updated parameters and retrieval prompt to massively improve query quality

This commit is contained in:
Philipp Mock 2026-02-19 15:36:32 +01:00
parent 9abda1d867
commit 08610e0b64
3 changed files with 28 additions and 15 deletions

View File

@ -203,12 +203,16 @@ class LocalRAG:
history_str = self._format_history(chat_history) history_str = self._format_history(chat_history)
search_query = question search_query = question
rag_query_instruction = (
"Do not return a list of references but prioritize meaningful text from abstracts, results and discussion sections."
)
print(f"[RAG] User question: {question!r}") print(f"[RAG] User question: {question!r}")
# 1) If we have history, rephrase the question into a standalone query for better retrieval # 1) If we have history, rephrase the question into a standalone query for better retrieval
if history_str.strip(): if history_str.strip():
rephrase_prompt = f"""Given this chat history and the latest user question, write a single standalone question that captures what the user is asking. Do not answer it; only output the standalone question. If the latest question is already clear on its own, output it unchanged. rephrase_prompt = f"""Given this chat history and the latest user question, write a single standalone question that captures what the user is asking.
Do not answer it; only output the standalone question. If the latest question is already clear on its own, output it unchanged.
Chat history: Chat history:
{history_str} {history_str}
@ -220,10 +224,12 @@ Standalone question:"""
search_query = (rephrase_response.content if hasattr(rephrase_response, "content") else str(rephrase_response)).strip() or question search_query = (rephrase_response.content if hasattr(rephrase_response, "content") else str(rephrase_response)).strip() or question
print(f"[RAG] Standalone search query (rephrased): {search_query!r}") print(f"[RAG] Standalone search query (rephrased): {search_query!r}")
print(f"[RAG] Query sent to vector store: {search_query!r}") retrieval_query = f"{search_query}\n\n{rag_query_instruction}"
print(f"[RAG] Search query: {search_query!r}")
print(f"[RAG] Retrieval query sent to vector store: {retrieval_query!r}")
# 2) Retrieve documents with scores (FAISS: L2 distance, lower = more similar) # 2) Retrieve documents with scores (FAISS: L2 distance, lower = more similar)
docs_with_scores = self.vectorstore.similarity_search_with_score(search_query, k=k) docs_with_scores = self.vectorstore.similarity_search_with_score(retrieval_query, k=k)
docs = [doc for doc, _ in docs_with_scores] docs = [doc for doc, _ in docs_with_scores]
retrieved = self._docs_scores_to_retrieved(docs_with_scores) retrieved = self._docs_scores_to_retrieved(docs_with_scores)
if docs_with_scores: if docs_with_scores:
@ -233,7 +239,8 @@ Standalone question:"""
# 3) Answer using conversation history + retrieved context # 3) Answer using conversation history + retrieved context
history_block = f"Chat history:\n{history_str}\n\n" if history_str else "" history_block = f"Chat history:\n{history_str}\n\n" if history_str else ""
answer_prompt = f"""You are an assistant for question-answering. Use the chat history (if any) and the retrieved context below to answer the current question. If you don't know the answer, say so. Keep the conversation coherent. answer_prompt = f"""You are an assistant for question-answering. Use the chat history (if any) and the retrieved context below to answer the current question.
If you don't know the answer, say so. Keep the conversation coherent.
{history_block}Relevant context from documents: {history_block}Relevant context from documents:
@ -251,7 +258,7 @@ Answer:"""
def main(): def main():
"""Example usage""" """Example usage"""
print("=" * 60) print("=" * 60)
print("Local RAG with LangChain, Ollama, and FAISS") print("Local RAG with LangChain, Ollama/OpenAI, and FAISS")
print("=" * 60) print("=" * 60)
# Initialize # Initialize
@ -268,7 +275,7 @@ def main():
rag.list_documents() rag.list_documents()
# Query # Query
question = "What do the documents say about modality for perceived message perception?" question = "What do you knowabout modality for perceived message perception?"
result = rag.query(question) result = rag.query(question)
print(f"\nQuestion: {question}") print(f"\nQuestion: {question}")
print(f"Answer: {result['answer']}") print(f"Answer: {result['answer']}")

View File

@ -99,7 +99,7 @@ def chat(request: ChatRequest):
print(f"\n[RAG] Retrieved {len(retrieved)} chunk(s)") print(f"\n[RAG] Retrieved {len(retrieved)} chunk(s)")
for i, chunk in enumerate(retrieved): for i, chunk in enumerate(retrieved):
content = chunk.get("content", "") content = chunk.get("content", "")
preview = (content[:80] + "...") if len(content) > 80 else content preview = (content[:150] + "...") if len(content) > 150 else content
print(f" [{i + 1}] {chunk.get('source', '')} p.{chunk.get('page', '?')} s={chunk.get('score')} | {preview!r}") print(f" [{i + 1}] {chunk.get('source', '')} p.{chunk.get('page', '?')} s={chunk.get('score')} | {preview!r}")
else: else:
print(f"\n[RAG] Retrieved 0 chunks") print(f"\n[RAG] Retrieved 0 chunks")

View File

@ -9,13 +9,15 @@
body { body {
font-family: "SF Mono", "Consolas", "Monaco", monospace; font-family: "SF Mono", "Consolas", "Monaco", monospace;
margin: 0; margin: 0;
min-height: 100vh; height: 100vh;
overflow: hidden;
background: #0f0f12; background: #0f0f12;
color: #e4e4e7; color: #e4e4e7;
display: flex; display: flex;
flex-direction: column; flex-direction: column;
} }
header { header {
flex-shrink: 0;
padding: 1rem 1.5rem; padding: 1rem 1.5rem;
border-bottom: 1px solid #27272a; border-bottom: 1px solid #27272a;
background: #18181b; background: #18181b;
@ -33,6 +35,7 @@
} }
#messages { #messages {
flex: 1; flex: 1;
min-height: 0;
overflow-y: auto; overflow-y: auto;
padding: 1.5rem; padding: 1.5rem;
display: flex; display: flex;
@ -108,6 +111,7 @@
display: block; display: block;
} }
#input-area { #input-area {
flex-shrink: 0;
padding: 1rem 1.5rem 1.5rem; padding: 1rem 1.5rem 1.5rem;
border-top: 1px solid #27272a; border-top: 1px solid #27272a;
background: #18181b; background: #18181b;
@ -138,6 +142,7 @@
#input { #input {
flex: 1; flex: 1;
min-height: 44px; min-height: 44px;
height: 44px;
max-height: 160px; max-height: 160px;
padding: 0.6rem 1rem; padding: 0.6rem 1rem;
font: inherit; font: inherit;
@ -152,7 +157,8 @@
#input::placeholder { color: #71717a; } #input::placeholder { color: #71717a; }
#input:focus { border-color: #52525b; } #input:focus { border-color: #52525b; }
#send { #send {
padding: 0.6rem 1.2rem; height: 44px;
padding: 0 1.2rem;
font: inherit; font: inherit;
font-size: 0.85rem; font-size: 0.85rem;
font-weight: 500; font-weight: 500;
@ -179,8 +185,8 @@
</head> </head>
<body> <body>
<header> <header>
<h1>Local RAG Chat</h1> <h1>RAG Chat</h1>
<p>Ask questions about your documents. Answers are generated from the vector store + Ollama / OpenAI.</p> <p>Ask questions about your documents. Answers are generated from a local vector store + Ollama / OpenAI.</p>
</header> </header>
<div id="messages"></div> <div id="messages"></div>
@ -219,11 +225,11 @@
} catch (_) {} } catch (_) {}
})(); })();
function appendMessage(role, text, isError = false) { function appendMessage(role, text, isError = false, llmProvider = null) {
text = text ?? ''; text = text ?? '';
const div = document.createElement('div'); const div = document.createElement('div');
div.className = 'msg ' + (isError ? 'error' : role); div.className = 'msg ' + (isError ? 'error' : role);
const label = role === 'user' ? 'You' : 'RAG'; let label = role === 'user' ? 'You' : (llmProvider === 'openai' ? 'ChatGPT + RAG' : 'Ollama + RAG');
let body; let body;
if (role === 'assistant' && !isError) { if (role === 'assistant' && !isError) {
const rawHtml = marked.parse(text, { gfm: true, breaks: true }); const rawHtml = marked.parse(text, { gfm: true, breaks: true });
@ -288,9 +294,9 @@
console.groupEnd(); console.groupEnd();
} }
if (data.error) { if (data.error) {
appendMessage('assistant', data.error, true); appendMessage('assistant', data.error, true, providerEl.value);
} else { } else {
appendMessage('assistant', (data.answer || '(No response)').trim()); appendMessage('assistant', (data.answer || '(No response)').trim(), false, providerEl.value);
} }
} catch (err) { } catch (err) {
setLoading(false); setLoading(false);