From 08610e0b64b11fc6c58c4d7da5f0e8bd889e38e4 Mon Sep 17 00:00:00 2001 From: Philipp Mock Date: Thu, 19 Feb 2026 15:36:32 +0100 Subject: [PATCH] updated parameters and retrieval prompt to massively improve query quality --- local_rag.py | 19 +++++++++++++------ server.py | 2 +- templates/chat.html | 22 ++++++++++++++-------- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/local_rag.py b/local_rag.py index 4745412..128749b 100644 --- a/local_rag.py +++ b/local_rag.py @@ -203,12 +203,16 @@ class LocalRAG: history_str = self._format_history(chat_history) search_query = question + rag_query_instruction = ( + "Do not return a list of references but prioritize meaningful text from abstracts, results and discussion sections." + ) print(f"[RAG] User question: {question!r}") # 1) If we have history, rephrase the question into a standalone query for better retrieval if history_str.strip(): - rephrase_prompt = f"""Given this chat history and the latest user question, write a single standalone question that captures what the user is asking. Do not answer it; only output the standalone question. If the latest question is already clear on its own, output it unchanged. + rephrase_prompt = f"""Given this chat history and the latest user question, write a single standalone question that captures what the user is asking. + Do not answer it; only output the standalone question. If the latest question is already clear on its own, output it unchanged. Chat history: {history_str} @@ -220,10 +224,12 @@ Standalone question:""" search_query = (rephrase_response.content if hasattr(rephrase_response, "content") else str(rephrase_response)).strip() or question print(f"[RAG] Standalone search query (rephrased): {search_query!r}") - print(f"[RAG] Query sent to vector store: {search_query!r}") + retrieval_query = f"{search_query}\n\n{rag_query_instruction}" + print(f"[RAG] Search query: {search_query!r}") + print(f"[RAG] Retrieval query sent to vector store: {retrieval_query!r}") # 2) Retrieve documents with scores (FAISS: L2 distance, lower = more similar) - docs_with_scores = self.vectorstore.similarity_search_with_score(search_query, k=k) + docs_with_scores = self.vectorstore.similarity_search_with_score(retrieval_query, k=k) docs = [doc for doc, _ in docs_with_scores] retrieved = self._docs_scores_to_retrieved(docs_with_scores) if docs_with_scores: @@ -233,7 +239,8 @@ Standalone question:""" # 3) Answer using conversation history + retrieved context history_block = f"Chat history:\n{history_str}\n\n" if history_str else "" - answer_prompt = f"""You are an assistant for question-answering. Use the chat history (if any) and the retrieved context below to answer the current question. If you don't know the answer, say so. Keep the conversation coherent. + answer_prompt = f"""You are an assistant for question-answering. Use the chat history (if any) and the retrieved context below to answer the current question. + If you don't know the answer, say so. Keep the conversation coherent. {history_block}Relevant context from documents: @@ -251,7 +258,7 @@ Answer:""" def main(): """Example usage""" print("=" * 60) - print("Local RAG with LangChain, Ollama, and FAISS") + print("Local RAG with LangChain, Ollama/OpenAI, and FAISS") print("=" * 60) # Initialize @@ -268,7 +275,7 @@ def main(): rag.list_documents() # Query - question = "What do the documents say about modality for perceived message perception?" + question = "What do you knowabout modality for perceived message perception?" result = rag.query(question) print(f"\nQuestion: {question}") print(f"Answer: {result['answer']}") diff --git a/server.py b/server.py index 838799b..bdcb0da 100644 --- a/server.py +++ b/server.py @@ -99,7 +99,7 @@ def chat(request: ChatRequest): print(f"\n[RAG] Retrieved {len(retrieved)} chunk(s)") for i, chunk in enumerate(retrieved): content = chunk.get("content", "") - preview = (content[:80] + "...") if len(content) > 80 else content + preview = (content[:150] + "...") if len(content) > 150 else content print(f" [{i + 1}] {chunk.get('source', '')} p.{chunk.get('page', '?')} s={chunk.get('score')} | {preview!r}") else: print(f"\n[RAG] Retrieved 0 chunks") diff --git a/templates/chat.html b/templates/chat.html index 8e1d340..4258c46 100644 --- a/templates/chat.html +++ b/templates/chat.html @@ -9,13 +9,15 @@ body { font-family: "SF Mono", "Consolas", "Monaco", monospace; margin: 0; - min-height: 100vh; + height: 100vh; + overflow: hidden; background: #0f0f12; color: #e4e4e7; display: flex; flex-direction: column; } header { + flex-shrink: 0; padding: 1rem 1.5rem; border-bottom: 1px solid #27272a; background: #18181b; @@ -33,6 +35,7 @@ } #messages { flex: 1; + min-height: 0; overflow-y: auto; padding: 1.5rem; display: flex; @@ -108,6 +111,7 @@ display: block; } #input-area { + flex-shrink: 0; padding: 1rem 1.5rem 1.5rem; border-top: 1px solid #27272a; background: #18181b; @@ -138,6 +142,7 @@ #input { flex: 1; min-height: 44px; + height: 44px; max-height: 160px; padding: 0.6rem 1rem; font: inherit; @@ -152,7 +157,8 @@ #input::placeholder { color: #71717a; } #input:focus { border-color: #52525b; } #send { - padding: 0.6rem 1.2rem; + height: 44px; + padding: 0 1.2rem; font: inherit; font-size: 0.85rem; font-weight: 500; @@ -179,8 +185,8 @@
-

Local RAG Chat

-

Ask questions about your documents. Answers are generated from the vector store + Ollama / OpenAI.

+

RAG Chat

+

Ask questions about your documents. Answers are generated from a local vector store + Ollama / OpenAI.

@@ -219,11 +225,11 @@ } catch (_) {} })(); - function appendMessage(role, text, isError = false) { + function appendMessage(role, text, isError = false, llmProvider = null) { text = text ?? ''; const div = document.createElement('div'); div.className = 'msg ' + (isError ? 'error' : role); - const label = role === 'user' ? 'You' : 'RAG'; + let label = role === 'user' ? 'You' : (llmProvider === 'openai' ? 'ChatGPT + RAG' : 'Ollama + RAG'); let body; if (role === 'assistant' && !isError) { const rawHtml = marked.parse(text, { gfm: true, breaks: true }); @@ -288,9 +294,9 @@ console.groupEnd(); } if (data.error) { - appendMessage('assistant', data.error, true); + appendMessage('assistant', data.error, true, providerEl.value); } else { - appendMessage('assistant', (data.answer || '(No response)').trim()); + appendMessage('assistant', (data.answer || '(No response)').trim(), false, providerEl.value); } } catch (err) { setLoading(false);