From 08610e0b64b11fc6c58c4d7da5f0e8bd889e38e4 Mon Sep 17 00:00:00 2001
From: Philipp Mock <p.mock@iwm-tuebingen.de>
Date: Thu, 19 Feb 2026 15:36:32 +0100
Subject: [PATCH] updated parameters and retrieval prompt to massively improve
 query quality

---
 local_rag.py        | 19 +++++++++++++------
 server.py           |  2 +-
 templates/chat.html | 22 ++++++++++++++--------
 3 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/local_rag.py b/local_rag.py
index 4745412..128749b 100644
--- a/local_rag.py
+++ b/local_rag.py
@@ -203,12 +203,16 @@ class LocalRAG:
 
         history_str = self._format_history(chat_history)
         search_query = question
+        rag_query_instruction = (
+            "Do not return a list of references but prioritize meaningful text from abstracts, results and discussion sections."
+        )
 
         print(f"[RAG] User question: {question!r}")
 
         # 1) If we have history, rephrase the question into a standalone query for better retrieval
         if history_str.strip():
-            rephrase_prompt = f"""Given this chat history and the latest user question, write a single standalone question that captures what the user is asking. Do not answer it; only output the standalone question. If the latest question is already clear on its own, output it unchanged.
+            rephrase_prompt = f"""Given this chat history and the latest user question, write a single standalone question that captures what the user is asking.
+            Do not answer it; only output the standalone question. If the latest question is already clear on its own, output it unchanged.
 
 Chat history:
 {history_str}
@@ -220,10 +224,12 @@ Standalone question:"""
             search_query = (rephrase_response.content if hasattr(rephrase_response, "content") else str(rephrase_response)).strip() or question
             print(f"[RAG] Standalone search query (rephrased): {search_query!r}")
 
-        print(f"[RAG] Query sent to vector store: {search_query!r}")
+        retrieval_query = f"{search_query}\n\n{rag_query_instruction}"
+        print(f"[RAG] Search query: {search_query!r}")
+        print(f"[RAG] Retrieval query sent to vector store: {retrieval_query!r}")
 
         # 2) Retrieve documents with scores (FAISS: L2 distance, lower = more similar)
-        docs_with_scores = self.vectorstore.similarity_search_with_score(search_query, k=k)
+        docs_with_scores = self.vectorstore.similarity_search_with_score(retrieval_query, k=k)
         docs = [doc for doc, _ in docs_with_scores]
         retrieved = self._docs_scores_to_retrieved(docs_with_scores)
         if docs_with_scores:
@@ -233,7 +239,8 @@ Standalone question:"""
 
         # 3) Answer using conversation history + retrieved context
         history_block = f"Chat history:\n{history_str}\n\n" if history_str else ""
-        answer_prompt = f"""You are an assistant for question-answering. Use the chat history (if any) and the retrieved context below to answer the current question. If you don't know the answer, say so. Keep the conversation coherent.
+        answer_prompt = f"""You are an assistant for question-answering. Use the chat history (if any) and the retrieved context below to answer the current question.
+        If you don't know the answer, say so. Keep the conversation coherent.
 
 {history_block}Relevant context from documents:
 
@@ -251,7 +258,7 @@ Answer:"""
 def main():
     """Example usage"""
     print("=" * 60)
-    print("Local RAG with LangChain, Ollama, and FAISS")
+    print("Local RAG with LangChain, Ollama/OpenAI, and FAISS")
     print("=" * 60)
     
     # Initialize
@@ -268,7 +275,7 @@ def main():
     rag.list_documents()
     
     # Query
-    question = "What do the documents say about modality for perceived message perception?"
+    question = "What do you knowabout modality for perceived message perception?"
     result = rag.query(question)
     print(f"\nQuestion: {question}")
     print(f"Answer: {result['answer']}")
diff --git a/server.py b/server.py
index 838799b..bdcb0da 100644
--- a/server.py
+++ b/server.py
@@ -99,7 +99,7 @@ def chat(request: ChatRequest):
             print(f"\n[RAG] Retrieved {len(retrieved)} chunk(s)")
             for i, chunk in enumerate(retrieved):
                 content = chunk.get("content", "")
-                preview = (content[:80] + "...") if len(content) > 80 else content
+                preview = (content[:150] + "...") if len(content) > 150 else content
                 print(f"  [{i + 1}] {chunk.get('source', '')} p.{chunk.get('page', '?')} s={chunk.get('score')} | {preview!r}")
         else:
             print(f"\n[RAG] Retrieved 0 chunks")
diff --git a/templates/chat.html b/templates/chat.html
index 8e1d340..4258c46 100644
--- a/templates/chat.html
+++ b/templates/chat.html
@@ -9,13 +9,15 @@
     body {
       font-family: "SF Mono", "Consolas", "Monaco", monospace;
       margin: 0;
-      min-height: 100vh;
+      height: 100vh;
+      overflow: hidden;
       background: #0f0f12;
       color: #e4e4e7;
       display: flex;
       flex-direction: column;
     }
     header {
+      flex-shrink: 0;
       padding: 1rem 1.5rem;
       border-bottom: 1px solid #27272a;
       background: #18181b;
@@ -33,6 +35,7 @@
     }
     #messages {
       flex: 1;
+      min-height: 0;
       overflow-y: auto;
       padding: 1.5rem;
       display: flex;
@@ -108,6 +111,7 @@
       display: block;
     }
     #input-area {
+      flex-shrink: 0;
       padding: 1rem 1.5rem 1.5rem;
       border-top: 1px solid #27272a;
       background: #18181b;
@@ -138,6 +142,7 @@
     #input {
       flex: 1;
       min-height: 44px;
+      height: 44px;
       max-height: 160px;
       padding: 0.6rem 1rem;
       font: inherit;
@@ -152,7 +157,8 @@
     #input::placeholder { color: #71717a; }
     #input:focus { border-color: #52525b; }
     #send {
-      padding: 0.6rem 1.2rem;
+      height: 44px;
+      padding: 0 1.2rem;
       font: inherit;
       font-size: 0.85rem;
       font-weight: 500;
@@ -179,8 +185,8 @@
 </head>
 <body>
   <header>
-    <h1>Local RAG Chat</h1>
-    <p>Ask questions about your documents. Answers are generated from the vector store + Ollama / OpenAI.</p>
+    <h1>RAG Chat</h1>
+    <p>Ask questions about your documents. Answers are generated from a local vector store + Ollama / OpenAI.</p>
   </header>
 
   <div id="messages"></div>
@@ -219,11 +225,11 @@
       } catch (_) {}
     })();
 
-    function appendMessage(role, text, isError = false) {
+    function appendMessage(role, text, isError = false, llmProvider = null) {
       text = text ?? '';
       const div = document.createElement('div');
       div.className = 'msg ' + (isError ? 'error' : role);
-      const label = role === 'user' ? 'You' : 'RAG';
+      let label = role === 'user' ? 'You' : (llmProvider === 'openai' ? 'ChatGPT + RAG' : 'Ollama + RAG');
       let body;
       if (role === 'assistant' && !isError) {
         const rawHtml = marked.parse(text, { gfm: true, breaks: true });
@@ -288,9 +294,9 @@
           console.groupEnd();
         }
         if (data.error) {
-          appendMessage('assistant', data.error, true);
+          appendMessage('assistant', data.error, true, providerEl.value);
         } else {
-          appendMessage('assistant', (data.answer || '(No response)').trim());
+          appendMessage('assistant', (data.answer || '(No response)').trim(), false, providerEl.value);
         }
       } catch (err) {
         setLoading(false);