From 4364ddcd71f719cf535e6f90e3d395f6ebd7bbad Mon Sep 17 00:00:00 2001
From: Philipp Mock <p.mock@iwm-tuebingen.de>
Date: Wed, 4 Feb 2026 15:31:24 +0100
Subject: [PATCH] added a script to add all documents (IWM articles) from a
 local folder. won't work on other machines

---
 add_pdfs.py         | 18 ++++++++++
 local_rag.py        | 85 ++++++++++++++++++++++++++++++++++-----------
 server.py           | 12 ++++++-
 templates/chat.html |  5 ++-
 4 files changed, 98 insertions(+), 22 deletions(-)
 create mode 100644 add_pdfs.py

diff --git a/add_pdfs.py b/add_pdfs.py
new file mode 100644
index 0000000..ebf1a10
--- /dev/null
+++ b/add_pdfs.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+"""Add all PDFs under a folder to the RAG vector store. Run from project root."""
+from pathlib import Path
+
+from local_rag import LocalRAG
+
+DATA_ROOT = Path("/Users/Philipp/Desktop/workspace/python/gpt_publikationen/data_vs")
+VECTORSTORE_PATH = "./vectorstore"
+
+if __name__ == "__main__":
+    pdfs = sorted(p for p in DATA_ROOT.rglob("*") if p.suffix.lower() == ".pdf")
+    print(f"Found {len(pdfs)} PDF(s) under {DATA_ROOT}")
+    if not pdfs:
+        raise SystemExit("No PDFs found.")
+
+    rag = LocalRAG(vectorstore_path=VECTORSTORE_PATH)
+    rag.add_documents([str(p) for p in pdfs])
+    print("Done.")
diff --git a/local_rag.py b/local_rag.py
index 403b040..32bb24d 100644
--- a/local_rag.py
+++ b/local_rag.py
@@ -4,11 +4,12 @@ Minimal dependencies, simple code
 """
 import os
 from pathlib import Path
+
 from langchain_community.document_loaders import PyPDFLoader, TextLoader
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
+from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_ollama import ChatOllama
+from langchain_text_splitters import RecursiveCharacterTextSplitter
 
 
 class LocalRAG:
@@ -25,8 +26,8 @@ class LocalRAG:
         
         # Text splitter
         self.text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=1000,
-            chunk_overlap=200
+            chunk_size=1500,
+            chunk_overlap=300
         )
         
         # Ollama LLM
@@ -133,16 +134,21 @@ class LocalRAG:
         
         return doc_list
     
-    def query(self, question, k=4):
-        """Query the RAG system. Returns dict with 'answer' and 'retrieved' (list of chunks with content, source, page)."""
-        if self.vectorstore is None:
-            return {
-                "answer": "Error: No documents loaded. Please add documents first.",
-                "retrieved": [],
-            }
+    def _format_history(self, chat_history):
+        """Format chat history as a string for prompts."""
+        lines = []
+        for turn in chat_history or []:
+            role = (turn.get("role") or "").lower()
+            content = (turn.get("content") or "").strip()
+            if role == "user":
+                lines.append(f"User: {content}")
+            elif role == "assistant":
+                lines.append(f"Assistant: {content}")
+        return "\n".join(lines) if lines else ""
 
-        docs = self.vectorstore.similarity_search(question, k=k)
-        retrieved = [
+    def _docs_to_retrieved(self, docs):
+        """Convert document list to retrieved chunks format for API."""
+        return [
             {
                 "content": doc.page_content,
                 "source": doc.metadata.get("source", ""),
@@ -151,20 +157,59 @@ class LocalRAG:
             for doc in docs
         ]
 
-        # Combine context from documents
+    def query(self, question, k=8):
+        """Query the RAG system (no conversation history). Returns dict with 'answer' and 'retrieved'."""
+        return self.query_with_history(question, chat_history=[], k=k)
+
+    def query_with_history(self, question, chat_history=None, k=8):
+        """Query the RAG with conversation history: rephrase question using history for retrieval,
+        then answer with full conversation + retrieved context in the prompt.
+        Returns dict with 'answer' and 'retrieved' (list of chunks with content, source, page).
+        """
+        if self.vectorstore is None:
+            return {
+                "answer": "Error: No documents loaded. Please add documents first.",
+                "retrieved": [],
+            }
+
+        history_str = self._format_history(chat_history)
+        search_query = question
+
+        print(f"[RAG] User question: {question!r}")
+
+        # 1) If we have history, rephrase the question into a standalone query for better retrieval
+        if history_str.strip():
+            rephrase_prompt = f"""Given this chat history and the latest user question, write a single standalone question that captures what the user is asking. Do not answer it; only output the standalone question. If the latest question is already clear on its own, output it unchanged.
+
+Chat history:
+{history_str}
+
+Latest user question: {question}
+
+Standalone question:"""
+            rephrase_response = self.llm.invoke(rephrase_prompt)
+            search_query = (rephrase_response.content if hasattr(rephrase_response, "content") else str(rephrase_response)).strip() or question
+            print(f"[RAG] Standalone search query (rephrased): {search_query!r}")
+
+        print(f"[RAG] Query sent to vector store: {search_query!r}")
+
+        # 2) Retrieve documents using the (rephrased) query
+        docs = self.vectorstore.similarity_search(search_query, k=k)
+        retrieved = self._docs_to_retrieved(docs)
         context = "\n\n".join([doc.page_content for doc in docs])
 
-        prompt = f"""Use the following context to answer the question.
-If you don't know the answer, say that you don't know instead of making up an answer.
+        # 3) Answer using conversation history + retrieved context
+        history_block = f"Chat history:\n{history_str}\n\n" if history_str else ""
+        answer_prompt = f"""You are an assistant for question-answering. Use the chat history (if any) and the retrieved context below to answer the current question. If you don't know the answer, say so. Keep the conversation coherent.
+
+{history_block}Relevant context from documents:
 
-Context:
 {context}
 
-Question: {question}
+Current question: {question}
 
 Answer:"""
-
-        response = self.llm.invoke(prompt)
+        response = self.llm.invoke(answer_prompt)
         answer = response.content if hasattr(response, "content") else str(response)
 
         return {"answer": answer, "retrieved": retrieved}
diff --git a/server.py b/server.py
index 54e71a8..548ecfd 100644
--- a/server.py
+++ b/server.py
@@ -18,8 +18,14 @@ rag = LocalRAG(vectorstore_path=VECTORSTORE_PATH, ollama_model=OLLAMA_MODEL)
 app = FastAPI(title="Local RAG Chat", version="1.0.0")
 
 
+class ChatMessage(BaseModel):
+    role: str  # "user" | "assistant"
+    content: str
+
+
 class ChatRequest(BaseModel):
     message: str
+    history: list[ChatMessage] = []  # previous turns for conversation context
 
 
 class RetrievedChunk(BaseModel):
@@ -49,7 +55,11 @@ def chat(request: ChatRequest):
     if not request.message or not request.message.strip():
         return ChatResponse(answer="", error="Message cannot be empty")
     try:
-        result = rag.query(request.message.strip())
+        chat_history = [{"role": m.role, "content": m.content} for m in request.history]
+        result = rag.query_with_history(
+            request.message.strip(),
+            chat_history=chat_history,
+        )
         answer = result["answer"]
         retrieved = result.get("retrieved", [])
 
diff --git a/templates/chat.html b/templates/chat.html
index fa414cb..22267f7 100644
--- a/templates/chat.html
+++ b/templates/chat.html
@@ -140,6 +140,7 @@
     const messagesEl = document.getElementById('messages');
     const inputEl = document.getElementById('input');
     const sendBtn = document.getElementById('send');
+    const chatHistory = [];
 
     function appendMessage(role, text, isError = false) {
       const div = document.createElement('div');
@@ -148,6 +149,7 @@
       div.innerHTML = '<span class="label">' + label + '</span>' + escapeHtml(text);
       messagesEl.appendChild(div);
       messagesEl.scrollTop = messagesEl.scrollHeight;
+      chatHistory.push({ role: role, content: text });
     }
 
     function escapeHtml(s) {
@@ -179,10 +181,11 @@
 
       setLoading(true);
       try {
+        const history = chatHistory.slice(0, -1);
         const res = await fetch('/api/chat', {
           method: 'POST',
           headers: { 'Content-Type': 'application/json' },
-          body: JSON.stringify({ message: text })
+          body: JSON.stringify({ message: text, history: history })
         });
         const data = await res.json();
         setLoading(false);