From 4879439f275bd7cffff3a00478cc9d4aa6138d07 Mon Sep 17 00:00:00 2001
From: Philipp Mock <p.mock@iwm-tuebingen.de>
Date: Wed, 11 Feb 2026 15:21:26 +0100
Subject: [PATCH] switched ollama model, added script to add pdfs to the vector
 store, tuned RAG parameters

---
 add_pdfs.py  |  5 +++++
 local_rag.py | 28 ++++++++++++++++++++++------
 server.py    |  3 ++-
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/add_pdfs.py b/add_pdfs.py
index ebf1a10..c1bed00 100644
--- a/add_pdfs.py
+++ b/add_pdfs.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 """Add all PDFs under a folder to the RAG vector store. Run from project root."""
+import shutil
 from pathlib import Path
 
 from local_rag import LocalRAG
@@ -8,6 +9,10 @@ DATA_ROOT = Path("/Users/Philipp/Desktop/workspace/python/gpt_publikationen/data
 VECTORSTORE_PATH = "./vectorstore"
 
 if __name__ == "__main__":
+    if Path(VECTORSTORE_PATH).exists():
+        shutil.rmtree(VECTORSTORE_PATH)
+        print(f"Cleared existing vector store: {VECTORSTORE_PATH}")
+
     pdfs = sorted(p for p in DATA_ROOT.rglob("*") if p.suffix.lower() == ".pdf")
     print(f"Found {len(pdfs)} PDF(s) under {DATA_ROOT}")
     if not pdfs:
diff --git a/local_rag.py b/local_rag.py
index 32bb24d..2f62ebb 100644
--- a/local_rag.py
+++ b/local_rag.py
@@ -26,8 +26,8 @@ class LocalRAG:
         
         # Text splitter
         self.text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=1500,
-            chunk_overlap=300
+            chunk_size=2000,
+            chunk_overlap=400
         )
         
         # Ollama LLM
@@ -157,6 +157,18 @@ class LocalRAG:
             for doc in docs
         ]
 
+    def _docs_scores_to_retrieved(self, docs_with_scores):
+        """Convert (Document, score) list to retrieved chunks format with score. FAISS returns L2 distance (lower = more similar)."""
+        return [
+            {
+                "content": doc.page_content,
+                "source": doc.metadata.get("source", ""),
+                "page": doc.metadata.get("page"),
+                "score": float(score),
+            }
+            for doc, score in docs_with_scores
+        ]
+
     def query(self, question, k=8):
         """Query the RAG system (no conversation history). Returns dict with 'answer' and 'retrieved'."""
         return self.query_with_history(question, chat_history=[], k=k)
@@ -193,9 +205,13 @@ Standalone question:"""
 
         print(f"[RAG] Query sent to vector store: {search_query!r}")
 
-        # 2) Retrieve documents using the (rephrased) query
-        docs = self.vectorstore.similarity_search(search_query, k=k)
-        retrieved = self._docs_to_retrieved(docs)
+        # 2) Retrieve documents with scores (FAISS: L2 distance, lower = more similar)
+        docs_with_scores = self.vectorstore.similarity_search_with_score(search_query, k=k)
+        docs = [doc for doc, _ in docs_with_scores]
+        retrieved = self._docs_scores_to_retrieved(docs_with_scores)
+        if docs_with_scores:
+            scores = [f"{s:.3f}" for _, s in docs_with_scores]
+            print(f"[RAG] Retrieved {len(docs)} chunk(s), scores (L2 dist): [{', '.join(scores)}]")
         context = "\n\n".join([doc.page_content for doc in docs])
 
         # 3) Answer using conversation history + retrieved context
@@ -222,7 +238,7 @@ def main():
     print("=" * 60)
     
     # Initialize
-    rag = LocalRAG(ollama_model="mistral:7b")
+    rag = LocalRAG(ollama_model="gpt-oss:20b")
     
     # Add documents (uncomment and add your file paths)
     # rag.add_documents([
diff --git a/server.py b/server.py
index 548ecfd..3e9979a 100644
--- a/server.py
+++ b/server.py
@@ -12,7 +12,7 @@ from local_rag import LocalRAG
 
 # Initialize RAG once at startup
 VECTORSTORE_PATH = "./vectorstore"
-OLLAMA_MODEL = "mistral:7b"
+OLLAMA_MODEL = "gpt-oss:20b"
 rag = LocalRAG(vectorstore_path=VECTORSTORE_PATH, ollama_model=OLLAMA_MODEL)
 
 app = FastAPI(title="Local RAG Chat", version="1.0.0")
@@ -32,6 +32,7 @@ class RetrievedChunk(BaseModel):
     content: str
     source: str
     page: int | None
+    score: float | None = None  # L2 distance from FAISS (lower = more similar)
 
 
 class ChatResponse(BaseModel):