From 4879439f275bd7cffff3a00478cc9d4aa6138d07 Mon Sep 17 00:00:00 2001 From: Philipp Mock Date: Wed, 11 Feb 2026 15:21:26 +0100 Subject: [PATCH] switched ollama model, added script to add pdfs to the vector store, tuned RAG parameters --- add_pdfs.py | 5 +++++ local_rag.py | 28 ++++++++++++++++++++++------ server.py | 3 ++- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/add_pdfs.py b/add_pdfs.py index ebf1a10..c1bed00 100644 --- a/add_pdfs.py +++ b/add_pdfs.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 """Add all PDFs under a folder to the RAG vector store. Run from project root.""" +import shutil from pathlib import Path from local_rag import LocalRAG @@ -8,6 +9,10 @@ DATA_ROOT = Path("/Users/Philipp/Desktop/workspace/python/gpt_publikationen/data VECTORSTORE_PATH = "./vectorstore" if __name__ == "__main__": + if Path(VECTORSTORE_PATH).exists(): + shutil.rmtree(VECTORSTORE_PATH) + print(f"Cleared existing vector store: {VECTORSTORE_PATH}") + pdfs = sorted(p for p in DATA_ROOT.rglob("*") if p.suffix.lower() == ".pdf") print(f"Found {len(pdfs)} PDF(s) under {DATA_ROOT}") if not pdfs: diff --git a/local_rag.py b/local_rag.py index 32bb24d..2f62ebb 100644 --- a/local_rag.py +++ b/local_rag.py @@ -26,8 +26,8 @@ class LocalRAG: # Text splitter self.text_splitter = RecursiveCharacterTextSplitter( - chunk_size=1500, - chunk_overlap=300 + chunk_size=2000, + chunk_overlap=400 ) # Ollama LLM @@ -157,6 +157,18 @@ class LocalRAG: for doc in docs ] + def _docs_scores_to_retrieved(self, docs_with_scores): + """Convert (Document, score) list to retrieved chunks format with score. FAISS returns L2 distance (lower = more similar).""" + return [ + { + "content": doc.page_content, + "source": doc.metadata.get("source", ""), + "page": doc.metadata.get("page"), + "score": float(score), + } + for doc, score in docs_with_scores + ] + def query(self, question, k=8): """Query the RAG system (no conversation history). Returns dict with 'answer' and 'retrieved'.""" return self.query_with_history(question, chat_history=[], k=k) @@ -193,9 +205,13 @@ Standalone question:""" print(f"[RAG] Query sent to vector store: {search_query!r}") - # 2) Retrieve documents using the (rephrased) query - docs = self.vectorstore.similarity_search(search_query, k=k) - retrieved = self._docs_to_retrieved(docs) + # 2) Retrieve documents with scores (FAISS: L2 distance, lower = more similar) + docs_with_scores = self.vectorstore.similarity_search_with_score(search_query, k=k) + docs = [doc for doc, _ in docs_with_scores] + retrieved = self._docs_scores_to_retrieved(docs_with_scores) + if docs_with_scores: + scores = [f"{s:.3f}" for _, s in docs_with_scores] + print(f"[RAG] Retrieved {len(docs)} chunk(s), scores (L2 dist): [{', '.join(scores)}]") context = "\n\n".join([doc.page_content for doc in docs]) # 3) Answer using conversation history + retrieved context @@ -222,7 +238,7 @@ def main(): print("=" * 60) # Initialize - rag = LocalRAG(ollama_model="mistral:7b") + rag = LocalRAG(ollama_model="gpt-oss:20b") # Add documents (uncomment and add your file paths) # rag.add_documents([ diff --git a/server.py b/server.py index 548ecfd..3e9979a 100644 --- a/server.py +++ b/server.py @@ -12,7 +12,7 @@ from local_rag import LocalRAG # Initialize RAG once at startup VECTORSTORE_PATH = "./vectorstore" -OLLAMA_MODEL = "mistral:7b" +OLLAMA_MODEL = "gpt-oss:20b" rag = LocalRAG(vectorstore_path=VECTORSTORE_PATH, ollama_model=OLLAMA_MODEL) app = FastAPI(title="Local RAG Chat", version="1.0.0") @@ -32,6 +32,7 @@ class RetrievedChunk(BaseModel): content: str source: str page: int | None + score: float | None = None # L2 distance from FAISS (lower = more similar) class ChatResponse(BaseModel):