From 4364ddcd71f719cf535e6f90e3d395f6ebd7bbad Mon Sep 17 00:00:00 2001 From: Philipp Mock Date: Wed, 4 Feb 2026 15:31:24 +0100 Subject: [PATCH] added a script to add all documents (IWM articles) from a local folder. won't work on other machines --- add_pdfs.py | 18 ++++++++++ local_rag.py | 85 ++++++++++++++++++++++++++++++++++----------- server.py | 12 ++++++- templates/chat.html | 5 ++- 4 files changed, 98 insertions(+), 22 deletions(-) create mode 100644 add_pdfs.py diff --git a/add_pdfs.py b/add_pdfs.py new file mode 100644 index 0000000..ebf1a10 --- /dev/null +++ b/add_pdfs.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +"""Add all PDFs under a folder to the RAG vector store. Run from project root.""" +from pathlib import Path + +from local_rag import LocalRAG + +DATA_ROOT = Path("/Users/Philipp/Desktop/workspace/python/gpt_publikationen/data_vs") +VECTORSTORE_PATH = "./vectorstore" + +if __name__ == "__main__": + pdfs = sorted(p for p in DATA_ROOT.rglob("*") if p.suffix.lower() == ".pdf") + print(f"Found {len(pdfs)} PDF(s) under {DATA_ROOT}") + if not pdfs: + raise SystemExit("No PDFs found.") + + rag = LocalRAG(vectorstore_path=VECTORSTORE_PATH) + rag.add_documents([str(p) for p in pdfs]) + print("Done.") diff --git a/local_rag.py b/local_rag.py index 403b040..32bb24d 100644 --- a/local_rag.py +++ b/local_rag.py @@ -4,11 +4,12 @@ Minimal dependencies, simple code """ import os from pathlib import Path + from langchain_community.document_loaders import PyPDFLoader, TextLoader -from langchain_text_splitters import RecursiveCharacterTextSplitter -from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS +from langchain_huggingface import HuggingFaceEmbeddings from langchain_ollama import ChatOllama +from langchain_text_splitters import RecursiveCharacterTextSplitter class LocalRAG: @@ -25,8 +26,8 @@ class LocalRAG: # Text splitter self.text_splitter = RecursiveCharacterTextSplitter( - chunk_size=1000, - chunk_overlap=200 + chunk_size=1500, + chunk_overlap=300 ) # Ollama LLM @@ -133,16 +134,21 @@ class LocalRAG: return doc_list - def query(self, question, k=4): - """Query the RAG system. Returns dict with 'answer' and 'retrieved' (list of chunks with content, source, page).""" - if self.vectorstore is None: - return { - "answer": "Error: No documents loaded. Please add documents first.", - "retrieved": [], - } + def _format_history(self, chat_history): + """Format chat history as a string for prompts.""" + lines = [] + for turn in chat_history or []: + role = (turn.get("role") or "").lower() + content = (turn.get("content") or "").strip() + if role == "user": + lines.append(f"User: {content}") + elif role == "assistant": + lines.append(f"Assistant: {content}") + return "\n".join(lines) if lines else "" - docs = self.vectorstore.similarity_search(question, k=k) - retrieved = [ + def _docs_to_retrieved(self, docs): + """Convert document list to retrieved chunks format for API.""" + return [ { "content": doc.page_content, "source": doc.metadata.get("source", ""), @@ -151,20 +157,59 @@ class LocalRAG: for doc in docs ] - # Combine context from documents + def query(self, question, k=8): + """Query the RAG system (no conversation history). Returns dict with 'answer' and 'retrieved'.""" + return self.query_with_history(question, chat_history=[], k=k) + + def query_with_history(self, question, chat_history=None, k=8): + """Query the RAG with conversation history: rephrase question using history for retrieval, + then answer with full conversation + retrieved context in the prompt. + Returns dict with 'answer' and 'retrieved' (list of chunks with content, source, page). + """ + if self.vectorstore is None: + return { + "answer": "Error: No documents loaded. Please add documents first.", + "retrieved": [], + } + + history_str = self._format_history(chat_history) + search_query = question + + print(f"[RAG] User question: {question!r}") + + # 1) If we have history, rephrase the question into a standalone query for better retrieval + if history_str.strip(): + rephrase_prompt = f"""Given this chat history and the latest user question, write a single standalone question that captures what the user is asking. Do not answer it; only output the standalone question. If the latest question is already clear on its own, output it unchanged. + +Chat history: +{history_str} + +Latest user question: {question} + +Standalone question:""" + rephrase_response = self.llm.invoke(rephrase_prompt) + search_query = (rephrase_response.content if hasattr(rephrase_response, "content") else str(rephrase_response)).strip() or question + print(f"[RAG] Standalone search query (rephrased): {search_query!r}") + + print(f"[RAG] Query sent to vector store: {search_query!r}") + + # 2) Retrieve documents using the (rephrased) query + docs = self.vectorstore.similarity_search(search_query, k=k) + retrieved = self._docs_to_retrieved(docs) context = "\n\n".join([doc.page_content for doc in docs]) - prompt = f"""Use the following context to answer the question. -If you don't know the answer, say that you don't know instead of making up an answer. + # 3) Answer using conversation history + retrieved context + history_block = f"Chat history:\n{history_str}\n\n" if history_str else "" + answer_prompt = f"""You are an assistant for question-answering. Use the chat history (if any) and the retrieved context below to answer the current question. If you don't know the answer, say so. Keep the conversation coherent. + +{history_block}Relevant context from documents: -Context: {context} -Question: {question} +Current question: {question} Answer:""" - - response = self.llm.invoke(prompt) + response = self.llm.invoke(answer_prompt) answer = response.content if hasattr(response, "content") else str(response) return {"answer": answer, "retrieved": retrieved} diff --git a/server.py b/server.py index 54e71a8..548ecfd 100644 --- a/server.py +++ b/server.py @@ -18,8 +18,14 @@ rag = LocalRAG(vectorstore_path=VECTORSTORE_PATH, ollama_model=OLLAMA_MODEL) app = FastAPI(title="Local RAG Chat", version="1.0.0") +class ChatMessage(BaseModel): + role: str # "user" | "assistant" + content: str + + class ChatRequest(BaseModel): message: str + history: list[ChatMessage] = [] # previous turns for conversation context class RetrievedChunk(BaseModel): @@ -49,7 +55,11 @@ def chat(request: ChatRequest): if not request.message or not request.message.strip(): return ChatResponse(answer="", error="Message cannot be empty") try: - result = rag.query(request.message.strip()) + chat_history = [{"role": m.role, "content": m.content} for m in request.history] + result = rag.query_with_history( + request.message.strip(), + chat_history=chat_history, + ) answer = result["answer"] retrieved = result.get("retrieved", []) diff --git a/templates/chat.html b/templates/chat.html index fa414cb..22267f7 100644 --- a/templates/chat.html +++ b/templates/chat.html @@ -140,6 +140,7 @@ const messagesEl = document.getElementById('messages'); const inputEl = document.getElementById('input'); const sendBtn = document.getElementById('send'); + const chatHistory = []; function appendMessage(role, text, isError = false) { const div = document.createElement('div'); @@ -148,6 +149,7 @@ div.innerHTML = '' + label + '' + escapeHtml(text); messagesEl.appendChild(div); messagesEl.scrollTop = messagesEl.scrollHeight; + chatHistory.push({ role: role, content: text }); } function escapeHtml(s) { @@ -179,10 +181,11 @@ setLoading(true); try { + const history = chatHistory.slice(0, -1); const res = await fetch('/api/chat', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ message: text }) + body: JSON.stringify({ message: text, history: history }) }); const data = await res.json(); setLoading(false);