diff --git a/local_rag.py b/local_rag.py index 2f62ebb..4745412 100644 --- a/local_rag.py +++ b/local_rag.py @@ -1,5 +1,5 @@ """ -Local RAG setup with LangChain, Ollama, and FAISS +Local RAG setup with LangChain, Ollama/OpenAI, and FAISS Minimal dependencies, simple code """ import os @@ -9,34 +9,51 @@ from langchain_community.document_loaders import PyPDFLoader, TextLoader from langchain_community.vectorstores import FAISS from langchain_huggingface import HuggingFaceEmbeddings from langchain_ollama import ChatOllama +from langchain_openai import ChatOpenAI from langchain_text_splitters import RecursiveCharacterTextSplitter class LocalRAG: - def __init__(self, vectorstore_path="./vectorstore", ollama_model="mistral:7b"): - """Initialize local RAG system""" + def __init__( + self, + vectorstore_path="./vectorstore", + llm_provider="ollama", + ollama_model="gpt-oss:20b", + openai_model="gpt-5.2", + ollama_base_url="http://localhost:11434", + ): + """Initialize local RAG system. llm_provider: 'ollama' or 'openai'.""" self.vectorstore_path = vectorstore_path - self.ollama_model = ollama_model - + self.llm_provider = llm_provider + # Embeddings print("Loading embeddings model...") self.embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) - + # Text splitter self.text_splitter = RecursiveCharacterTextSplitter( chunk_size=2000, chunk_overlap=400 ) - - # Ollama LLM - print(f"Connecting to Ollama (model: {ollama_model})...") - self.llm = ChatOllama( - model=ollama_model, - base_url="http://localhost:11434" - ) - + + # LLM (Ollama or OpenAI) + if llm_provider == "openai": + api_key = os.environ.get("OPENAI_API_KEY") + if not api_key: + raise ValueError( + "OPENAI_API_KEY environment variable is required when llm_provider='openai'" + ) + print(f"Using OpenAI (model: {openai_model})...") + self.llm = ChatOpenAI(model=openai_model, api_key=api_key) + else: + print(f"Using Ollama (model: {ollama_model})...") + self.llm = ChatOllama( + model=ollama_model, + base_url=ollama_base_url + ) + # Vector store (load if exists, otherwise None) self.vectorstore = None self._load_vectorstore() diff --git a/requirements.txt b/requirements.txt index 153a7cb..b1c1fdb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ langchain langchain-community langchain-ollama +langchain-openai langchain-text-splitters langchain-huggingface faiss-cpu diff --git a/server.py b/server.py index 3e9979a..92d9556 100644 --- a/server.py +++ b/server.py @@ -10,10 +10,18 @@ from pydantic import BaseModel from local_rag import LocalRAG -# Initialize RAG once at startup -VECTORSTORE_PATH = "./vectorstore" +# LLM provider: "ollama" or "openai" +LLM_PROVIDER = "openai" OLLAMA_MODEL = "gpt-oss:20b" -rag = LocalRAG(vectorstore_path=VECTORSTORE_PATH, ollama_model=OLLAMA_MODEL) +OPENAI_MODEL = "gpt-5.2" + +VECTORSTORE_PATH = "./vectorstore" +rag = LocalRAG( + vectorstore_path=VECTORSTORE_PATH, + llm_provider=LLM_PROVIDER, + ollama_model=OLLAMA_MODEL, + openai_model=OPENAI_MODEL, +) app = FastAPI(title="Local RAG Chat", version="1.0.0") @@ -64,15 +72,16 @@ def chat(request: ChatRequest): answer = result["answer"] retrieved = result.get("retrieved", []) - # Server-side console trace: log retrieved chunks before LLM answer + # Server-side console trace: shorter chunk logs + raw LLM response if retrieved: - print(f"\n[RAG] Retrieved {len(retrieved)} chunk(s) for query: {request.message[:80]!r}") + print(f"\n[RAG] Retrieved {len(retrieved)} chunk(s)") for i, chunk in enumerate(retrieved): content = chunk.get("content", "") - preview = (content[:1000] + "...") if len(content) > 1000 else content - print(f" [{i + 1}] source={chunk.get('source', '')} page={chunk.get('page')} | {preview!r}") + preview = (content[:80] + "...") if len(content) > 80 else content + print(f" [{i + 1}] {chunk.get('source', '')} p.{chunk.get('page', '?')} s={chunk.get('score')} | {preview!r}") else: - print(f"\n[RAG] Retrieved 0 chunks for query: {request.message[:80]!r}") + print(f"\n[RAG] Retrieved 0 chunks") + print(f"[RAG] LLM response:\n{answer}") return ChatResponse(answer=answer, retrieved=retrieved) except Exception as e: diff --git a/templates/chat.html b/templates/chat.html index 22267f7..16654e3 100644 --- a/templates/chat.html +++ b/templates/chat.html @@ -45,9 +45,45 @@ border-radius: 8px; font-size: 0.9rem; line-height: 1.5; - white-space: pre-wrap; word-break: break-word; } + .msg.user, .msg.error { + white-space: pre-wrap; + } + .msg.assistant .markdown-body { + white-space: normal; + } + .msg.assistant .markdown-body h1, .msg.assistant .markdown-body h2, .msg.assistant .markdown-body h3 { + margin: 0.75em 0 0.35em; + font-size: 1em; + font-weight: 600; + } + .msg.assistant .markdown-body h1:first-child, .msg.assistant .markdown-body h2:first-child, .msg.assistant .markdown-body h3:first-child { margin-top: 0; } + .msg.assistant .markdown-body p { margin: 0.5em 0; } + .msg.assistant .markdown-body p:first-child { margin-top: 0; } + .msg.assistant .markdown-body p:last-child { margin-bottom: 0; } + .msg.assistant .markdown-body pre { + margin: 0.5em 0; + padding: 0.6rem; + background: #18181b; + border-radius: 6px; + overflow-x: auto; + font-size: 0.85em; + } + .msg.assistant .markdown-body code { + background: #18181b; + padding: 0.15em 0.35em; + border-radius: 4px; + font-size: 0.9em; + } + .msg.assistant .markdown-body pre code { + padding: 0; + background: none; + } + .msg.assistant .markdown-body ul, .msg.assistant .markdown-body ol { margin: 0.5em 0; padding-left: 1.4em; } + .msg.assistant .markdown-body li { margin: 0.25em 0; } + .msg.assistant .markdown-body a { color: #60a5fa; text-decoration: none; } + .msg.assistant .markdown-body a:hover { text-decoration: underline; } .msg.user { align-self: flex-end; background: #3f3f46; @@ -120,11 +156,13 @@ color: #71717a; } + +

Local RAG Chat

-

Ask questions about your documents. Answers are generated from the vector store + Ollama.

+

Ask questions about your documents. Answers are generated from the vector store + Ollama / OpenAI.

@@ -143,10 +181,18 @@ const chatHistory = []; function appendMessage(role, text, isError = false) { + text = text ?? ''; const div = document.createElement('div'); div.className = 'msg ' + (isError ? 'error' : role); const label = role === 'user' ? 'You' : 'RAG'; - div.innerHTML = '' + label + '' + escapeHtml(text); + let body; + if (role === 'assistant' && !isError) { + const rawHtml = marked.parse(text, { gfm: true, breaks: true }); + body = '
' + DOMPurify.sanitize(rawHtml) + '
'; + } else { + body = escapeHtml(text); + } + div.innerHTML = '' + label + '' + body; messagesEl.appendChild(div); messagesEl.scrollTop = messagesEl.scrollHeight; chatHistory.push({ role: role, content: text });