---
README.md | 88 +++++++++++++++++++++++++--------------------
server.py | 44 +++++++++++++++++++----
templates/chat.html | 45 ++++++++++++++++++++++-
3 files changed, 131 insertions(+), 46 deletions(-)
diff --git a/README.md b/README.md
index 4f871e3..0ac7d33 100644
--- a/README.md
+++ b/README.md
@@ -1,78 +1,90 @@
# Local RAG Setup
-Minimal RAG implementation with LangChain, Ollama, and FAISS.
+Minimal RAG implementation with LangChain, FAISS, and support for either Ollama or OpenAI (API-key needed).
## Dependencies
- `langchain` - Core framework
-- `langchain-community` - Community integrations (loaders, vectorstores)
+- `langchain-community` - Loaders, vectorstores
- `langchain-ollama` - Ollama integration
-- `langchain-text-splitters` - Text splitting utilities
+- `langchain-openai` - OpenAI integration
+- `langchain-text-splitters` - Text splitting
- `langchain-huggingface` - HuggingFace embeddings
- `faiss-cpu` - Vector search
- `sentence-transformers` - Embeddings
- `pypdf` - PDF loading
-- `fastapi` - Web server and API
+- `fastapi` - Web server
- `uvicorn` - ASGI server
## Installation
```bash
-# Create conda environment
conda create -n local_rag python=3.10 -y
conda activate local_rag
-
-# Install dependencies
pip install -r requirements.txt
```
-## Setup Ollama
+## Setup
+
+### Ollama (optional)
```bash
-# Make sure Ollama is running
ollama serve
-
-# Pull a model (in another terminal)
-ollama pull llama2
+ollama pull mistral
```
-## Usage
+### OpenAI (optional)
-Edit `local_rag.py` and uncomment the example code:
+Set the API key when using OpenAI:
+
+```bash
+export OPENAI_API_KEY="your-key"
+```
+
+## Add Documents
+
+**Option 1:** Add PDFs from a folder via script. Edit `DATA_ROOT` in [add_pdfs.py](add_pdfs.py) to point at your folder, then run:
+
+```bash
+python add_pdfs.py
+```
+
+The script clears the existing vector store and indexes all PDFs recursively. Supports `.pdf`, `.txt`, `.md`.
+
+**Option 2:** Use `local_rag.py` programmatically:
```python
-# Add documents
-rag.add_documents([
- "path/to/document1.pdf",
- "path/to/document2.txt"
-])
-
-# Query
-question = "What is this document about?"
-answer = rag.query(question)
-print(f"Answer: {answer}")
+from local_rag import LocalRAG
+rag = LocalRAG()
+rag.add_documents(["path/to/doc1.pdf", "path/to/doc2.txt"])
```
-Run:
-```bash
-python local_rag.py
-```
+## Chat GUI
-## Chat GUI (FastAPI)
-
-A simple web chat interface is included. Start the server:
+Start the server:
```bash
uvicorn server:app --reload
```
-Then open [http://localhost:8000](http://localhost:8000) in your browser. The chat view uses the same RAG system: your messages are answered using the vector store and Ollama. Ensure your vector store is populated (e.g. by running the document-add steps in `local_rag.py` once) and that Ollama is running.
+Open [http://localhost:8000](http://localhost:8000). The chat UI provides:
+
+- **Provider switch** – Toggle between Ollama and OpenAI without restart (OpenAI requires `OPENAI_API_KEY`)
+- **Conversation history** – Multi-turn chat with context
+- **Markdown** – Assistant replies rendered as markdown (headings, code, lists, links)
+
+Ensure the vector store is populated and at least one provider (Ollama or OpenAI) is configured.
+
+## API
+
+- `POST /api/chat` – `{ "message": "...", "history": [...], "llm_provider": "ollama"|"openai" }`
+- `GET /api/providers` – `{ "ollama": true, "openai": true|false }`
+- `GET /api/health` – Health and vectorstore status
## How it works
-1. **Load documents** - PDFs or text files
-2. **Split into chunks** - 1000 chars with 200 overlap
-3. **Create embeddings** - Using sentence-transformers
-4. **Store in FAISS** - Fast similarity search
-5. **Query** - Retrieve relevant chunks and generate answer with Ollama
-
+1. **Load documents** – PDFs or text via PyPDFLoader / TextLoader
+2. **Chunk** – RecursiveCharacterTextSplitter (2000 chars, 400 overlap)
+3. **Embed** – sentence-transformers/all-MiniLM-L6-v2
+4. **Store** – FAISS vector store (similarity search with scores)
+5. **Query** – Retrieve chunks, optionally rephrase with conversation history, generate answer with selected LLM
diff --git a/server.py b/server.py
index 92d9556..838799b 100644
--- a/server.py
+++ b/server.py
@@ -2,6 +2,7 @@
FastAPI server for Local RAG with chat GUI.
Run with: uvicorn server:app --reload
"""
+import os
from pathlib import Path
from fastapi import FastAPI, HTTPException
@@ -10,18 +11,28 @@ from pydantic import BaseModel
from local_rag import LocalRAG
-# LLM provider: "ollama" or "openai"
-LLM_PROVIDER = "openai"
OLLAMA_MODEL = "gpt-oss:20b"
OPENAI_MODEL = "gpt-5.2"
-
VECTORSTORE_PATH = "./vectorstore"
-rag = LocalRAG(
+
+# Dual RAG instances for on-the-fly provider switching
+rag_ollama = LocalRAG(
vectorstore_path=VECTORSTORE_PATH,
- llm_provider=LLM_PROVIDER,
+ llm_provider="ollama",
ollama_model=OLLAMA_MODEL,
openai_model=OPENAI_MODEL,
)
+rag_openai = None
+if os.environ.get("OPENAI_API_KEY"):
+ try:
+ rag_openai = LocalRAG(
+ vectorstore_path=VECTORSTORE_PATH,
+ llm_provider="openai",
+ ollama_model=OLLAMA_MODEL,
+ openai_model=OPENAI_MODEL,
+ )
+ except Exception as e:
+ print(f"OpenAI RAG not available: {e}")
app = FastAPI(title="Local RAG Chat", version="1.0.0")
@@ -34,6 +45,7 @@ class ChatMessage(BaseModel):
class ChatRequest(BaseModel):
message: str
history: list[ChatMessage] = [] # previous turns for conversation context
+ llm_provider: str = "ollama" # "ollama" | "openai"
class RetrievedChunk(BaseModel):
@@ -58,11 +70,21 @@ def chat_view():
return HTMLResponse(content=html_path.read_text(encoding="utf-8"))
+def _get_rag(provider: str):
+ """Return the RAG instance for the given provider. Fall back to Ollama if OpenAI unavailable."""
+ if provider == "openai" and rag_openai is not None:
+ return rag_openai
+ return rag_ollama
+
+
@app.post("/api/chat", response_model=ChatResponse)
def chat(request: ChatRequest):
"""Handle a chat message and return the RAG answer."""
if not request.message or not request.message.strip():
return ChatResponse(answer="", error="Message cannot be empty")
+ if request.llm_provider == "openai" and rag_openai is None:
+ return ChatResponse(answer="", error="OpenAI not configured. Set OPENAI_API_KEY.")
+ rag = _get_rag(request.llm_provider)
try:
chat_history = [{"role": m.role, "content": m.content} for m in request.history]
result = rag.query_with_history(
@@ -81,7 +103,9 @@ def chat(request: ChatRequest):
print(f" [{i + 1}] {chunk.get('source', '')} p.{chunk.get('page', '?')} s={chunk.get('score')} | {preview!r}")
else:
print(f"\n[RAG] Retrieved 0 chunks")
- print(f"[RAG] LLM response:\n{answer}")
+ provider_label = "OpenAI" if request.llm_provider == "openai" else "Ollama"
+ model_name = OPENAI_MODEL if request.llm_provider == "openai" else OLLAMA_MODEL
+ print(f"[RAG] LLM response ({provider_label} / {model_name}):\n{answer}")
return ChatResponse(answer=answer, retrieved=retrieved)
except Exception as e:
@@ -91,10 +115,16 @@ def chat(request: ChatRequest):
@app.get("/api/health")
def health():
"""Health check and vector store status."""
- has_docs = rag.vectorstore is not None
+ has_docs = rag_ollama.vectorstore is not None
return {"status": "ok", "vectorstore_loaded": has_docs}
+@app.get("/api/providers")
+def providers():
+ """Return which LLM providers are available."""
+ return {"ollama": True, "openai": rag_openai is not None}
+
+
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/templates/chat.html b/templates/chat.html
index 16654e3..8e1d340 100644
--- a/templates/chat.html
+++ b/templates/chat.html
@@ -112,6 +112,24 @@
border-top: 1px solid #27272a;
background: #18181b;
}
+ #provider-row {
+ display: flex;
+ align-items: center;
+ gap: 0.5rem;
+ margin-bottom: 0.5rem;
+ font-size: 0.8rem;
+ color: #71717a;
+ }
+ #provider-row label { flex-shrink: 0; }
+ #provider {
+ padding: 0.25rem 0.5rem;
+ font: inherit;
+ font-size: 0.85rem;
+ color: #e4e4e7;
+ background: #27272a;
+ border: 1px solid #3f3f46;
+ border-radius: 6px;
+ }
#input-row {
display: flex;
gap: 0.5rem;
@@ -168,6 +186,13 @@