diff --git a/README.md b/README.md index 4f871e3..0ac7d33 100644 --- a/README.md +++ b/README.md @@ -1,78 +1,90 @@ # Local RAG Setup -Minimal RAG implementation with LangChain, Ollama, and FAISS. +Minimal RAG implementation with LangChain, FAISS, and support for either Ollama or OpenAI (API-key needed). ## Dependencies - `langchain` - Core framework -- `langchain-community` - Community integrations (loaders, vectorstores) +- `langchain-community` - Loaders, vectorstores - `langchain-ollama` - Ollama integration -- `langchain-text-splitters` - Text splitting utilities +- `langchain-openai` - OpenAI integration +- `langchain-text-splitters` - Text splitting - `langchain-huggingface` - HuggingFace embeddings - `faiss-cpu` - Vector search - `sentence-transformers` - Embeddings - `pypdf` - PDF loading -- `fastapi` - Web server and API +- `fastapi` - Web server - `uvicorn` - ASGI server ## Installation ```bash -# Create conda environment conda create -n local_rag python=3.10 -y conda activate local_rag - -# Install dependencies pip install -r requirements.txt ``` -## Setup Ollama +## Setup + +### Ollama (optional) ```bash -# Make sure Ollama is running ollama serve - -# Pull a model (in another terminal) -ollama pull llama2 +ollama pull mistral ``` -## Usage +### OpenAI (optional) -Edit `local_rag.py` and uncomment the example code: +Set the API key when using OpenAI: + +```bash +export OPENAI_API_KEY="your-key" +``` + +## Add Documents + +**Option 1:** Add PDFs from a folder via script. Edit `DATA_ROOT` in [add_pdfs.py](add_pdfs.py) to point at your folder, then run: + +```bash +python add_pdfs.py +``` + +The script clears the existing vector store and indexes all PDFs recursively. Supports `.pdf`, `.txt`, `.md`. + +**Option 2:** Use `local_rag.py` programmatically: ```python -# Add documents -rag.add_documents([ - "path/to/document1.pdf", - "path/to/document2.txt" -]) - -# Query -question = "What is this document about?" -answer = rag.query(question) -print(f"Answer: {answer}") +from local_rag import LocalRAG +rag = LocalRAG() +rag.add_documents(["path/to/doc1.pdf", "path/to/doc2.txt"]) ``` -Run: -```bash -python local_rag.py -``` +## Chat GUI -## Chat GUI (FastAPI) - -A simple web chat interface is included. Start the server: +Start the server: ```bash uvicorn server:app --reload ``` -Then open [http://localhost:8000](http://localhost:8000) in your browser. The chat view uses the same RAG system: your messages are answered using the vector store and Ollama. Ensure your vector store is populated (e.g. by running the document-add steps in `local_rag.py` once) and that Ollama is running. +Open [http://localhost:8000](http://localhost:8000). The chat UI provides: + +- **Provider switch** – Toggle between Ollama and OpenAI without restart (OpenAI requires `OPENAI_API_KEY`) +- **Conversation history** – Multi-turn chat with context +- **Markdown** – Assistant replies rendered as markdown (headings, code, lists, links) + +Ensure the vector store is populated and at least one provider (Ollama or OpenAI) is configured. + +## API + +- `POST /api/chat` – `{ "message": "...", "history": [...], "llm_provider": "ollama"|"openai" }` +- `GET /api/providers` – `{ "ollama": true, "openai": true|false }` +- `GET /api/health` – Health and vectorstore status ## How it works -1. **Load documents** - PDFs or text files -2. **Split into chunks** - 1000 chars with 200 overlap -3. **Create embeddings** - Using sentence-transformers -4. **Store in FAISS** - Fast similarity search -5. **Query** - Retrieve relevant chunks and generate answer with Ollama - +1. **Load documents** – PDFs or text via PyPDFLoader / TextLoader +2. **Chunk** – RecursiveCharacterTextSplitter (2000 chars, 400 overlap) +3. **Embed** – sentence-transformers/all-MiniLM-L6-v2 +4. **Store** – FAISS vector store (similarity search with scores) +5. **Query** – Retrieve chunks, optionally rephrase with conversation history, generate answer with selected LLM diff --git a/server.py b/server.py index 92d9556..838799b 100644 --- a/server.py +++ b/server.py @@ -2,6 +2,7 @@ FastAPI server for Local RAG with chat GUI. Run with: uvicorn server:app --reload """ +import os from pathlib import Path from fastapi import FastAPI, HTTPException @@ -10,18 +11,28 @@ from pydantic import BaseModel from local_rag import LocalRAG -# LLM provider: "ollama" or "openai" -LLM_PROVIDER = "openai" OLLAMA_MODEL = "gpt-oss:20b" OPENAI_MODEL = "gpt-5.2" - VECTORSTORE_PATH = "./vectorstore" -rag = LocalRAG( + +# Dual RAG instances for on-the-fly provider switching +rag_ollama = LocalRAG( vectorstore_path=VECTORSTORE_PATH, - llm_provider=LLM_PROVIDER, + llm_provider="ollama", ollama_model=OLLAMA_MODEL, openai_model=OPENAI_MODEL, ) +rag_openai = None +if os.environ.get("OPENAI_API_KEY"): + try: + rag_openai = LocalRAG( + vectorstore_path=VECTORSTORE_PATH, + llm_provider="openai", + ollama_model=OLLAMA_MODEL, + openai_model=OPENAI_MODEL, + ) + except Exception as e: + print(f"OpenAI RAG not available: {e}") app = FastAPI(title="Local RAG Chat", version="1.0.0") @@ -34,6 +45,7 @@ class ChatMessage(BaseModel): class ChatRequest(BaseModel): message: str history: list[ChatMessage] = [] # previous turns for conversation context + llm_provider: str = "ollama" # "ollama" | "openai" class RetrievedChunk(BaseModel): @@ -58,11 +70,21 @@ def chat_view(): return HTMLResponse(content=html_path.read_text(encoding="utf-8")) +def _get_rag(provider: str): + """Return the RAG instance for the given provider. Fall back to Ollama if OpenAI unavailable.""" + if provider == "openai" and rag_openai is not None: + return rag_openai + return rag_ollama + + @app.post("/api/chat", response_model=ChatResponse) def chat(request: ChatRequest): """Handle a chat message and return the RAG answer.""" if not request.message or not request.message.strip(): return ChatResponse(answer="", error="Message cannot be empty") + if request.llm_provider == "openai" and rag_openai is None: + return ChatResponse(answer="", error="OpenAI not configured. Set OPENAI_API_KEY.") + rag = _get_rag(request.llm_provider) try: chat_history = [{"role": m.role, "content": m.content} for m in request.history] result = rag.query_with_history( @@ -81,7 +103,9 @@ def chat(request: ChatRequest): print(f" [{i + 1}] {chunk.get('source', '')} p.{chunk.get('page', '?')} s={chunk.get('score')} | {preview!r}") else: print(f"\n[RAG] Retrieved 0 chunks") - print(f"[RAG] LLM response:\n{answer}") + provider_label = "OpenAI" if request.llm_provider == "openai" else "Ollama" + model_name = OPENAI_MODEL if request.llm_provider == "openai" else OLLAMA_MODEL + print(f"[RAG] LLM response ({provider_label} / {model_name}):\n{answer}") return ChatResponse(answer=answer, retrieved=retrieved) except Exception as e: @@ -91,10 +115,16 @@ def chat(request: ChatRequest): @app.get("/api/health") def health(): """Health check and vector store status.""" - has_docs = rag.vectorstore is not None + has_docs = rag_ollama.vectorstore is not None return {"status": "ok", "vectorstore_loaded": has_docs} +@app.get("/api/providers") +def providers(): + """Return which LLM providers are available.""" + return {"ollama": True, "openai": rag_openai is not None} + + if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/templates/chat.html b/templates/chat.html index 16654e3..8e1d340 100644 --- a/templates/chat.html +++ b/templates/chat.html @@ -112,6 +112,24 @@ border-top: 1px solid #27272a; background: #18181b; } + #provider-row { + display: flex; + align-items: center; + gap: 0.5rem; + margin-bottom: 0.5rem; + font-size: 0.8rem; + color: #71717a; + } + #provider-row label { flex-shrink: 0; } + #provider { + padding: 0.25rem 0.5rem; + font: inherit; + font-size: 0.85rem; + color: #e4e4e7; + background: #27272a; + border: 1px solid #3f3f46; + border-radius: 6px; + } #input-row { display: flex; gap: 0.5rem; @@ -168,6 +186,13 @@
+
+ + +
@@ -178,8 +203,22 @@ const messagesEl = document.getElementById('messages'); const inputEl = document.getElementById('input'); const sendBtn = document.getElementById('send'); + const providerEl = document.getElementById('provider'); const chatHistory = []; + (async function initProviders() { + try { + const res = await fetch('/api/providers'); + const data = await res.json(); + if (!data.openai) { + const opt = providerEl.querySelector('option[value="openai"]'); + opt.disabled = true; + opt.textContent = 'OpenAI (not configured)'; + if (providerEl.value === 'openai') providerEl.value = 'ollama'; + } + } catch (_) {} + })(); + function appendMessage(role, text, isError = false) { text = text ?? ''; const div = document.createElement('div'); @@ -231,7 +270,11 @@ const res = await fetch('/api/chat', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ message: text, history: history }) + body: JSON.stringify({ + message: text, + history: history, + llm_provider: providerEl.value + }) }); const data = await res.json(); setLoading(false);