""" FastAPI server for Local RAG with chat GUI. Run with: uvicorn server:app --reload """ from pathlib import Path from fastapi import FastAPI, HTTPException from fastapi.responses import HTMLResponse from pydantic import BaseModel from local_rag import LocalRAG # LLM provider: "ollama" or "openai" LLM_PROVIDER = "openai" OLLAMA_MODEL = "gpt-oss:20b" OPENAI_MODEL = "gpt-5.2" VECTORSTORE_PATH = "./vectorstore" rag = LocalRAG( vectorstore_path=VECTORSTORE_PATH, llm_provider=LLM_PROVIDER, ollama_model=OLLAMA_MODEL, openai_model=OPENAI_MODEL, ) app = FastAPI(title="Local RAG Chat", version="1.0.0") class ChatMessage(BaseModel): role: str # "user" | "assistant" content: str class ChatRequest(BaseModel): message: str history: list[ChatMessage] = [] # previous turns for conversation context class RetrievedChunk(BaseModel): content: str source: str page: int | None score: float | None = None # L2 distance from FAISS (lower = more similar) class ChatResponse(BaseModel): answer: str error: str | None = None retrieved: list[RetrievedChunk] | None = None @app.get("/", response_class=HTMLResponse) def chat_view(): """Serve the chat GUI.""" html_path = Path(__file__).parent / "templates" / "chat.html" if not html_path.exists(): raise HTTPException(status_code=500, detail="Chat template not found") return HTMLResponse(content=html_path.read_text(encoding="utf-8")) @app.post("/api/chat", response_model=ChatResponse) def chat(request: ChatRequest): """Handle a chat message and return the RAG answer.""" if not request.message or not request.message.strip(): return ChatResponse(answer="", error="Message cannot be empty") try: chat_history = [{"role": m.role, "content": m.content} for m in request.history] result = rag.query_with_history( request.message.strip(), chat_history=chat_history, ) answer = result["answer"] retrieved = result.get("retrieved", []) # Server-side console trace: shorter chunk logs + raw LLM response if retrieved: print(f"\n[RAG] Retrieved {len(retrieved)} chunk(s)") for i, chunk in enumerate(retrieved): content = chunk.get("content", "") preview = (content[:80] + "...") if len(content) > 80 else content print(f" [{i + 1}] {chunk.get('source', '')} p.{chunk.get('page', '?')} s={chunk.get('score')} | {preview!r}") else: print(f"\n[RAG] Retrieved 0 chunks") print(f"[RAG] LLM response:\n{answer}") return ChatResponse(answer=answer, retrieved=retrieved) except Exception as e: return ChatResponse(answer="", error=str(e)) @app.get("/api/health") def health(): """Health check and vector store status.""" has_docs = rag.vectorstore is not None return {"status": "ok", "vectorstore_loaded": has_docs} if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)