""" FastAPI server for Local RAG with chat GUI. Run with: uvicorn server:app --reload """ from pathlib import Path from fastapi import FastAPI, HTTPException from fastapi.responses import HTMLResponse from pydantic import BaseModel from local_rag import LocalRAG # Initialize RAG once at startup VECTORSTORE_PATH = "./vectorstore" OLLAMA_MODEL = "mistral:7b" rag = LocalRAG(vectorstore_path=VECTORSTORE_PATH, ollama_model=OLLAMA_MODEL) app = FastAPI(title="Local RAG Chat", version="1.0.0") class ChatMessage(BaseModel): role: str # "user" | "assistant" content: str class ChatRequest(BaseModel): message: str history: list[ChatMessage] = [] # previous turns for conversation context class RetrievedChunk(BaseModel): content: str source: str page: int | None class ChatResponse(BaseModel): answer: str error: str | None = None retrieved: list[RetrievedChunk] | None = None @app.get("/", response_class=HTMLResponse) def chat_view(): """Serve the chat GUI.""" html_path = Path(__file__).parent / "templates" / "chat.html" if not html_path.exists(): raise HTTPException(status_code=500, detail="Chat template not found") return HTMLResponse(content=html_path.read_text(encoding="utf-8")) @app.post("/api/chat", response_model=ChatResponse) def chat(request: ChatRequest): """Handle a chat message and return the RAG answer.""" if not request.message or not request.message.strip(): return ChatResponse(answer="", error="Message cannot be empty") try: chat_history = [{"role": m.role, "content": m.content} for m in request.history] result = rag.query_with_history( request.message.strip(), chat_history=chat_history, ) answer = result["answer"] retrieved = result.get("retrieved", []) # Server-side console trace: log retrieved chunks before LLM answer if retrieved: print(f"\n[RAG] Retrieved {len(retrieved)} chunk(s) for query: {request.message[:80]!r}") for i, chunk in enumerate(retrieved): content = chunk.get("content", "") preview = (content[:1000] + "...") if len(content) > 1000 else content print(f" [{i + 1}] source={chunk.get('source', '')} page={chunk.get('page')} | {preview!r}") else: print(f"\n[RAG] Retrieved 0 chunks for query: {request.message[:80]!r}") return ChatResponse(answer=answer, retrieved=retrieved) except Exception as e: return ChatResponse(answer="", error=str(e)) @app.get("/api/health") def health(): """Health check and vector store status.""" has_docs = rag.vectorstore is not None return {"status": "ok", "vectorstore_loaded": has_docs} if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)