localRAG/server.py

101 lines
3.0 KiB
Python

"""
FastAPI server for Local RAG with chat GUI.
Run with: uvicorn server:app --reload
"""
from pathlib import Path
from fastapi import FastAPI, HTTPException
from fastapi.responses import HTMLResponse
from pydantic import BaseModel
from local_rag import LocalRAG
# LLM provider: "ollama" or "openai"
LLM_PROVIDER = "openai"
OLLAMA_MODEL = "gpt-oss:20b"
OPENAI_MODEL = "gpt-5.2"
VECTORSTORE_PATH = "./vectorstore"
rag = LocalRAG(
vectorstore_path=VECTORSTORE_PATH,
llm_provider=LLM_PROVIDER,
ollama_model=OLLAMA_MODEL,
openai_model=OPENAI_MODEL,
)
app = FastAPI(title="Local RAG Chat", version="1.0.0")
class ChatMessage(BaseModel):
role: str # "user" | "assistant"
content: str
class ChatRequest(BaseModel):
message: str
history: list[ChatMessage] = [] # previous turns for conversation context
class RetrievedChunk(BaseModel):
content: str
source: str
page: int | None
score: float | None = None # L2 distance from FAISS (lower = more similar)
class ChatResponse(BaseModel):
answer: str
error: str | None = None
retrieved: list[RetrievedChunk] | None = None
@app.get("/", response_class=HTMLResponse)
def chat_view():
"""Serve the chat GUI."""
html_path = Path(__file__).parent / "templates" / "chat.html"
if not html_path.exists():
raise HTTPException(status_code=500, detail="Chat template not found")
return HTMLResponse(content=html_path.read_text(encoding="utf-8"))
@app.post("/api/chat", response_model=ChatResponse)
def chat(request: ChatRequest):
"""Handle a chat message and return the RAG answer."""
if not request.message or not request.message.strip():
return ChatResponse(answer="", error="Message cannot be empty")
try:
chat_history = [{"role": m.role, "content": m.content} for m in request.history]
result = rag.query_with_history(
request.message.strip(),
chat_history=chat_history,
)
answer = result["answer"]
retrieved = result.get("retrieved", [])
# Server-side console trace: shorter chunk logs + raw LLM response
if retrieved:
print(f"\n[RAG] Retrieved {len(retrieved)} chunk(s)")
for i, chunk in enumerate(retrieved):
content = chunk.get("content", "")
preview = (content[:80] + "...") if len(content) > 80 else content
print(f" [{i + 1}] {chunk.get('source', '')} p.{chunk.get('page', '?')} s={chunk.get('score')} | {preview!r}")
else:
print(f"\n[RAG] Retrieved 0 chunks")
print(f"[RAG] LLM response:\n{answer}")
return ChatResponse(answer=answer, retrieved=retrieved)
except Exception as e:
return ChatResponse(answer="", error=str(e))
@app.get("/api/health")
def health():
"""Health check and vector store status."""
has_docs = rag.vectorstore is not None
return {"status": "ok", "vectorstore_loaded": has_docs}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)