added OpenAI support and markdown in the chat window
This commit is contained in:
parent
4879439f27
commit
650f73a74b
37
local_rag.py
37
local_rag.py
@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
Local RAG setup with LangChain, Ollama, and FAISS
|
Local RAG setup with LangChain, Ollama/OpenAI, and FAISS
|
||||||
Minimal dependencies, simple code
|
Minimal dependencies, simple code
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
@ -9,14 +9,22 @@ from langchain_community.document_loaders import PyPDFLoader, TextLoader
|
|||||||
from langchain_community.vectorstores import FAISS
|
from langchain_community.vectorstores import FAISS
|
||||||
from langchain_huggingface import HuggingFaceEmbeddings
|
from langchain_huggingface import HuggingFaceEmbeddings
|
||||||
from langchain_ollama import ChatOllama
|
from langchain_ollama import ChatOllama
|
||||||
|
from langchain_openai import ChatOpenAI
|
||||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||||
|
|
||||||
|
|
||||||
class LocalRAG:
|
class LocalRAG:
|
||||||
def __init__(self, vectorstore_path="./vectorstore", ollama_model="mistral:7b"):
|
def __init__(
|
||||||
"""Initialize local RAG system"""
|
self,
|
||||||
|
vectorstore_path="./vectorstore",
|
||||||
|
llm_provider="ollama",
|
||||||
|
ollama_model="gpt-oss:20b",
|
||||||
|
openai_model="gpt-5.2",
|
||||||
|
ollama_base_url="http://localhost:11434",
|
||||||
|
):
|
||||||
|
"""Initialize local RAG system. llm_provider: 'ollama' or 'openai'."""
|
||||||
self.vectorstore_path = vectorstore_path
|
self.vectorstore_path = vectorstore_path
|
||||||
self.ollama_model = ollama_model
|
self.llm_provider = llm_provider
|
||||||
|
|
||||||
# Embeddings
|
# Embeddings
|
||||||
print("Loading embeddings model...")
|
print("Loading embeddings model...")
|
||||||
@ -30,12 +38,21 @@ class LocalRAG:
|
|||||||
chunk_overlap=400
|
chunk_overlap=400
|
||||||
)
|
)
|
||||||
|
|
||||||
# Ollama LLM
|
# LLM (Ollama or OpenAI)
|
||||||
print(f"Connecting to Ollama (model: {ollama_model})...")
|
if llm_provider == "openai":
|
||||||
self.llm = ChatOllama(
|
api_key = os.environ.get("OPENAI_API_KEY")
|
||||||
model=ollama_model,
|
if not api_key:
|
||||||
base_url="http://localhost:11434"
|
raise ValueError(
|
||||||
)
|
"OPENAI_API_KEY environment variable is required when llm_provider='openai'"
|
||||||
|
)
|
||||||
|
print(f"Using OpenAI (model: {openai_model})...")
|
||||||
|
self.llm = ChatOpenAI(model=openai_model, api_key=api_key)
|
||||||
|
else:
|
||||||
|
print(f"Using Ollama (model: {ollama_model})...")
|
||||||
|
self.llm = ChatOllama(
|
||||||
|
model=ollama_model,
|
||||||
|
base_url=ollama_base_url
|
||||||
|
)
|
||||||
|
|
||||||
# Vector store (load if exists, otherwise None)
|
# Vector store (load if exists, otherwise None)
|
||||||
self.vectorstore = None
|
self.vectorstore = None
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
langchain
|
langchain
|
||||||
langchain-community
|
langchain-community
|
||||||
langchain-ollama
|
langchain-ollama
|
||||||
|
langchain-openai
|
||||||
langchain-text-splitters
|
langchain-text-splitters
|
||||||
langchain-huggingface
|
langchain-huggingface
|
||||||
faiss-cpu
|
faiss-cpu
|
||||||
|
|||||||
25
server.py
25
server.py
@ -10,10 +10,18 @@ from pydantic import BaseModel
|
|||||||
|
|
||||||
from local_rag import LocalRAG
|
from local_rag import LocalRAG
|
||||||
|
|
||||||
# Initialize RAG once at startup
|
# LLM provider: "ollama" or "openai"
|
||||||
VECTORSTORE_PATH = "./vectorstore"
|
LLM_PROVIDER = "openai"
|
||||||
OLLAMA_MODEL = "gpt-oss:20b"
|
OLLAMA_MODEL = "gpt-oss:20b"
|
||||||
rag = LocalRAG(vectorstore_path=VECTORSTORE_PATH, ollama_model=OLLAMA_MODEL)
|
OPENAI_MODEL = "gpt-5.2"
|
||||||
|
|
||||||
|
VECTORSTORE_PATH = "./vectorstore"
|
||||||
|
rag = LocalRAG(
|
||||||
|
vectorstore_path=VECTORSTORE_PATH,
|
||||||
|
llm_provider=LLM_PROVIDER,
|
||||||
|
ollama_model=OLLAMA_MODEL,
|
||||||
|
openai_model=OPENAI_MODEL,
|
||||||
|
)
|
||||||
|
|
||||||
app = FastAPI(title="Local RAG Chat", version="1.0.0")
|
app = FastAPI(title="Local RAG Chat", version="1.0.0")
|
||||||
|
|
||||||
@ -64,15 +72,16 @@ def chat(request: ChatRequest):
|
|||||||
answer = result["answer"]
|
answer = result["answer"]
|
||||||
retrieved = result.get("retrieved", [])
|
retrieved = result.get("retrieved", [])
|
||||||
|
|
||||||
# Server-side console trace: log retrieved chunks before LLM answer
|
# Server-side console trace: shorter chunk logs + raw LLM response
|
||||||
if retrieved:
|
if retrieved:
|
||||||
print(f"\n[RAG] Retrieved {len(retrieved)} chunk(s) for query: {request.message[:80]!r}")
|
print(f"\n[RAG] Retrieved {len(retrieved)} chunk(s)")
|
||||||
for i, chunk in enumerate(retrieved):
|
for i, chunk in enumerate(retrieved):
|
||||||
content = chunk.get("content", "")
|
content = chunk.get("content", "")
|
||||||
preview = (content[:1000] + "...") if len(content) > 1000 else content
|
preview = (content[:80] + "...") if len(content) > 80 else content
|
||||||
print(f" [{i + 1}] source={chunk.get('source', '')} page={chunk.get('page')} | {preview!r}")
|
print(f" [{i + 1}] {chunk.get('source', '')} p.{chunk.get('page', '?')} s={chunk.get('score')} | {preview!r}")
|
||||||
else:
|
else:
|
||||||
print(f"\n[RAG] Retrieved 0 chunks for query: {request.message[:80]!r}")
|
print(f"\n[RAG] Retrieved 0 chunks")
|
||||||
|
print(f"[RAG] LLM response:\n{answer}")
|
||||||
|
|
||||||
return ChatResponse(answer=answer, retrieved=retrieved)
|
return ChatResponse(answer=answer, retrieved=retrieved)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@ -45,9 +45,45 @@
|
|||||||
border-radius: 8px;
|
border-radius: 8px;
|
||||||
font-size: 0.9rem;
|
font-size: 0.9rem;
|
||||||
line-height: 1.5;
|
line-height: 1.5;
|
||||||
white-space: pre-wrap;
|
|
||||||
word-break: break-word;
|
word-break: break-word;
|
||||||
}
|
}
|
||||||
|
.msg.user, .msg.error {
|
||||||
|
white-space: pre-wrap;
|
||||||
|
}
|
||||||
|
.msg.assistant .markdown-body {
|
||||||
|
white-space: normal;
|
||||||
|
}
|
||||||
|
.msg.assistant .markdown-body h1, .msg.assistant .markdown-body h2, .msg.assistant .markdown-body h3 {
|
||||||
|
margin: 0.75em 0 0.35em;
|
||||||
|
font-size: 1em;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
.msg.assistant .markdown-body h1:first-child, .msg.assistant .markdown-body h2:first-child, .msg.assistant .markdown-body h3:first-child { margin-top: 0; }
|
||||||
|
.msg.assistant .markdown-body p { margin: 0.5em 0; }
|
||||||
|
.msg.assistant .markdown-body p:first-child { margin-top: 0; }
|
||||||
|
.msg.assistant .markdown-body p:last-child { margin-bottom: 0; }
|
||||||
|
.msg.assistant .markdown-body pre {
|
||||||
|
margin: 0.5em 0;
|
||||||
|
padding: 0.6rem;
|
||||||
|
background: #18181b;
|
||||||
|
border-radius: 6px;
|
||||||
|
overflow-x: auto;
|
||||||
|
font-size: 0.85em;
|
||||||
|
}
|
||||||
|
.msg.assistant .markdown-body code {
|
||||||
|
background: #18181b;
|
||||||
|
padding: 0.15em 0.35em;
|
||||||
|
border-radius: 4px;
|
||||||
|
font-size: 0.9em;
|
||||||
|
}
|
||||||
|
.msg.assistant .markdown-body pre code {
|
||||||
|
padding: 0;
|
||||||
|
background: none;
|
||||||
|
}
|
||||||
|
.msg.assistant .markdown-body ul, .msg.assistant .markdown-body ol { margin: 0.5em 0; padding-left: 1.4em; }
|
||||||
|
.msg.assistant .markdown-body li { margin: 0.25em 0; }
|
||||||
|
.msg.assistant .markdown-body a { color: #60a5fa; text-decoration: none; }
|
||||||
|
.msg.assistant .markdown-body a:hover { text-decoration: underline; }
|
||||||
.msg.user {
|
.msg.user {
|
||||||
align-self: flex-end;
|
align-self: flex-end;
|
||||||
background: #3f3f46;
|
background: #3f3f46;
|
||||||
@ -120,11 +156,13 @@
|
|||||||
color: #71717a;
|
color: #71717a;
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/dompurify/dist/purify.min.js"></script>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<header>
|
<header>
|
||||||
<h1>Local RAG Chat</h1>
|
<h1>Local RAG Chat</h1>
|
||||||
<p>Ask questions about your documents. Answers are generated from the vector store + Ollama.</p>
|
<p>Ask questions about your documents. Answers are generated from the vector store + Ollama / OpenAI.</p>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
<div id="messages"></div>
|
<div id="messages"></div>
|
||||||
@ -143,10 +181,18 @@
|
|||||||
const chatHistory = [];
|
const chatHistory = [];
|
||||||
|
|
||||||
function appendMessage(role, text, isError = false) {
|
function appendMessage(role, text, isError = false) {
|
||||||
|
text = text ?? '';
|
||||||
const div = document.createElement('div');
|
const div = document.createElement('div');
|
||||||
div.className = 'msg ' + (isError ? 'error' : role);
|
div.className = 'msg ' + (isError ? 'error' : role);
|
||||||
const label = role === 'user' ? 'You' : 'RAG';
|
const label = role === 'user' ? 'You' : 'RAG';
|
||||||
div.innerHTML = '<span class="label">' + label + '</span>' + escapeHtml(text);
|
let body;
|
||||||
|
if (role === 'assistant' && !isError) {
|
||||||
|
const rawHtml = marked.parse(text, { gfm: true, breaks: true });
|
||||||
|
body = '<div class="markdown-body">' + DOMPurify.sanitize(rawHtml) + '</div>';
|
||||||
|
} else {
|
||||||
|
body = escapeHtml(text);
|
||||||
|
}
|
||||||
|
div.innerHTML = '<span class="label">' + label + '</span>' + body;
|
||||||
messagesEl.appendChild(div);
|
messagesEl.appendChild(div);
|
||||||
messagesEl.scrollTop = messagesEl.scrollHeight;
|
messagesEl.scrollTop = messagesEl.scrollHeight;
|
||||||
chatHistory.push({ role: role, content: text });
|
chatHistory.push({ role: role, content: text });
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user