added OpenAI support and markdown in the chat window

This commit is contained in:
Philipp Mock 2026-02-11 15:58:34 +01:00
parent 4879439f27
commit 650f73a74b
4 changed files with 98 additions and 25 deletions

View File

@ -1,5 +1,5 @@
""" """
Local RAG setup with LangChain, Ollama, and FAISS Local RAG setup with LangChain, Ollama/OpenAI, and FAISS
Minimal dependencies, simple code Minimal dependencies, simple code
""" """
import os import os
@ -9,34 +9,51 @@ from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_community.vectorstores import FAISS from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import ChatOllama from langchain_ollama import ChatOllama
from langchain_openai import ChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_text_splitters import RecursiveCharacterTextSplitter
class LocalRAG: class LocalRAG:
def __init__(self, vectorstore_path="./vectorstore", ollama_model="mistral:7b"): def __init__(
"""Initialize local RAG system""" self,
vectorstore_path="./vectorstore",
llm_provider="ollama",
ollama_model="gpt-oss:20b",
openai_model="gpt-5.2",
ollama_base_url="http://localhost:11434",
):
"""Initialize local RAG system. llm_provider: 'ollama' or 'openai'."""
self.vectorstore_path = vectorstore_path self.vectorstore_path = vectorstore_path
self.ollama_model = ollama_model self.llm_provider = llm_provider
# Embeddings # Embeddings
print("Loading embeddings model...") print("Loading embeddings model...")
self.embeddings = HuggingFaceEmbeddings( self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2" model_name="sentence-transformers/all-MiniLM-L6-v2"
) )
# Text splitter # Text splitter
self.text_splitter = RecursiveCharacterTextSplitter( self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=2000, chunk_size=2000,
chunk_overlap=400 chunk_overlap=400
) )
# Ollama LLM # LLM (Ollama or OpenAI)
print(f"Connecting to Ollama (model: {ollama_model})...") if llm_provider == "openai":
self.llm = ChatOllama( api_key = os.environ.get("OPENAI_API_KEY")
model=ollama_model, if not api_key:
base_url="http://localhost:11434" raise ValueError(
) "OPENAI_API_KEY environment variable is required when llm_provider='openai'"
)
print(f"Using OpenAI (model: {openai_model})...")
self.llm = ChatOpenAI(model=openai_model, api_key=api_key)
else:
print(f"Using Ollama (model: {ollama_model})...")
self.llm = ChatOllama(
model=ollama_model,
base_url=ollama_base_url
)
# Vector store (load if exists, otherwise None) # Vector store (load if exists, otherwise None)
self.vectorstore = None self.vectorstore = None
self._load_vectorstore() self._load_vectorstore()

View File

@ -1,6 +1,7 @@
langchain langchain
langchain-community langchain-community
langchain-ollama langchain-ollama
langchain-openai
langchain-text-splitters langchain-text-splitters
langchain-huggingface langchain-huggingface
faiss-cpu faiss-cpu

View File

@ -10,10 +10,18 @@ from pydantic import BaseModel
from local_rag import LocalRAG from local_rag import LocalRAG
# Initialize RAG once at startup # LLM provider: "ollama" or "openai"
VECTORSTORE_PATH = "./vectorstore" LLM_PROVIDER = "openai"
OLLAMA_MODEL = "gpt-oss:20b" OLLAMA_MODEL = "gpt-oss:20b"
rag = LocalRAG(vectorstore_path=VECTORSTORE_PATH, ollama_model=OLLAMA_MODEL) OPENAI_MODEL = "gpt-5.2"
VECTORSTORE_PATH = "./vectorstore"
rag = LocalRAG(
vectorstore_path=VECTORSTORE_PATH,
llm_provider=LLM_PROVIDER,
ollama_model=OLLAMA_MODEL,
openai_model=OPENAI_MODEL,
)
app = FastAPI(title="Local RAG Chat", version="1.0.0") app = FastAPI(title="Local RAG Chat", version="1.0.0")
@ -64,15 +72,16 @@ def chat(request: ChatRequest):
answer = result["answer"] answer = result["answer"]
retrieved = result.get("retrieved", []) retrieved = result.get("retrieved", [])
# Server-side console trace: log retrieved chunks before LLM answer # Server-side console trace: shorter chunk logs + raw LLM response
if retrieved: if retrieved:
print(f"\n[RAG] Retrieved {len(retrieved)} chunk(s) for query: {request.message[:80]!r}") print(f"\n[RAG] Retrieved {len(retrieved)} chunk(s)")
for i, chunk in enumerate(retrieved): for i, chunk in enumerate(retrieved):
content = chunk.get("content", "") content = chunk.get("content", "")
preview = (content[:1000] + "...") if len(content) > 1000 else content preview = (content[:80] + "...") if len(content) > 80 else content
print(f" [{i + 1}] source={chunk.get('source', '')} page={chunk.get('page')} | {preview!r}") print(f" [{i + 1}] {chunk.get('source', '')} p.{chunk.get('page', '?')} s={chunk.get('score')} | {preview!r}")
else: else:
print(f"\n[RAG] Retrieved 0 chunks for query: {request.message[:80]!r}") print(f"\n[RAG] Retrieved 0 chunks")
print(f"[RAG] LLM response:\n{answer}")
return ChatResponse(answer=answer, retrieved=retrieved) return ChatResponse(answer=answer, retrieved=retrieved)
except Exception as e: except Exception as e:

View File

@ -45,9 +45,45 @@
border-radius: 8px; border-radius: 8px;
font-size: 0.9rem; font-size: 0.9rem;
line-height: 1.5; line-height: 1.5;
white-space: pre-wrap;
word-break: break-word; word-break: break-word;
} }
.msg.user, .msg.error {
white-space: pre-wrap;
}
.msg.assistant .markdown-body {
white-space: normal;
}
.msg.assistant .markdown-body h1, .msg.assistant .markdown-body h2, .msg.assistant .markdown-body h3 {
margin: 0.75em 0 0.35em;
font-size: 1em;
font-weight: 600;
}
.msg.assistant .markdown-body h1:first-child, .msg.assistant .markdown-body h2:first-child, .msg.assistant .markdown-body h3:first-child { margin-top: 0; }
.msg.assistant .markdown-body p { margin: 0.5em 0; }
.msg.assistant .markdown-body p:first-child { margin-top: 0; }
.msg.assistant .markdown-body p:last-child { margin-bottom: 0; }
.msg.assistant .markdown-body pre {
margin: 0.5em 0;
padding: 0.6rem;
background: #18181b;
border-radius: 6px;
overflow-x: auto;
font-size: 0.85em;
}
.msg.assistant .markdown-body code {
background: #18181b;
padding: 0.15em 0.35em;
border-radius: 4px;
font-size: 0.9em;
}
.msg.assistant .markdown-body pre code {
padding: 0;
background: none;
}
.msg.assistant .markdown-body ul, .msg.assistant .markdown-body ol { margin: 0.5em 0; padding-left: 1.4em; }
.msg.assistant .markdown-body li { margin: 0.25em 0; }
.msg.assistant .markdown-body a { color: #60a5fa; text-decoration: none; }
.msg.assistant .markdown-body a:hover { text-decoration: underline; }
.msg.user { .msg.user {
align-self: flex-end; align-self: flex-end;
background: #3f3f46; background: #3f3f46;
@ -120,11 +156,13 @@
color: #71717a; color: #71717a;
} }
</style> </style>
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/dompurify/dist/purify.min.js"></script>
</head> </head>
<body> <body>
<header> <header>
<h1>Local RAG Chat</h1> <h1>Local RAG Chat</h1>
<p>Ask questions about your documents. Answers are generated from the vector store + Ollama.</p> <p>Ask questions about your documents. Answers are generated from the vector store + Ollama / OpenAI.</p>
</header> </header>
<div id="messages"></div> <div id="messages"></div>
@ -143,10 +181,18 @@
const chatHistory = []; const chatHistory = [];
function appendMessage(role, text, isError = false) { function appendMessage(role, text, isError = false) {
text = text ?? '';
const div = document.createElement('div'); const div = document.createElement('div');
div.className = 'msg ' + (isError ? 'error' : role); div.className = 'msg ' + (isError ? 'error' : role);
const label = role === 'user' ? 'You' : 'RAG'; const label = role === 'user' ? 'You' : 'RAG';
div.innerHTML = '<span class="label">' + label + '</span>' + escapeHtml(text); let body;
if (role === 'assistant' && !isError) {
const rawHtml = marked.parse(text, { gfm: true, breaks: true });
body = '<div class="markdown-body">' + DOMPurify.sanitize(rawHtml) + '</div>';
} else {
body = escapeHtml(text);
}
div.innerHTML = '<span class="label">' + label + '</span>' + body;
messagesEl.appendChild(div); messagesEl.appendChild(div);
messagesEl.scrollTop = messagesEl.scrollHeight; messagesEl.scrollTop = messagesEl.scrollHeight;
chatHistory.push({ role: role, content: text }); chatHistory.push({ role: role, content: text });