updated parameters and retrieval prompt to massively improve query quality
This commit is contained in:
parent
9abda1d867
commit
08610e0b64
19
local_rag.py
19
local_rag.py
@ -203,12 +203,16 @@ class LocalRAG:
|
|||||||
|
|
||||||
history_str = self._format_history(chat_history)
|
history_str = self._format_history(chat_history)
|
||||||
search_query = question
|
search_query = question
|
||||||
|
rag_query_instruction = (
|
||||||
|
"Do not return a list of references but prioritize meaningful text from abstracts, results and discussion sections."
|
||||||
|
)
|
||||||
|
|
||||||
print(f"[RAG] User question: {question!r}")
|
print(f"[RAG] User question: {question!r}")
|
||||||
|
|
||||||
# 1) If we have history, rephrase the question into a standalone query for better retrieval
|
# 1) If we have history, rephrase the question into a standalone query for better retrieval
|
||||||
if history_str.strip():
|
if history_str.strip():
|
||||||
rephrase_prompt = f"""Given this chat history and the latest user question, write a single standalone question that captures what the user is asking. Do not answer it; only output the standalone question. If the latest question is already clear on its own, output it unchanged.
|
rephrase_prompt = f"""Given this chat history and the latest user question, write a single standalone question that captures what the user is asking.
|
||||||
|
Do not answer it; only output the standalone question. If the latest question is already clear on its own, output it unchanged.
|
||||||
|
|
||||||
Chat history:
|
Chat history:
|
||||||
{history_str}
|
{history_str}
|
||||||
@ -220,10 +224,12 @@ Standalone question:"""
|
|||||||
search_query = (rephrase_response.content if hasattr(rephrase_response, "content") else str(rephrase_response)).strip() or question
|
search_query = (rephrase_response.content if hasattr(rephrase_response, "content") else str(rephrase_response)).strip() or question
|
||||||
print(f"[RAG] Standalone search query (rephrased): {search_query!r}")
|
print(f"[RAG] Standalone search query (rephrased): {search_query!r}")
|
||||||
|
|
||||||
print(f"[RAG] Query sent to vector store: {search_query!r}")
|
retrieval_query = f"{search_query}\n\n{rag_query_instruction}"
|
||||||
|
print(f"[RAG] Search query: {search_query!r}")
|
||||||
|
print(f"[RAG] Retrieval query sent to vector store: {retrieval_query!r}")
|
||||||
|
|
||||||
# 2) Retrieve documents with scores (FAISS: L2 distance, lower = more similar)
|
# 2) Retrieve documents with scores (FAISS: L2 distance, lower = more similar)
|
||||||
docs_with_scores = self.vectorstore.similarity_search_with_score(search_query, k=k)
|
docs_with_scores = self.vectorstore.similarity_search_with_score(retrieval_query, k=k)
|
||||||
docs = [doc for doc, _ in docs_with_scores]
|
docs = [doc for doc, _ in docs_with_scores]
|
||||||
retrieved = self._docs_scores_to_retrieved(docs_with_scores)
|
retrieved = self._docs_scores_to_retrieved(docs_with_scores)
|
||||||
if docs_with_scores:
|
if docs_with_scores:
|
||||||
@ -233,7 +239,8 @@ Standalone question:"""
|
|||||||
|
|
||||||
# 3) Answer using conversation history + retrieved context
|
# 3) Answer using conversation history + retrieved context
|
||||||
history_block = f"Chat history:\n{history_str}\n\n" if history_str else ""
|
history_block = f"Chat history:\n{history_str}\n\n" if history_str else ""
|
||||||
answer_prompt = f"""You are an assistant for question-answering. Use the chat history (if any) and the retrieved context below to answer the current question. If you don't know the answer, say so. Keep the conversation coherent.
|
answer_prompt = f"""You are an assistant for question-answering. Use the chat history (if any) and the retrieved context below to answer the current question.
|
||||||
|
If you don't know the answer, say so. Keep the conversation coherent.
|
||||||
|
|
||||||
{history_block}Relevant context from documents:
|
{history_block}Relevant context from documents:
|
||||||
|
|
||||||
@ -251,7 +258,7 @@ Answer:"""
|
|||||||
def main():
|
def main():
|
||||||
"""Example usage"""
|
"""Example usage"""
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
print("Local RAG with LangChain, Ollama, and FAISS")
|
print("Local RAG with LangChain, Ollama/OpenAI, and FAISS")
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
|
|
||||||
# Initialize
|
# Initialize
|
||||||
@ -268,7 +275,7 @@ def main():
|
|||||||
rag.list_documents()
|
rag.list_documents()
|
||||||
|
|
||||||
# Query
|
# Query
|
||||||
question = "What do the documents say about modality for perceived message perception?"
|
question = "What do you knowabout modality for perceived message perception?"
|
||||||
result = rag.query(question)
|
result = rag.query(question)
|
||||||
print(f"\nQuestion: {question}")
|
print(f"\nQuestion: {question}")
|
||||||
print(f"Answer: {result['answer']}")
|
print(f"Answer: {result['answer']}")
|
||||||
|
|||||||
@ -99,7 +99,7 @@ def chat(request: ChatRequest):
|
|||||||
print(f"\n[RAG] Retrieved {len(retrieved)} chunk(s)")
|
print(f"\n[RAG] Retrieved {len(retrieved)} chunk(s)")
|
||||||
for i, chunk in enumerate(retrieved):
|
for i, chunk in enumerate(retrieved):
|
||||||
content = chunk.get("content", "")
|
content = chunk.get("content", "")
|
||||||
preview = (content[:80] + "...") if len(content) > 80 else content
|
preview = (content[:150] + "...") if len(content) > 150 else content
|
||||||
print(f" [{i + 1}] {chunk.get('source', '')} p.{chunk.get('page', '?')} s={chunk.get('score')} | {preview!r}")
|
print(f" [{i + 1}] {chunk.get('source', '')} p.{chunk.get('page', '?')} s={chunk.get('score')} | {preview!r}")
|
||||||
else:
|
else:
|
||||||
print(f"\n[RAG] Retrieved 0 chunks")
|
print(f"\n[RAG] Retrieved 0 chunks")
|
||||||
|
|||||||
@ -9,13 +9,15 @@
|
|||||||
body {
|
body {
|
||||||
font-family: "SF Mono", "Consolas", "Monaco", monospace;
|
font-family: "SF Mono", "Consolas", "Monaco", monospace;
|
||||||
margin: 0;
|
margin: 0;
|
||||||
min-height: 100vh;
|
height: 100vh;
|
||||||
|
overflow: hidden;
|
||||||
background: #0f0f12;
|
background: #0f0f12;
|
||||||
color: #e4e4e7;
|
color: #e4e4e7;
|
||||||
display: flex;
|
display: flex;
|
||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
}
|
}
|
||||||
header {
|
header {
|
||||||
|
flex-shrink: 0;
|
||||||
padding: 1rem 1.5rem;
|
padding: 1rem 1.5rem;
|
||||||
border-bottom: 1px solid #27272a;
|
border-bottom: 1px solid #27272a;
|
||||||
background: #18181b;
|
background: #18181b;
|
||||||
@ -33,6 +35,7 @@
|
|||||||
}
|
}
|
||||||
#messages {
|
#messages {
|
||||||
flex: 1;
|
flex: 1;
|
||||||
|
min-height: 0;
|
||||||
overflow-y: auto;
|
overflow-y: auto;
|
||||||
padding: 1.5rem;
|
padding: 1.5rem;
|
||||||
display: flex;
|
display: flex;
|
||||||
@ -108,6 +111,7 @@
|
|||||||
display: block;
|
display: block;
|
||||||
}
|
}
|
||||||
#input-area {
|
#input-area {
|
||||||
|
flex-shrink: 0;
|
||||||
padding: 1rem 1.5rem 1.5rem;
|
padding: 1rem 1.5rem 1.5rem;
|
||||||
border-top: 1px solid #27272a;
|
border-top: 1px solid #27272a;
|
||||||
background: #18181b;
|
background: #18181b;
|
||||||
@ -138,6 +142,7 @@
|
|||||||
#input {
|
#input {
|
||||||
flex: 1;
|
flex: 1;
|
||||||
min-height: 44px;
|
min-height: 44px;
|
||||||
|
height: 44px;
|
||||||
max-height: 160px;
|
max-height: 160px;
|
||||||
padding: 0.6rem 1rem;
|
padding: 0.6rem 1rem;
|
||||||
font: inherit;
|
font: inherit;
|
||||||
@ -152,7 +157,8 @@
|
|||||||
#input::placeholder { color: #71717a; }
|
#input::placeholder { color: #71717a; }
|
||||||
#input:focus { border-color: #52525b; }
|
#input:focus { border-color: #52525b; }
|
||||||
#send {
|
#send {
|
||||||
padding: 0.6rem 1.2rem;
|
height: 44px;
|
||||||
|
padding: 0 1.2rem;
|
||||||
font: inherit;
|
font: inherit;
|
||||||
font-size: 0.85rem;
|
font-size: 0.85rem;
|
||||||
font-weight: 500;
|
font-weight: 500;
|
||||||
@ -179,8 +185,8 @@
|
|||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<header>
|
<header>
|
||||||
<h1>Local RAG Chat</h1>
|
<h1>RAG Chat</h1>
|
||||||
<p>Ask questions about your documents. Answers are generated from the vector store + Ollama / OpenAI.</p>
|
<p>Ask questions about your documents. Answers are generated from a local vector store + Ollama / OpenAI.</p>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
<div id="messages"></div>
|
<div id="messages"></div>
|
||||||
@ -219,11 +225,11 @@
|
|||||||
} catch (_) {}
|
} catch (_) {}
|
||||||
})();
|
})();
|
||||||
|
|
||||||
function appendMessage(role, text, isError = false) {
|
function appendMessage(role, text, isError = false, llmProvider = null) {
|
||||||
text = text ?? '';
|
text = text ?? '';
|
||||||
const div = document.createElement('div');
|
const div = document.createElement('div');
|
||||||
div.className = 'msg ' + (isError ? 'error' : role);
|
div.className = 'msg ' + (isError ? 'error' : role);
|
||||||
const label = role === 'user' ? 'You' : 'RAG';
|
let label = role === 'user' ? 'You' : (llmProvider === 'openai' ? 'ChatGPT + RAG' : 'Ollama + RAG');
|
||||||
let body;
|
let body;
|
||||||
if (role === 'assistant' && !isError) {
|
if (role === 'assistant' && !isError) {
|
||||||
const rawHtml = marked.parse(text, { gfm: true, breaks: true });
|
const rawHtml = marked.parse(text, { gfm: true, breaks: true });
|
||||||
@ -288,9 +294,9 @@
|
|||||||
console.groupEnd();
|
console.groupEnd();
|
||||||
}
|
}
|
||||||
if (data.error) {
|
if (data.error) {
|
||||||
appendMessage('assistant', data.error, true);
|
appendMessage('assistant', data.error, true, providerEl.value);
|
||||||
} else {
|
} else {
|
||||||
appendMessage('assistant', (data.answer || '(No response)').trim());
|
appendMessage('assistant', (data.answer || '(No response)').trim(), false, providerEl.value);
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
setLoading(false);
|
setLoading(false);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user