localRAG/local_rag.py
2026-01-26 19:49:02 +01:00

156 lines
5.1 KiB
Python

"""
Local RAG setup with LangChain, Ollama, and FAISS
Minimal dependencies, simple code
"""
import os
from pathlib import Path
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_ollama import ChatOllama
class LocalRAG:
def __init__(self, vectorstore_path="./vectorstore", ollama_model="llama2"):
"""Initialize local RAG system"""
self.vectorstore_path = vectorstore_path
self.ollama_model = ollama_model
# Embeddings
print("Loading embeddings model...")
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# Text splitter
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
# Ollama LLM
print(f"Connecting to Ollama (model: {ollama_model})...")
self.llm = ChatOllama(
model=ollama_model,
base_url="http://localhost:11434"
)
# Vector store (load if exists, otherwise None)
self.vectorstore = None
self._load_vectorstore()
def _load_vectorstore(self):
"""Load existing vector store if available"""
index_file = os.path.join(self.vectorstore_path, "index.faiss")
if os.path.exists(index_file):
try:
self.vectorstore = FAISS.load_local(
self.vectorstore_path,
self.embeddings,
allow_dangerous_deserialization=True
)
print(f"Loaded existing vector store from {self.vectorstore_path}")
except Exception as e:
print(f"Could not load vector store: {e}")
self.vectorstore = None
def add_documents(self, file_paths):
"""Add documents to the vector store"""
print(f"\nLoading {len(file_paths)} document(s)...")
all_docs = []
for file_path in file_paths:
path = Path(file_path)
if not path.exists():
print(f"Warning: {file_path} not found, skipping")
continue
# Load document
if path.suffix.lower() == '.pdf':
loader = PyPDFLoader(str(path))
elif path.suffix.lower() in ['.txt', '.md']:
loader = TextLoader(str(path))
else:
print(f"Warning: Unsupported file type {path.suffix}, skipping")
continue
docs = loader.load()
chunks = self.text_splitter.split_documents(docs)
all_docs.extend(chunks)
print(f" - {path.name}: {len(chunks)} chunks")
if not all_docs:
print("No documents loaded!")
return
# Create or update vector store
print(f"\nCreating embeddings for {len(all_docs)} chunks...")
if self.vectorstore is None:
self.vectorstore = FAISS.from_documents(all_docs, self.embeddings)
else:
new_store = FAISS.from_documents(all_docs, self.embeddings)
self.vectorstore.merge_from(new_store)
# Save
os.makedirs(self.vectorstore_path, exist_ok=True)
self.vectorstore.save_local(self.vectorstore_path)
print(f"Vector store saved to {self.vectorstore_path}")
def query(self, question, k=4):
"""Query the RAG system"""
if self.vectorstore is None:
return "Error: No documents loaded. Please add documents first."
print(f"\nSearching for relevant documents...")
docs = self.vectorstore.similarity_search(question, k=k)
print(f"Found {len(docs)} relevant documents")
# Combine context from documents
context = "\n\n".join([doc.page_content for doc in docs])
# Create prompt
prompt = f"""Use the following context to answer the question.
If you don't know the answer, say that you don't know instead of making up an answer.
Context:
{context}
Question: {question}
Answer:"""
print("Generating answer with Ollama...")
response = self.llm.invoke(prompt)
answer = response.content if hasattr(response, 'content') else str(response)
return answer
def main():
"""Example usage"""
print("=" * 60)
print("Local RAG with LangChain, Ollama, and FAISS")
print("=" * 60)
# Initialize
rag = LocalRAG(ollama_model="llama2")
# Add documents (uncomment and add your file paths)
rag.add_documents([
"diverses/local_rag/test1.pdf",
"diverses/local_rag/test2.txt"
])
# Query
# question = "What is this document about?"
# answer = rag.query(question)
# print(f"\nQuestion: {question}")
# print(f"Answer: {answer}")
print("\nSetup complete! Uncomment the code above to add documents and query.")
if __name__ == "__main__":
main()