#!/usr/bin/env python3 """Add all PDFs under a folder to the RAG vector store. Run from project root.""" import shutil from pathlib import Path from local_rag import LocalRAG # Folder to scan for PDFs (recursively). DATA_ROOT = Path("/Users/Philipp/Desktop/workspace/python/gpt_publikationen/data_vs") # Must match server.py so the chat loads the same index. VECTORSTORE_PATH = "./vectorstore" # If True: delete the existing vector store folder, then index all PDFs from scratch. # If False: load the existing index (if any) and merge new chunks from these PDFs into it. CLEAR_VECTORSTORE_FIRST = True if __name__ == "__main__": if CLEAR_VECTORSTORE_FIRST and Path(VECTORSTORE_PATH).exists(): shutil.rmtree(VECTORSTORE_PATH) print(f"Cleared existing vector store: {VECTORSTORE_PATH}") elif not CLEAR_VECTORSTORE_FIRST: print(f"Appending to existing vector store (if any): {VECTORSTORE_PATH}") pdfs = sorted(p for p in DATA_ROOT.rglob("*") if p.suffix.lower() == ".pdf") print(f"Found {len(pdfs)} PDF(s) under {DATA_ROOT}") if not pdfs: raise SystemExit("No PDFs found.") rag = LocalRAG(vectorstore_path=VECTORSTORE_PATH) rag.add_documents([str(p) for p in pdfs]) print("Done.")