77 load_index_from_storage ,
88 Settings ,
99)
10- from llama_index .llms .ollama import Ollama
11- from llama_index .embeddings .huggingface import HuggingFaceEmbedding
10+ from llama_index .llms .openai import OpenAI
11+ from llama_index .embeddings .openai import OpenAIEmbedding
1212from llama_index .readers .file import PDFReader
1313
14+ from dotenv import load_dotenv
15+
16+ load_dotenv ()
17+
1418# Config
1519ROOT_DIR = Path (__file__ ).resolve ().parent .parent
1620DATA_DIR = ROOT_DIR / "data"
@@ -20,7 +24,13 @@ async def build_and_save_index():
2024 """Builds the index from PDF files and saves it."""
2125 print ("Loading PDF documents..." )
2226 reader = PDFReader ()
23- documents = reader .load_data (folder_path = DATA_DIR )
27+ pdf_files = list (DATA_DIR .glob ("**/*.pdf" ))
28+
29+ documents = []
30+ for pdf_file in pdf_files :
31+ documents .extend (reader .load_data (file = pdf_file ))
32+
33+ print (f"Loaded { len (documents )} document chunks from { len (pdf_files )} PDF files" )
2434
2535 print ("Building index..." )
2636 index = VectorStoreIndex .from_documents (documents )
@@ -31,7 +41,7 @@ async def build_and_save_index():
3141
3242async def load_or_build_index ():
3343 """Loads existing index or builds a new one if not found."""
34- if os .path .exists (STORAGE_DIR ):
44+ if os .path .exists (STORAGE_DIR ) or False :
3545 print (f"Loading existing index from { STORAGE_DIR } /..." )
3646 storage_context = StorageContext .from_defaults (persist_dir = STORAGE_DIR )
3747 index = load_index_from_storage (storage_context )
@@ -41,8 +51,11 @@ async def load_or_build_index():
4151
4252async def main ():
4353 # Step 1: Setup global Settings
44- Settings .embed_model = HuggingFaceEmbedding (model_name = "BAAI/bge-base-en-v1.5" )
45- Settings .llm = Ollama (model = "llama3.1" , request_timeout = 360.0 )
54+ Settings .embed_model = OpenAIEmbedding (api_key = os .getenv ("OPENAI_API_KEY" ))
55+ Settings .llm = OpenAI (
56+ api_key = os .getenv ("OPENAI_API_KEY" ),
57+ model = "gpt-4o-mini"
58+ )
4659
4760 # Step 2: Load or build the index
4861 index = await load_or_build_index ()
0 commit comments