@@ -42,12 +42,12 @@ def generate_collection(
4242 for idx , (_ , row ) in enumerate (
4343 df_docs .iterrows (), start = 1
4444 ): # Using _ for unused variable
45- content = row ["Contents " ]
45+ content = row ["content " ]
4646
4747 if not isinstance (content , str ):
4848 logger .warning (
4949 "Skipping document due to missing or invalid content." ,
50- filename = row ["Filename " ],
50+ filename = row ["file_name " ],
5151 )
5252 continue
5353
@@ -56,34 +56,34 @@ def generate_collection(
5656 embedding_model = retriever_config .embedding_model ,
5757 task_type = EmbeddingTaskType .RETRIEVAL_DOCUMENT ,
5858 contents = content ,
59- title = str (row ["Filename " ]),
59+ title = str (row ["file_name " ]),
6060 )
6161 except google .api_core .exceptions .InvalidArgument as e :
6262 # Check if it's the known "Request payload size exceeds the limit" error
6363 # If so, downgrade it to a warning
6464 if "400 Request payload size exceeds the limit" in str (e ):
6565 logger .warning (
6666 "Skipping document due to size limit." ,
67- filename = row ["Filename " ],
67+ filename = row ["file_name " ],
6868 )
6969 continue
7070 # Log the full traceback for other InvalidArgument errors
7171 logger .exception (
7272 "Error encoding document (InvalidArgument)." ,
73- filename = row ["Filename " ],
73+ filename = row ["file_name " ],
7474 )
7575 continue
7676 except Exception :
7777 # Log the full traceback for any other errors
7878 logger .exception (
7979 "Error encoding document (general)." ,
80- filename = row ["Filename " ],
80+ filename = row ["file_name " ],
8181 )
8282 continue
8383
8484 payload = {
85- "filename" : row ["Filename " ],
86- "metadata" : row ["Metadata " ],
85+ "filename" : row ["file_name " ],
86+ "metadata" : row ["meta_data " ],
8787 "text" : content ,
8888 }
8989
0 commit comments