Skip to content

Commit 1dcbe0b

Browse files
authored
safeguard wherever reading s3 paths[0] (#451)
1 parent 8f81253 commit 1dcbe0b

File tree

3 files changed

+18
-5
lines changed

3 files changed

+18
-5
lines changed

ai_ta_backend/rabbitmq/ingest.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,22 +305,27 @@ def _ingest_single(ingest_method: Callable, s3_path: str, force_embeddings: bool
305305
self.posthog.capture('distinct_id_of_the_user', event='ingest_failure',
306306
properties={
307307
'course_name': course_name,
308-
's3_path': s3_paths,
308+
's3_path': s3_path,
309309
'kwargs': kwargs,
310310
'error': err_msg
311311
})
312312
return success_status
313313
except Exception as e:
314314
err = f"❌❌ Error in /ingest: `{inspect.currentframe().f_code.co_name}`: {e}\nTraceback:\n", traceback.format_exc() # type: ignore
315+
# Use s3_path if available (from loop), otherwise use first item from s3_paths list or s3_paths itself if it's a string
316+
try:
317+
error_s3_path = s3_path if 's3_path' in locals() else (s3_paths[0] if isinstance(s3_paths, list) and len(s3_paths) > 0 else s3_paths)
318+
except (NameError, TypeError, IndexError):
319+
error_s3_path = str(s3_paths) if s3_paths else "unknown"
315320
success_status['failure_ingest'] = {
316-
's3_path': s3_path,
321+
's3_path': error_s3_path,
317322
'error': f"MAJOR ERROR DURING INGEST: {err}"
318323
}
319324
if self.posthog:
320325
self.posthog.capture('distinct_id_of_the_user', event='ingest_failure',
321326
properties={
322327
'course_name': course_name,
323-
's3_path': s3_paths,
328+
's3_path': error_s3_path,
324329
'kwargs': kwargs,
325330
'error': err
326331
})

ai_ta_backend/rabbitmq/ingest_canvas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ def ingest_course_content(self,
253253
data = {
254254
'course_name': course_name,
255255
'readable_filename': readable_filename,
256-
's3_paths': s3_path,
256+
's3_paths': [s3_path],
257257
'base_url': f"{self.canvas_url}/courses/{canvas_course_id}",
258258
}
259259
print(f"Posting readable_filename: '{readable_filename}' with S3 path: '{s3_path}'")

ai_ta_backend/rabbitmq/rmqueue.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,16 @@ def addJobToIngestQueue(self, inputs, queue_name=None):
6262
logging.error("RabbitMQ is offline")
6363

6464
# SQL record first
65+
# Handle s3_paths: convert string to list, or get first element from list
66+
s3_path_value = ''
67+
if 's3_paths' in inputs:
68+
s3_paths = inputs['s3_paths']
69+
if isinstance(s3_paths, str):
70+
s3_path_value = s3_paths
71+
elif isinstance(s3_paths, list) and len(s3_paths) > 0:
72+
s3_path_value = s3_paths[0]
6573
doc_progress_payload = models.DocumentsInProgress(
66-
s3_path=inputs['s3_paths'][0] if 's3_paths' in inputs else '',
74+
s3_path=s3_path_value,
6775
readable_filename=inputs['readable_filename'],
6876
course_name=inputs['course_name']
6977
)

0 commit comments

Comments
 (0)