Skip to content

Commit a3f44e2

Browse files
authored
Merge pull request #456 from Center-for-AI-Innovation/fix-download-convo-larger-than-500
Fix download conversations larger than 500
2 parents 1dcbe0b + 229406e commit a3f44e2

File tree

4 files changed

+43
-25
lines changed

4 files changed

+43
-25
lines changed

.env.local

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,11 @@ NEXT_PUBLIC_KEYCLOAK_CLIENT_ID=illinois_chat
106106
# NOMIC_API_KEY=
107107
# SENTRY_DSN=
108108

109+
# =============================================================================
110+
# SMTP EMAIL CONFIGURATION
111+
# =============================================================================
112+
SES_HOST=
113+
EMAIL_SENDER=
114+
PASSWORD_SMTP=
115+
SES_PORT=
116+
USERNAME_SMTP=

.env.template

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,10 @@ NUMEXPR_MAX_THREADS=2
3636
# Embeddings
3737
EMBEDDING_MODEL=Qwen/Qwen3-Embedding-8B
3838
EMBEDDING_API_BASE=
39+
40+
# Email
41+
SES_HOST=
42+
EMAIL_SENDER=
43+
PASSWORD_SMTP=
44+
SES_PORT=
45+
USERNAME_SMTP=

ai_ta_backend/service/export_service.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import zipfile
77

88
import pandas as pd
9-
import requests
9+
import redis
1010
from injector import inject
1111

1212
from ai_ta_backend.database.aws import AWSStorage
@@ -89,7 +89,7 @@ def export_documents_json(self, course_name: str, from_date='', to_date=''):
8989

9090
curr_doc_count = 0
9191
# create a temporary directory
92-
temp_dir = tempfile.mkdtemp(prefix="export_")
92+
temp_dir = tempfile.mkdtemp(prefix="export_", dir=tempfile.gettempdir())
9393

9494
filename = course_name + '_' + str(uuid.uuid4()) + '_documents.jsonl'
9595
file_path = os.path.join(temp_dir, filename)
@@ -160,7 +160,7 @@ def export_convo_history_json(self, course_name: str, from_date='', to_date=''):
160160
last_id = response["data"][-1]['id']
161161
total_count = count
162162

163-
temp_dir = tempfile.mkdtemp(prefix="export_")
163+
temp_dir = tempfile.mkdtemp(prefix="export_", dir=tempfile.gettempdir())
164164

165165
filename = course_name[0:10] + '-convos.jsonl'
166166
file_path = os.path.join(temp_dir, filename)
@@ -231,7 +231,7 @@ def export_conversations(self, course_name: str, from_date: str, to_date: str, e
231231
last_id = response["data"][-1]['id']
232232
total_count = count
233233

234-
temp_dir = tempfile.mkdtemp(prefix="export_")
234+
temp_dir = tempfile.mkdtemp(prefix="export_", dir=tempfile.gettempdir())
235235

236236
filename = course_name[0:10] + '-convos.jsonl'
237237
file_path = os.path.join(temp_dir, filename)
@@ -538,13 +538,15 @@ def export_data_in_bg_extended(response, download_type, course_name, s3_path):
538538
os.remove(zip_file_path)
539539
s3_url = s3.generatePresignedUrl('get_object', os.environ['S3_BUCKET_NAME'], s3_path, 172800)
540540

541-
# Fetch course metadata to get admin emails
542-
headers = {"Authorization": f"Bearer {os.environ['VERCEL_READ_ONLY_API_KEY']}", "Content-Type": "application/json"}
543-
hget_url = str(os.environ['VERCEL_BASE_URL']) + "course_metadatas/" + course_name
544-
response = requests.get(hget_url, headers=headers)
545-
course_metadata = response.json()
546-
course_metadata = json.loads(course_metadata['result'])
547-
admin_emails = course_metadata['course_admins']
541+
# get admin email IDs from Redis
542+
print("Connecting to Redis... with url: ", os.environ['REDIS_URL'])
543+
redis_client = redis.Redis.from_url(os.environ['REDIS_URL'], db=0)
544+
course_metadata_json = redis_client.hget('course_metadatas', key=course_name)
545+
if not course_metadata_json:
546+
raise ValueError(f"No course metadata found in Redis for project '{course_name}'")
547+
course_metadata = json.loads(course_metadata_json)
548+
admin_emails = course_metadata.get('course_admins', [])
549+
548550
bcc_emails = []
549551

550552
# Handle specific email cases
@@ -584,7 +586,7 @@ def export_data_in_bg(response, download_type, course_name, s3_path):
584586
response (dict): The response from the Supabase query.
585587
download_type (str): The type of download - 'documents' or 'conversations'.
586588
course_name (str): The name of the course.
587-
s3_path (str): The S3 path where the file will be uploaded.
589+
s3_path (str): The S3 path where the file will be uploaded.
588590
"""
589591
s3 = AWSStorage()
590592
sql = SQLDatabase()
@@ -595,7 +597,7 @@ def export_data_in_bg(response, download_type, course_name, s3_path):
595597
print("pre-defined s3_path: ", s3_path)
596598

597599
curr_doc_count = 0
598-
temp_dir = tempfile.mkdtemp(prefix="export_")
600+
temp_dir = tempfile.mkdtemp(prefix="export_", dir=tempfile.gettempdir())
599601

600602
filename = s3_path.split('/')[-1].split('.')[0] + '.jsonl'
601603
file_path = os.path.join(temp_dir, filename)
@@ -641,17 +643,18 @@ def export_data_in_bg(response, download_type, course_name, s3_path):
641643
# generate presigned URL
642644
s3_url = s3.generatePresignedUrl('get_object', os.environ['S3_BUCKET_NAME'], s3_path, 172800)
643645

644-
# get admin email IDs
645-
headers = {"Authorization": f"Bearer {os.environ['VERCEL_READ_ONLY_API_KEY']}", "Content-Type": "application/json"}
646+
# get admin email IDs from Redis
647+
print("Connecting to Redis... with url: ", os.environ['REDIS_URL'])
648+
redis_client = redis.Redis.from_url(os.environ['REDIS_URL'], db=0)
649+
course_metadata_json = redis_client.hget('course_metadatas', key=course_name)
650+
if not course_metadata_json:
651+
raise ValueError(f"No course metadata found in Redis for project '{course_name}'")
652+
course_metadata = json.loads(course_metadata_json)
653+
admin_emails = course_metadata.get('course_admins', [])
646654

647-
hget_url = str(os.environ['VERCEL_BASE_URL']) + "course_metadatas/" + course_name
648-
response = requests.get(hget_url, headers=headers)
649-
course_metadata = response.json()
650-
course_metadata = json.loads(course_metadata['result'])
651-
admin_emails = course_metadata['course_admins']
652655
bcc_emails = []
653656

654-
# check for Kastan's email and move to bcc
657+
# check for dev's email and move to bcc
655658
if 'rohan13@illinois.edu' in admin_emails:
656659
admin_emails.remove('rohan13@illinois.edu')
657660
bcc_emails.append('rohan13@illinois.edu')
@@ -707,7 +710,7 @@ def export_data_in_bg_emails(response, download_type, course_name, s3_path, emai
707710

708711
curr_doc_count = 0
709712

710-
temp_dir = tempfile.mkdtemp(prefix="export_")
713+
temp_dir = tempfile.mkdtemp(prefix="export_", dir=tempfile.gettempdir())
711714

712715
filename = s3_path.split('/')[-1].split('.')[0] + '.jsonl'
713716
file_path = os.path.join(temp_dir, filename)

ai_ta_backend/utils/export_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def _initialize_file_paths(course_name: str):
1616
base_name = _initialize_base_name(course_name)
1717

1818
# Create a unique temp directory for this course_name
19-
temp_dir = tempfile.mkdtemp(prefix=f"{course_name}_")
19+
temp_dir = tempfile.mkdtemp(prefix=f"{course_name}_", dir=tempfile.gettempdir())
2020

2121
file_paths = {
2222
"zip": os.path.join(temp_dir, base_name + ".zip"),
@@ -258,7 +258,7 @@ def _append_to_jsonl(convo_data, jsonl_file_path, error_log):
258258

259259

260260
def _create_zip(file_paths, error_log):
261-
temp_dir = tempfile.mkdtemp(prefix="export_")
261+
temp_dir = tempfile.mkdtemp(prefix="export_", dir=tempfile.gettempdir())
262262
zip_file_path = os.path.join(temp_dir, os.path.basename(file_paths['zip']))
263263
error_log_path = os.path.join(temp_dir, 'error.log')
264264
with open(error_log_path, 'w') as log_file:
@@ -286,7 +286,7 @@ def _create_zip(file_paths, error_log):
286286

287287

288288
def _create_zip_for_user_convo_export(markdown_dir, media_dir, error_log):
289-
temp_dir = tempfile.mkdtemp(prefix="user_convo_export_")
289+
temp_dir = tempfile.mkdtemp(prefix="user_convo_export_", dir=tempfile.gettempdir())
290290
zip_file_path = os.path.join(temp_dir, 'user_convo_export.zip')
291291
error_log_path = os.path.join(temp_dir, 'error.log')
292292

0 commit comments

Comments
 (0)