From 43665915c8b71e34de525dd1e2f32711437194f7 Mon Sep 17 00:00:00 2001 From: HamChowderr Date: Mon, 24 Nov 2025 10:27:17 -0800 Subject: [PATCH] Enhancement: Add Content-Type metadata to S3/GCP uploads Fixes user-facing issue where uploaded files couldn't be previewed in browsers due to missing Content-Type headers. Internal changes: - Add get_content_type() function to detect MIME types from file extensions - Set ContentType in boto3 upload_fileobj() and create_multipart_upload() - Set content_type for GCP blob uploads - Uses Python's mimetypes module with fallback to application/octet-stream External impact: - Videos, audio, images now preview inline in browsers instead of forcing download - Browsers can properly identify and handle media file types - Improves user experience for media file access Modified files: - services/file_management.py (new get_content_type function) - services/s3_toolkit.py (ContentType in ExtraArgs) - services/gcp_toolkit.py (blob.content_type assignment) - services/v1/s3/upload.py (ContentType in multipart upload) --- services/file_management.py | 13 +++++++++++++ services/gcp_toolkit.py | 5 +++++ services/s3_toolkit.py | 6 +++++- services/v1/s3/upload.py | 9 +++++++-- 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/services/file_management.py b/services/file_management.py index 256c299e..76b3357c 100644 --- a/services/file_management.py +++ b/services/file_management.py @@ -79,3 +79,16 @@ def download_file(url, storage_path="/tmp/"): os.remove(local_filename) raise e +def get_content_type(file_path): + """Determine the MIME type of a file based on its extension. + + Args: + file_path (str): Path to the file + + Returns: + str: MIME type (e.g., 'video/mp4', 'image/png', 'application/json') + Returns 'application/octet-stream' if type cannot be determined + """ + content_type, _ = mimetypes.guess_type(file_path) + return content_type if content_type else 'application/octet-stream' + diff --git a/services/gcp_toolkit.py b/services/gcp_toolkit.py index 828889a0..88d9dacb 100644 --- a/services/gcp_toolkit.py +++ b/services/gcp_toolkit.py @@ -23,6 +23,7 @@ from google.cloud import storage from google.cloud.run_v2 import JobsClient, RunJobRequest from google.api_core.exceptions import GoogleAPIError +from services.file_management import get_content_type # Configure logging logging.basicConfig(level=logging.INFO) @@ -65,6 +66,10 @@ def upload_to_gcs(file_path, bucket_name=GCP_BUCKET_NAME): logger.info(f"Uploading file to Google Cloud Storage: {file_path}") bucket = gcs_client.bucket(bucket_name) blob = bucket.blob(os.path.basename(file_path)) + + # Set content type before uploading + blob.content_type = get_content_type(file_path) + blob.upload_from_filename(file_path) logger.info(f"File uploaded successfully to GCS: {blob.public_url}") return blob.public_url diff --git a/services/s3_toolkit.py b/services/s3_toolkit.py index 1b0dc970..9d1c884d 100644 --- a/services/s3_toolkit.py +++ b/services/s3_toolkit.py @@ -20,6 +20,7 @@ import boto3 import logging from urllib.parse import urlparse, quote +from services.file_management import get_content_type logger = logging.getLogger(__name__) @@ -36,9 +37,12 @@ def upload_to_s3(file_path, s3_url, access_key, secret_key, bucket_name, region) client = session.client('s3', endpoint_url=s3_url) try: + # Detect content type from file extension + content_type = get_content_type(file_path) + # Upload the file to the specified S3 bucket with open(file_path, 'rb') as data: - client.upload_fileobj(data, bucket_name, os.path.basename(file_path), ExtraArgs={'ACL': 'public-read'}) + client.upload_fileobj(data, bucket_name, os.path.basename(file_path), ExtraArgs={'ACL': 'public-read', 'ContentType': content_type}) # URL encode the filename for the URL encoded_filename = quote(os.path.basename(file_path)) diff --git a/services/v1/s3/upload.py b/services/v1/s3/upload.py index 49fc3898..00003893 100644 --- a/services/v1/s3/upload.py +++ b/services/v1/s3/upload.py @@ -23,6 +23,7 @@ from urllib.parse import urlparse, unquote, quote import uuid import re +from services.file_management import get_content_type logger = logging.getLogger(__name__) @@ -82,11 +83,15 @@ def stream_upload_to_s3(file_url, custom_filename=None, make_public=False, downl # Start a multipart upload logger.info(f"Starting multipart upload for {filename} to bucket {bucket_name}") acl = 'public-read' if make_public else 'private' - + + # Detect content type from filename + content_type = get_content_type(filename) + multipart_upload = s3_client.create_multipart_upload( Bucket=bucket_name, Key=filename, - ACL=acl + ACL=acl, + ContentType=content_type ) upload_id = multipart_upload['UploadId']