Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions services/file_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,16 @@ def download_file(url, storage_path="/tmp/"):
os.remove(local_filename)
raise e

def get_content_type(file_path):
"""Determine the MIME type of a file based on its extension.

Args:
file_path (str): Path to the file

Returns:
str: MIME type (e.g., 'video/mp4', 'image/png', 'application/json')
Returns 'application/octet-stream' if type cannot be determined
"""
content_type, _ = mimetypes.guess_type(file_path)
return content_type if content_type else 'application/octet-stream'

5 changes: 5 additions & 0 deletions services/gcp_toolkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from google.cloud import storage
from google.cloud.run_v2 import JobsClient, RunJobRequest
from google.api_core.exceptions import GoogleAPIError
from services.file_management import get_content_type

# Configure logging
logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -65,6 +66,10 @@ def upload_to_gcs(file_path, bucket_name=GCP_BUCKET_NAME):
logger.info(f"Uploading file to Google Cloud Storage: {file_path}")
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(os.path.basename(file_path))

# Set content type before uploading
blob.content_type = get_content_type(file_path)

blob.upload_from_filename(file_path)
logger.info(f"File uploaded successfully to GCS: {blob.public_url}")
return blob.public_url
Expand Down
6 changes: 5 additions & 1 deletion services/s3_toolkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import boto3
import logging
from urllib.parse import urlparse, quote
from services.file_management import get_content_type

logger = logging.getLogger(__name__)

Expand All @@ -36,9 +37,12 @@ def upload_to_s3(file_path, s3_url, access_key, secret_key, bucket_name, region)
client = session.client('s3', endpoint_url=s3_url)

try:
# Detect content type from file extension
content_type = get_content_type(file_path)

# Upload the file to the specified S3 bucket
with open(file_path, 'rb') as data:
client.upload_fileobj(data, bucket_name, os.path.basename(file_path), ExtraArgs={'ACL': 'public-read'})
client.upload_fileobj(data, bucket_name, os.path.basename(file_path), ExtraArgs={'ACL': 'public-read', 'ContentType': content_type})

# URL encode the filename for the URL
encoded_filename = quote(os.path.basename(file_path))
Expand Down
9 changes: 7 additions & 2 deletions services/v1/s3/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from urllib.parse import urlparse, unquote, quote
import uuid
import re
from services.file_management import get_content_type

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -82,11 +83,15 @@ def stream_upload_to_s3(file_url, custom_filename=None, make_public=False, downl
# Start a multipart upload
logger.info(f"Starting multipart upload for {filename} to bucket {bucket_name}")
acl = 'public-read' if make_public else 'private'


# Detect content type from filename
content_type = get_content_type(filename)

multipart_upload = s3_client.create_multipart_upload(
Bucket=bucket_name,
Key=filename,
ACL=acl
ACL=acl,
ContentType=content_type
)

upload_id = multipart_upload['UploadId']
Expand Down