Skip to content

Commit 969601a

Browse files
committed
improvement to metadata, doesn't need to download the whole file first
1 parent 487f248 commit 969601a

4 files changed

Lines changed: 200 additions & 31 deletions

File tree

.env.local.minio.n8n

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Local Development Environment Configuration
2+
# No Code Architect Toolkit - Local Setup with MinIO
3+
4+
# App Configuration
5+
APP_NAME=NCAToolkit
6+
APP_DEBUG=true
7+
APP_DOMAIN=localhost:8080
8+
APP_URL=http://localhost:8080
9+
10+
# API Configuration
11+
API_KEY=local-dev-key-123
12+
13+
# MinIO S3-Compatible Storage Configuration
14+
S3_ENDPOINT_URL=http://minio:9000
15+
S3_ACCESS_KEY=minioadmin
16+
S3_SECRET_KEY=minioadmin123
17+
S3_REGION=us-east-1
18+
S3_BUCKET_NAME=nca-toolkit-local
19+
20+
# Optional: Gunicorn Configuration
21+
GUNICORN_WORKERS=2
22+
GUNICORN_TIMEOUT=300
23+
24+
# n8n Configuration
25+
N8N_HOST=localhost
26+
N8N_PORT=5678
27+
N8N_PROTOCOL=http
28+
WEBHOOK_URL=http://localhost:5678/
29+
GENERIC_TIMEZONE=UTC

.env.minio.example

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Production Environment Configuration with MinIO
2+
# No Code Architect Toolkit
3+
4+
# The name of your application.
5+
APP_NAME=NCAToolkit
6+
7+
# Debug mode setting. Set to `false` for production environments.
8+
APP_DEBUG=false
9+
10+
# Your app's domain or subdomain, without the 'http://' or 'https://' prefix.
11+
APP_DOMAIN=nca.example.com
12+
13+
# MinIO subdomain (leave as-is, this gets used in docker-compose)
14+
MINIO_DOMAIN=minio.nca.example.com
15+
16+
# Full application URL is automatically configured; no modification required.
17+
APP_URL=https://${APP_DOMAIN}
18+
19+
# SSL settings
20+
SSL_EMAIL=user@example.com
21+
22+
# API_KEY
23+
# Purpose: Used for API authentication.
24+
# Requirement: Mandatory.
25+
API_KEY=your_api_key_here
26+
27+
# MinIO S3-Compatible Storage Configuration
28+
# Purpose: MinIO provides S3-compatible object storage locally
29+
# Requirement: Mandatory for MinIO setup
30+
S3_ENDPOINT_URL=https://${MINIO_DOMAIN}
31+
S3_ACCESS_KEY=your_minio_access_key
32+
S3_SECRET_KEY=your_minio_secret_key
33+
S3_REGION=us-east-1
34+
S3_BUCKET_NAME=nca-toolkit-prod
35+
36+
# MinIO Admin Configuration
37+
# Purpose: Root credentials for MinIO server administration
38+
# Requirement: Mandatory - change these default values for production
39+
MINIO_ROOT_USER=your_minio_root_user
40+
MINIO_ROOT_PASSWORD=your_secure_minio_password
41+
42+
# Optional: Gunicorn Configuration for production
43+
GUNICORN_WORKERS=4
44+
GUNICORN_TIMEOUT=300

docker-compose.prod.minio.yml

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
services:
2+
traefik:
3+
image: "traefik"
4+
restart: unless-stopped
5+
env_file:
6+
- .env.minio
7+
command:
8+
- "--api=true"
9+
- "--api.insecure=true"
10+
- "--providers.docker=true"
11+
- "--providers.docker.exposedbydefault=false"
12+
- "--entrypoints.web.address=:80"
13+
- "--entrypoints.web.http.redirections.entryPoint.to=websecure"
14+
- "--entrypoints.web.http.redirections.entrypoint.scheme=https"
15+
- "--entrypoints.websecure.address=:443"
16+
- "--certificatesresolvers.mytlschallenge.acme.tlschallenge=true"
17+
- "--certificatesresolvers.mytlschallenge.acme.email=${SSL_EMAIL}"
18+
- "--certificatesresolvers.mytlschallenge.acme.storage=/letsencrypt/acme.json"
19+
ports:
20+
- "80:80"
21+
- "443:443"
22+
volumes:
23+
- traefik_data:/letsencrypt
24+
- /var/run/docker.sock:/var/run/docker.sock:ro
25+
networks:
26+
- nca-network
27+
28+
ncat:
29+
image: stephengpope/no-code-architects-toolkit:latest
30+
env_file:
31+
- .env.minio
32+
labels:
33+
- traefik.enable=true
34+
- traefik.http.routers.ncat.rule=Host(`${APP_DOMAIN}`)
35+
- traefik.http.routers.ncat.tls=true
36+
- traefik.http.routers.ncat.entrypoints=web,websecure
37+
- traefik.http.routers.ncat.tls.certresolver=mytlschallenge
38+
volumes:
39+
- storage:/var/www/html/storage/app
40+
- logs:/var/www/html/storage/logs
41+
restart: unless-stopped
42+
depends_on:
43+
- minio
44+
- minio-init
45+
networks:
46+
- nca-network
47+
48+
minio:
49+
image: minio/minio:latest
50+
command: server /data --console-address ":9001"
51+
env_file:
52+
- .env.minio
53+
ports:
54+
- "9000:9000"
55+
- "9001:9001"
56+
environment:
57+
MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}
58+
MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin123}
59+
volumes:
60+
- minio_data:/data
61+
restart: unless-stopped
62+
networks:
63+
- nca-network
64+
labels:
65+
- traefik.enable=true
66+
- traefik.http.routers.minio.rule=Host(`${MINIO_DOMAIN}`)
67+
- traefik.http.routers.minio.tls=true
68+
- traefik.http.routers.minio.entrypoints=web,websecure
69+
- traefik.http.routers.minio.tls.certresolver=mytlschallenge
70+
- traefik.http.routers.minio.service=minio
71+
- traefik.http.services.minio.loadbalancer.server.port=9000
72+
73+
minio-init:
74+
image: minio/mc:latest
75+
env_file:
76+
- .env.minio
77+
depends_on:
78+
- minio
79+
entrypoint: >
80+
/bin/sh -c "
81+
sleep 10;
82+
/usr/bin/mc alias set myminio http://minio:9000 ${MINIO_ROOT_USER:-minioadmin} ${MINIO_ROOT_PASSWORD:-minioadmin123};
83+
/usr/bin/mc mb myminio/${S3_BUCKET_NAME:-nca-toolkit-prod} --ignore-existing;
84+
/usr/bin/mc anonymous set public myminio/${S3_BUCKET_NAME:-nca-toolkit-prod};
85+
echo 'MinIO bucket ${S3_BUCKET_NAME:-nca-toolkit-prod} created and configured as public';
86+
"
87+
networks:
88+
- nca-network
89+
90+
volumes:
91+
traefik_data:
92+
driver: local
93+
storage:
94+
driver: local
95+
logs:
96+
driver: local
97+
minio_data:
98+
driver: local
99+
100+
networks:
101+
nca-network:
102+
driver: bridge

services/v1/media/metadata.py

Lines changed: 25 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import subprocess
2121
import json
2222
import logging
23-
from services.file_management import download_file
23+
import requests
2424
from config import LOCAL_STORAGE_PATH
2525

2626
# Set up logging
@@ -30,45 +30,49 @@
3030
def get_media_metadata(media_url, job_id=None):
3131
"""
3232
Extract metadata from a media file including video/audio properties.
33-
33+
Uses ffprobe direct streaming to avoid downloading the entire file.
34+
3435
Args:
3536
media_url (str): URL of the media file to analyze
3637
job_id (str, optional): Unique job identifier
37-
38+
3839
Returns:
3940
dict: Dictionary containing all available metadata for the media file
4041
"""
4142
logger.info(f"Starting metadata extraction for {media_url}")
42-
43-
# Download the file
44-
input_filename = download_file(media_url, os.path.join(LOCAL_STORAGE_PATH, f"{job_id}_metadata_input"))
45-
logger.info(f"Downloaded media to local file: {input_filename}")
46-
43+
4744
try:
4845
# Initialize metadata dictionary
4946
metadata = {}
50-
51-
# Get file size
52-
metadata['filesize'] = os.path.getsize(input_filename)
53-
metadata['filesize_mb'] = round(metadata['filesize'] / (1024 * 1024), 2) # Convert to MB
54-
55-
# Run ffprobe to get detailed metadata
47+
48+
# Get file size from HTTP HEAD request (without downloading)
49+
try:
50+
head_response = requests.head(media_url, allow_redirects=True, timeout=10)
51+
if 'content-length' in head_response.headers:
52+
metadata['filesize'] = int(head_response.headers['content-length'])
53+
metadata['filesize_mb'] = round(metadata['filesize'] / (1024 * 1024), 2) # Convert to MB
54+
except Exception as e:
55+
logger.warning(f"Could not retrieve file size from HEAD request: {str(e)}")
56+
57+
# Run ffprobe directly on the URL with reduced probing
5658
ffprobe_command = [
5759
'ffprobe',
5860
'-v', 'quiet',
5961
'-print_format', 'json',
6062
'-show_format',
6163
'-show_streams',
62-
input_filename
64+
'-analyzeduration', '100K',
65+
'-probesize', '100K',
66+
media_url
6367
]
64-
65-
logger.info(f"Running ffprobe command: {' '.join(ffprobe_command)}")
68+
69+
logger.info(f"Running ffprobe command on URL")
6670
result = subprocess.run(ffprobe_command, capture_output=True, text=True)
67-
71+
6872
if result.returncode != 0:
6973
logger.error(f"Error during ffprobe: {result.stderr}")
7074
raise Exception(f"ffprobe error: {result.stderr}")
71-
75+
7276
probe_data = json.loads(result.stdout)
7377

7478
# Get format information
@@ -155,19 +159,9 @@ def get_media_metadata(media_url, job_id=None):
155159
# Add flags indicating presence of streams
156160
metadata['has_video'] = has_video
157161
metadata['has_audio'] = has_audio
158-
159-
# Clean up the downloaded file
160-
if os.path.exists(input_filename):
161-
os.remove(input_filename)
162-
logger.info(f"Removed temporary file: {input_filename}")
163-
162+
164163
return metadata
165-
164+
166165
except Exception as e:
167166
logger.error(f"Metadata extraction failed: {str(e)}")
168-
169-
# Clean up temporary file if it exists
170-
if 'input_filename' in locals() and os.path.exists(input_filename):
171-
os.remove(input_filename)
172-
173167
raise

0 commit comments

Comments
 (0)