4242YTDLP_WAIT = 10
4343YTDLP_MAX_REDIRECTS = 5
4444
45- VIDEO_DATA = ""
45+ VIDEO_DATA_SOURCE = os . getenv ( "VIDEO_DATA_SOURCE" )
4646
4747
4848logger = structlog .get_logger (logger_name = __name__ )
@@ -421,23 +421,22 @@ def _try_youtube_dl(worker, ydl, site, page):
421421
422422
423423def get_video_captures (site , source = "youtube" ):
424- if not VIDEO_DATA :
424+ if not VIDEO_DATA_SOURCE :
425425 return None
426426
427- if VIDEO_DATA and VIDEO_DATA .startswith ("postgresql" ):
427+ if VIDEO_DATA_SOURCE and VIDEO_DATA_SOURCE .startswith ("postgresql" ):
428428 import psycopg
429429
430- pg_url = VIDEO_DATA
431430 account_id = site .account_id if site .account_id else None
432431 seed = site .metadata .ait_seed_id if site .metadata .ait_seed_id else None
433432 if source == "youtube" :
434- containing_page_url_pattern = "http://youtube.com/watch"
433+ containing_page_url_pattern = "http://youtube.com/watch" # yes, video data canonicalization uses "http"
435434 # support other sources here
436435 else :
437436 containing_page_url_pattern = None
438437 if account_id and seed and source :
439438 pg_query = (
440- "SELECT containing_page_url from video where account_id = %s and seed = %s and containing_page_url like %s" ,
439+ "SELECT distinct( containing_page_url) from video where account_id = %s and seed = %s and containing_page_url like %s" ,
441440 (
442441 account_id ,
443442 seed ,
@@ -451,10 +450,11 @@ def get_video_captures(site, source="youtube"):
451450 )
452451 else :
453452 return None
454- with psycopg .connect (pg_url ) as conn :
453+ with psycopg .connect (VIDEO_DATA_SOURCE ) as conn :
455454 with conn .cursor (row_factory = psycopg .rows .scalar_row ) as cur :
456455 cur .execute (pg_query )
457456 return cur .fetchall ()
457+ return None
458458
459459
460460@metrics .brozzler_ytdlp_duration_seconds .time ()
0 commit comments