Skip to content

Commit b5258f4

Browse files
author
Barbara Miller
committed
VIDEO_DATA_SOURCE
1 parent 9d279a1 commit b5258f4

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

brozzler/ydl.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
YTDLP_WAIT = 10
4343
YTDLP_MAX_REDIRECTS = 5
4444

45-
VIDEO_DATA = ""
45+
VIDEO_DATA_SOURCE = os.getenv("VIDEO_DATA_SOURCE")
4646

4747

4848
logger = structlog.get_logger(logger_name=__name__)
@@ -421,23 +421,22 @@ def _try_youtube_dl(worker, ydl, site, page):
421421

422422

423423
def get_video_captures(site, source="youtube"):
424-
if not VIDEO_DATA:
424+
if not VIDEO_DATA_SOURCE:
425425
return None
426426

427-
if VIDEO_DATA and VIDEO_DATA.startswith("postgresql"):
427+
if VIDEO_DATA_SOURCE and VIDEO_DATA_SOURCE.startswith("postgresql"):
428428
import psycopg
429429

430-
pg_url = VIDEO_DATA
431430
account_id = site.account_id if site.account_id else None
432431
seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None
433432
if source == "youtube":
434-
containing_page_url_pattern = "http://youtube.com/watch"
433+
containing_page_url_pattern = "http://youtube.com/watch" # yes, video data canonicalization uses "http"
435434
# support other sources here
436435
else:
437436
containing_page_url_pattern = None
438437
if account_id and seed and source:
439438
pg_query = (
440-
"SELECT containing_page_url from video where account_id = %s and seed = %s and containing_page_url like %s",
439+
"SELECT distinct(containing_page_url) from video where account_id = %s and seed = %s and containing_page_url like %s",
441440
(
442441
account_id,
443442
seed,
@@ -451,10 +450,11 @@ def get_video_captures(site, source="youtube"):
451450
)
452451
else:
453452
return None
454-
with psycopg.connect(pg_url) as conn:
453+
with psycopg.connect(VIDEO_DATA_SOURCE) as conn:
455454
with conn.cursor(row_factory=psycopg.rows.scalar_row) as cur:
456455
cur.execute(pg_query)
457456
return cur.fetchall()
457+
return None
458458

459459

460460
@metrics.brozzler_ytdlp_duration_seconds.time()

0 commit comments

Comments
 (0)