Skip to content

Commit ecd7f19

Browse files
authored
fix: update pytube to yt-dlp (#428)
Add tests and use yt-dlp instead of outdated and failing pytube package
1 parent e1d79c1 commit ecd7f19

5 files changed

Lines changed: 46 additions & 26 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ dependencies = [
2828
"ipykernel>=6.29.4,<7",
2929
"tenacity>=8.3.0,<9",
3030
"pillow-heif>=0.16.0,<0.17",
31-
"pytube==15.0.0",
3231
"anthropic>=0.31.0,<0.32",
3332
"pydantic>=2.0.0,<3",
3433
"av>=11.0.0,<12",
@@ -39,6 +38,7 @@ dependencies = [
3938
"dotenv>=0.9.9,<0.10",
4039
"pymupdf>=1.23.0,<2",
4140
"google-genai>=1.0.0,<2",
41+
"yt-dlp>=2025.3.31",
4242
]
4343

4444
[project.urls]

tests/integ/test_tools.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
vit_nsfw_classification,
3838
)
3939

40+
4041
def test_owlv2_object_detection():
4142
img = ski.data.coins()
4243
result = owlv2_object_detection(

tests/unit/tools/test_video.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import numpy as np
66

77
from vision_agent.utils.video import extract_frames_from_video
8+
from vision_agent.tools import extract_frames_and_timestamps
89

910

1011
def test_extract_frames_from_video():
@@ -44,6 +45,28 @@ def test_extract_frames_with_input_video_has_no_fps():
4445
assert len(res) == 0
4546

4647

48+
def test_extract_frames_and_timestamps_from_local_video():
49+
video_path = _create_video(duration=2)
50+
res = extract_frames_and_timestamps(video_path, fps=24)
51+
assert isinstance(res, list)
52+
assert len(res) == 48
53+
assert all("frame" in item and "timestamp" in item for item in res)
54+
55+
56+
def test_extract_frames_and_timestamps_from_http():
57+
res = extract_frames_and_timestamps(
58+
"https://www.w3schools.com/tags/mov_bbb.mp4", fps=0.2
59+
)
60+
assert isinstance(res, list)
61+
assert len(res) == 2
62+
assert all("frame" in item and "timestamp" in item for item in res)
63+
64+
65+
def test_extract_frames_and_timestamps_invalid_local_file():
66+
res = extract_frames_and_timestamps("non_existing_file.mp4", fps=1.0)
67+
assert res == []
68+
69+
4770
def _create_video(
4871
*, duration: int = 3, fps: int = 24, fps_video_prop: Optional[int] = 24
4972
) -> str:

uv.lock

Lines changed: 11 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vision_agent/tools/tools.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from IPython.display import display
2020
from PIL import Image, ImageDraw, ImageFont
2121
from pillow_heif import register_heif_opener # type: ignore
22-
from pytube import YouTube # type: ignore
22+
import yt_dlp # type: ignore
2323
import pymupdf # type: ignore
2424
from google import genai # type: ignore
2525
from google.genai import types # type: ignore
@@ -3174,7 +3174,6 @@ def extract_frames_and_timestamps(
31743174
[{"frame": np.ndarray, "timestamp": 0.0}, ...]
31753175
"""
31763176
if isinstance(fps, str):
3177-
# fps could be a string when it's passed in from a web endpoint deployment
31783177
fps = float(fps)
31793178

31803179
def reformat(
@@ -3194,23 +3193,20 @@ def reformat(
31943193
)
31953194
):
31963195
with tempfile.TemporaryDirectory() as temp_dir:
3197-
yt = YouTube(str(video_uri))
3198-
# Download the highest resolution video
3199-
video = (
3200-
yt.streams.filter(progressive=True, file_extension="mp4")
3201-
.order_by("resolution")
3202-
.desc()
3203-
.first()
3204-
)
3205-
if not video:
3206-
raise Exception("No suitable video stream found")
3207-
video_file_path = video.download(output_path=temp_dir)
3196+
ydl_opts = {
3197+
"outtmpl": os.path.join(temp_dir, "%(title)s.%(ext)s"),
3198+
"format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
3199+
"quiet": True,
3200+
}
3201+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
3202+
info = ydl.extract_info(str(video_uri), download=True)
3203+
video_file_path = ydl.prepare_filename(info)
32083204

32093205
return reformat(extract_frames_from_video(video_file_path, fps))
3206+
32103207
elif str(video_uri).startswith(("http", "https")):
32113208
_, image_suffix = os.path.splitext(video_uri)
32123209
with tempfile.NamedTemporaryFile(delete=False, suffix=image_suffix) as tmp_file:
3213-
# Download the video and save it to the temporary file
32143210
with urllib.request.urlopen(str(video_uri)) as response:
32153211
tmp_file.write(response.read())
32163212
return reformat(extract_frames_from_video(tmp_file.name, fps))

0 commit comments

Comments
 (0)