Skip to content

Commit 78839c2

Browse files
authored
[Release 0.27] Cherry-pick of 'Remove old pyav backend, rely on TC instead (#9481)
1 parent 883dcaf commit 78839c2

12 files changed

Lines changed: 109 additions & 771 deletions

File tree

.github/scripts/setup-env.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,6 @@ conda activate ci
3434
conda install --quiet --yes libjpeg-turbo -c pytorch
3535
pip install --progress-bar=off --upgrade setuptools==72.1.0
3636

37-
# See https://github.com/pytorch/vision/issues/6790
38-
if [[ "${PYTHON_VERSION}" != "3.11" ]]; then
39-
pip install --progress-bar=off av!=10.0.0
40-
fi
41-
4237
echo '::endgroup::'
4338

4439
if [[ "${OS_TYPE}" == windows && "${GPU_ARCH_TYPE}" == cuda ]]; then

.github/workflows/docs.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@ jobs:
3434
CONDA_PATH=$(which conda)
3535
eval "$(${CONDA_PATH} shell.bash hook)"
3636
conda activate ci
37-
# FIXME: not sure why we need this. `ldd torchvision/video_reader.so` shows that it
38-
# already links against the one pulled from conda. However, at runtime it pulls from
39-
# /lib64
40-
# Should we maybe always do this in `./.github/scripts/setup-env.sh` so that we don't
41-
# have to pay attention in all other workflows?
37+
38+
echo '::group::Install TorchCodec and ffmpeg'
39+
conda install --quiet --yes ffmpeg
40+
pip install --progress-bar=off --pre torchcodec --index-url="https://download.pytorch.org/whl/nightly/cpu"
4241
export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH}"
42+
echo '::endgroup::'
4343
4444
cd docs
4545

gallery/others/plot_optical_flow.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,10 @@ def plot(imgs, **imshow_kwargs):
4747
plt.tight_layout()
4848

4949
# %%
50-
# Reading Videos Using Torchvision
50+
# Reading Videos Using TorchCodec
5151
# --------------------------------
52-
# We will first read a video using :func:`~torchvision.io.read_video`.
53-
# Alternatively one can use the new :class:`~torchvision.io.VideoReader` API (if
54-
# torchvision is built from source).
52+
# We will first read a video using
53+
# `TorchCodec <https://github.com/pytorch/torchcodec>`_.
5554
# The video we will use here is free of use from `pexels.com
5655
# <https://www.pexels.com/video/a-man-playing-a-game-of-basketball-5192157/>`_,
5756
# credits go to `Pavel Danilyuk <https://www.pexels.com/@pavel-danilyuk>`_.
@@ -67,16 +66,16 @@ def plot(imgs, **imshow_kwargs):
6766
_ = urlretrieve(video_url, video_path)
6867

6968
# %%
70-
# :func:`~torchvision.io.read_video` returns the video frames, audio frames and
71-
# the metadata associated with the video. In our case, we only need the video
72-
# frames.
69+
# We use :class:`~torchcodec.decoders.VideoDecoder` to decode the video frames.
70+
# TorchCodec returns frames in NCHW format by default.
7371
#
7472
# Here we will just make 2 predictions between 2 pre-selected pairs of frames,
7573
# namely frames (100, 101) and (150, 151). Each of these pairs corresponds to a
7674
# single model input.
7775

78-
from torchvision.io import read_video
79-
frames, _, _ = read_video(str(video_path), output_format="TCHW")
76+
from torchcodec.decoders import VideoDecoder
77+
decoder = VideoDecoder(str(video_path))
78+
frames = decoder[:]
8079

8180
img1_batch = torch.stack([frames[100], frames[150]])
8281
img2_batch = torch.stack([frames[101], frames[151]])
@@ -85,7 +84,7 @@ def plot(imgs, **imshow_kwargs):
8584

8685
# %%
8786
# The RAFT model accepts RGB images. We first get the frames from
88-
# :func:`~torchvision.io.read_video` and resize them to ensure their dimensions
87+
# the decoder and resize them to ensure their dimensions
8988
# are divisible by 8. Note that we explicitly use ``antialias=False``, because
9089
# this is how those models were trained. Then we use the transforms bundled into
9190
# the weights in order to preprocess the input and rescale its values to the

test/common_utils.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import torch.testing
1919

2020
from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
21-
from torchvision import io, tv_tensors
21+
from torchvision import tv_tensors
2222
from torchvision.transforms._functional_tensor import _max_value as get_max_value
2323
from torchvision.transforms.v2.functional import to_image, to_pil_image
2424
from torchvision.utils import _Image_fromarray
@@ -158,6 +158,8 @@ def _create_data_batch(height=3, width=3, channels=3, num_samples=4, device="cpu
158158

159159

160160
def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None):
161+
from datasets_utils import create_video_file
162+
161163
names = []
162164
for i in range(num_videos):
163165
if sizes is None:
@@ -168,10 +170,9 @@ def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None):
168170
f = 5
169171
else:
170172
f = fps[i]
171-
data = torch.randint(0, 256, (size, 300, 400, 3), dtype=torch.uint8)
172-
name = os.path.join(tmpdir, f"{i}.mp4")
173-
names.append(name)
174-
io.write_video(name, data, fps=f)
173+
name = f"{i}.mp4"
174+
create_video_file(tmpdir, name, size=(size, 3, 300, 400), fps=f)
175+
names.append(os.path.join(tmpdir, name))
175176

176177
return names
177178

test/datasets_utils.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ class LazyImporter:
6666
"""
6767

6868
MODULES = (
69-
"av",
69+
"torchcodec",
7070
"lmdb",
7171
"pycocotools",
7272
"requests",
@@ -669,17 +669,24 @@ class VideoDatasetTestCase(DatasetTestCase):
669669
670670
- Overwrites the 'FEATURE_TYPES' class attribute to expect two :class:`torch.Tensor` s for the video and audio as
671671
well as an integer label.
672-
- Overwrites the 'REQUIRED_PACKAGES' class attribute to require PyAV (``av``).
672+
- Overwrites the 'REQUIRED_PACKAGES' class attribute to require TorchCodec (``torchcodec``).
673+
- Skips on non-Linux platforms and CUDA-only environments.
673674
- Adds the 'DEFAULT_FRAMES_PER_CLIP' class attribute. If no 'frames_per_clip' is provided by 'inject_fake_data()'
674675
and it is the last parameter without a default value in the dataset constructor, the value of the
675676
'DEFAULT_FRAMES_PER_CLIP' class attribute is appended to the output.
676677
"""
677678

678679
FEATURE_TYPES = (torch.Tensor, torch.Tensor, int)
679-
REQUIRED_PACKAGES = ("av",)
680+
REQUIRED_PACKAGES = ("torchcodec",)
680681

681682
FRAMES_PER_CLIP = 1
682683

684+
@classmethod
685+
def setUpClass(cls):
686+
if platform.system() != "Linux":
687+
raise unittest.SkipTest("Video dataset tests are only supported on Linux.")
688+
super().setUpClass()
689+
683690
def __init__(self, *args, **kwargs):
684691
super().__init__(*args, **kwargs)
685692
self.dataset_args = self._set_default_frames_per_clip(self.dataset_args)
@@ -864,13 +871,12 @@ def shape_test_for_stereo(
864871
assert dw == mw
865872

866873

867-
@requires_lazy_imports("av")
874+
@requires_lazy_imports("torchcodec")
868875
def create_video_file(
869876
root: Union[pathlib.Path, str],
870877
name: Union[pathlib.Path, str],
871878
size: Union[Sequence[int], int] = (1, 3, 10, 10),
872879
fps: float = 25,
873-
**kwargs: Any,
874880
) -> pathlib.Path:
875881
"""Create a video file from random data.
876882
@@ -881,14 +887,15 @@ def create_video_file(
881887
``(num_frames, num_channels, height, width)``. If scalar, the value is used for the height and width.
882888
If not provided, ``num_frames=1`` and ``num_channels=3`` are assumed.
883889
fps (float): Frame rate in frames per second.
884-
kwargs (Any): Additional parameters passed to :func:`torchvision.io.write_video`.
885890
886891
Returns:
887-
pathlib.Path: Path to the created image file.
892+
pathlib.Path: Path to the created video file.
888893
889894
Raises:
890-
UsageError: If PyAV is not available.
895+
UsageError: If TorchCodec is not available.
891896
"""
897+
from torchcodec.encoders import VideoEncoder
898+
892899
if isinstance(size, int):
893900
size = (size, size)
894901
if len(size) == 2:
@@ -902,11 +909,14 @@ def create_video_file(
902909

903910
video = create_image_or_video_tensor(size)
904911
file = pathlib.Path(root) / name
905-
torchvision.io.write_video(str(file), video.permute(0, 2, 3, 1), fps, **kwargs)
912+
913+
encoder = VideoEncoder(video, frame_rate=fps)
914+
encoder.to_file(str(file))
915+
906916
return file
907917

908918

909-
@requires_lazy_imports("av")
919+
@requires_lazy_imports("torchcodec")
910920
def create_video_folder(
911921
root: Union[str, pathlib.Path],
912922
name: Union[str, pathlib.Path],
@@ -933,7 +943,7 @@ def create_video_folder(
933943
List[pathlib.Path]: Paths to all created video files.
934944
935945
Raises:
936-
UsageError: If PyAV is not available.
946+
UsageError: If TorchCodec is not available.
937947
938948
.. seealso::
939949
@@ -944,7 +954,7 @@ def create_video_folder(
944954
def size(idx):
945955
num_frames = 1
946956
num_channels = 3
947-
# The 'libx264' video codec, which is the default of torchvision.io.write_video, requires the height and
957+
# The 'libx264' video codec requires the height and
948958
# width of the video to be divisible by 2.
949959
height, width = (torch.randint(2, 6, size=(2,), dtype=torch.int) * 2).tolist()
950960
return (num_frames, num_channels, height, width)

test/test_datasets_samplers.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,23 @@
1+
import sys
2+
13
import pytest
24
import torch
35
from common_utils import assert_equal, get_list_of_videos
4-
from torchvision import io
56
from torchvision.datasets.samplers import DistributedSampler, RandomClipSampler, UniformClipSampler
67
from torchvision.datasets.video_utils import VideoClips
78

9+
try:
10+
import torchcodec # noqa: F401
11+
12+
_torchcodec_available = True
13+
except ImportError:
14+
_torchcodec_available = False
15+
816

9-
@pytest.mark.skipif(not io.video._av_available(), reason="this test requires av")
17+
@pytest.mark.skipif(
18+
not (_torchcodec_available and sys.platform == "linux"),
19+
reason="this test requires torchcodec (linux only)",
20+
)
1021
class TestDatasetsSamplers:
1122
def test_random_clip_sampler(self, tmpdir):
1223
video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[25, 25, 25])

test/test_datasets_video_utils.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,22 @@
1+
import sys
2+
13
import pytest
24
import torch
35
from common_utils import assert_equal, get_list_of_videos
4-
from torchvision import io
56
from torchvision.datasets.video_utils import unfold, VideoClips
67

8+
try:
9+
import torchcodec # noqa: F401
10+
11+
_torchcodec_available = True
12+
except ImportError:
13+
_torchcodec_available = False
14+
15+
_requires_torchcodec = pytest.mark.skipif(
16+
not (_torchcodec_available and sys.platform == "linux"),
17+
reason="this test requires torchcodec (linux only)",
18+
)
19+
720

821
class TestVideo:
922
def test_unfold(self):
@@ -31,7 +44,7 @@ def test_unfold(self):
3144
)
3245
assert_equal(r, expected)
3346

34-
@pytest.mark.skipif(not io.video._av_available(), reason="this test requires av")
47+
@_requires_torchcodec
3548
def test_video_clips(self, tmpdir):
3649
video_list = get_list_of_videos(tmpdir, num_videos=3)
3750
video_clips = VideoClips(video_list, 5, 5, num_workers=2)
@@ -55,7 +68,7 @@ def test_video_clips(self, tmpdir):
5568
assert video_idx == v_idx
5669
assert clip_idx == c_idx
5770

58-
@pytest.mark.skipif(not io.video._av_available(), reason="this test requires av")
71+
@_requires_torchcodec
5972
def test_video_clips_custom_fps(self, tmpdir):
6073
video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[12, 12, 12], fps=[3, 4, 6])
6174
num_frames = 4

0 commit comments

Comments
 (0)