-
Notifications
You must be signed in to change notification settings - Fork 79
Add pixel formats and color handling to VideoEncoder GPU #1125
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
cd5f8aa
939240b
b538d13
0480f1f
351a55d
ffcf872
a671f31
a9ad8e0
261549d
f4d777c
da3a6d7
1dc7690
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -780,9 +780,9 @@ def test_pixel_format_errors(self, method, device, tmp_path): | |
| if device == "cuda": | ||
| with pytest.raises( | ||
| RuntimeError, | ||
| match="GPU Video encoding currently only supports the NV12 pixel format. Do not set pixel_format to use NV12", | ||
| match="GPU encoding only supports nv12 and yuv420p pixel formats, got yuv444p", | ||
| ): | ||
| getattr(encoder, method)(**valid_params, pixel_format="yuv420p") | ||
| getattr(encoder, method)(**valid_params, pixel_format="yuv444p") | ||
| return | ||
|
|
||
| with pytest.raises( | ||
|
|
@@ -1353,8 +1353,14 @@ def test_extra_options_utilized(self, tmp_path, profile, colorspace, color_range | |
| ], | ||
| ) | ||
| @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) | ||
| # TODO-VideoEncoder: Enable additional pixel formats ("yuv420p", "yuv444p") | ||
| def test_nvenc_against_ffmpeg_cli(self, tmp_path, format_codec, method): | ||
| @pytest.mark.parametrize("pixel_format", ("nv12", "yuv420p", None)) | ||
| # BT.601, BT.709, BT.2020 | ||
| @pytest.mark.parametrize("color_space", ("bt470bg", "bt709", "bt2020nc")) | ||
| # Full/PC range, Limited/TV range | ||
| @pytest.mark.parametrize("color_range", ("pc", "tv")) | ||
| def test_nvenc_against_ffmpeg_cli( | ||
| self, tmp_path, format_codec, method, pixel_format, color_space, color_range | ||
| ): | ||
| # Encode with FFmpeg CLI using nvenc codecs | ||
| format, codec = format_codec | ||
| device = "cuda" | ||
|
|
@@ -1385,7 +1391,14 @@ def test_nvenc_against_ffmpeg_cli(self, tmp_path, format_codec, method): | |
| codec, # Use specified NVENC hardware encoder | ||
| ] | ||
|
|
||
| ffmpeg_cmd.extend(["-pix_fmt", "nv12"]) # Output format is always NV12 | ||
| if color_space: | ||
| ffmpeg_cmd.extend(["-colorspace", color_space]) | ||
| if color_range: | ||
| ffmpeg_cmd.extend(["-color_range", color_range]) | ||
| if pixel_format: | ||
| ffmpeg_cmd.extend(["-pix_fmt", pixel_format]) | ||
| else: # VideoEncoder will default to yuv420p for nvenc codecs | ||
| ffmpeg_cmd.extend(["-pix_fmt", "yuv420p"]) | ||
| if codec == "av1_nvenc": | ||
| ffmpeg_cmd.extend(["-rc", "constqp"]) # Set rate control mode for AV1 | ||
| ffmpeg_cmd.extend(["-qp", str(qp)]) # Use lossless qp for other codecs | ||
|
|
@@ -1396,18 +1409,24 @@ def test_nvenc_against_ffmpeg_cli(self, tmp_path, format_codec, method): | |
| encoder_extra_options = {"qp": qp} | ||
| if codec == "av1_nvenc": | ||
| encoder_extra_options["rc"] = 0 # constqp mode | ||
| if color_space: | ||
| encoder_extra_options["colorspace"] = color_space | ||
| if color_range: | ||
| encoder_extra_options["color_range"] = color_range | ||
| if method == "to_file": | ||
| encoder_output_path = str(tmp_path / f"nvenc_output.{format}") | ||
| encoder.to_file( | ||
| dest=encoder_output_path, | ||
| codec=codec, | ||
| pixel_format=pixel_format, | ||
| extra_options=encoder_extra_options, | ||
| ) | ||
| encoder_output = encoder_output_path | ||
| elif method == "to_tensor": | ||
| encoder_output = encoder.to_tensor( | ||
| format=format, | ||
| codec=codec, | ||
| pixel_format=pixel_format, | ||
| extra_options=encoder_extra_options, | ||
| ) | ||
| elif method == "to_file_like": | ||
|
|
@@ -1416,6 +1435,7 @@ def test_nvenc_against_ffmpeg_cli(self, tmp_path, format_codec, method): | |
| file_like=file_like, | ||
| format=format, | ||
| codec=codec, | ||
| pixel_format=pixel_format, | ||
| extra_options=encoder_extra_options, | ||
| ) | ||
| encoder_output = file_like.getvalue() | ||
|
|
@@ -1426,13 +1446,39 @@ def test_nvenc_against_ffmpeg_cli(self, tmp_path, format_codec, method): | |
| encoder_frames = self.decode(encoder_output).data | ||
|
|
||
| assert ffmpeg_frames.shape[0] == encoder_frames.shape[0] | ||
| # The combination of full range + bt709 results in worse accuracy | ||
| percentage = 91 if color_range == "full" and color_space == "bt709" else 96 | ||
| for ff_frame, enc_frame in zip(ffmpeg_frames, encoder_frames): | ||
| assert psnr(ff_frame, enc_frame) > 25 | ||
| assert_tensor_close_on_at_least(ff_frame, enc_frame, percentage=96, atol=2) | ||
| assert_tensor_close_on_at_least( | ||
| ff_frame, enc_frame, percentage=percentage, atol=2 | ||
| ) | ||
|
|
||
| if method == "to_file": | ||
| ffmpeg_metadata = self._get_video_metadata(ffmpeg_encoded_path, ["pix_fmt"]) | ||
| encoder_metadata = self._get_video_metadata(encoder_output, ["pix_fmt"]) | ||
| # pix_fmt nv12 is stored as yuv420p in metadata | ||
| assert encoder_metadata["pix_fmt"] == "yuv420p" | ||
| assert ffmpeg_metadata["pix_fmt"] == "yuv420p" | ||
|
Comment on lines
-1433
to
-1434
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like we're not assert pix_fmt anymore, which makes it hard to verify that the changes in this PR are correct. IIRC, passing NV12 actually resulted in a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suspect the format changes occur based on codec implementation. By adding back this assertion, I observed a deprecated pixel format
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The new assertions you added are good. But I personally still do not understand why passing NV12 actually ends up being reported yuv420. Until we get a good understanding on that, I think we should refrain from allowing |
||
| metadata_fields = ["pix_fmt", "color_range", "color_space"] | ||
| ffmpeg_metadata = self._get_video_metadata( | ||
| ffmpeg_encoded_path, metadata_fields | ||
| ) | ||
| encoder_metadata = self._get_video_metadata(encoder_output, metadata_fields) | ||
| assert ( | ||
| encoder_metadata["color_range"] | ||
| == ffmpeg_metadata["color_range"] | ||
| == color_range | ||
| ) | ||
| assert ( | ||
| encoder_metadata["color_space"] | ||
| == ffmpeg_metadata["color_space"] | ||
| == color_space | ||
| ) | ||
| if color_range == "pc" and codec != "av1_nvenc": | ||
| # This format represents full range (pc) with yuv420p pixel format | ||
| # It's deprecated, but is set automatically in h264 and hevc NVENC codecs | ||
| expected_pix_fmt = "yuvj420p" | ||
| else: | ||
| # av1_nvenc does not utilize the yuvj420p pixel format | ||
| expected_pix_fmt = "yuv420p" | ||
| assert ( | ||
| encoder_metadata["pix_fmt"] | ||
| == ffmpeg_metadata["pix_fmt"] | ||
| == expected_pix_fmt | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you share how these were derived? What were the original values that were used as reference?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These follow the pattern described in the note in
CudaCommon, I can add a comment referencing that note heretorchcodec/src/torchcodec/_core/CUDACommon.cpp
Lines 43 to 44 in ee8ce04
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The note is about YUV -> RGB so it's not 100% targeted to what the matrices are doing. But yes, add a ref to that note, it's still useful.
You asked me offline whether we should update the note to explain limited range: yes, we should :)
There's a TODO in the note for that, but I never had the chance to do it - and frankly I forgot the underlying logic lol. If you'd like to give it a go, please do it - in a follow-up PR.