diff --git a/Changelog b/Changelog index 2eb785fe44145..4217449438a9d 100644 --- a/Changelog +++ b/Changelog @@ -16,6 +16,8 @@ version : - APV decoder and APV raw bitstream muxing and demuxing - APV parser - APV encoding support through a libopenapv wrapper +- VVC decoder supports all content of SCC (Screen Content Coding): + IBC (Inter Block Copy), Palette Mode and ACT (Adaptive Color Transform version 7.1: diff --git a/MAINTAINERS b/MAINTAINERS index d1d87752b9193..0fba390938552 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -176,6 +176,7 @@ Codecs: dss_sp.c Oleksij Rempel dv.c Roman Shaposhnik dvbsubdec.c Anshul Maheshwari + dxv.*, dxvenc.* Emma Worley eacmv*, eaidct*, eat* Peter Ross exif.c, exif.h Thilo Borgmann ffv1* [2] Michael Niedermayer diff --git a/Makefile b/Makefile index e2250f6bc6f2c..877b0071f6cc7 100644 --- a/Makefile +++ b/Makefile @@ -19,14 +19,20 @@ vpath %/fate_config.sh.template $(SRC_PATH) TESTTOOLS = audiogen videogen rotozoom tiny_psnr tiny_ssim base64 audiomatch HOSTPROGS := $(TESTTOOLS:%=tests/%) doc/print_options -ALLFFLIBS = avcodec avdevice avfilter avformat avutil postproc swscale swresample +ALLFFLIBS = \ + avcodec \ + avdevice \ + avfilter \ + avformat \ + avutil \ + swscale \ + swresample \ # $(FFLIBS-yes) needs to be in linking order FFLIBS-$(CONFIG_AVDEVICE) += avdevice FFLIBS-$(CONFIG_AVFILTER) += avfilter FFLIBS-$(CONFIG_AVFORMAT) += avformat FFLIBS-$(CONFIG_AVCODEC) += avcodec -FFLIBS-$(CONFIG_POSTPROC) += postproc FFLIBS-$(CONFIG_SWRESAMPLE) += swresample FFLIBS-$(CONFIG_SWSCALE) += swscale @@ -104,8 +110,7 @@ SUBDIR_VARS := CLEANFILES FFLIBS HOSTPROGS TESTPROGS TOOLS \ ALTIVEC-OBJS VSX-OBJS MMX-OBJS X86ASM-OBJS \ MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSP-OBJS MSA-OBJS \ MMI-OBJS LSX-OBJS LASX-OBJS RV-OBJS RVV-OBJS RVVB-OBJS \ - OBJS SLIBOBJS SHLIBOBJS STLIBOBJS HOSTOBJS TESTOBJS \ - SIMD128-OBJS + OBJS SHLIBOBJS STLIBOBJS HOSTOBJS TESTOBJS SIMD128-OBJS define RESET $(1) := diff --git a/configure b/configure index 0609dac4abc90..89a766b403d55 100755 --- a/configure +++ b/configure @@ -249,7 +249,7 @@ External library support: --enable-liblensfun enable lensfun lens correction [no] --enable-libmodplug enable ModPlug via libmodplug [no] --enable-libmp3lame enable MP3 encoding via libmp3lame [no] - --enable-liboapv enable APV encoding/decoding via liboapv [no] + --enable-liboapv enable APV encoding via liboapv [no] --enable-libopencore-amrnb enable AMR-NB de/encoding via libopencore-amrnb [no] --enable-libopencore-amrwb enable AMR-WB decoding via libopencore-amrwb [no] --enable-libopencv enable video filtering via libopencv [no] @@ -2661,6 +2661,7 @@ CONFIG_EXTRA=" vp56dsp vp8dsp vulkan_encode + vvc_sei wma_freqs wmv2dsp " @@ -2910,6 +2911,7 @@ mpegvideoenc_select="aandcttables fdctdsp me_cmp mpegvideo pixblockdsp" msmpeg4dec_select="h263_decoder" msmpeg4enc_select="h263_encoder" vc1dsp_select="h264chroma qpeldsp startcode" +vvc_sei_select="atsc_a53 golomb" wmv2dsp_select="qpeldsp" # decoders / encoders @@ -3147,7 +3149,7 @@ vp6f_decoder_select="vp6_decoder" vp7_decoder_select="h264pred videodsp vp8dsp" vp8_decoder_select="h264pred videodsp vp8dsp" vp9_decoder_select="videodsp vp9_parser vp9_superframe_split_bsf" -vvc_decoder_select="cabac cbs_h266 golomb videodsp" +vvc_decoder_select="cabac cbs_h266 golomb videodsp vvc_sei" wcmv_decoder_select="inflate_wrapper" webp_decoder_select="vp8_decoder exif" wmalossless_decoder_select="llauddsp" @@ -3747,6 +3749,7 @@ wav_demuxer_select="riffdec" wav_muxer_select="riffenc" webm_chunk_muxer_select="webm_muxer" webm_dash_manifest_demuxer_select="matroska_demuxer" +whip_muxer_deps_any="dtls_protocol" wtv_demuxer_select="mpegts_demuxer riffdec" wtv_muxer_select="mpegts_muxer riffenc" xmv_demuxer_select="riffdec" @@ -3845,6 +3848,9 @@ srtp_protocol_select="rtp_protocol srtp" tcp_protocol_select="network" tls_protocol_deps_any="gnutls openssl schannel securetransport libtls mbedtls" tls_protocol_select="tcp_protocol" +# TODO: Support libtls, mbedtls, and gnutls. +dtls_protocol_deps_any="openssl" +dtls_protocol_select="udp_protocol" udp_protocol_select="network" udplite_protocol_select="network" unix_protocol_deps="sys_un_h" @@ -3879,6 +3885,7 @@ ass_filter_deps="libass" avgblur_opencl_filter_deps="opencl" avgblur_vulkan_filter_deps="vulkan spirv_compiler" azmq_filter_deps="libzmq" +blackdetect_vulkan_filter_deps="vulkan spirv_compiler" blackframe_filter_deps="gpl" blend_vulkan_filter_deps="vulkan spirv_compiler" boxblur_filter_deps="gpl" @@ -3986,6 +3993,7 @@ vpp_amf_filter_deps="amf" scale_qsv_filter_deps="libmfx" scale_qsv_filter_select="qsvvpp" scdet_filter_select="scene_sad" +scdet_vulkan_filter_deps="vulkan spirv_compiler" select_filter_select="scene_sad" sharpness_vaapi_filter_deps="vaapi" showcqt_filter_deps="avformat swscale" @@ -4159,6 +4167,8 @@ if test "$target_os_default" = aix; then arch_default=$(uname -p) strip_default="strip -X32_64" nm_default="nm -g -X32_64" +elif test "$MSYSTEM_CARCH" != ""; then + arch_default="$MSYSTEM_CARCH" else arch_default=$(uname -m) fi @@ -5293,7 +5303,7 @@ case "$arch" in arm*|iPad*|iPhone*) arch="arm" ;; - loongarch*) + loongarch*|loong64) arch="loongarch" ;; mips*|IP*) @@ -7192,6 +7202,14 @@ enabled rkmpp && { require_pkg_config rkmpp rockchip_mpp rockchip/r } enabled vapoursynth && require_headers "vapoursynth/VSScript4.h vapoursynth/VapourSynth4.h" +enabled openssl && { + enabled whip_muxer && { + $pkg_config --exists --print-errors "openssl >= 1.0.1k" || + require_pkg_config openssl "openssl >= 1.0.1k" openssl/ssl.h SSL_library_init || + require_pkg_config openssl "openssl >= 1.0.1k" openssl/ssl.h OPENSSL_init_ssl + } +} + if enabled gcrypt; then GCRYPT_CONFIG="${cross_prefix}libgcrypt-config" diff --git a/doc/APIchanges b/doc/APIchanges index d0869561f39db..91710bb27d6ff 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -2,6 +2,9 @@ The last version increases of all libraries were on 2025-03-28 API changes, most recent first: +2025-05-21 - xxxxxxxxxx - lavu 60.3.100 - avassert.h + Add av_unreachable() and av_assume() macros. + 2025-02-xx - xxxxxxxxxx - lavfi 10.10.100 - avfilter.h Add avfilter_link_get_hw_frames_ctx(). diff --git a/doc/examples/filter_audio.c b/doc/examples/filter_audio.c index 8b237e2adf3b6..02222f591417c 100644 --- a/doc/examples/filter_audio.c +++ b/doc/examples/filter_audio.c @@ -270,7 +270,6 @@ int main(int argc, char *argv[]) AVFilterGraph *graph; AVFilterContext *src, *sink; AVFrame *frame; - uint8_t errstr[1024]; float duration; int err, nb_frames, i; @@ -354,7 +353,6 @@ int main(int argc, char *argv[]) return 0; fail: - av_strerror(err, errstr, sizeof(errstr)); - fprintf(stderr, "%s\n", errstr); + fprintf(stderr, "%s\n", av_err2str(err)); return 1; } diff --git a/doc/examples/qsv_decode.c b/doc/examples/qsv_decode.c index 5a6f3625aa6ee..ec91109480835 100644 --- a/doc/examples/qsv_decode.c +++ b/doc/examples/qsv_decode.c @@ -219,11 +219,8 @@ int main(int argc, char **argv) ret = decode_packet(decoder_ctx, frame, sw_frame, NULL, output_ctx); finish: - if (ret < 0) { - char buf[1024]; - av_strerror(ret, buf, sizeof(buf)); - fprintf(stderr, "%s\n", buf); - } + if (ret < 0) + fprintf(stderr, "%s\n", av_err2str(ret)); avformat_close_input(&input_ctx); diff --git a/doc/filters.texi b/doc/filters.texi index 679b71f29065c..63f55f5794e70 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -8634,7 +8634,7 @@ Filter out noisy pixels from @code{bitplane} set above. Default is disabled. @end table -@section blackdetect +@section blackdetect, blackdetect_vulkan Detect video intervals that are (almost) completely black. Can be useful to detect chapter transitions, commercials, or invalid @@ -8687,6 +8687,12 @@ the input video format, the range is [0-255] for YUV full-range formats and [16-235] for YUV non full-range formats. Default value is 0.10. + +@item alpha +If true, check the alpha channel instead of the luma channel. Detects frames +which are (almost) transparent, instead of frames which are almost black. + +Default value is disabled. @end table The following example sets the maximum pixel threshold to the minimum @@ -16259,6 +16265,16 @@ and @code{(oh-ph)/2}. Set the output placement width/height expressions, default values are @code{ow} and @code{oh}. +@item rotate +Rotate the input frame clockwise by the specified angle. + +@table @samp +@item 0, 360 +@item 90 +@item 180 +@item 270 +@end table + @item fps Set the output frame rate. This can be rational, e.g. @code{60000/1001}. If set to the special string @code{none} (the default), input timestamps will @@ -16311,6 +16327,18 @@ to double the input image resolution: -vf "libplacebo=w=iw*2:h=ih*2:extra_opts='upscaler=custom\:upscaler_preset=ewa_lanczos\:upscaler_blur=0.9812505644269356'" @end example + +@item shader_cache +File path of a cache directory that libplacebo will use to store and load +cached shader objects. This cache is not cleaned up automatically. If the +path does not end in a directory separator, the generated filenames will be +effectively prefixed by the last path component. All directories must already +exist. + +@example +-vf "libplacebo=shader_cache=/tmp/pl-shader-" +@end example + @item colorspace @item color_primaries @item color_trc diff --git a/doc/htmlxref.cnf b/doc/htmlxref.cnf new file mode 100644 index 0000000000000..079c848a651a6 --- /dev/null +++ b/doc/htmlxref.cnf @@ -0,0 +1,6 @@ +ffmpeg mono ./ffmpeg.html +ffmpeg-filters mono ./ffmpeg-filters.html +ffmpeg-formats mono ./ffmpeg-formats.html +ffmpeg-resampler mono ./ffmpeg-resampler.html +ffmpeg-scaler mono ./ffmpeg-scaler.html +ffmpeg-utils mono ./ffmpeg-utils.html diff --git a/doc/muxers.texi b/doc/muxers.texi index 04b7f20b7e8cb..30c95c3d34e3b 100644 --- a/doc/muxers.texi +++ b/doc/muxers.texi @@ -3879,4 +3879,51 @@ ffmpeg -f webm_dash_manifest -i video1.webm \ manifest.xml @end example +@anchor{whip} +@section whip + +WebRTC (Real-Time Communication) muxer that supports sub-second latency streaming according to +the WHIP (WebRTC-HTTP ingestion protocol) specification. + +It uses HTTP as a signaling protocol to exchange SDP capabilities and ICE lite candidates. Then, +it uses STUN binding requests and responses to establish a session over UDP. Subsequently, it +initiates a DTLS handshake to exchange the SRTP encryption keys. Lastly, it splits video and +audio frames into RTP packets and encrypts them using SRTP. + +Ensure that you use H.264 without B frames and Opus for the audio codec. For example, to convert +an input file with @command{ffmpeg} to WebRTC: +@example +ffmpeg -re -i input.mp4 -acodec libopus -ar 48000 -ac 2 \ + -vcodec libx264 -profile:v baseline -tune zerolatency -threads 1 -bf 0 \ + -f whip "http://localhost:1985/rtc/v1/whip/?app=live&stream=livestream" +@end example + +For this example, we have employed low latency options, resulting in an end-to-end latency of +approximately 150ms. + +@subsection Options + +This muxer supports the following options: + +@table @option + +@item handshake_timeout @var{integer} +Set the timeout in milliseconds for ICE and DTLS handshake. +Default value is 5000. + +@item pkt_size @var{integer} +Set the maximum size, in bytes, of RTP packets that send out. +Default value is 1500. + +@item authorization @var{string} +The optional Bearer token for WHIP Authorization. + +@item cert_file @var{string} +The optional certificate file path for DTLS. + +@item key_file @var{string} +The optional private key file path for DTLS. + +@end table + @c man end MUXERS diff --git a/doc/swscale-v2.txt b/doc/swscale-v2.txt new file mode 100644 index 0000000000000..3ae2b27036d50 --- /dev/null +++ b/doc/swscale-v2.txt @@ -0,0 +1,344 @@ +New swscale design to change everything (tm) +============================================ + +SwsGraph +-------- + +The entry point to the new architecture, SwsGraph is what coordinates +multiple "passes". These can include cascaded scaling passes, error diffusion +dithering, and so on. Or we could have separate passes for the vertical and +horizontal scaling. In between each SwsPass lies a fully allocated image buffer. +Graph passes may have different levels of threading, e.g. we can have a single +threaded error diffusion pass following a multi-threaded scaling pass. + +SwsGraph is internally recreated whenever the image format, dimensions or +settings change in any way. sws_scale_frame() is itself just a light-weight +wrapper that runs ff_sws_graph_create() whenever the format changes, splits +interlaced images into separate fields, and calls ff_sws_graph_run() on each. + +From the point of view of SwsGraph itself, all inputs are progressive. + +SwsOp / SwsOpList +----------------- + +This is the newly introduced abstraction layer between the high-level format +handling logic and the low-level backing implementation. Each SwsOp is designed +to be as small and atomic as possible, with the possible exception of the +read / write operations due to their numerous variants. + +The basic idea is to split logic between three major components: + +1. The high-level format "business logic", which generates in a very + naive way a sequence of operations guaranteed to get you from point A + to point B. This logic is written with correctness in mind only, and + ignoring any performance concerns or low-level implementation decisions. + Semantically, everything is always decoded from the input format to + normalized (real valued) RGB, and then encoded back to output format. + + This code lives in libswscale/format.c + +2. The optimizer. This is where the "magic" happens, so to speak. The + optimizer's job is to take the abstract sequence of operations + produced by the high-level format analysis code and incrementally + optimize it. Each optimization step is designed to be minute and provably + lossless, or otherwise guarded behind the BITEXACT flag. This ensures that + the resulting output is always identical, no matter how many layers of + optimization we add. + + This code lives in libswscale/ops.c + +3. The compiler. Once we have a sequence of operations as output by the + optimizer, we "compile" this down to a callable function. This is then + applied by the dispatch wrapper by striping it over the input image. + + See libswscale/ops_backend.c for the reference backend, or + libswscale/x86/ops.c for a more complex SIMD example. + +This overall approach has a considerable number of benefits: + +1. It allows us to verify correctness of logic and spot semantic errors at a + very high level, by simply looking at the sequence of operations (available + by default at debug / verbose log level), without having to dig through the + multiple levels of complicated, interwoven format handling code that is + legacy swscale. + +2. Because most of the brains lives inside the the powerful optimizer, we get + fast paths "for free" for any suitable format conversion, rather than having + to enumerate them one by one. SIMD code itself can be written in a very + general way and does need to be tied to specific pixel formats - subsequent + low-level implementations can be strung together without much overhead. + +3. We can in the future, with relative ease, compile these operations + down to SPIR-V (or even LLVM IR) and generate efficient GPU or + target-machine specific implementations. This also opens the window for + adding hardware frame support to libswscale, and even transparently using + GPU acceleration for CPU frames. + +4. Platform-specific SIMD can be reduced down to a comparatively small set of + optimized routines, while still providing 100% coverage for all possible + pixel formats and operations. (As of writing, the x86 example backend has + about 60 unique implementations, of which 20 are trivial swizzles, 10 are + read/write ops, 10 are pixel type conversions and the remaining 20 are the + various logic/arithmetic ops). + +5. Backends hide behind a layer of abstraction offering them a considerable + deal of flexibility in how they want to implement their operations. For + example, the x86 backend has a dedicated function for compiling compatible + operations down to a single in-place pshufb instruction. + + Platform specific low level data is self-contained within its own setup() + function and private data structure, eliminating all reads into SwsContext + or the possibility of conflicts between platforms. + +6. We can compute an exact reference result for each operation with fixed + precision (ff_sws_op_apply_q), and use that to e.g. measure the amount of + error introduced by dithering, or even catch bugs in the reference C + implementation. (In theory - currently checkasm just compares against C) + +Examples of SwsOp in action +--------------------------- + +For illustration, here is the sequence of operations currently generated by +my prototype, for a conversion from RGB24 to YUV444P: + +Unoptimized operation list: + [ u8 .... -> ....] SWS_OP_READ : 3 elem(s) packed >> 0 + [ u8 .... -> ....] SWS_OP_SWIZZLE : 0123 + [ u8 .... -> ....] SWS_OP_RSHIFT : >> 0 + [ u8 .... -> ....] SWS_OP_CLEAR : {_ _ _ 0} + [ u8 .... -> ....] SWS_OP_CONVERT : u8 -> f32 + [f32 .... -> ....] SWS_OP_LINEAR : diag3+alpha [[1/255 0 0 0 0] [0 1/255 0 0 0] [0 0 1/255 0 0] [0 0 0 1 1]] + [f32 .... -> ....] SWS_OP_LINEAR : matrix3 [[0.299000 0.587000 0.114000 0 0] [-0.168736 -0.331264 1/2 0 0] [1/2 -0.418688 -57/701 0 0] [0 0 0 1 0]] + [f32 .... -> ....] SWS_OP_LINEAR : diag3+off3 [[219 0 0 0 16] [0 224 0 0 128] [0 0 224 0 128] [0 0 0 1 0]] + [f32 .... -> ....] SWS_OP_DITHER : 16x16 matrix + [f32 .... -> ....] SWS_OP_MAX : {0 0 0 0} <= x + [f32 .... -> ....] SWS_OP_MIN : x <= {255 255 255 _} + [f32 .... -> ....] SWS_OP_CONVERT : f32 -> u8 + [ u8 .... -> ....] SWS_OP_LSHIFT : << 0 + [ u8 .... -> ....] SWS_OP_SWIZZLE : 0123 + [ u8 .... -> ....] SWS_OP_WRITE : 3 elem(s) planar >> 0 + +This is optimized into the following sequence: + +Optimized operation list: + [ u8 XXXX -> +++X] SWS_OP_READ : 3 elem(s) packed >> 0 + [ u8 ...X -> +++X] SWS_OP_CONVERT : u8 -> f32 + [f32 ...X -> ...X] SWS_OP_LINEAR : matrix3+off3 [[0.256788 0.504129 0.097906 0 16] [-0.148223 -0.290993 112/255 0 128] [112/255 -0.367788 -0.071427 0 128] [0 0 0 1 0]] + [f32 ...X -> ...X] SWS_OP_DITHER : 16x16 matrix + [f32 ...X -> +++X] SWS_OP_CONVERT : f32 -> u8 + [ u8 ...X -> +++X] SWS_OP_WRITE : 3 elem(s) planar >> 0 + (X = unused, + = exact, 0 = zero) + +The extra metadata on the left of the operation list is just a dump of the +internal state used by the optimizer during optimization. It keeps track of +knowledge about the pixel values, such as their value range, whether or not +they're exact integers, and so on. + +In this example, you can see that the input values are exact (except for +the alpha channel, which is undefined), until the first SWS_OP_LINEAR +multiplies them by a noninteger constant. They regain their exact integer +status only after the (truncating) conversion to U8 in the output step. + +Example of more aggressive optimization +--------------------------------------- + +Conversion pass for gray -> rgb48: +Unoptimized operation list: + [ u8 .... -> ....] SWS_OP_READ : 1 elem(s) planar >> 0 + [ u8 .... -> ....] SWS_OP_SWIZZLE : 0123 + [ u8 .... -> ....] SWS_OP_RSHIFT : >> 0 + [ u8 .... -> ....] SWS_OP_CLEAR : {_ 0 0 0} + [ u8 .... -> ....] SWS_OP_CONVERT : u8 -> f32 + [f32 .... -> ....] SWS_OP_LINEAR : luma+alpha [[1/255 0 0 0 0] [0 1 0 0 0] [0 0 1 0 0] [0 0 0 1 1]] + [f32 .... -> ....] SWS_OP_LINEAR : matrix3 [[1 0 701/500 0 0] [1 -0.344136 -0.714136 0 0] [1 443/250 0 0 0] [0 0 0 1 0]] + [f32 .... -> ....] SWS_OP_LINEAR : diag3 [[65535 0 0 0 0] [0 65535 0 0 0] [0 0 65535 0 0] [0 0 0 1 0]] + [f32 .... -> ....] SWS_OP_MAX : {0 0 0 0} <= x + [f32 .... -> ....] SWS_OP_MIN : x <= {65535 65535 65535 _} + [f32 .... -> ....] SWS_OP_CONVERT : f32 -> u16 + [u16 .... -> ....] SWS_OP_LSHIFT : << 0 + [u16 .... -> ....] SWS_OP_SWIZZLE : 0123 + [u16 .... -> ....] SWS_OP_WRITE : 3 elem(s) packed >> 0 + +Optimized operation list: + [ u8 XXXX -> +XXX] SWS_OP_READ : 1 elem(s) planar >> 0 + [ u8 .XXX -> +XXX] SWS_OP_CONVERT : u8 -> u16 (expand) + [u16 .XXX -> +++X] SWS_OP_SWIZZLE : 0003 + [u16 ...X -> +++X] SWS_OP_WRITE : 3 elem(s) packed >> 0 + (X = unused, + = exact, 0 = zero) + +Here, the optimizer has managed to eliminate all of the unnecessary linear +operations on previously zero'd values, turn the resulting column matrix into +a swizzle operation, avoid the unnecessary dither (and round trip via float) +because the pixel values are guaranteed to be bit exact, and finally, turns +the multiplication by 65535 / 255 = 257 into a simple integer expand operation. + +As a final bonus, the x86 backend further optimizes this into a 12-byte shuffle: + pshufb = {0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1} + +time=208 us, ref=4212 us, speedup=20.236x faster (single thread) +time=57 us, ref=472 us, speedup=8.160x faster (multi thread) + +Compiler and underlying implementation layer (SwsOpChain) +--------------------------------------------------------- + +While the backend API is flexible enough to permit more exotic implementations +(e.g. using JIT code generation), we establish a common set of helpers for use +in "traditional" SIMD implementations. + +The basic idea is to have one "kernel" (or implementation) per operation, +and then just chain a list of these kernels together as separate function +calls. For best performance, we want to keep data in vector registers in +between function calls using a custom calling convention, thus avoiding any +unnecessary memory accesses. Additionally, we want the per-kernel overhead to +be as low as possible, with each kernel ideally just jumping directly into +the next kernel. + +As a result, we arrive at a design where we first divide the image into small +chunks, or "blocks", and then dispatch the "chain" of kernels on each chunk in +sequence. Each kernel processes a fixed number of pixels, with the overall +entry point taking care of looping. Remaining pixels (the "tail") are handled +generically by the backend-invariant dispatch code (located in ops.c), using a +partial memcpy into a suitably sized temporary buffer. + +To minimize the per-kernel function call overhead, we use a "continuation +passing style" for chaining kernels. Each operation computes its result and +then directly calls the next operation in the sequence, with the appropriate +internal function signature. + +The C reference backend reads data into the stack and then passes the array +pointers to the next continuation as regular function arguments: + + void process(GlobalContext *ctx, OpContext *op, + block_t x, block_t y, block_t z, block_t w) + { + for (int i = 0; i < SWS_BLOCK_SIZE; i++) + // do something with x[i], y[i], z[i], w[i] + + op->next(ctx, &op[1], x, y, z, w); + } + +With type conversions pushing the new data onto the stack as well: + + void convert8to16(GlobalContext *ctx, OpContext *op, + block_t x, block_t y, block_t z, block_t w) + { + /* Pseudo-code */ + u16block_t x16 = (u16block_t) x; + u16block_t y16 = (u16block_t) y; + u16block_t z16 = (u16block_t) z; + u16block_t w16 = (u16block_t) w; + + op->next(ctx, &op[1], x16, y16, z16, w16); + } + +By contrast, the x86 backend always keeps the X/Y/Z/W values pinned in specific +vector registers (ymm0-ymm3 for the lower half, and ymm4-ymm7 for the second +half). + +Each kernel additionally has access to a 32 byte per-op context storing the +pointer to the next kernel plus 16 bytes of arbitrary private data. This is +used during construction of the function chain to place things like small +constants. + +In assembly, the per-kernel overhead looks like this: + + load $tmp, $arg1 + ... + add $arg1, 32 + jump $tmp + +This design gives vastly better performance than the alternative of returning +out to a central loop or "trampoline". This is partly because the order of +kernels within a chain is always the same, so the branch predictor can easily +remember the target address of each "jump" instruction. + +The only way to realistically improve on this design would be to directly +stitch the kernel body together using runtime code generation. + +Future considerations and limitations +------------------------------------- + +My current prototype has a number of severe limitations and opportunities +for improvements: + +1. It does not handle scaling at all. I am not yet entirely sure on how I want + to handle scaling; this includes handling of subsampled content. I have a + number of vague ideas in my head, but nothing where I can say with certainty + that it will work out well. + + It's possible that we won't come up with a perfect solution here, and will + need to decide on which set of compromises we are comfortable accepting: + + 1. Do we need the ability to scale YUV -> YUV by handling luma and chroma + independently? When downscaling 100x100 4:2:0 to 50x50 4:4:4, should we + support the option of reusing the chroma plane directly (even though + this would introduce a subpixel shift for typical chroma siting)? + + Looking towards zimg, I am also thinking that we probably also want to do + scaling on floating point values, since this is best for both performance + and accuracy, especially given that we need to go up to 32-bit intermediates + during scaling anyway. + + So far, the most promising approach seems to be to handle subsampled + input/output as a dedicated read/write operation type; perhaps even with a + fixed/static subsampling kernel. To avoid compromising on performance when + chroma resampling is not necessary, the optimizer could then relax the + pipeline to use non-interpolating read/writes when all intermediate + operations are component-independent. + +2. Since each operation is conceptually defined on 4-component pixels, we end + up defining a lot of variants of each implementation for each possible + *subset*. For example, we have four different implementations for + SWS_OP_SCALE in my current templates: + - op_scale_1000 + - op_scale_1001 + - op_scale_1110 + - op_scale_1111 + + This reflects the four different arangements of pixel components that are + typically present (or absent). While best for performance, it does turn into + a bit of a chore when implementing these kernels. + + The only real alternative would be to either branch inside the kernel (bad), + or to use separate kernels for each individual component and chain them all + together. I have not yet tested whether the latter approach would be faster + after the latest round of refactors to the kernel glue code. + +3. I do not yet have any support for LUTs. But when I add them, something we + could do is have the optimized pass automatically "promote" a sequence of + operations to LUTs. For example, any sequence that looks like: + + 1. [u8] SWS_OP_CONVERT -> X + 2. [X] ... // only per-component operations + 4. [X] SWS_OP_CONVERT -> Y + 3. [Y] SWS_OP_WRITE + + could be replaced by a LUT with 256 entries. This is especially important + for anything involving packed 8-bit input (e.g. rgb8, rgb4_byte). + + We also definitely want to hook this up to the existing CMS code for + transformations between different primaries. + +4. Because we rely on AVRational math to generate the coefficients for + operations, we need to be able to represent all pixel values as an + AVRational. However, this presents a challenge for 32-bit formats (e.g. + GRAY32, RGBA128), because their size exceeds INT_MAX, which is the maximum + value representable by an AVRational. + + It's possible we may want to introduce an AVRational64 for this, or + perhaps more flexibly, extend AVRational to an AVFloating type which is + represented as { AVRational n; int exp; }, representing n/d * 2^exp. This + would preserve our ability to represent all pixel values exactly, while + opening up the range arbitrarily. + +5. Is there ever a situation where the use of floats introduces the risk of + non bit-exact output? For this reason, and possible performance advantages, + we may want to explore the use of a fixed-point 16 bit path as an alternative + to the floating point math. + + So far, I have managed to avoid any bit exactness issues inside the x86 + backend by ensuring that the order of linear operations is identical + between the C backend and the x86 backend, but this may not be practical + to guarantee on all backends. The x86 float code is also dramatically + faster than the old fixed point code, so I'm tentatively optimistic about + the lack of a need for a fixed point path. diff --git a/ffbuild/common.mak b/ffbuild/common.mak index 0e1eb1f62bcbe..ddf48923ea735 100644 --- a/ffbuild/common.mak +++ b/ffbuild/common.mak @@ -140,9 +140,9 @@ else endif # 1) Preprocess CSS to a minified version +%.css.min: TAG = SED %.css.min: %.css - # Must start with a tab in the real Makefile - sed 's!/\\*.*\\*/!!g' $< \ + $(M)sed 's!/\\*.*\\*/!!g' $< \ | tr '\n' ' ' \ | tr -s ' ' \ | sed 's/^ //; s/ $$//' \ @@ -151,6 +151,7 @@ endif ifdef CONFIG_RESOURCE_COMPRESSION # 2) Gzip the minified CSS +%.css.min.gz: TAG = GZIP %.css.min.gz: %.css.min $(M)gzip -nc9 $< > $@ @@ -159,6 +160,7 @@ ifdef CONFIG_RESOURCE_COMPRESSION $(BIN2C) $< $@ $(subst .,_,$(basename $(notdir $@))) # 4) Gzip the HTML file (no minification needed) +%.html.gz: TAG = GZIP %.html.gz: %.html $(M)gzip -nc9 $< > $@ @@ -197,7 +199,6 @@ endif include $(SRC_PATH)/ffbuild/arch.mak OBJS += $(OBJS-yes) -SLIBOBJS += $(SLIBOBJS-yes) SHLIBOBJS += $(SHLIBOBJS-yes) STLIBOBJS += $(STLIBOBJS-yes) FFLIBS := $($(NAME)_FFLIBS) $(FFLIBS-yes) $(FFLIBS) @@ -207,7 +208,6 @@ LDLIBS = $(FFLIBS:%=%$(BUILDSUF)) FFEXTRALIBS := $(LDLIBS:%=$(LD_LIB)) $(foreach lib,EXTRALIBS-$(NAME) $(FFLIBS:%=EXTRALIBS-%),$($(lib))) $(EXTRALIBS) OBJS := $(sort $(OBJS:%=$(SUBDIR)%)) -SLIBOBJS := $(sort $(SLIBOBJS:%=$(SUBDIR)%)) SHLIBOBJS := $(sort $(SHLIBOBJS:%=$(SUBDIR)%)) STLIBOBJS := $(sort $(STLIBOBJS:%=$(SUBDIR)%)) TESTOBJS := $(TESTOBJS:%=$(SUBDIR)tests/%) $(TESTPROGS:%=$(SUBDIR)tests/%.o) @@ -245,13 +245,12 @@ $(HOSTPROGS): %$(HOSTEXESUF): %.o $(OBJS): | $(sort $(dir $(OBJS))) $(HOBJS): | $(sort $(dir $(HOBJS))) $(HOSTOBJS): | $(sort $(dir $(HOSTOBJS))) -$(SLIBOBJS): | $(sort $(dir $(SLIBOBJS))) $(SHLIBOBJS): | $(sort $(dir $(SHLIBOBJS))) $(STLIBOBJS): | $(sort $(dir $(STLIBOBJS))) $(TESTOBJS): | $(sort $(dir $(TESTOBJS))) $(TOOLOBJS): | tools -OUTDIRS := $(OUTDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SLIBOBJS) $(SHLIBOBJS) $(STLIBOBJS) $(TESTOBJS)) +OUTDIRS := $(OUTDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SHLIBOBJS) $(STLIBOBJS) $(TESTOBJS)) CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.objs *.pc *.ptx *.ptx.gz *.ptx.c *.ver *.version *.html.gz *.html.c *.css.gz *.css.c *$(DEFAULT_X86ASMD).asm *~ *.ilk *.pdb LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a @@ -263,4 +262,4 @@ endef $(eval $(RULES)) --include $(wildcard $(OBJS:.o=.d) $(HOSTOBJS:.o=.d) $(TESTOBJS:.o=.d) $(HOBJS:.o=.d) $(SHLIBOBJS:.o=.d) $(STLIBOBJS:.o=.d) $(SLIBOBJS:.o=.d)) $(OBJS:.o=$(DEFAULT_X86ASMD).d) +-include $(wildcard $(OBJS:.o=.d) $(HOSTOBJS:.o=.d) $(TESTOBJS:.o=.d) $(HOBJS:.o=.d) $(SHLIBOBJS:.o=.d) $(STLIBOBJS:.o=.d)) $(OBJS:.o=$(DEFAULT_X86ASMD).d) diff --git a/ffbuild/library.mak b/ffbuild/library.mak index 288c82a177041..569708c73b859 100644 --- a/ffbuild/library.mak +++ b/ffbuild/library.mak @@ -70,7 +70,7 @@ $(SUBDIR)lib$(NAME).ver: $(SUBDIR)lib$(NAME).v $(OBJS) $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR) $(Q)cd ./$(SUBDIR) && $(LN_S) $(SLIBNAME_WITH_MAJOR) $(SLIBNAME) -$(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SLIBOBJS) $(SUBDIR)lib$(NAME).ver +$(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SUBDIR)lib$(NAME).ver $(SLIB_CREATE_DEF_CMD) ifeq ($(RESPONSE_FILES),yes) $(Q)echo $$(filter %.o,$$^) > $$@.objs diff --git a/fftools/Makefile b/fftools/Makefile index 361a4fd574f97..b3c08ae5a0e8b 100644 --- a/fftools/Makefile +++ b/fftools/Makefile @@ -49,7 +49,6 @@ OBJS-ffprobe += \ fftools/textformat/tw_avio.o \ fftools/textformat/tw_buffer.o \ fftools/textformat/tw_stdout.o \ - $(OBJS-resman) \ OBJS-ffplay += fftools/ffplay_renderer.o @@ -93,4 +92,4 @@ uninstall-progs: $(RM) $(addprefix "$(BINDIR)/", $(ALLAVPROGS)) clean:: - $(RM) $(ALLAVPROGS) $(ALLAVPROGS_G) $(CLEANSUFFIXES:%=fftools/%) + $(RM) $(ALLAVPROGS) $(ALLAVPROGS_G) $(CLEANSUFFIXES:%=fftools/%) $(CLEANSUFFIXES:%=fftools/graph/%) $(CLEANSUFFIXES:%=fftools/textformat/%) diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c index 964770df23238..de607cac931b8 100644 --- a/fftools/ffmpeg.c +++ b/fftools/ffmpeg.c @@ -309,7 +309,7 @@ const AVIOInterruptCB int_cb = { decode_interrupt_cb, NULL }; static void ffmpeg_cleanup(int ret) { - if (print_graphs || print_graphs_file) + if ((print_graphs || print_graphs_file) && nb_output_files > 0) print_filtergraphs(filtergraphs, nb_filtergraphs, input_files, nb_input_files, output_files, nb_output_files); if (do_benchmark) { @@ -344,6 +344,9 @@ static void ffmpeg_cleanup(int ret) av_freep(&filter_nbthreads); + av_freep(&print_graphs_file); + av_freep(&print_graphs_format); + av_freep(&input_files); av_freep(&output_files); diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h index 7fbf0ad5326d6..7868f3d85ffc1 100644 --- a/fftools/ffmpeg.h +++ b/fftools/ffmpeg.h @@ -39,6 +39,7 @@ #include "libavfilter/avfilter.h" #include "libavutil/avutil.h" +#include "libavutil/bprint.h" #include "libavutil/dict.h" #include "libavutil/eval.h" #include "libavutil/fifo.h" @@ -352,6 +353,18 @@ typedef struct OutputFilterOptions { typedef struct InputFilter { struct FilterGraph *graph; uint8_t *name; + int index; + + // filter data type + enum AVMediaType type; + + AVFilterContext *filter; + + char *input_name; + + /* for filters that are not yet bound to an input stream, + * this stores the input linklabel, if any */ + uint8_t *linklabel; } InputFilter; typedef struct OutputFilter { @@ -359,6 +372,11 @@ typedef struct OutputFilter { struct FilterGraph *graph; uint8_t *name; + int index; + + AVFilterContext *filter; + + char *output_name; /* for filters that are not yet bound to an output stream, * this stores the output linklabel, if any */ @@ -381,6 +399,9 @@ typedef struct FilterGraph { int nb_inputs; OutputFilter **outputs; int nb_outputs; + + const char *graph_desc; + struct AVBPrint graph_print_buf; } FilterGraph; enum DecoderFlags { diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c index b774606562788..e0c40ffe00a93 100644 --- a/fftools/ffmpeg_filter.c +++ b/fftools/ffmpeg_filter.c @@ -21,7 +21,6 @@ #include #include "ffmpeg.h" -#include "ffmpeg_filter.h" #include "graph/graphprint.h" #include "libavfilter/avfilter.h" @@ -44,6 +43,42 @@ // FIXME private header, used for mid_pred() #include "libavcodec/mathops.h" +typedef struct FilterGraphPriv { + FilterGraph fg; + + // name used for logging + char log_name[32]; + + int is_simple; + // true when the filtergraph contains only meta filters + // that do not modify the frame data + int is_meta; + // source filters are present in the graph + int have_sources; + int disable_conversions; + + unsigned nb_outputs_done; + + int nb_threads; + + // frame for temporarily holding output from the filtergraph + AVFrame *frame; + // frame for sending output to the encoder + AVFrame *frame_enc; + + Scheduler *sch; + unsigned sch_idx; +} FilterGraphPriv; + +static FilterGraphPriv *fgp_from_fg(FilterGraph *fg) +{ + return (FilterGraphPriv*)fg; +} + +static const FilterGraphPriv *cfgp_from_cfg(const FilterGraph *fg) +{ + return (const FilterGraphPriv*)fg; +} // data that is local to the filter thread and not visible outside of it typedef struct FilterGraphThread { @@ -66,6 +101,141 @@ typedef struct FilterGraphThread { uint8_t *eof_out; } FilterGraphThread; +typedef struct InputFilterPriv { + InputFilter ifilter; + + InputFilterOptions opts; + + // used to hold submitted input + AVFrame *frame; + + // source data type: AVMEDIA_TYPE_SUBTITLE for sub2video, + // same as type otherwise + enum AVMediaType type_src; + + int eof; + int bound; + int drop_warned; + uint64_t nb_dropped; + + // parameters configured for this input + int format; + + int width, height; + AVRational sample_aspect_ratio; + enum AVColorSpace color_space; + enum AVColorRange color_range; + + int sample_rate; + AVChannelLayout ch_layout; + + AVRational time_base; + + AVFrameSideData **side_data; + int nb_side_data; + + AVFifo *frame_queue; + + AVBufferRef *hw_frames_ctx; + + int displaymatrix_present; + int displaymatrix_applied; + int32_t displaymatrix[9]; + + int downmixinfo_present; + AVDownmixInfo downmixinfo; + + struct { + AVFrame *frame; + + int64_t last_pts; + int64_t end_pts; + + /// marks if sub2video_update should force an initialization + unsigned int initialize; + } sub2video; +} InputFilterPriv; + +static InputFilterPriv *ifp_from_ifilter(InputFilter *ifilter) +{ + return (InputFilterPriv*)ifilter; +} + +typedef struct FPSConvContext { + AVFrame *last_frame; + /* number of frames emitted by the video-encoding sync code */ + int64_t frame_number; + /* history of nb_frames_prev, i.e. the number of times the + * previous frame was duplicated by vsync code in recent + * do_video_out() calls */ + int64_t frames_prev_hist[3]; + + uint64_t dup_warning; + + int last_dropped; + int dropped_keyframe; + + enum VideoSyncMethod vsync_method; + + AVRational framerate; + AVRational framerate_max; + const AVRational *framerate_supported; + int framerate_clip; +} FPSConvContext; + +typedef struct OutputFilterPriv { + OutputFilter ofilter; + + void *log_parent; + char log_name[32]; + + /* desired output stream properties */ + int format; + int width, height; + int sample_rate; + AVChannelLayout ch_layout; + enum AVColorSpace color_space; + enum AVColorRange color_range; + + AVFrameSideData **side_data; + int nb_side_data; + + // time base in which the output is sent to our downstream + // does not need to match the filtersink's timebase + AVRational tb_out; + // at least one frame with the above timebase was sent + // to our downstream, so it cannot change anymore + int tb_out_locked; + + AVRational sample_aspect_ratio; + + AVDictionary *sws_opts; + AVDictionary *swr_opts; + + // those are only set if no format is specified and the encoder gives us multiple options + // They point directly to the relevant lists of the encoder. + const int *formats; + const AVChannelLayout *ch_layouts; + const int *sample_rates; + const enum AVColorSpace *color_spaces; + const enum AVColorRange *color_ranges; + + AVRational enc_timebase; + int64_t trim_start_us; + int64_t trim_duration_us; + // offset for output timestamps, in AV_TIME_BASE_Q + int64_t ts_offset; + int64_t next_pts; + FPSConvContext fps; + + unsigned flags; +} OutputFilterPriv; + +static OutputFilterPriv *ofp_from_ofilter(OutputFilter *ofilter) +{ + return (OutputFilterPriv*)ofilter; +} + typedef struct FilterCommand { char *target; char *command; @@ -146,7 +316,7 @@ static void sub2video_push_ref(InputFilterPriv *ifp, int64_t pts) av_assert1(frame->data[0]); ifp->sub2video.last_pts = frame->pts = pts; - ret = av_buffersrc_add_frame_flags(ifp->filter, frame, + ret = av_buffersrc_add_frame_flags(ifp->ifilter.filter, frame, AV_BUFFERSRC_FLAG_KEEP_REF | AV_BUFFERSRC_FLAG_PUSH); if (ret != AVERROR_EOF && ret < 0) @@ -480,10 +650,10 @@ static OutputFilter *ofilter_alloc(FilterGraph *fg, enum AVMediaType type) ofp->format = -1; ofp->color_space = AVCOL_SPC_UNSPECIFIED; ofp->color_range = AVCOL_RANGE_UNSPECIFIED; - ofp->index = fg->nb_outputs - 1; + ofilter->index = fg->nb_outputs - 1; snprintf(ofp->log_name, sizeof(ofp->log_name), "%co%d", - av_get_media_type_string(type)[0], ofp->index); + av_get_media_type_string(type)[0], ofilter->index); return ofilter; } @@ -499,10 +669,10 @@ static int ifilter_bind_ist(InputFilter *ifilter, InputStream *ist, av_assert0(!ifp->bound); ifp->bound = 1; - if (ifp->type != ist->par->codec_type && - !(ifp->type == AVMEDIA_TYPE_VIDEO && ist->par->codec_type == AVMEDIA_TYPE_SUBTITLE)) { + if (ifilter->type != ist->par->codec_type && + !(ifilter->type == AVMEDIA_TYPE_VIDEO && ist->par->codec_type == AVMEDIA_TYPE_SUBTITLE)) { av_log(fgp, AV_LOG_ERROR, "Tried to connect %s stream to %s filtergraph input\n", - av_get_media_type_string(ist->par->codec_type), av_get_media_type_string(ifp->type)); + av_get_media_type_string(ist->par->codec_type), av_get_media_type_string(ifilter->type)); return AVERROR(EINVAL); } @@ -517,8 +687,12 @@ static int ifilter_bind_ist(InputFilter *ifilter, InputStream *ist, if (ret < 0) return ret; + ifilter->input_name = av_strdup(ifp->opts.name); + if (!ifilter->input_name) + return AVERROR(EINVAL); + ret = sch_connect(fgp->sch, - src, SCH_FILTER_IN(fgp->sch_idx, ifp->index)); + src, SCH_FILTER_IN(fgp->sch_idx, ifilter->index)); if (ret < 0) return ret; @@ -553,19 +727,23 @@ static int ifilter_bind_dec(InputFilterPriv *ifp, Decoder *dec, av_assert0(!ifp->bound); ifp->bound = 1; - if (ifp->type != dec->type) { + if (ifp->ifilter.type != dec->type) { av_log(fgp, AV_LOG_ERROR, "Tried to connect %s decoder to %s filtergraph input\n", - av_get_media_type_string(dec->type), av_get_media_type_string(ifp->type)); + av_get_media_type_string(dec->type), av_get_media_type_string(ifp->ifilter.type)); return AVERROR(EINVAL); } - ifp->type_src = ifp->type; + ifp->type_src = ifp->ifilter.type; ret = dec_filter_add(dec, &ifp->ifilter, &ifp->opts, vs, &src); if (ret < 0) return ret; - ret = sch_connect(fgp->sch, src, SCH_FILTER_IN(fgp->sch_idx, ifp->index)); + ifp->ifilter.input_name = av_strdup(ifp->opts.name); + if (!ifp->ifilter.input_name) + return AVERROR(EINVAL); + + ret = sch_connect(fgp->sch, src, SCH_FILTER_IN(fgp->sch_idx, ifp->ifilter.index)); if (ret < 0) return ret; @@ -634,8 +812,8 @@ int ofilter_bind_enc(OutputFilter *ofilter, unsigned sched_idx_enc, ofp->trim_start_us = opts->trim_start_us; ofp->trim_duration_us = opts->trim_duration_us; - ofp->name = av_strdup(opts->name); - if (!ofp->name) + ofilter->output_name = av_strdup(opts->name); + if (!ofilter->output_name) return AVERROR(EINVAL); ret = av_dict_copy(&ofp->sws_opts, opts->sws_opts, 0); @@ -655,7 +833,7 @@ int ofilter_bind_enc(OutputFilter *ofilter, unsigned sched_idx_enc, ofp->log_parent = NULL; av_strlcpy(ofp->log_name, fgp->log_name, sizeof(ofp->log_name)); } else - av_strlcatf(ofp->log_name, sizeof(ofp->log_name), "->%s", ofp->name); + av_strlcatf(ofp->log_name, sizeof(ofp->log_name), "->%s", ofilter->output_name); switch (ofilter->type) { case AVMEDIA_TYPE_VIDEO: @@ -714,7 +892,7 @@ int ofilter_bind_enc(OutputFilter *ofilter, unsigned sched_idx_enc, break; } - ret = sch_connect(fgp->sch, SCH_FILTER_OUT(fgp->sch_idx, ofp->index), + ret = sch_connect(fgp->sch, SCH_FILTER_OUT(fgp->sch_idx, ofilter->index), SCH_ENC(sched_idx_enc)); if (ret < 0) return ret; @@ -728,16 +906,16 @@ static int ofilter_bind_ifilter(OutputFilter *ofilter, InputFilterPriv *ifp, OutputFilterPriv *ofp = ofp_from_ofilter(ofilter); av_assert0(!ofilter->bound); - av_assert0(ofilter->type == ifp->type); + av_assert0(ofilter->type == ifp->ifilter.type); ofilter->bound = 1; av_freep(&ofilter->linklabel); - ofp->name = av_strdup(opts->name); - if (!ofp->name) + ofilter->output_name = av_strdup(opts->name); + if (!ofilter->output_name) return AVERROR(EINVAL); - av_strlcatf(ofp->log_name, sizeof(ofp->log_name), "->%s", ofp->name); + av_strlcatf(ofp->log_name, sizeof(ofp->log_name), "->%s", ofilter->output_name); return 0; } @@ -753,18 +931,18 @@ static int ifilter_bind_fg(InputFilterPriv *ifp, FilterGraph *fg_src, int out_id av_assert0(!ifp->bound); ifp->bound = 1; - if (ifp->type != ofilter_src->type) { + if (ifp->ifilter.type != ofilter_src->type) { av_log(fgp, AV_LOG_ERROR, "Tried to connect %s output to %s input\n", av_get_media_type_string(ofilter_src->type), - av_get_media_type_string(ifp->type)); + av_get_media_type_string(ifp->ifilter.type)); return AVERROR(EINVAL); } - ifp->type_src = ifp->type; + ifp->type_src = ifp->ifilter.type; memset(&opts, 0, sizeof(opts)); - snprintf(name, sizeof(name), "fg:%d:%d", fgp->fg.index, ifp->index); + snprintf(name, sizeof(name), "fg:%d:%d", fgp->fg.index, ifp->ifilter.index); opts.name = name; ret = ofilter_bind_ifilter(ofilter_src, ifp, &opts); @@ -772,7 +950,7 @@ static int ifilter_bind_fg(InputFilterPriv *ifp, FilterGraph *fg_src, int out_id return ret; ret = sch_connect(fgp->sch, SCH_FILTER_OUT(fg_src->index, out_idx), - SCH_FILTER_IN(fgp->sch_idx, ifp->index)); + SCH_FILTER_IN(fgp->sch_idx, ifp->ifilter.index)); if (ret < 0) return ret; @@ -795,7 +973,7 @@ static InputFilter *ifilter_alloc(FilterGraph *fg) if (!ifp->frame) return NULL; - ifp->index = fg->nb_inputs - 1; + ifilter->index = fg->nb_inputs - 1; ifp->format = -1; ifp->color_space = AVCOL_SPC_UNSPECIFIED; ifp->color_range = AVCOL_RANGE_UNSPECIFIED; @@ -832,10 +1010,11 @@ void fg_free(FilterGraph **pfg) av_frame_free(&ifp->opts.fallback); av_buffer_unref(&ifp->hw_frames_ctx); - av_freep(&ifp->linklabel); + av_freep(&ifilter->linklabel); av_freep(&ifp->opts.name); av_frame_side_data_free(&ifp->side_data, &ifp->nb_side_data); av_freep(&ifilter->name); + av_freep(&ifilter->input_name); av_freep(&fg->inputs[j]); } av_freep(&fg->inputs); @@ -849,14 +1028,14 @@ void fg_free(FilterGraph **pfg) av_freep(&ofilter->linklabel); av_freep(&ofilter->name); + av_freep(&ofilter->output_name); av_freep(&ofilter->apad); - av_freep(&ofp->name); av_channel_layout_uninit(&ofp->ch_layout); av_frame_side_data_free(&ofp->side_data, &ofp->nb_side_data); av_freep(&fg->outputs[j]); } av_freep(&fg->outputs); - av_freep(&fgp->graph_desc); + av_freep(&fg->graph_desc); av_frame_free(&fgp->frame); av_frame_free(&fgp->frame_enc); @@ -909,7 +1088,7 @@ int fg_create(FilterGraph **pfg, char *graph_desc, Scheduler *sch) } fg->class = &fg_class; - fgp->graph_desc = graph_desc; + fg->graph_desc = graph_desc; fgp->disable_conversions = !auto_conversion_filters; fgp->nb_threads = -1; fgp->sch = sch; @@ -928,7 +1107,7 @@ int fg_create(FilterGraph **pfg, char *graph_desc, Scheduler *sch) return AVERROR(ENOMEM);; graph->nb_threads = 1; - ret = graph_parse(fg, graph, fgp->graph_desc, &inputs, &outputs, + ret = graph_parse(fg, graph, fg->graph_desc, &inputs, &outputs, hw_device_for_filter()); if (ret < 0) goto fail; @@ -945,21 +1124,19 @@ int fg_create(FilterGraph **pfg, char *graph_desc, Scheduler *sch) for (AVFilterInOut *cur = inputs; cur; cur = cur->next) { InputFilter *const ifilter = ifilter_alloc(fg); - InputFilterPriv *ifp; if (!ifilter) { ret = AVERROR(ENOMEM); goto fail; } - ifp = ifp_from_ifilter(ifilter); - ifp->linklabel = cur->name; + ifilter->linklabel = cur->name; cur->name = NULL; - ifp->type = avfilter_pad_get_type(cur->filter_ctx->input_pads, + ifilter->type = avfilter_pad_get_type(cur->filter_ctx->input_pads, cur->pad_idx); - if (ifp->type != AVMEDIA_TYPE_VIDEO && ifp->type != AVMEDIA_TYPE_AUDIO) { + if (ifilter->type != AVMEDIA_TYPE_VIDEO && ifilter->type != AVMEDIA_TYPE_AUDIO) { av_log(fg, AV_LOG_FATAL, "Only video and audio filters supported " "currently.\n"); ret = AVERROR(ENOSYS); @@ -1070,23 +1247,22 @@ int fg_create_simple(FilterGraph **pfg, static int fg_complex_bind_input(FilterGraph *fg, InputFilter *ifilter) { - FilterGraphPriv *fgp = fgp_from_fg(fg); InputFilterPriv *ifp = ifp_from_ifilter(ifilter); InputStream *ist = NULL; - enum AVMediaType type = ifp->type; + enum AVMediaType type = ifilter->type; ViewSpecifier vs = { .type = VIEW_SPECIFIER_TYPE_NONE }; const char *spec; char *p; int i, ret; - if (ifp->linklabel && !strncmp(ifp->linklabel, "dec:", 4)) { + if (ifilter->linklabel && !strncmp(ifilter->linklabel, "dec:", 4)) { // bind to a standalone decoder int dec_idx; - dec_idx = strtol(ifp->linklabel + 4, &p, 0); + dec_idx = strtol(ifilter->linklabel + 4, &p, 0); if (dec_idx < 0 || dec_idx >= nb_decoders) { av_log(fg, AV_LOG_ERROR, "Invalid decoder index %d in filtergraph description %s\n", - dec_idx, fgp->graph_desc); + dec_idx, fg->graph_desc); return AVERROR(EINVAL); } @@ -1102,7 +1278,7 @@ static int fg_complex_bind_input(FilterGraph *fg, InputFilter *ifilter) av_log(fg, AV_LOG_ERROR, "Error binding a decoder to filtergraph input %s\n", ifilter->name); return ret; - } else if (ifp->linklabel) { + } else if (ifilter->linklabel) { StreamSpecifier ss; AVFormatContext *s; AVStream *st = NULL; @@ -1119,25 +1295,25 @@ static int fg_complex_bind_input(FilterGraph *fg, InputFilter *ifilter) OutputFilter *ofilter = fg_src->outputs[j]; if (!ofilter->bound && ofilter->linklabel && - !strcmp(ofilter->linklabel, ifp->linklabel)) { + !strcmp(ofilter->linklabel, ifilter->linklabel)) { av_log(fg, AV_LOG_VERBOSE, "Binding input with label '%s' to filtergraph output %d:%d\n", - ifp->linklabel, i, j); + ifilter->linklabel, i, j); ret = ifilter_bind_fg(ifp, fg_src, j); if (ret < 0) av_log(fg, AV_LOG_ERROR, "Error binding filtergraph input %s\n", - ifp->linklabel); + ifilter->linklabel); return ret; } } } // bind to an explicitly specified demuxer stream - file_idx = strtol(ifp->linklabel, &p, 0); + file_idx = strtol(ifilter->linklabel, &p, 0); if (file_idx < 0 || file_idx >= nb_input_files) { av_log(fg, AV_LOG_FATAL, "Invalid file index %d in filtergraph description %s.\n", - file_idx, fgp->graph_desc); + file_idx, fg->graph_desc); return AVERROR(EINVAL); } s = input_files[file_idx]->ctx; @@ -1171,14 +1347,14 @@ static int fg_complex_bind_input(FilterGraph *fg, InputFilter *ifilter) stream_specifier_uninit(&ss); if (!st) { av_log(fg, AV_LOG_FATAL, "Stream specifier '%s' in filtergraph description %s " - "matches no streams.\n", p, fgp->graph_desc); + "matches no streams.\n", p, fg->graph_desc); return AVERROR(EINVAL); } ist = input_files[file_idx]->streams[st->index]; av_log(fg, AV_LOG_VERBOSE, "Binding input with label '%s' to input stream %d:%d\n", - ifp->linklabel, ist->file->index, ist->index); + ifilter->linklabel, ist->file->index, ist->index); } else { ist = ist_find_unused(type); if (!ist) { @@ -1191,7 +1367,7 @@ static int fg_complex_bind_input(FilterGraph *fg, InputFilter *ifilter) av_log(fg, AV_LOG_VERBOSE, "Binding unlabeled input %d to input stream %d:%d\n", - ifp->index, ist->file->index, ist->index); + ifilter->index, ist->file->index, ist->index); } av_assert0(ist); @@ -1340,8 +1516,8 @@ static int configure_output_video_filter(FilterGraphPriv *fgp, AVFilterGraph *gr int ret; char name[255]; - snprintf(name, sizeof(name), "out_%s", ofp->name); - ret = avfilter_graph_create_filter(&ofp->filter, + snprintf(name, sizeof(name), "out_%s", ofilter->output_name); + ret = avfilter_graph_create_filter(&ofilter->filter, avfilter_get_by_name("buffersink"), name, NULL, NULL, graph); @@ -1360,7 +1536,7 @@ static int configure_output_video_filter(FilterGraphPriv *fgp, AVFilterGraph *gr av_strlcatf(args, sizeof(args), ":%s=%s", e->key, e->value); } - snprintf(name, sizeof(name), "scaler_out_%s", ofp->name); + snprintf(name, sizeof(name), "scaler_out_%s", ofilter->output_name); if ((ret = avfilter_graph_create_filter(&filter, avfilter_get_by_name("scale"), name, args, NULL, graph)) < 0) return ret; @@ -1396,14 +1572,14 @@ static int configure_output_video_filter(FilterGraphPriv *fgp, AVFilterGraph *gr pad_idx = 0; } - snprintf(name, sizeof(name), "trim_out_%s", ofp->name); + snprintf(name, sizeof(name), "trim_out_%s", ofilter->output_name); ret = insert_trim(fgp, ofp->trim_start_us, ofp->trim_duration_us, &last_filter, &pad_idx, name); if (ret < 0) return ret; - if ((ret = avfilter_link(last_filter, pad_idx, ofp->filter, 0)) < 0) + if ((ret = avfilter_link(last_filter, pad_idx, ofilter->filter, 0)) < 0) return ret; return 0; @@ -1419,8 +1595,8 @@ static int configure_output_audio_filter(FilterGraphPriv *fgp, AVFilterGraph *gr char name[255]; int ret; - snprintf(name, sizeof(name), "out_%s", ofp->name); - ret = avfilter_graph_create_filter(&ofp->filter, + snprintf(name, sizeof(name), "out_%s", ofilter->output_name); + ret = avfilter_graph_create_filter(&ofilter->filter, avfilter_get_by_name("abuffersink"), name, NULL, NULL, graph); if (ret < 0) @@ -1457,7 +1633,7 @@ static int configure_output_audio_filter(FilterGraphPriv *fgp, AVFilterGraph *gr if (args.len) { AVFilterContext *format; - snprintf(name, sizeof(name), "format_out_%s", ofp->name); + snprintf(name, sizeof(name), "format_out_%s", ofilter->output_name); ret = avfilter_graph_create_filter(&format, avfilter_get_by_name("aformat"), name, args.str, NULL, graph); @@ -1477,13 +1653,13 @@ static int configure_output_audio_filter(FilterGraphPriv *fgp, AVFilterGraph *gr fgp->have_sources = 1; } - snprintf(name, sizeof(name), "trim for output %s", ofp->name); + snprintf(name, sizeof(name), "trim for output %s", ofilter->output_name); ret = insert_trim(fgp, ofp->trim_start_us, ofp->trim_duration_us, &last_filter, &pad_idx, name); if (ret < 0) goto fail; - if ((ret = avfilter_link(last_filter, pad_idx, ofp->filter, 0)) < 0) + if ((ret = avfilter_link(last_filter, pad_idx, ofilter->filter, 0)) < 0) goto fail; fail: av_bprint_finalize(&args, NULL); @@ -1532,8 +1708,8 @@ static int configure_input_video_filter(FilterGraph *fg, AVFilterGraph *graph, snprintf(name, sizeof(name), "graph %d input from stream %s", fg->index, ifp->opts.name); - ifp->filter = avfilter_graph_alloc_filter(graph, buffer_filt, name); - if (!ifp->filter) { + ifilter->filter = avfilter_graph_alloc_filter(graph, buffer_filt, name); + if (!ifilter->filter) { ret = AVERROR(ENOMEM); goto fail; } @@ -1551,16 +1727,16 @@ static int configure_input_video_filter(FilterGraph *fg, AVFilterGraph *graph, par->side_data = ifp->side_data; par->nb_side_data = ifp->nb_side_data; - ret = av_buffersrc_parameters_set(ifp->filter, par); + ret = av_buffersrc_parameters_set(ifilter->filter, par); if (ret < 0) goto fail; av_freep(&par); - ret = avfilter_init_dict(ifp->filter, NULL); + ret = avfilter_init_dict(ifilter->filter, NULL); if (ret < 0) goto fail; - last_filter = ifp->filter; + last_filter = ifilter->filter; desc = av_pix_fmt_desc_get(ifp->format); av_assert0(desc); @@ -1654,7 +1830,7 @@ static int configure_input_audio_filter(FilterGraph *fg, AVFilterGraph *graph, av_bprintf(&args, ":channels=%d", ifp->ch_layout.nb_channels); snprintf(name, sizeof(name), "graph_%d_in_%s", fg->index, ifp->opts.name); - if ((ret = avfilter_graph_create_filter(&ifp->filter, abuffer_filt, + if ((ret = avfilter_graph_create_filter(&ifilter->filter, abuffer_filt, name, args.str, NULL, graph)) < 0) return ret; @@ -1663,11 +1839,11 @@ static int configure_input_audio_filter(FilterGraph *fg, AVFilterGraph *graph, return AVERROR(ENOMEM); par->side_data = ifp->side_data; par->nb_side_data = ifp->nb_side_data; - ret = av_buffersrc_parameters_set(ifp->filter, par); + ret = av_buffersrc_parameters_set(ifilter->filter, par); av_free(par); if (ret < 0) return ret; - last_filter = ifp->filter; + last_filter = ifilter->filter; snprintf(name, sizeof(name), "trim for input stream %s", ifp->opts.name); ret = insert_trim(fg, ifp->opts.trim_start_us, ifp->opts.trim_end_us, @@ -1684,7 +1860,7 @@ static int configure_input_audio_filter(FilterGraph *fg, AVFilterGraph *graph, static int configure_input_filter(FilterGraph *fg, AVFilterGraph *graph, InputFilter *ifilter, AVFilterInOut *in) { - switch (ifp_from_ifilter(ifilter)->type) { + switch (ifilter->type) { case AVMEDIA_TYPE_VIDEO: return configure_input_video_filter(fg, graph, ifilter, in); case AVMEDIA_TYPE_AUDIO: return configure_input_audio_filter(fg, graph, ifilter, in); default: av_assert0(0); return 0; @@ -1694,9 +1870,9 @@ static int configure_input_filter(FilterGraph *fg, AVFilterGraph *graph, static void cleanup_filtergraph(FilterGraph *fg, FilterGraphThread *fgt) { for (int i = 0; i < fg->nb_outputs; i++) - ofp_from_ofilter(fg->outputs[i])->filter = NULL; + fg->outputs[i]->filter = NULL; for (int i = 0; i < fg->nb_inputs; i++) - ifp_from_ifilter(fg->inputs[i])->filter = NULL; + fg->inputs[i]->filter = NULL; avfilter_graph_free(&fgt->graph); } @@ -1733,7 +1909,7 @@ static int configure_filtergraph(FilterGraph *fg, FilterGraphThread *fgt) AVFilterInOut *inputs, *outputs, *cur; int ret = AVERROR_BUG, i, simple = filtergraph_is_simple(fg); int have_input_eof = 0; - const char *graph_desc = fgp->graph_desc; + const char *graph_desc = fg->graph_desc; cleanup_filtergraph(fg, fgt); fgt->graph = avfilter_graph_alloc(); @@ -1810,7 +1986,7 @@ static int configure_filtergraph(FilterGraph *fg, FilterGraphThread *fgt) int nb_sd; OutputFilter *ofilter = fg->outputs[i]; OutputFilterPriv *ofp = ofp_from_ofilter(ofilter); - AVFilterContext *sink = ofp->filter; + AVFilterContext *sink = ofilter->filter; ofp->format = av_buffersink_get_format(sink); @@ -1850,6 +2026,7 @@ static int configure_filtergraph(FilterGraph *fg, FilterGraphThread *fgt) } for (int i = 0; i < fg->nb_inputs; i++) { + InputFilter *ifilter = fg->inputs[i]; InputFilterPriv *ifp = ifp_from_ifilter(fg->inputs[i]); AVFrame *tmp; while (av_fifo_read(ifp->frame_queue, &tmp, 1) >= 0) { @@ -1860,7 +2037,7 @@ static int configure_filtergraph(FilterGraph *fg, FilterGraphThread *fgt) if (ifp->displaymatrix_applied) av_frame_remove_side_data(tmp, AV_FRAME_DATA_DISPLAYMATRIX); } - ret = av_buffersrc_add_frame(ifp->filter, tmp); + ret = av_buffersrc_add_frame(ifilter->filter, tmp); } av_frame_free(&tmp); if (ret < 0) @@ -1870,9 +2047,9 @@ static int configure_filtergraph(FilterGraph *fg, FilterGraphThread *fgt) /* send the EOFs for the finished inputs */ for (int i = 0; i < fg->nb_inputs; i++) { - InputFilterPriv *ifp = ifp_from_ifilter(fg->inputs[i]); + InputFilter *ifilter = fg->inputs[i]; if (fgt->eof_in[i]) { - ret = av_buffersrc_add_frame(ifp->filter, NULL); + ret = av_buffersrc_add_frame(ifilter->filter, NULL); if (ret < 0) goto fail; have_input_eof = 1; @@ -1902,7 +2079,7 @@ static int ifilter_parameters_from_frame(InputFilter *ifilter, const AVFrame *fr if (ret < 0) return ret; - ifp->time_base = (ifp->type == AVMEDIA_TYPE_AUDIO) ? (AVRational){ 1, frame->sample_rate } : + ifp->time_base = (ifilter->type == AVMEDIA_TYPE_AUDIO) ? (AVRational){ 1, frame->sample_rate } : (ifp->opts.flags & IFILTER_FLAG_CFR) ? av_inv_q(ifp->opts.framerate) : frame->time_base; @@ -1992,12 +2169,11 @@ static int choose_input(const FilterGraph *fg, const FilterGraphThread *fgt) for (int i = 0; i < fg->nb_inputs; i++) { InputFilter *ifilter = fg->inputs[i]; - InputFilterPriv *ifp = ifp_from_ifilter(ifilter); if (fgt->eof_in[i]) continue; - nb_requests = av_buffersrc_get_nb_failed_requests(ifp->filter); + nb_requests = av_buffersrc_get_nb_failed_requests(ifilter->filter); if (nb_requests > nb_requests_max) { nb_requests_max = nb_requests; best_input = i; @@ -2041,7 +2217,7 @@ static int choose_out_timebase(OutputFilterPriv *ofp, AVFrame *frame) fr = fps->framerate; if (!fr.num) { - AVRational fr_sink = av_buffersink_get_frame_rate(ofp->filter); + AVRational fr_sink = av_buffersink_get_frame_rate(ofilter->filter); if (fr_sink.num > 0 && fr_sink.den > 0) fr = fr_sink; } @@ -2294,16 +2470,16 @@ static int close_output(OutputFilterPriv *ofp, FilterGraphThread *fgt) "No filtered frames for output stream, trying to " "initialize anyway.\n"); - ret = sch_filter_send(fgp->sch, fgp->sch_idx, ofp->index, frame); + ret = sch_filter_send(fgp->sch, fgp->sch_idx, ofp->ofilter.index, frame); if (ret < 0) { av_frame_unref(frame); return ret; } } - fgt->eof_out[ofp->index] = 1; + fgt->eof_out[ofp->ofilter.index] = 1; - ret = sch_filter_send(fgp->sch, fgp->sch_idx, ofp->index, NULL); + ret = sch_filter_send(fgp->sch, fgp->sch_idx, ofp->ofilter.index, NULL); return (ret == AVERROR_EOF) ? 0 : ret; } @@ -2356,12 +2532,12 @@ static int fg_output_frame(OutputFilterPriv *ofp, FilterGraphThread *fgt, } // send the frame to consumers - ret = sch_filter_send(fgp->sch, fgp->sch_idx, ofp->index, frame_out); + ret = sch_filter_send(fgp->sch, fgp->sch_idx, ofp->ofilter.index, frame_out); if (ret < 0) { av_frame_unref(frame_out); - if (!fgt->eof_out[ofp->index]) { - fgt->eof_out[ofp->index] = 1; + if (!fgt->eof_out[ofp->ofilter.index]) { + fgt->eof_out[ofp->ofilter.index] = 1; fgp->nb_outputs_done++; } @@ -2394,13 +2570,13 @@ static int fg_output_step(OutputFilterPriv *ofp, FilterGraphThread *fgt, AVFrame *frame) { FilterGraphPriv *fgp = fgp_from_fg(ofp->ofilter.graph); - AVFilterContext *filter = ofp->filter; + AVFilterContext *filter = ofp->ofilter.filter; FrameData *fd; int ret; ret = av_buffersink_get_frame_flags(filter, frame, AV_BUFFERSINK_FLAG_NO_REQUEST); - if (ret == AVERROR_EOF && !fgt->eof_out[ofp->index]) { + if (ret == AVERROR_EOF && !fgt->eof_out[ofp->ofilter.index]) { ret = fg_output_frame(ofp, fgt, NULL); return (ret < 0) ? ret : 1; } else if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { @@ -2412,7 +2588,7 @@ static int fg_output_step(OutputFilterPriv *ofp, FilterGraphThread *fgt, return ret; } - if (fgt->eof_out[ofp->index]) { + if (fgt->eof_out[ofp->ofilter.index]) { av_frame_unref(frame); return 0; } @@ -2587,7 +2763,7 @@ static int sub2video_frame(InputFilter *ifilter, AVFrame *frame, int buffer) if (ifp->sub2video.end_pts < INT64_MAX) sub2video_update(ifp, INT64_MAX, NULL); - return av_buffersrc_add_frame(ifp->filter, NULL); + return av_buffersrc_add_frame(ifilter->filter, NULL); } ifp->width = frame->width ? frame->width : ifp->width; @@ -2604,16 +2780,16 @@ static int send_eof(FilterGraphThread *fgt, InputFilter *ifilter, InputFilterPriv *ifp = ifp_from_ifilter(ifilter); int ret; - if (fgt->eof_in[ifp->index]) + if (fgt->eof_in[ifilter->index]) return 0; - fgt->eof_in[ifp->index] = 1; + fgt->eof_in[ifilter->index] = 1; - if (ifp->filter) { + if (ifilter->filter) { pts = av_rescale_q_rnd(pts, tb, ifp->time_base, AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX); - ret = av_buffersrc_close(ifp->filter, pts, AV_BUFFERSRC_FLAG_PUSH); + ret = av_buffersrc_close(ifilter->filter, pts, AV_BUFFERSRC_FLAG_PUSH); if (ret < 0) return ret; } else { @@ -2682,7 +2858,7 @@ static int send_frame(FilterGraph *fg, FilterGraphThread *fgt, int need_reinit = 0, ret; /* determine if the parameters for this input changed */ - switch (ifp->type) { + switch (ifilter->type) { case AVMEDIA_TYPE_AUDIO: if (ifp->format != frame->format || ifp->sample_rate != frame->sample_rate || @@ -2802,7 +2978,7 @@ static int send_frame(FilterGraph *fg, FilterGraphThread *fgt, return AVERROR(ENOMEM); fd->wallclock[LATENCY_PROBE_FILTER_PRE] = av_gettime_relative(); - ret = av_buffersrc_add_frame_flags(ifp->filter, frame, + ret = av_buffersrc_add_frame_flags(ifilter->filter, frame, AV_BUFFERSRC_FLAG_PUSH); if (ret < 0) { av_frame_unref(frame); @@ -2821,7 +2997,7 @@ static void fg_thread_set_name(const FilterGraph *fg) OutputFilterPriv *ofp = ofp_from_ofilter(fg->outputs[0]); snprintf(name, sizeof(name), "%cf%s", av_get_media_type_string(ofp->ofilter.type)[0], - ofp->name); + ofp->ofilter.output_name); } else { snprintf(name, sizeof(name), "fc%d", fg->index); } diff --git a/fftools/ffmpeg_filter.h b/fftools/ffmpeg_filter.h deleted file mode 100644 index 94b94beece6be..0000000000000 --- a/fftools/ffmpeg_filter.h +++ /dev/null @@ -1,234 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef FFTOOLS_FFMPEG_FILTER_H -#define FFTOOLS_FFMPEG_FILTER_H - -#include "ffmpeg.h" - -#include - -#include "ffmpeg_sched.h" -#include "sync_queue.h" - -#include "libavfilter/avfilter.h" - -#include "libavutil/avutil.h" -#include "libavutil/dict.h" -#include "libavutil/fifo.h" -#include "libavutil/pixfmt.h" -#include "libavutil/rational.h" -#include "libavutil/bprint.h" -#include "libavutil/channel_layout.h" -#include "libavutil/downmix_info.h" - -typedef struct FilterGraphPriv { - FilterGraph fg; - - // name used for logging - char log_name[32]; - - int is_simple; - // true when the filtergraph contains only meta filters - // that do not modify the frame data - int is_meta; - // source filters are present in the graph - int have_sources; - int disable_conversions; - - unsigned nb_outputs_done; - - const char *graph_desc; - - int nb_threads; - - // frame for temporarily holding output from the filtergraph - AVFrame *frame; - // frame for sending output to the encoder - AVFrame *frame_enc; - - Scheduler *sch; - unsigned sch_idx; - - AVBPrint graph_print_buf; - -} FilterGraphPriv; - -static inline FilterGraphPriv *fgp_from_fg(FilterGraph *fg) -{ - return (FilterGraphPriv*)fg; -} - -static inline const FilterGraphPriv *cfgp_from_cfg(const FilterGraph *fg) -{ - return (const FilterGraphPriv*)fg; -} - -typedef struct InputFilterPriv { - InputFilter ifilter; - - InputFilterOptions opts; - - int index; - - AVFilterContext *filter; - - // used to hold submitted input - AVFrame *frame; - - /* for filters that are not yet bound to an input stream, - * this stores the input linklabel, if any */ - uint8_t *linklabel; - - // filter data type - enum AVMediaType type; - // source data type: AVMEDIA_TYPE_SUBTITLE for sub2video, - // same as type otherwise - enum AVMediaType type_src; - - int eof; - int bound; - int drop_warned; - uint64_t nb_dropped; - - // parameters configured for this input - int format; - - int width, height; - AVRational sample_aspect_ratio; - enum AVColorSpace color_space; - enum AVColorRange color_range; - - int sample_rate; - AVChannelLayout ch_layout; - - AVRational time_base; - - AVFrameSideData **side_data; - int nb_side_data; - - AVFifo *frame_queue; - - AVBufferRef *hw_frames_ctx; - - int displaymatrix_present; - int displaymatrix_applied; - int32_t displaymatrix[9]; - - int downmixinfo_present; - AVDownmixInfo downmixinfo; - - struct { - AVFrame *frame; - - int64_t last_pts; - int64_t end_pts; - - /// marks if sub2video_update should force an initialization - unsigned int initialize; - } sub2video; -} InputFilterPriv; - -static inline InputFilterPriv *ifp_from_ifilter(InputFilter *ifilter) -{ - return (InputFilterPriv*)ifilter; -} - -typedef struct FPSConvContext { - AVFrame *last_frame; - /* number of frames emitted by the video-encoding sync code */ - int64_t frame_number; - /* history of nb_frames_prev, i.e. the number of times the - * previous frame was duplicated by vsync code in recent - * do_video_out() calls */ - int64_t frames_prev_hist[3]; - - uint64_t dup_warning; - - int last_dropped; - int dropped_keyframe; - - enum VideoSyncMethod vsync_method; - - AVRational framerate; - AVRational framerate_max; - const AVRational *framerate_supported; - int framerate_clip; -} FPSConvContext; - - -typedef struct OutputFilterPriv { - OutputFilter ofilter; - - int index; - - void *log_parent; - char log_name[32]; - - char *name; - - AVFilterContext *filter; - - /* desired output stream properties */ - int format; - int width, height; - int sample_rate; - AVChannelLayout ch_layout; - enum AVColorSpace color_space; - enum AVColorRange color_range; - - AVFrameSideData **side_data; - int nb_side_data; - - // time base in which the output is sent to our downstream - // does not need to match the filtersink's timebase - AVRational tb_out; - // at least one frame with the above timebase was sent - // to our downstream, so it cannot change anymore - int tb_out_locked; - - AVRational sample_aspect_ratio; - - AVDictionary *sws_opts; - AVDictionary *swr_opts; - - // those are only set if no format is specified and the encoder gives us multiple options - // They point directly to the relevant lists of the encoder. - const int *formats; - const AVChannelLayout *ch_layouts; - const int *sample_rates; - const enum AVColorSpace *color_spaces; - const enum AVColorRange *color_ranges; - - AVRational enc_timebase; - int64_t trim_start_us; - int64_t trim_duration_us; - // offset for output timestamps, in AV_TIME_BASE_Q - int64_t ts_offset; - int64_t next_pts; - FPSConvContext fps; - - unsigned flags; -} OutputFilterPriv; - -static inline OutputFilterPriv *ofp_from_ofilter(OutputFilter *ofilter) -{ - return (OutputFilterPriv*)ofilter; -} - -#endif /* FFTOOLS_FFMPEG_FILTER_H */ diff --git a/fftools/ffprobe.c b/fftools/ffprobe.c index 80ce38e73bd2e..1346ed33c5c81 100644 --- a/fftools/ffprobe.c +++ b/fftools/ffprobe.c @@ -457,6 +457,43 @@ static inline int show_tags(AVTextFormatContext *tfc, AVDictionary *tags, int se return ret; } +static void print_displaymatrix(AVTextFormatContext *tfc, const int32_t matrix[9]) +{ + double rotation = av_display_rotation_get(matrix); + if (isnan(rotation)) + rotation = 0; + avtext_print_integers(tfc, "displaymatrix", (void*)matrix, 9, " %11d", 3, 4, 1); + print_int("rotation", rotation); +} + +static void print_mastering_display_metadata(AVTextFormatContext *tfc, + const AVMasteringDisplayMetadata *metadata) +{ + if (metadata->has_primaries) { + print_q("red_x", metadata->display_primaries[0][0], '/'); + print_q("red_y", metadata->display_primaries[0][1], '/'); + print_q("green_x", metadata->display_primaries[1][0], '/'); + print_q("green_y", metadata->display_primaries[1][1], '/'); + print_q("blue_x", metadata->display_primaries[2][0], '/'); + print_q("blue_y", metadata->display_primaries[2][1], '/'); + + print_q("white_point_x", metadata->white_point[0], '/'); + print_q("white_point_y", metadata->white_point[1], '/'); + } + + if (metadata->has_luminance) { + print_q("min_luminance", metadata->min_luminance, '/'); + print_q("max_luminance", metadata->max_luminance, '/'); + } +} + +static void print_context_light_level(AVTextFormatContext *tfc, + const AVContentLightMetadata *metadata) +{ + print_int("max_content", metadata->MaxCLL); + print_int("max_average", metadata->MaxFALL); +} + static void print_dovi_metadata(AVTextFormatContext *tfc, const AVDOVIMetadata *dovi) { if (!dovi) @@ -929,121 +966,98 @@ static void print_pkt_side_data(AVTextFormatContext *tfc, const AVPacketSideData *sd, SectionID id_data) { - const char *name = av_packet_side_data_name(sd->type); - - avtext_print_section_header(tfc, sd, id_data); - print_str("side_data_type", name ? name : "unknown"); - if (sd->type == AV_PKT_DATA_DISPLAYMATRIX && sd->size >= 9*4) { - double rotation = av_display_rotation_get((int32_t *)sd->data); - if (isnan(rotation)) - rotation = 0; - avtext_print_integers(tfc, "displaymatrix", sd->data, 9, " %11d", 3, 4, 1); - print_int("rotation", rotation); - } else if (sd->type == AV_PKT_DATA_STEREO3D) { - const AVStereo3D *stereo = (AVStereo3D *)sd->data; - print_str("type", av_stereo3d_type_name(stereo->type)); - print_int("inverted", !!(stereo->flags & AV_STEREO3D_FLAG_INVERT)); - print_str("view", av_stereo3d_view_name(stereo->view)); - print_str("primary_eye", av_stereo3d_primary_eye_name(stereo->primary_eye)); - print_int("baseline", stereo->baseline); - print_q("horizontal_disparity_adjustment", stereo->horizontal_disparity_adjustment, '/'); - print_q("horizontal_field_of_view", stereo->horizontal_field_of_view, '/'); - } else if (sd->type == AV_PKT_DATA_SPHERICAL) { - const AVSphericalMapping *spherical = (AVSphericalMapping *)sd->data; - print_str("projection", av_spherical_projection_name(spherical->projection)); - if (spherical->projection == AV_SPHERICAL_CUBEMAP) { - print_int("padding", spherical->padding); - } else if (spherical->projection == AV_SPHERICAL_EQUIRECTANGULAR_TILE) { - size_t l, t, r, b; - av_spherical_tile_bounds(spherical, par->width, par->height, - &l, &t, &r, &b); - print_int("bound_left", l); - print_int("bound_top", t); - print_int("bound_right", r); - print_int("bound_bottom", b); - } - - print_int("yaw", (double) spherical->yaw / (1 << 16)); - print_int("pitch", (double) spherical->pitch / (1 << 16)); - print_int("roll", (double) spherical->roll / (1 << 16)); - } else if (sd->type == AV_PKT_DATA_SKIP_SAMPLES && sd->size == 10) { - print_int("skip_samples", AV_RL32(sd->data)); - print_int("discard_padding", AV_RL32(sd->data + 4)); - print_int("skip_reason", AV_RL8(sd->data + 8)); - print_int("discard_reason", AV_RL8(sd->data + 9)); - } else if (sd->type == AV_PKT_DATA_MASTERING_DISPLAY_METADATA) { - AVMasteringDisplayMetadata *metadata = (AVMasteringDisplayMetadata *)sd->data; - - if (metadata->has_primaries) { - print_q("red_x", metadata->display_primaries[0][0], '/'); - print_q("red_y", metadata->display_primaries[0][1], '/'); - print_q("green_x", metadata->display_primaries[1][0], '/'); - print_q("green_y", metadata->display_primaries[1][1], '/'); - print_q("blue_x", metadata->display_primaries[2][0], '/'); - print_q("blue_y", metadata->display_primaries[2][1], '/'); - - print_q("white_point_x", metadata->white_point[0], '/'); - print_q("white_point_y", metadata->white_point[1], '/'); - } - - if (metadata->has_luminance) { - print_q("min_luminance", metadata->min_luminance, '/'); - print_q("max_luminance", metadata->max_luminance, '/'); - } - } else if (sd->type == AV_PKT_DATA_CONTENT_LIGHT_LEVEL) { - AVContentLightMetadata *metadata = (AVContentLightMetadata *)sd->data; - print_int("max_content", metadata->MaxCLL); - print_int("max_average", metadata->MaxFALL); - } else if (sd->type == AV_PKT_DATA_AMBIENT_VIEWING_ENVIRONMENT) { - print_ambient_viewing_environment( - tfc, (const AVAmbientViewingEnvironment *)sd->data); - } else if (sd->type == AV_PKT_DATA_DYNAMIC_HDR10_PLUS) { - AVDynamicHDRPlus *metadata = (AVDynamicHDRPlus *)sd->data; - print_dynamic_hdr10_plus(tfc, metadata); - } else if (sd->type == AV_PKT_DATA_DOVI_CONF) { - AVDOVIDecoderConfigurationRecord *dovi = (AVDOVIDecoderConfigurationRecord *)sd->data; - const char *comp = "unknown"; - print_int("dv_version_major", dovi->dv_version_major); - print_int("dv_version_minor", dovi->dv_version_minor); - print_int("dv_profile", dovi->dv_profile); - print_int("dv_level", dovi->dv_level); - print_int("rpu_present_flag", dovi->rpu_present_flag); - print_int("el_present_flag", dovi->el_present_flag); - print_int("bl_present_flag", dovi->bl_present_flag); - print_int("dv_bl_signal_compatibility_id", dovi->dv_bl_signal_compatibility_id); - switch (dovi->dv_md_compression) - { - case AV_DOVI_COMPRESSION_NONE: comp = "none"; break; - case AV_DOVI_COMPRESSION_LIMITED: comp = "limited"; break; - case AV_DOVI_COMPRESSION_RESERVED: comp = "reserved"; break; - case AV_DOVI_COMPRESSION_EXTENDED: comp = "extended"; break; - } - print_str("dv_md_compression", comp); - } else if (sd->type == AV_PKT_DATA_AUDIO_SERVICE_TYPE) { - enum AVAudioServiceType *t = (enum AVAudioServiceType *)sd->data; - print_int("service_type", *t); - } else if (sd->type == AV_PKT_DATA_MPEGTS_STREAM_ID) { - print_int("id", *sd->data); - } else if (sd->type == AV_PKT_DATA_CPB_PROPERTIES) { - const AVCPBProperties *prop = (AVCPBProperties *)sd->data; - print_int("max_bitrate", prop->max_bitrate); - print_int("min_bitrate", prop->min_bitrate); - print_int("avg_bitrate", prop->avg_bitrate); - print_int("buffer_size", prop->buffer_size); - print_int("vbv_delay", prop->vbv_delay); - } else if (sd->type == AV_PKT_DATA_WEBVTT_IDENTIFIER || - sd->type == AV_PKT_DATA_WEBVTT_SETTINGS) { - if (do_show_data) - avtext_print_data(tfc, "data", sd->data, sd->size); - avtext_print_data_hash(tfc, "data_hash", sd->data, sd->size); - } else if (sd->type == AV_PKT_DATA_FRAME_CROPPING && sd->size >= sizeof(uint32_t) * 4) { - print_int("crop_top", AV_RL32(sd->data)); - print_int("crop_bottom", AV_RL32(sd->data + 4)); - print_int("crop_left", AV_RL32(sd->data + 8)); - print_int("crop_right", AV_RL32(sd->data + 12)); - } else if (sd->type == AV_PKT_DATA_AFD && sd->size > 0) { - print_int("active_format", *sd->data); - } + const char *name = av_packet_side_data_name(sd->type); + + avtext_print_section_header(tfc, sd, id_data); + print_str("side_data_type", name ? name : "unknown"); + if (sd->type == AV_PKT_DATA_DISPLAYMATRIX && sd->size >= 9*4) { + print_displaymatrix(tfc, (const int32_t*)sd->data); + } else if (sd->type == AV_PKT_DATA_STEREO3D) { + const AVStereo3D *stereo = (AVStereo3D *)sd->data; + print_str("type", av_stereo3d_type_name(stereo->type)); + print_int("inverted", !!(stereo->flags & AV_STEREO3D_FLAG_INVERT)); + print_str("view", av_stereo3d_view_name(stereo->view)); + print_str("primary_eye", av_stereo3d_primary_eye_name(stereo->primary_eye)); + print_int("baseline", stereo->baseline); + print_q("horizontal_disparity_adjustment", stereo->horizontal_disparity_adjustment, '/'); + print_q("horizontal_field_of_view", stereo->horizontal_field_of_view, '/'); + } else if (sd->type == AV_PKT_DATA_SPHERICAL) { + const AVSphericalMapping *spherical = (AVSphericalMapping *)sd->data; + print_str("projection", av_spherical_projection_name(spherical->projection)); + if (spherical->projection == AV_SPHERICAL_CUBEMAP) { + print_int("padding", spherical->padding); + } else if (spherical->projection == AV_SPHERICAL_EQUIRECTANGULAR_TILE) { + size_t l, t, r, b; + av_spherical_tile_bounds(spherical, par->width, par->height, + &l, &t, &r, &b); + print_int("bound_left", l); + print_int("bound_top", t); + print_int("bound_right", r); + print_int("bound_bottom", b); + } + + print_int("yaw", (double) spherical->yaw / (1 << 16)); + print_int("pitch", (double) spherical->pitch / (1 << 16)); + print_int("roll", (double) spherical->roll / (1 << 16)); + } else if (sd->type == AV_PKT_DATA_SKIP_SAMPLES && sd->size == 10) { + print_int("skip_samples", AV_RL32(sd->data)); + print_int("discard_padding", AV_RL32(sd->data + 4)); + print_int("skip_reason", AV_RL8(sd->data + 8)); + print_int("discard_reason", AV_RL8(sd->data + 9)); + } else if (sd->type == AV_PKT_DATA_MASTERING_DISPLAY_METADATA) { + print_mastering_display_metadata(tfc, (AVMasteringDisplayMetadata *)sd->data); + } else if (sd->type == AV_PKT_DATA_CONTENT_LIGHT_LEVEL) { + print_context_light_level(tfc, (AVContentLightMetadata *)sd->data); + } else if (sd->type == AV_PKT_DATA_AMBIENT_VIEWING_ENVIRONMENT) { + print_ambient_viewing_environment( + tfc, (const AVAmbientViewingEnvironment *)sd->data); + } else if (sd->type == AV_PKT_DATA_DYNAMIC_HDR10_PLUS) { + AVDynamicHDRPlus *metadata = (AVDynamicHDRPlus *)sd->data; + print_dynamic_hdr10_plus(tfc, metadata); + } else if (sd->type == AV_PKT_DATA_DOVI_CONF) { + AVDOVIDecoderConfigurationRecord *dovi = (AVDOVIDecoderConfigurationRecord *)sd->data; + const char *comp = "unknown"; + print_int("dv_version_major", dovi->dv_version_major); + print_int("dv_version_minor", dovi->dv_version_minor); + print_int("dv_profile", dovi->dv_profile); + print_int("dv_level", dovi->dv_level); + print_int("rpu_present_flag", dovi->rpu_present_flag); + print_int("el_present_flag", dovi->el_present_flag); + print_int("bl_present_flag", dovi->bl_present_flag); + print_int("dv_bl_signal_compatibility_id", dovi->dv_bl_signal_compatibility_id); + switch (dovi->dv_md_compression) + { + case AV_DOVI_COMPRESSION_NONE: comp = "none"; break; + case AV_DOVI_COMPRESSION_LIMITED: comp = "limited"; break; + case AV_DOVI_COMPRESSION_RESERVED: comp = "reserved"; break; + case AV_DOVI_COMPRESSION_EXTENDED: comp = "extended"; break; + } + print_str("dv_md_compression", comp); + } else if (sd->type == AV_PKT_DATA_AUDIO_SERVICE_TYPE) { + enum AVAudioServiceType *t = (enum AVAudioServiceType *)sd->data; + print_int("service_type", *t); + } else if (sd->type == AV_PKT_DATA_MPEGTS_STREAM_ID) { + print_int("id", *sd->data); + } else if (sd->type == AV_PKT_DATA_CPB_PROPERTIES) { + const AVCPBProperties *prop = (AVCPBProperties *)sd->data; + print_int("max_bitrate", prop->max_bitrate); + print_int("min_bitrate", prop->min_bitrate); + print_int("avg_bitrate", prop->avg_bitrate); + print_int("buffer_size", prop->buffer_size); + print_int("vbv_delay", prop->vbv_delay); + } else if (sd->type == AV_PKT_DATA_WEBVTT_IDENTIFIER || + sd->type == AV_PKT_DATA_WEBVTT_SETTINGS) { + if (do_show_data) + avtext_print_data(tfc, "data", sd->data, sd->size); + avtext_print_data_hash(tfc, "data_hash", sd->data, sd->size); + } else if (sd->type == AV_PKT_DATA_FRAME_CROPPING && sd->size >= sizeof(uint32_t) * 4) { + print_int("crop_top", AV_RL32(sd->data)); + print_int("crop_bottom", AV_RL32(sd->data + 4)); + print_int("crop_left", AV_RL32(sd->data + 8)); + print_int("crop_right", AV_RL32(sd->data + 12)); + } else if (sd->type == AV_PKT_DATA_AFD && sd->size > 0) { + print_int("active_format", *sd->data); + } } static void print_private_data(AVTextFormatContext *tfc, void *priv_data) @@ -1279,11 +1293,7 @@ static void print_frame_side_data(AVTextFormatContext *tfc, name = av_frame_side_data_name(sd->type); print_str("side_data_type", name ? name : "unknown"); if (sd->type == AV_FRAME_DATA_DISPLAYMATRIX && sd->size >= 9*4) { - double rotation = av_display_rotation_get((int32_t *)sd->data); - if (isnan(rotation)) - rotation = 0; - avtext_print_integers(tfc, "displaymatrix", sd->data, 9, " %11d", 3, 4, 1); - print_int("rotation", rotation); + print_displaymatrix(tfc, (const int32_t*)sd->data); } else if (sd->type == AV_FRAME_DATA_AFD && sd->size > 0) { print_int("active_format", *sd->data); } else if (sd->type == AV_FRAME_DATA_GOP_TIMECODE && sd->size >= 8) { @@ -1303,31 +1313,12 @@ static void print_frame_side_data(AVTextFormatContext *tfc, } avtext_print_section_footer(tfc); } else if (sd->type == AV_FRAME_DATA_MASTERING_DISPLAY_METADATA) { - AVMasteringDisplayMetadata *metadata = (AVMasteringDisplayMetadata *)sd->data; - - if (metadata->has_primaries) { - print_q("red_x", metadata->display_primaries[0][0], '/'); - print_q("red_y", metadata->display_primaries[0][1], '/'); - print_q("green_x", metadata->display_primaries[1][0], '/'); - print_q("green_y", metadata->display_primaries[1][1], '/'); - print_q("blue_x", metadata->display_primaries[2][0], '/'); - print_q("blue_y", metadata->display_primaries[2][1], '/'); - - print_q("white_point_x", metadata->white_point[0], '/'); - print_q("white_point_y", metadata->white_point[1], '/'); - } - - if (metadata->has_luminance) { - print_q("min_luminance", metadata->min_luminance, '/'); - print_q("max_luminance", metadata->max_luminance, '/'); - } + print_mastering_display_metadata(tfc, (AVMasteringDisplayMetadata *)sd->data); } else if (sd->type == AV_FRAME_DATA_DYNAMIC_HDR_PLUS) { AVDynamicHDRPlus *metadata = (AVDynamicHDRPlus *)sd->data; print_dynamic_hdr10_plus(tfc, metadata); } else if (sd->type == AV_FRAME_DATA_CONTENT_LIGHT_LEVEL) { - AVContentLightMetadata *metadata = (AVContentLightMetadata *)sd->data; - print_int("max_content", metadata->MaxCLL); - print_int("max_average", metadata->MaxFALL); + print_context_light_level(tfc, (AVContentLightMetadata *)sd->data); } else if (sd->type == AV_FRAME_DATA_ICC_PROFILE) { const AVDictionaryEntry *tag = av_dict_get(sd->metadata, "name", NULL, AV_DICT_MATCH_CASE); if (tag) diff --git a/fftools/graph/graphprint.c b/fftools/graph/graphprint.c index fc94a75797935..e4c6886cf8eef 100644 --- a/fftools/graph/graphprint.c +++ b/fftools/graph/graphprint.c @@ -28,7 +28,7 @@ #include "graphprint.h" -#include "fftools/ffmpeg_filter.h" +#include "fftools/ffmpeg.h" #include "fftools/ffmpeg_mux.h" #include "libavutil/avassert.h" @@ -318,6 +318,7 @@ static void print_link(GraphPrintContext *gpc, AVFilterLink *link) if (hw_frames_ctx && hw_frames_ctx->data) print_hwframescontext(gpc, (AVHWFramesContext *)hw_frames_ctx->data); + av_buffer_unref(&hw_frames_ctx); } static char sanitize_char(const char c) @@ -478,19 +479,18 @@ static void init_sections(void) static void print_filtergraph_single(GraphPrintContext *gpc, FilterGraph *fg, AVFilterGraph *graph) { AVTextFormatContext *tfc = gpc->tfc; - FilterGraphPriv *fgp = fgp_from_fg(fg); AVDictionary *input_map = NULL; AVDictionary *output_map = NULL; print_int("graph_index", fg->index); print_fmt("name", "Graph %d.%d", gpc->id_prefix_num, fg->index); print_fmt("id", "Graph_%d_%d", gpc->id_prefix_num, fg->index); - print_str("description", fgp->graph_desc); + print_str("description", fg->graph_desc); print_section_header_id(gpc, SECTION_ID_GRAPH_INPUTS, "Input_File", 0); for (int i = 0; i < fg->nb_inputs; i++) { - InputFilterPriv *ifilter = ifp_from_ifilter(fg->inputs[i]); + InputFilter *ifilter = fg->inputs[i]; enum AVMediaType media_type = ifilter->type; avtext_print_section_header(tfc, NULL, SECTION_ID_GRAPH_INPUT); @@ -507,8 +507,8 @@ static void print_filtergraph_single(GraphPrintContext *gpc, FilterGraph *fg, AV if (ifilter->linklabel && ifilter->filter) av_dict_set(&input_map, ifilter->filter->name, (const char *)ifilter->linklabel, 0); - else if (ifilter->opts.name && ifilter->filter) - av_dict_set(&input_map, ifilter->filter->name, (const char *)ifilter->opts.name, 0); + else if (ifilter->input_name && ifilter->filter) + av_dict_set(&input_map, ifilter->filter->name, (const char *)ifilter->input_name, 0); print_str("media_type", av_get_media_type_string(media_type)); @@ -520,13 +520,13 @@ static void print_filtergraph_single(GraphPrintContext *gpc, FilterGraph *fg, AV print_section_header_id(gpc, SECTION_ID_GRAPH_OUTPUTS, "Output_File", 0); for (int i = 0; i < fg->nb_outputs; i++) { - OutputFilterPriv *ofilter = ofp_from_ofilter(fg->outputs[i]); + OutputFilter *ofilter = fg->outputs[i]; avtext_print_section_header(tfc, NULL, SECTION_ID_GRAPH_OUTPUT); print_int("output_index", ofilter->index); - print_str("name", ofilter->name); + print_str("name", ofilter->output_name); if (fg->outputs[i]->linklabel) print_str("link_label", (const char*)fg->outputs[i]->linklabel); @@ -536,11 +536,11 @@ static void print_filtergraph_single(GraphPrintContext *gpc, FilterGraph *fg, AV print_str("filter_name", ofilter->filter->filter->name); } - if (ofilter->name && ofilter->filter) - av_dict_set(&output_map, ofilter->filter->name, ofilter->name, 0); + if (ofilter->output_name && ofilter->filter) + av_dict_set(&output_map, ofilter->filter->name, ofilter->output_name, 0); - print_str("media_type", av_get_media_type_string(fg->outputs[i]->type)); + print_str("media_type", av_get_media_type_string(ofilter->type)); avtext_print_section_footer(tfc); // SECTION_ID_GRAPH_OUTPUT } @@ -556,7 +556,7 @@ static void print_filtergraph_single(GraphPrintContext *gpc, FilterGraph *fg, AV if (gpc->is_diagram) { print_fmt("name", "Graph %d.%d", gpc->id_prefix_num, fg->index); - print_str("description", fgp->graph_desc); + print_str("description", fg->graph_desc); print_str("id", sec_ctx.context_id); } @@ -780,6 +780,8 @@ static int print_streams(GraphPrintContext *gpc, InputFile **ifiles, int nb_ifil avtext_print_section_header(tfc, &sec_ctx, SECTION_ID_OUTPUTSTREAMS); + av_freep(&sec_ctx.context_id); + for (int i = 0; i < of->nb_streams; i++) { OutputStream *ost = of->streams[i]; const AVCodecDescriptor *codec_desc = avcodec_descriptor_get(ost->st->codecpar->codec_id); @@ -862,6 +864,8 @@ static void uninit_graphprint(GraphPrintContext *gpc) // Finalize the print buffer if it was initialized av_bprint_finalize(&gpc->pbuf, NULL); + + av_freep(&gpc); } static int init_graphprint(GraphPrintContext **pgpc, AVBPrint *target_buf) @@ -870,8 +874,6 @@ static int init_graphprint(GraphPrintContext **pgpc, AVBPrint *target_buf) AVTextFormatContext *tfc = NULL; AVTextWriterContext *wctx = NULL; GraphPrintContext *gpc = NULL; - char *w_args = NULL; - char *w_name; int ret; init_sections(); @@ -879,19 +881,7 @@ static int init_graphprint(GraphPrintContext **pgpc, AVBPrint *target_buf) av_bprint_init(target_buf, 0, AV_BPRINT_SIZE_UNLIMITED); - if (!print_graphs_format) - print_graphs_format = av_strdup("json"); - if (!print_graphs_format) { - ret = AVERROR(ENOMEM); - goto fail; - } - - w_name = av_strtok(print_graphs_format, "=", &w_args); - if (!w_name) { - av_log(NULL, AV_LOG_ERROR, "No name specified for the filter graph output format\n"); - ret = AVERROR(EINVAL); - goto fail; - } + const char *w_name = print_graphs_format ? print_graphs_format : "json"; text_formatter = avtext_get_formatter_by_name(w_name); if (!text_formatter) { @@ -908,6 +898,9 @@ static int init_graphprint(GraphPrintContext **pgpc, AVBPrint *target_buf) } AVTextFormatOptions tf_options = { .show_optional_fields = -1 }; + const char *w_args = print_graphs_format ? strchr(print_graphs_format, '=') : NULL; + if (w_args) + ++w_args; // consume '=' ret = avtext_context_open(&tfc, text_formatter, wctx, w_args, sections, FF_ARRAY_ELEMS(sections), tf_options, NULL); if (ret < 0) { goto fail; @@ -962,11 +955,10 @@ int print_filtergraph(FilterGraph *fg, AVFilterGraph *graph) { GraphPrintContext *gpc = NULL; AVTextFormatContext *tfc; - FilterGraphPriv *fgp = fgp_from_fg(fg); - AVBPrint *target_buf = &fgp->graph_print_buf; + AVBPrint *target_buf = &fg->graph_print_buf; int ret; - if (!fg || !fgp) { + if (!fg) { av_log(NULL, AV_LOG_ERROR, "Invalid filter graph provided\n"); return AVERROR(EINVAL); } @@ -1030,8 +1022,7 @@ static int print_filtergraphs_priv(FilterGraph **graphs, int nb_graphs, InputFil avtext_print_section_header(tfc, NULL, SECTION_ID_FILTERGRAPHS); for (int i = 0; i < nb_graphs; i++) { - FilterGraphPriv *fgp = fgp_from_fg(graphs[i]); - AVBPrint *graph_buf = &fgp->graph_print_buf; + AVBPrint *graph_buf = &graphs[i]->graph_print_buf; if (graph_buf->len > 0) { avtext_print_section_header(tfc, NULL, SECTION_ID_FILTERGRAPH); @@ -1048,8 +1039,7 @@ static int print_filtergraphs_priv(FilterGraph **graphs, int nb_graphs, InputFil OutputStream *ost = of->streams[i]; if (ost->fg_simple) { - FilterGraphPriv *fgp = fgp_from_fg(ost->fg_simple); - AVBPrint *graph_buf = &fgp->graph_print_buf; + AVBPrint *graph_buf = &ost->fg_simple->graph_print_buf; if (graph_buf->len > 0) { avtext_print_section_header(tfc, NULL, SECTION_ID_FILTERGRAPH); @@ -1080,7 +1070,6 @@ static int print_filtergraphs_priv(FilterGraph **graphs, int nb_graphs, InputFil } avio_write(avio, (const unsigned char *)target_buf.str, FFMIN(target_buf.len, target_buf.size - 1)); - avio_flush(avio); if ((ret = avio_closep(&avio)) < 0) av_log(NULL, AV_LOG_ERROR, "Error closing graph output file, loss of information possible: %s\n", av_err2str(ret)); @@ -1103,5 +1092,7 @@ static int print_filtergraphs_priv(FilterGraph **graphs, int nb_graphs, InputFil int print_filtergraphs(FilterGraph **graphs, int nb_graphs, InputFile **ifiles, int nb_ifiles, OutputFile **ofiles, int nb_ofiles) { - return print_filtergraphs_priv(graphs, nb_graphs, ifiles, nb_ifiles, ofiles, nb_ofiles); + int ret = print_filtergraphs_priv(graphs, nb_graphs, ifiles, nb_ifiles, ofiles, nb_ofiles); + ff_resman_uninit(); + return ret; } diff --git a/fftools/resources/.gitignore b/fftools/resources/.gitignore index 5f496535a605f..bda2c59a1c9ed 100644 --- a/fftools/resources/.gitignore +++ b/fftools/resources/.gitignore @@ -2,3 +2,5 @@ *.css.c *.html.gz *.css.gz +*.min +*.min.gz diff --git a/fftools/resources/resman.c b/fftools/resources/resman.c index a9e21626fa81c..aa53e96bf4cc6 100644 --- a/fftools/resources/resman.c +++ b/fftools/resources/resman.c @@ -32,7 +32,6 @@ #endif #include "resman.h" -#include "fftools/ffmpeg_filter.h" #include "libavutil/avassert.h" #include "libavutil/pixdesc.h" #include "libavutil/dict.h" @@ -61,7 +60,7 @@ typedef struct ResourceManagerContext { static AVMutex mutex = AV_MUTEX_INITIALIZER; -ResourceManagerContext *resman_ctx = NULL; +static ResourceManagerContext resman_ctx = { .class = &resman_class }; #if CONFIG_RESOURCE_COMPRESSION @@ -118,39 +117,11 @@ static int decompress_gzip(ResourceManagerContext *ctx, uint8_t *in, unsigned in } #endif -static ResourceManagerContext *get_resman_context(void) -{ - ResourceManagerContext *res = resman_ctx; - - ff_mutex_lock(&mutex); - - if (res) - goto end; - - res = av_mallocz(sizeof(ResourceManagerContext)); - if (!res) { - av_log(NULL, AV_LOG_ERROR, "Failed to allocate resource manager context\n"); - goto end; - } - - res->class = &resman_class; - resman_ctx = res; - -end: - ff_mutex_unlock(&mutex); - return res; -} - - void ff_resman_uninit(void) { ff_mutex_lock(&mutex); - if (resman_ctx) { - if (resman_ctx->resource_dic) - av_dict_free(&resman_ctx->resource_dic); - av_freep(&resman_ctx); - } + av_dict_free(&resman_ctx.resource_dic); ff_mutex_unlock(&mutex); } @@ -158,14 +129,11 @@ void ff_resman_uninit(void) char *ff_resman_get_string(FFResourceId resource_id) { - ResourceManagerContext *ctx = get_resman_context(); + ResourceManagerContext *ctx = &resman_ctx; FFResourceDefinition resource_definition = { 0 }; AVDictionaryEntry *dic_entry; char *res = NULL; - if (!ctx) - return NULL; - for (unsigned i = 0; i < FF_ARRAY_ELEMS(resource_definitions); ++i) { FFResourceDefinition def = resource_definitions[i]; if (def.resource_id == resource_id) { @@ -174,10 +142,7 @@ char *ff_resman_get_string(FFResourceId resource_id) } } - if (!resource_definition.name) { - av_log(ctx, AV_LOG_ERROR, "Unable to find resource with ID %d\n", resource_id); - return NULL; - } + av_assert1(resource_definition.name); ff_mutex_lock(&mutex); @@ -194,13 +159,13 @@ char *ff_resman_get_string(FFResourceId resource_id) int ret = decompress_gzip(ctx, (uint8_t *)resource_definition.data, *resource_definition.data_len, &out, &out_len); if (ret) { - av_log(NULL, AV_LOG_ERROR, "Unable to decompress the resource with ID %d\n", resource_id); + av_log(ctx, AV_LOG_ERROR, "Unable to decompress the resource with ID %d\n", resource_id); goto end; } dict_ret = av_dict_set(&ctx->resource_dic, resource_definition.name, out, 0); if (dict_ret < 0) { - av_log(NULL, AV_LOG_ERROR, "Failed to store decompressed resource in dictionary: %d\n", dict_ret); + av_log(ctx, AV_LOG_ERROR, "Failed to store decompressed resource in dictionary: %d\n", dict_ret); av_freep(&out); goto end; } @@ -210,7 +175,7 @@ char *ff_resman_get_string(FFResourceId resource_id) dict_ret = av_dict_set(&ctx->resource_dic, resource_definition.name, (const char *)resource_definition.data, 0); if (dict_ret < 0) { - av_log(NULL, AV_LOG_ERROR, "Failed to store resource in dictionary: %d\n", dict_ret); + av_log(ctx, AV_LOG_ERROR, "Failed to store resource in dictionary: %d\n", dict_ret); goto end; } @@ -218,7 +183,7 @@ char *ff_resman_get_string(FFResourceId resource_id) dic_entry = av_dict_get(ctx->resource_dic, resource_definition.name, NULL, 0); if (!dic_entry) { - av_log(NULL, AV_LOG_ERROR, "Failed to retrieve resource from dictionary after storing it\n"); + av_log(ctx, AV_LOG_ERROR, "Failed to retrieve resource from dictionary after storing it\n"); goto end; } } diff --git a/fftools/textformat/avtextformat.c b/fftools/textformat/avtextformat.c index bb90e66918ef4..14779e6f0cb26 100644 --- a/fftools/textformat/avtextformat.c +++ b/fftools/textformat/avtextformat.c @@ -43,8 +43,8 @@ static const struct { double bin_val; double dec_val; - const char *bin_str; - const char *dec_str; + char bin_str[4]; + char dec_str[4]; } si_prefixes[] = { { 1.0, 1.0, "", "" }, { 1.024e3, 1e3, "Ki", "K" }, @@ -681,34 +681,28 @@ int avtextwriter_context_open(AVTextWriterContext **pwctx, const AVTextWriter *w return ret; } -static const AVTextFormatter *registered_formatters[9 + 1]; - -static void formatters_register_all(void) +static const AVTextFormatter *const registered_formatters[] = { - static int initialized; - - if (initialized) - return; - initialized = 1; - - registered_formatters[0] = &avtextformatter_default; - registered_formatters[1] = &avtextformatter_compact; - registered_formatters[2] = &avtextformatter_csv; - registered_formatters[3] = &avtextformatter_flat; - registered_formatters[4] = &avtextformatter_ini; - registered_formatters[5] = &avtextformatter_json; - registered_formatters[6] = &avtextformatter_xml; - registered_formatters[7] = &avtextformatter_mermaid; - registered_formatters[8] = &avtextformatter_mermaidhtml; -} + &avtextformatter_default, + &avtextformatter_compact, + &avtextformatter_csv, + &avtextformatter_flat, + &avtextformatter_ini, + &avtextformatter_json, + &avtextformatter_xml, + &avtextformatter_mermaid, + &avtextformatter_mermaidhtml, + NULL +}; const AVTextFormatter *avtext_get_formatter_by_name(const char *name) { - formatters_register_all(); - - for (int i = 0; registered_formatters[i]; i++) - if (!strcmp(registered_formatters[i]->name, name)) + for (int i = 0; registered_formatters[i]; i++) { + const char *end; + if (av_strstart(name, registered_formatters[i]->name, &end) && + (*end == '\0' || *end == '=')) return registered_formatters[i]; + } return NULL; } diff --git a/fftools/textformat/tf_mermaid.c b/fftools/textformat/tf_mermaid.c index 6147cf6eeabb8..d3b9131adad93 100644 --- a/fftools/textformat/tf_mermaid.c +++ b/fftools/textformat/tf_mermaid.c @@ -153,7 +153,6 @@ typedef struct MermaidContext { } section_data[SECTION_MAX_NB_LEVELS]; unsigned nb_link_captions[SECTION_MAX_NB_LEVELS]; ///< generic print buffer dedicated to each section, - AVBPrint section_pbuf[SECTION_MAX_NB_LEVELS]; ///< generic print buffer dedicated to each section, AVBPrint link_buf; ///< print buffer for writing diagram links AVDictionary *link_dict; } MermaidContext; @@ -216,6 +215,32 @@ static av_cold int mermaid_init_html(AVTextFormatContext *tfc) return 0; } +static av_cold int mermaid_uninit(AVTextFormatContext *tfc) +{ + MermaidContext *mmc = tfc->priv; + + av_bprint_finalize(&mmc->link_buf, NULL); + av_dict_free(&mmc->link_dict); + + for (unsigned i = 0; i < SECTION_MAX_NB_LEVELS; i++) { + av_freep(&mmc->section_data[i].dest_id); + av_freep(&mmc->section_data[i].section_id); + av_freep(&mmc->section_data[i].src_id); + av_freep(&mmc->section_data[i].section_type); + } + + return 0; +} + +static void set_str(const char **dst, const char *src) +{ + if (*dst) + av_freep(dst); + + if (src) + *dst = av_strdup(src); +} + #define MM_INDENT() writer_printf(tfc, "%*c", mmc->indent_level * 2, ' ') static void mermaid_print_section_header(AVTextFormatContext *tfc, const void *data) @@ -266,6 +291,8 @@ static void mermaid_print_section_header(AVTextFormatContext *tfc, const void *d break; } + av_bprint_finalize(&css_buf, NULL); + av_freep(&directive); return; } @@ -310,7 +337,7 @@ static void mermaid_print_section_header(AVTextFormatContext *tfc, const void *d } mmc->section_data[tfc->level].subgraph_start_incomplete = 1; - mmc->section_data[tfc->level].section_id = av_strdup(sec_ctx->context_id); + set_str(&mmc->section_data[tfc->level].section_id, sec_ctx->context_id); } if (section->flags & AV_TEXTFORMAT_SECTION_FLAG_IS_SHAPE) { @@ -322,7 +349,7 @@ static void mermaid_print_section_header(AVTextFormatContext *tfc, const void *d if (sec_ctx->context_id) { - mmc->section_data[tfc->level].section_id = av_strdup(sec_ctx->context_id); + set_str(&mmc->section_data[tfc->level].section_id, sec_ctx->context_id); switch (mmc->diagram_config->diagram_type) { case AV_DIAGRAMTYPE_GRAPH: @@ -352,7 +379,7 @@ static void mermaid_print_section_header(AVTextFormatContext *tfc, const void *d av_log(tfc, AV_LOG_ERROR, "Unable to write shape start. Missing id field. Section: %s", section->name); } - mmc->section_data[tfc->level].section_id = av_strdup(sec_ctx->context_id); + set_str(&mmc->section_data[tfc->level].section_id, sec_ctx->context_id); } @@ -371,7 +398,7 @@ static void mermaid_print_section_header(AVTextFormatContext *tfc, const void *d mmc->nb_link_captions[tfc->level] = 0; if (sec_ctx && sec_ctx->context_type) - mmc->section_data[tfc->level].section_type = av_strdup(sec_ctx->context_type); + set_str(&mmc->section_data[tfc->level].section_type, sec_ctx->context_type); ////if (section->flags & AV_TEXTFORMAT_SECTION_FLAG_HAS_TYPE) { //// AVBPrint buf; @@ -533,17 +560,17 @@ static void mermaid_print_value(AVTextFormatContext *tfc, const char *key, int exit = 0; if (section->id_key && !strcmp(section->id_key, key)) { - mmc->section_data[tfc->level].section_id = av_strdup(str); + set_str(&mmc->section_data[tfc->level].section_id, str); exit = 1; } if (section->dest_id_key && !strcmp(section->dest_id_key, key)) { - mmc->section_data[tfc->level].dest_id = av_strdup(str); + set_str(&mmc->section_data[tfc->level].dest_id, str); exit = 1; } if (section->src_id_key && !strcmp(section->src_id_key, key)) { - mmc->section_data[tfc->level].src_id = av_strdup(str); + set_str(&mmc->section_data[tfc->level].src_id, str); exit = 1; } @@ -636,6 +663,7 @@ const AVTextFormatter avtextformatter_mermaid = { .name = "mermaid", .priv_size = sizeof(MermaidContext), .init = mermaid_init, + .uninit = mermaid_uninit, .print_section_header = mermaid_print_section_header, .print_section_footer = mermaid_print_section_footer, .print_integer = mermaid_print_int, @@ -649,6 +677,7 @@ const AVTextFormatter avtextformatter_mermaidhtml = { .name = "mermaidhtml", .priv_size = sizeof(MermaidContext), .init = mermaid_init_html, + .uninit = mermaid_uninit, .print_section_header = mermaid_print_section_header, .print_section_footer = mermaid_print_section_footer, .print_integer = mermaid_print_int, diff --git a/libavcodec/4xm.c b/libavcodec/4xm.c index 94f42681272e3..c6b2ce1230476 100644 --- a/libavcodec/4xm.c +++ b/libavcodec/4xm.c @@ -337,7 +337,8 @@ static inline void mcdc(uint16_t *dst, const uint16_t *src, int log2w, } break; default: - av_assert0(0); + av_unreachable("log2w starts at 3 and gets only decremented during " + "recursive calls to decode_p_block"); } } diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 77734dff24585..fb3fb7f7f72de 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -42,6 +42,7 @@ OBJS = ac3_parser.o \ dv_profile.o \ encode.o \ get_buffer.o \ + hashtable.o \ imgconvert.o \ jni.o \ lcevcdec.o \ @@ -811,6 +812,7 @@ OBJS-$(CONFIG_VP9_V4L2M2M_DECODER) += v4l2_m2m_dec.o OBJS-$(CONFIG_VQA_DECODER) += vqavideo.o OBJS-$(CONFIG_VQC_DECODER) += vqcdec.o OBJS-$(CONFIG_VVC_DECODER) += executor.o h2645data.o +OBJS-$(CONFIG_VVC_SEI) += h2645_sei.o aom_film_grain.o h274.o OBJS-$(CONFIG_WADY_DPCM_DECODER) += dpcm.o OBJS-$(CONFIG_WAVARC_DECODER) += wavarc.o OBJS-$(CONFIG_WAVPACK_DECODER) += wavpack.o wavpackdata.o dsd.o @@ -1325,6 +1327,7 @@ TESTPROGS = avcodec \ bitstream_le \ celp_math \ codec_desc \ + hashtable \ htmlsubtitles \ jpeg2000dwt \ mathops \ diff --git a/libavcodec/aac/aacdec_ac.c b/libavcodec/aac/aacdec_ac.c index 7e5077cd19d8e..5104604fa580a 100644 --- a/libavcodec/aac/aacdec_ac.c +++ b/libavcodec/aac/aacdec_ac.c @@ -91,10 +91,7 @@ uint32_t ff_aac_ac_get_pk(uint32_t c) void ff_aac_ac_update_context(AACArithState *state, int idx, uint16_t a, uint16_t b) { - state->cur[0] = a + b + 1; - if (state->cur[0] > 0xF) - state->cur[0] = 0xF; - + state->cur[0] = FFMIN(a + b + 1, 0xF); state->cur[3] = state->cur[2]; state->cur[2] = state->cur[1]; state->cur[1] = state->cur[0]; diff --git a/libavcodec/aarch64/pixblockdsp_init_aarch64.c b/libavcodec/aarch64/pixblockdsp_init_aarch64.c index e4bac722f88f3..404f3680a619d 100644 --- a/libavcodec/aarch64/pixblockdsp_init_aarch64.c +++ b/libavcodec/aarch64/pixblockdsp_init_aarch64.c @@ -21,7 +21,6 @@ #include "libavutil/attributes.h" #include "libavutil/cpu.h" #include "libavutil/aarch64/cpu.h" -#include "libavcodec/avcodec.h" #include "libavcodec/pixblockdsp.h" void ff_get_pixels_neon(int16_t *block, const uint8_t *pixels, @@ -30,7 +29,6 @@ void ff_diff_pixels_neon(int16_t *block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); av_cold void ff_pixblockdsp_init_aarch64(PixblockDSPContext *c, - AVCodecContext *avctx, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); diff --git a/libavcodec/ac3.h b/libavcodec/ac3.h index 2386c15ad00a3..ccd437f700ab4 100644 --- a/libavcodec/ac3.h +++ b/libavcodec/ac3.h @@ -81,17 +81,6 @@ typedef float SHORTFLOAT; #define AC3_LEVEL(x) ROUND15((x) * FIXR15(M_SQRT1_2)) -/* pre-defined gain values */ -#define LEVEL_PLUS_3DB M_SQRT2 -#define LEVEL_PLUS_1POINT5DB 1.1892071150027209 -#define LEVEL_MINUS_1POINT5DB 0.8408964152537145 -#define LEVEL_MINUS_3DB M_SQRT1_2 -#define LEVEL_MINUS_4POINT5DB 0.5946035575013605 -#define LEVEL_MINUS_6DB 0.5000000000000000 -#define LEVEL_MINUS_9DB 0.3535533905932738 -#define LEVEL_ZERO 0.0000000000000000 -#define LEVEL_ONE 1.0000000000000000 - typedef struct AC3BitAllocParameters { int sr_code; int sr_shift; diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c index 49b170c235084..5eacab44751ad 100644 --- a/libavcodec/ac3dec.c +++ b/libavcodec/ac3dec.c @@ -46,142 +46,32 @@ #include "decode.h" #include "kbdwin.h" -/** - * table for ungrouping 3 values in 7 bits. - * used for exponents and bap=2 mantissas - */ -static uint8_t ungroup_3_in_7_bits_tab[128][3]; - -/** tables for ungrouping mantissas */ -static int b1_mantissas[32][3]; -static int b2_mantissas[128][3]; -static int b3_mantissas[8]; -static int b4_mantissas[128][2]; -static int b5_mantissas[16]; - -/** - * Quantization table: levels for symmetric. bits for asymmetric. - * reference: Table 7.18 Mapping of bap to Quantizer - */ -static const uint8_t quantization_tab[16] = { - 0, 3, 5, 7, 11, 15, - 5, 6, 7, 8, 9, 10, 11, 12, 14, 16 -}; - #if (!USE_FIXED) /** dynamic range table. converts codes to scale factors. */ static float dynamic_range_tab[256]; float ff_ac3_heavy_dynamic_range_tab[256]; -#endif - -/** Adjustments in dB gain */ -static const float gain_levels[9] = { - LEVEL_PLUS_3DB, - LEVEL_PLUS_1POINT5DB, - LEVEL_ONE, - LEVEL_MINUS_1POINT5DB, - LEVEL_MINUS_3DB, - LEVEL_MINUS_4POINT5DB, - LEVEL_MINUS_6DB, - LEVEL_ZERO, - LEVEL_MINUS_9DB -}; - -/** Adjustments in dB gain (LFE, +10 to -21 dB) */ -static const float gain_levels_lfe[32] = { - 3.162275, 2.818382, 2.511886, 2.238719, 1.995261, 1.778278, 1.584893, - 1.412536, 1.258924, 1.122018, 1.000000, 0.891251, 0.794328, 0.707946, - 0.630957, 0.562341, 0.501187, 0.446683, 0.398107, 0.354813, 0.316227, - 0.281838, 0.251188, 0.223872, 0.199526, 0.177828, 0.158489, 0.141253, - 0.125892, 0.112201, 0.100000, 0.089125 -}; - -/** - * Table for default stereo downmixing coefficients - * reference: Section 7.8.2 Downmixing Into Two Channels - */ -static const uint8_t ac3_default_coeffs[8][5][2] = { - { { 2, 7 }, { 7, 2 }, }, - { { 4, 4 }, }, - { { 2, 7 }, { 7, 2 }, }, - { { 2, 7 }, { 5, 5 }, { 7, 2 }, }, - { { 2, 7 }, { 7, 2 }, { 6, 6 }, }, - { { 2, 7 }, { 5, 5 }, { 7, 2 }, { 8, 8 }, }, - { { 2, 7 }, { 7, 2 }, { 6, 7 }, { 7, 6 }, }, - { { 2, 7 }, { 5, 5 }, { 7, 2 }, { 6, 7 }, { 7, 6 }, }, -}; - -/** - * Symmetrical Dequantization - * reference: Section 7.3.3 Expansion of Mantissas for Symmetrical Quantization - * Tables 7.19 to 7.23 - */ -static inline int -symmetric_dequant(int code, int levels) -{ - return ((code - (levels >> 1)) * (1 << 24)) / levels; -} /* * Initialize tables at runtime. */ -static av_cold void ac3_tables_init(void) +static av_cold void ac3_float_tables_init(void) { - int i; - - /* generate table for ungrouping 3 values in 7 bits - reference: Section 7.1.3 Exponent Decoding */ - for (i = 0; i < 128; i++) { - ungroup_3_in_7_bits_tab[i][0] = i / 25; - ungroup_3_in_7_bits_tab[i][1] = (i % 25) / 5; - ungroup_3_in_7_bits_tab[i][2] = (i % 25) % 5; - } - - /* generate grouped mantissa tables - reference: Section 7.3.5 Ungrouping of Mantissas */ - for (i = 0; i < 32; i++) { - /* bap=1 mantissas */ - b1_mantissas[i][0] = symmetric_dequant(ff_ac3_ungroup_3_in_5_bits_tab[i][0], 3); - b1_mantissas[i][1] = symmetric_dequant(ff_ac3_ungroup_3_in_5_bits_tab[i][1], 3); - b1_mantissas[i][2] = symmetric_dequant(ff_ac3_ungroup_3_in_5_bits_tab[i][2], 3); - } - for (i = 0; i < 128; i++) { - /* bap=2 mantissas */ - b2_mantissas[i][0] = symmetric_dequant(ungroup_3_in_7_bits_tab[i][0], 5); - b2_mantissas[i][1] = symmetric_dequant(ungroup_3_in_7_bits_tab[i][1], 5); - b2_mantissas[i][2] = symmetric_dequant(ungroup_3_in_7_bits_tab[i][2], 5); - - /* bap=4 mantissas */ - b4_mantissas[i][0] = symmetric_dequant(i / 11, 11); - b4_mantissas[i][1] = symmetric_dequant(i % 11, 11); - } - /* generate ungrouped mantissa tables - reference: Tables 7.21 and 7.23 */ - for (i = 0; i < 7; i++) { - /* bap=3 mantissas */ - b3_mantissas[i] = symmetric_dequant(i, 7); - } - for (i = 0; i < 15; i++) { - /* bap=5 mantissas */ - b5_mantissas[i] = symmetric_dequant(i, 15); - } - -#if (!USE_FIXED) /* generate dynamic range table reference: Section 7.7.1 Dynamic Range Control */ - for (i = 0; i < 256; i++) { + for (int i = 0; i < 256; i++) { int v = (i >> 5) - ((i >> 7) << 3) - 5; dynamic_range_tab[i] = powf(2.0f, v) * ((i & 0x1F) | 0x20); } /* generate compr dynamic range table reference: Section 7.7.2 Heavy Compression */ - for (i = 0; i < 256; i++) { + for (int i = 0; i < 256; i++) { int v = (i >> 4) - ((i >> 7) << 4) - 4; ff_ac3_heavy_dynamic_range_tab[i] = powf(2.0f, v) * ((i & 0xF) | 0x10); } -#endif + ff_ac3_init_static(); } +#endif static void ac3_downmix(AVCodecContext *avctx) { @@ -206,7 +96,6 @@ static void ac3_downmix(AVCodecContext *avctx) */ static av_cold int ac3_decode_init(AVCodecContext *avctx) { - static AVOnce init_static_once = AV_ONCE_INIT; AC3DecodeContext *s = avctx->priv_data; const float scale = 1.0f; int i, ret; @@ -247,7 +136,12 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx) s->dlyptr[i] = s->delay[i]; } - ff_thread_once(&init_static_once, ac3_tables_init); +#if USE_FIXED + ff_ac3_init_static(); +#else + static AVOnce init_static_once = AV_ONCE_INIT; + ff_thread_once(&init_static_once, ac3_float_tables_init); +#endif return 0; } @@ -404,8 +298,8 @@ static int parse_frame_header(AC3DecodeContext *s) static int set_downmix_coeffs(AC3DecodeContext *s) { int i; - float cmix = gain_levels[s-> center_mix_level]; - float smix = gain_levels[s->surround_mix_level]; + float cmix = ff_ac3_gain_levels[s-> center_mix_level]; + float smix = ff_ac3_gain_levels[s->surround_mix_level]; float norm0, norm1; float downmix_coeffs[2][AC3_MAX_CHANNELS]; @@ -418,8 +312,8 @@ static int set_downmix_coeffs(AC3DecodeContext *s) } for (i = 0; i < s->fbw_channels; i++) { - downmix_coeffs[0][i] = gain_levels[ac3_default_coeffs[s->channel_mode][i][0]]; - downmix_coeffs[1][i] = gain_levels[ac3_default_coeffs[s->channel_mode][i][1]]; + downmix_coeffs[0][i] = ff_ac3_gain_levels[ff_ac3_default_coeffs[s->channel_mode][i][0]]; + downmix_coeffs[1][i] = ff_ac3_gain_levels[ff_ac3_default_coeffs[s->channel_mode][i][1]]; } if (s->channel_mode > 1 && s->channel_mode & 1) { downmix_coeffs[0][1] = downmix_coeffs[1][1] = cmix; @@ -479,9 +373,9 @@ static int decode_exponents(AC3DecodeContext *s, av_log(s->avctx, AV_LOG_ERROR, "expacc %d is out-of-range\n", expacc); return AVERROR_INVALIDDATA; } - dexp[i++] = ungroup_3_in_7_bits_tab[expacc][0]; - dexp[i++] = ungroup_3_in_7_bits_tab[expacc][1]; - dexp[i++] = ungroup_3_in_7_bits_tab[expacc][2]; + dexp[i++] = ff_ac3_ungroup_3_in_7_bits_tab[expacc][0]; + dexp[i++] = ff_ac3_ungroup_3_in_7_bits_tab[expacc][1]; + dexp[i++] = ff_ac3_ungroup_3_in_7_bits_tab[expacc][2]; } /* convert to absolute exps and expand groups */ @@ -576,9 +470,9 @@ static void ac3_decode_transform_coeffs_ch(AC3DecodeContext *s, int ch_index, ma mantissa = m->b1_mant[m->b1]; } else { int bits = get_bits(gbc, 5); - mantissa = b1_mantissas[bits][0]; - m->b1_mant[1] = b1_mantissas[bits][1]; - m->b1_mant[0] = b1_mantissas[bits][2]; + mantissa = ff_ac3_bap1_mantissas[bits][0]; + m->b1_mant[1] = ff_ac3_bap1_mantissas[bits][1]; + m->b1_mant[0] = ff_ac3_bap1_mantissas[bits][2]; m->b1 = 2; } break; @@ -588,14 +482,14 @@ static void ac3_decode_transform_coeffs_ch(AC3DecodeContext *s, int ch_index, ma mantissa = m->b2_mant[m->b2]; } else { int bits = get_bits(gbc, 7); - mantissa = b2_mantissas[bits][0]; - m->b2_mant[1] = b2_mantissas[bits][1]; - m->b2_mant[0] = b2_mantissas[bits][2]; + mantissa = ff_ac3_bap2_mantissas[bits][0]; + m->b2_mant[1] = ff_ac3_bap2_mantissas[bits][1]; + m->b2_mant[0] = ff_ac3_bap2_mantissas[bits][2]; m->b2 = 2; } break; case 3: - mantissa = b3_mantissas[get_bits(gbc, 3)]; + mantissa = ff_ac3_bap3_mantissas[get_bits(gbc, 3)]; break; case 4: if (m->b4) { @@ -603,13 +497,13 @@ static void ac3_decode_transform_coeffs_ch(AC3DecodeContext *s, int ch_index, ma mantissa = m->b4_mant; } else { int bits = get_bits(gbc, 7); - mantissa = b4_mantissas[bits][0]; - m->b4_mant = b4_mantissas[bits][1]; + mantissa = ff_ac3_bap4_mantissas[bits][0]; + m->b4_mant = ff_ac3_bap4_mantissas[bits][1]; m->b4 = 1; } break; case 5: - mantissa = b5_mantissas[get_bits(gbc, 4)]; + mantissa = ff_ac3_bap5_mantissas[get_bits(gbc, 4)]; break; default: /* 6 to 15 */ /* Shift mantissa and sign-extend it. */ @@ -617,7 +511,7 @@ static void ac3_decode_transform_coeffs_ch(AC3DecodeContext *s, int ch_index, ma av_log(s->avctx, AV_LOG_ERROR, "bap %d is invalid in plain AC-3\n", bap); bap = 15; } - mantissa = (unsigned)get_sbits(gbc, quantization_tab[bap]) << (24 - quantization_tab[bap]); + mantissa = (unsigned)get_sbits(gbc, ff_ac3_quantization_tab[bap]) << (24 - ff_ac3_quantization_tab[bap]); break; } coeffs[freq] = mantissa >> exps[freq]; @@ -1620,10 +1514,10 @@ static int ac3_decode_frame(AVCodecContext *avctx, AVFrame *frame, s->output_mode = AC3_CHMODE_STEREO; } - s->loro_center_mix_level = gain_levels[s-> center_mix_level]; - s->loro_surround_mix_level = gain_levels[s->surround_mix_level]; - s->ltrt_center_mix_level = gain_levels[s-> center_mix_level_ltrt]; - s->ltrt_surround_mix_level = gain_levels[s->surround_mix_level_ltrt]; + s->loro_center_mix_level = ff_ac3_gain_levels[s-> center_mix_level]; + s->loro_surround_mix_level = ff_ac3_gain_levels[s->surround_mix_level]; + s->ltrt_center_mix_level = ff_ac3_gain_levels[s-> center_mix_level_ltrt]; + s->ltrt_surround_mix_level = ff_ac3_gain_levels[s->surround_mix_level_ltrt]; switch (s->preferred_downmix) { case AC3_DMIXMOD_LTRT: s->preferred_stereo_downmix = AV_DOWNMIX_TYPE_LTRT; @@ -1862,12 +1756,12 @@ static int ac3_decode_frame(AVCodecContext *avctx, AVFrame *frame, downmix_info->preferred_downmix_type = AV_DOWNMIX_TYPE_UNKNOWN; break; } - downmix_info->center_mix_level = gain_levels[s-> center_mix_level]; - downmix_info->center_mix_level_ltrt = gain_levels[s-> center_mix_level_ltrt]; - downmix_info->surround_mix_level = gain_levels[s-> surround_mix_level]; - downmix_info->surround_mix_level_ltrt = gain_levels[s->surround_mix_level_ltrt]; + downmix_info->center_mix_level = ff_ac3_gain_levels[s-> center_mix_level]; + downmix_info->center_mix_level_ltrt = ff_ac3_gain_levels[s-> center_mix_level_ltrt]; + downmix_info->surround_mix_level = ff_ac3_gain_levels[s-> surround_mix_level]; + downmix_info->surround_mix_level_ltrt = ff_ac3_gain_levels[s->surround_mix_level_ltrt]; if (s->lfe_mix_level_exists) - downmix_info->lfe_mix_level = gain_levels_lfe[s->lfe_mix_level]; + downmix_info->lfe_mix_level = ff_eac3_gain_levels_lfe[s->lfe_mix_level]; else downmix_info->lfe_mix_level = 0.0; // -inf dB } diff --git a/libavcodec/ac3dec_data.c b/libavcodec/ac3dec_data.c index a3794ab223d12..0f5402c335670 100644 --- a/libavcodec/ac3dec_data.c +++ b/libavcodec/ac3dec_data.c @@ -21,10 +21,11 @@ /** * @file - * Tables taken directly from the AC-3 spec. + * Tables taken directly from the AC-3 spec or derived from it. */ #include "ac3dec_data.h" +#include "libavutil/thread.h" /** * Table used to ungroup 3 values stored in 5 bits. @@ -42,6 +43,124 @@ const uint8_t ff_ac3_ungroup_3_in_5_bits_tab[32][3] = { { 3, 0, 1 }, { 3, 0, 2 }, { 3, 1, 0 }, { 3, 1, 1 } }; +/** + * table for ungrouping 3 values in 7 bits. + * used for exponents and bap=2 mantissas + */ +uint8_t ff_ac3_ungroup_3_in_7_bits_tab[128][3]; + +/** + * Symmetrical Dequantization + * reference: Section 7.3.3 Expansion of Mantissas for Symmetrical Quantization + * Tables 7.19 to 7.23 + */ +#define SYMMETRIC_DEQUANT(code, levels) (((code - (levels >> 1)) * (1 << 24)) / levels) +/** + * Ungrouped mantissa tables; the extra entry is padding to avoid range checks + */ +/** + * Table 7.21 + */ +const int ff_ac3_bap3_mantissas[7 + 1] = { + SYMMETRIC_DEQUANT(0, 7), + SYMMETRIC_DEQUANT(1, 7), + SYMMETRIC_DEQUANT(2, 7), + SYMMETRIC_DEQUANT(3, 7), + SYMMETRIC_DEQUANT(4, 7), + SYMMETRIC_DEQUANT(5, 7), + SYMMETRIC_DEQUANT(6, 7), +}; +/** + * Table 7.23 + */ +const int ff_ac3_bap5_mantissas[15 + 1] = { + SYMMETRIC_DEQUANT(0, 15), + SYMMETRIC_DEQUANT(1, 15), + SYMMETRIC_DEQUANT(2, 15), + SYMMETRIC_DEQUANT(3, 15), + SYMMETRIC_DEQUANT(4, 15), + SYMMETRIC_DEQUANT(5, 15), + SYMMETRIC_DEQUANT(6, 15), + SYMMETRIC_DEQUANT(7, 15), + SYMMETRIC_DEQUANT(8, 15), + SYMMETRIC_DEQUANT(9, 15), + SYMMETRIC_DEQUANT(10, 15), + SYMMETRIC_DEQUANT(11, 15), + SYMMETRIC_DEQUANT(12, 15), + SYMMETRIC_DEQUANT(13, 15), + SYMMETRIC_DEQUANT(14, 15), +}; + +int ff_ac3_bap1_mantissas[32][3]; +int ff_ac3_bap2_mantissas[128][3]; +int ff_ac3_bap4_mantissas[128][2]; + +static inline int +symmetric_dequant(int code, int levels) +{ + return SYMMETRIC_DEQUANT(code, levels); +} + +static av_cold void ac3_init_static(void) +{ + /* generate table for ungrouping 3 values in 7 bits + reference: Section 7.1.3 Exponent Decoding */ + for (int i = 0; i < 128; ++i) { + ff_ac3_ungroup_3_in_7_bits_tab[i][0] = i / 25; + ff_ac3_ungroup_3_in_7_bits_tab[i][1] = (i % 25) / 5; + ff_ac3_ungroup_3_in_7_bits_tab[i][2] = (i % 25) % 5; + } + + /* generate grouped mantissa tables + reference: Section 7.3.5 Ungrouping of Mantissas */ + for (int i = 0; i < 32; ++i) { + /* bap=1 mantissas */ + ff_ac3_bap1_mantissas[i][0] = symmetric_dequant(ff_ac3_ungroup_3_in_5_bits_tab[i][0], 3); + ff_ac3_bap1_mantissas[i][1] = symmetric_dequant(ff_ac3_ungroup_3_in_5_bits_tab[i][1], 3); + ff_ac3_bap1_mantissas[i][2] = symmetric_dequant(ff_ac3_ungroup_3_in_5_bits_tab[i][2], 3); + } + for (int i = 0; i < 128; ++i) { + /* bap=2 mantissas */ + ff_ac3_bap2_mantissas[i][0] = symmetric_dequant(ff_ac3_ungroup_3_in_7_bits_tab[i][0], 5); + ff_ac3_bap2_mantissas[i][1] = symmetric_dequant(ff_ac3_ungroup_3_in_7_bits_tab[i][1], 5); + ff_ac3_bap2_mantissas[i][2] = symmetric_dequant(ff_ac3_ungroup_3_in_7_bits_tab[i][2], 5); + + /* bap=4 mantissas */ + ff_ac3_bap4_mantissas[i][0] = symmetric_dequant(i / 11, 11); + ff_ac3_bap4_mantissas[i][1] = symmetric_dequant(i % 11, 11); + } +} + +av_cold void ff_ac3_init_static(void) +{ + static AVOnce ac3_init_static_once = AV_ONCE_INIT; + ff_thread_once(&ac3_init_static_once, ac3_init_static); +} + +/** + * Quantization table: levels for symmetric. bits for asymmetric. + * reference: Table 7.18 Mapping of bap to Quantizer + */ +const uint8_t ff_ac3_quantization_tab[16] = { + 0, 3, 5, 7, 11, 15, + 5, 6, 7, 8, 9, 10, 11, 12, 14, 16 +}; + +/** + * Table for default stereo downmixing coefficients + * reference: Section 7.8.2 Downmixing Into Two Channels + */ +const uint8_t ff_ac3_default_coeffs[8][5][2] = { + { { 2, 7 }, { 7, 2 }, }, + { { 4, 4 }, }, + { { 2, 7 }, { 7, 2 }, }, + { { 2, 7 }, { 5, 5 }, { 7, 2 }, }, + { { 2, 7 }, { 7, 2 }, { 6, 6 }, }, + { { 2, 7 }, { 5, 5 }, { 7, 2 }, { 8, 8 }, }, + { { 2, 7 }, { 7, 2 }, { 6, 7 }, { 7, 6 }, }, + { { 2, 7 }, { 5, 5 }, { 7, 2 }, { 6, 7 }, { 7, 6 }, }, +}; + const uint8_t ff_eac3_hebap_tab[64] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 11, @@ -57,3 +176,12 @@ const uint8_t ff_eac3_hebap_tab[64] = { */ const uint8_t ff_eac3_default_spx_band_struct[17] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }; + +/** Adjustments in dB gain (LFE, +10 to -21 dB) */ +const float ff_eac3_gain_levels_lfe[32] = { + 3.162275, 2.818382, 2.511886, 2.238719, 1.995261, 1.778278, 1.584893, + 1.412536, 1.258924, 1.122018, 1.000000, 0.891251, 0.794328, 0.707946, + 0.630957, 0.562341, 0.501187, 0.446683, 0.398107, 0.354813, 0.316227, + 0.281838, 0.251188, 0.223872, 0.199526, 0.177828, 0.158489, 0.141253, + 0.125892, 0.112201, 0.100000, 0.089125 +}; diff --git a/libavcodec/ac3dec_data.h b/libavcodec/ac3dec_data.h index 975b52ef2cb48..613871627bea0 100644 --- a/libavcodec/ac3dec_data.h +++ b/libavcodec/ac3dec_data.h @@ -24,9 +24,31 @@ #include +#include "libavutil/attributes_internal.h" + +FF_VISIBILITY_PUSH_HIDDEN + extern const uint8_t ff_ac3_ungroup_3_in_5_bits_tab[32][3]; +extern uint8_t ff_ac3_ungroup_3_in_7_bits_tab[128][3]; + +extern const int ff_ac3_bap3_mantissas[ 7 + 1]; +extern const int ff_ac3_bap5_mantissas[15 + 1]; + +/** tables for ungrouping mantissas */ +extern int ff_ac3_bap1_mantissas[32][3]; +extern int ff_ac3_bap2_mantissas[128][3]; +extern int ff_ac3_bap4_mantissas[128][2]; + +extern const uint8_t ff_ac3_quantization_tab[16]; + +extern const uint8_t ff_ac3_default_coeffs[8][5][2]; extern const uint8_t ff_eac3_hebap_tab[64]; extern const uint8_t ff_eac3_default_spx_band_struct[17]; +extern const float ff_eac3_gain_levels_lfe[32]; + +void ff_ac3_init_static(void); + +FF_VISIBILITY_POP_HIDDEN #endif /* AVCODEC_AC3DEC_DATA_H */ diff --git a/libavcodec/ac3defs.h b/libavcodec/ac3defs.h index ff92f0ac4ab99..f9b1be059faa4 100644 --- a/libavcodec/ac3defs.h +++ b/libavcodec/ac3defs.h @@ -34,6 +34,17 @@ #define AC3_CRITICAL_BANDS 50 #define AC3_MAX_CPL_BANDS 18 +/* pre-defined gain values */ +#define LEVEL_PLUS_3DB M_SQRT2 +#define LEVEL_PLUS_1POINT5DB 1.1892071150027209 +#define LEVEL_MINUS_1POINT5DB 0.8408964152537145 +#define LEVEL_MINUS_3DB M_SQRT1_2 +#define LEVEL_MINUS_4POINT5DB 0.5946035575013605 +#define LEVEL_MINUS_6DB 0.5000000000000000 +#define LEVEL_MINUS_9DB 0.3535533905932738 +#define LEVEL_ZERO 0.0000000000000000 +#define LEVEL_ONE 1.0000000000000000 + /* exponent encoding strategy */ #define EXP_REUSE 0 #define EXP_NEW 1 diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c index 3649289865213..a316d4e4d745e 100644 --- a/libavcodec/ac3enc.c +++ b/libavcodec/ac3enc.c @@ -71,10 +71,7 @@ static const float surmixlev_options[SURMIXLEV_NUM_OPTIONS] = { }; #define EXTMIXLEV_NUM_OPTIONS 8 -static const float extmixlev_options[EXTMIXLEV_NUM_OPTIONS] = { - LEVEL_PLUS_3DB, LEVEL_PLUS_1POINT5DB, LEVEL_ONE, LEVEL_MINUS_1POINT5DB, - LEVEL_MINUS_3DB, LEVEL_MINUS_4POINT5DB, LEVEL_MINUS_6DB, LEVEL_ZERO -}; +#define extmixlev_options ff_ac3_gain_levels /* The first two options apply only to the AC-3 encoders; * the rest is also valid for EAC-3. When modifying it, @@ -1638,6 +1635,8 @@ static void ac3_output_frame_header(AC3EncodeContext *s, PutBitContext *pb) { AC3EncOptions *opt = &s->options; + put_bits_assume_flushed(pb); + put_bits(pb, 16, 0x0b77); /* frame header */ put_bits(pb, 16, 0); /* crc1: will be filled later */ put_bits(pb, 2, s->bit_alloc.sr_code); diff --git a/libavcodec/ac3tab.c b/libavcodec/ac3tab.c index 48c89a8ba007d..b38e7237b3479 100644 --- a/libavcodec/ac3tab.c +++ b/libavcodec/ac3tab.c @@ -25,6 +25,7 @@ */ #include "libavutil/channel_layout.h" +#include "libavutil/mathematics.h" #include "ac3tab.h" @@ -147,6 +148,19 @@ const uint16_t ff_ac3_fast_gain_tab[8]= { 0x080, 0x100, 0x180, 0x200, 0x280, 0x300, 0x380, 0x400, }; +/** Adjustments in dB gain */ +const float ff_ac3_gain_levels[9] = { + LEVEL_PLUS_3DB, + LEVEL_PLUS_1POINT5DB, + LEVEL_ONE, + LEVEL_MINUS_1POINT5DB, + LEVEL_MINUS_3DB, + LEVEL_MINUS_4POINT5DB, + LEVEL_MINUS_6DB, + LEVEL_ZERO, + LEVEL_MINUS_9DB +}; + const uint64_t ff_eac3_custom_channel_map_locations[16][2] = { { 1, AV_CH_FRONT_LEFT }, { 1, AV_CH_FRONT_CENTER }, diff --git a/libavcodec/ac3tab.h b/libavcodec/ac3tab.h index dcef643acb8d1..3f83ce7b8c5c4 100644 --- a/libavcodec/ac3tab.h +++ b/libavcodec/ac3tab.h @@ -26,6 +26,9 @@ #include "ac3defs.h" +#include "libavutil/attributes_internal.h" + +FF_VISIBILITY_PUSH_HIDDEN extern const uint16_t ff_ac3_frame_size_tab[38][3]; extern const uint8_t ff_ac3_channels_tab[8]; extern const uint16_t ff_ac3_channel_layout_tab[8]; @@ -43,7 +46,9 @@ extern const int16_t ff_ac3_floor_tab[8]; extern const uint16_t ff_ac3_fast_gain_tab[8]; extern const uint8_t ff_ac3_band_start_tab[AC3_CRITICAL_BANDS+1]; extern const uint8_t ff_ac3_bin_to_band_tab[253]; +extern const float ff_ac3_gain_levels[9]; extern const uint64_t ff_eac3_custom_channel_map_locations[16][2]; +FF_VISIBILITY_POP_HIDDEN #define COMMON_CHANNEL_MAP \ { { 0, 1, }, { 0, 1, 2, } },\ diff --git a/libavcodec/adpcm.c b/libavcodec/adpcm.c index e20b60e05fef4..622cf54b40e94 100644 --- a/libavcodec/adpcm.c +++ b/libavcodec/adpcm.c @@ -2319,7 +2319,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx, AVFrame *frame, } ) /* End of CASE */ default: - av_assert0(0); // unsupported codec_id should not happen + av_unreachable("There are cases for all codec ids using adpcm_decode_frame"); } if (avpkt->size && bytestream2_tell(&gb) == 0) { diff --git a/libavcodec/amfenc_h264.c b/libavcodec/amfenc_h264.c index cfcc5482f06a9..260139f14fad5 100644 --- a/libavcodec/amfenc_h264.c +++ b/libavcodec/amfenc_h264.c @@ -468,26 +468,61 @@ static av_cold int amf_encode_init_h264(AVCodecContext *avctx) } // B-Frames - if (ctx->max_consecutive_b_frames != -1) { - AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_CONSECUTIVE_BPICTURES, ctx->max_consecutive_b_frames); - if (ctx->max_b_frames != -1) { - AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_B_PIC_PATTERN, ctx->max_b_frames); - if (res != AMF_OK) { - res = ctx->encoder->pVtbl->GetProperty(ctx->encoder, AMF_VIDEO_ENCODER_B_PIC_PATTERN, &var); - av_log(ctx, AV_LOG_WARNING, "B-frames=%d is not supported by this GPU, switched to %d\n", - ctx->max_b_frames, (int)var.int64Value); - ctx->max_b_frames = (int)var.int64Value; + AMFVariantStruct is_adaptive_b_frames = { 0 }; + res = ctx->encoder->pVtbl->GetProperty(ctx->encoder, AMF_VIDEO_ENCODER_ADAPTIVE_MINIGOP, &is_adaptive_b_frames); + if (ctx->max_consecutive_b_frames != -1 || ctx->max_b_frames != -1 || is_adaptive_b_frames.boolValue == true) { + + //Get the capability of encoder + AMFCaps *encoder_caps = NULL; + ctx->encoder->pVtbl->GetCaps(ctx->encoder, &encoder_caps); + if (encoder_caps != NULL) + { + res = encoder_caps->pVtbl->GetProperty(encoder_caps, AMF_VIDEO_ENCODER_CAP_BFRAMES, &var); + if (res == AMF_OK) { + + //encoder supports H.264 B-frame + if(var.boolValue == true){ + //adaptive b-frames is higher priority than max_b_frames + if (is_adaptive_b_frames.boolValue == true) + { + //force AMF_VIDEO_ENCODER_MAX_CONSECUTIVE_BPICTURES to 3 + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_CONSECUTIVE_BPICTURES, 3); + + if(ctx->pa_lookahead_buffer_depth < 1) + { + //force AMF_PA_LOOKAHEAD_BUFFER_DEPTH to 1 if not set or smaller than 1 + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_LOOKAHEAD_BUFFER_DEPTH, 1); + } + } + else { + if (ctx->max_b_frames != -1) { + //in case user sets B-frames + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_B_PIC_PATTERN, ctx->max_b_frames); + if (res != AMF_OK) { + res = ctx->encoder->pVtbl->GetProperty(ctx->encoder, AMF_VIDEO_ENCODER_B_PIC_PATTERN, &var); + av_log(ctx, AV_LOG_WARNING, "B-frames=%d is not supported by this GPU, switched to %d\n", ctx->max_b_frames, (int)var.int64Value); + ctx->max_b_frames = (int)var.int64Value; + } + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_CONSECUTIVE_BPICTURES, ctx->max_b_frames); + } + } + + } + //encoder doesn't support H.264 B-frame + else { + av_log(ctx, AV_LOG_WARNING, "The current GPU in use does not support H.264 B-frame encoding, there will be no B-frame in bitstream.\n"); + } + } else { + //Can't get the capability of encoder + av_log(ctx, AV_LOG_WARNING, "Unable to get H.264 B-frame capability.\n"); + av_log(ctx, AV_LOG_WARNING, "There will be no B-frame in bitstream.\n"); } - if (ctx->max_consecutive_b_frames < ctx->max_b_frames) { - av_log(ctx, AVERROR_BUG, "Maxium B frames needs to be greater than the specified B frame count.\n"); - } - } - } - else { - if (ctx->max_b_frames != -1) { - av_log(ctx, AVERROR_BUG, "Maxium number of B frames needs to be specified.\n"); + + encoder_caps->pVtbl->Release(encoder_caps); + encoder_caps = NULL; } } + res = ctx->encoder->pVtbl->GetProperty(ctx->encoder, AMF_VIDEO_ENCODER_B_PIC_PATTERN, &var); if ((int)var.int64Value) { AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_B_PIC_DELTA_QP, ctx->b_frame_delta_qp); diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c index 929fc30a3ce07..91fb870a6408d 100644 --- a/libavcodec/amrwbdec.c +++ b/libavcodec/amrwbdec.c @@ -556,7 +556,8 @@ static void decode_fixed_vector(float *fixed_vector, const uint16_t *pulse_hi, ((int) pulse_hi[i] << 11), 4, 1); break; default: - av_assert2(0); + av_unreachable("Everything >= MODE_SID is impossible: MODE_SID is patchwelcome," + "> MODE_SID is invalid"); } memset(fixed_vector, 0, sizeof(float) * AMRWB_SFR_SIZE); diff --git a/libavcodec/arm/pixblockdsp_init_arm.c b/libavcodec/arm/pixblockdsp_init_arm.c index 5481c0178c03e..121338ad0ce7c 100644 --- a/libavcodec/arm/pixblockdsp_init_arm.c +++ b/libavcodec/arm/pixblockdsp_init_arm.c @@ -21,7 +21,6 @@ #include "libavutil/attributes.h" #include "libavutil/cpu.h" #include "libavutil/arm/cpu.h" -#include "libavcodec/avcodec.h" #include "libavcodec/pixblockdsp.h" void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, @@ -39,7 +38,6 @@ void ff_diff_pixels_unaligned_neon(int16_t *block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c, - AVCodecContext *avctx, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c index 52666ee5473f3..883edd046860a 100644 --- a/libavcodec/asvenc.c +++ b/libavcodec/asvenc.c @@ -26,6 +26,7 @@ #include "config_components.h" #include "libavutil/attributes.h" +#include "libavutil/intreadwrite.h" #include "libavutil/mem.h" #include "libavutil/mem_internal.h" @@ -44,6 +45,10 @@ typedef struct ASVEncContext { PutBitContext pb; + void (*get_pixels)(int16_t *restrict block, + const uint8_t *pixels, + ptrdiff_t stride); + PixblockDSPContext pdsp; FDCTDSPContext fdsp; DECLARE_ALIGNED(32, int16_t, block)[6][64]; @@ -61,40 +66,43 @@ enum { static inline void asv1_put_level(PutBitContext *pb, int level) { unsigned int index = level + 3; + unsigned n, code; if (index <= 6) { - put_bits(pb, ff_asv_level_tab[index][1], ff_asv_level_tab[index][0]); + n = ff_asv_level_tab[index][1]; + code = ff_asv_level_tab[index][0]; } else { - put_bits(pb, 3, 0); /* Escape code */ - put_sbits(pb, 8, level); + n = 3 + 8; + code = (0 /* Escape code */ << 8) | (level & 0xFF); } + put_bits(pb, n, code); } static inline void asv2_put_level(ASVEncContext *a, PutBitContext *pb, int level) { unsigned int index = level + 31; + unsigned n, code; if (index <= 62) { - put_bits_le(pb, ff_asv2_level_tab[index][1], ff_asv2_level_tab[index][0]); + n = ff_asv2_level_tab[index][1]; + code = ff_asv2_level_tab[index][0]; } else { - put_bits_le(pb, 5, 0); /* Escape code */ if (level < -128 || level > 127) { av_log(a->c.avctx, AV_LOG_WARNING, "Clipping level %d, increase qscale\n", level); level = av_clip_int8(level); } - put_bits_le(pb, 8, level & 0xFF); + n = 5 + 8; + code = (level & 0xFF) << 5 | /* Escape code */ 0; } + put_bits_le(pb, n, code); } static inline void asv1_encode_block(ASVEncContext *a, int16_t block[64]) { - int i; - int nc_count = 0; - put_bits(&a->pb, 8, (block[0] + 32) >> 6); block[0] = 0; - for (i = 0; i < 10; i++) { + for (unsigned i = 0, nc_bits = 0, nc_val = 0; i < 10; i++) { const int index = ff_asv_scantab[4 * i]; int ccp = 0; @@ -112,10 +120,11 @@ static inline void asv1_encode_block(ASVEncContext *a, int16_t block[64]) ccp |= 1; if (ccp) { - for (; nc_count; nc_count--) - put_bits(&a->pb, 2, 2); /* Skip */ - - put_bits(&a->pb, ff_asv_ccp_tab[ccp][1], ff_asv_ccp_tab[ccp][0]); + put_bits(&a->pb, nc_bits + ff_asv_ccp_tab[ccp][1], + nc_val << ff_asv_ccp_tab[ccp][1] /* Skip */ | + ff_asv_ccp_tab[ccp][0]); + nc_bits = 0; + nc_val = 0; if (ccp & 8) asv1_put_level(&a->pb, block[index + 0]); @@ -126,7 +135,8 @@ static inline void asv1_encode_block(ASVEncContext *a, int16_t block[64]) if (ccp & 1) asv1_put_level(&a->pb, block[index + 9]); } else { - nc_count++; + nc_bits += 2; + nc_val = (nc_val << 2) | 2; } } put_bits(&a->pb, 5, 0xF); /* End of block */ @@ -145,8 +155,8 @@ static inline void asv2_encode_block(ASVEncContext *a, int16_t block[64]) count >>= 2; - put_bits_le(&a->pb, 4, count); - put_bits_le(&a->pb, 8, (block[0] + 32) >> 6); + put_bits_le(&a->pb, 4 + 8, count /* 4 bits */ | + (/* DC */(block[0] + 32) >> 6) << 4); block[0] = 0; for (i = 0; i <= count; i++) { @@ -213,74 +223,92 @@ static inline void dct_get(ASVEncContext *a, const AVFrame *frame, const uint8_t *ptr_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8; const uint8_t *ptr_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8; - a->pdsp.get_pixels(block[0], ptr_y, linesize); - a->pdsp.get_pixels(block[1], ptr_y + 8, linesize); - a->pdsp.get_pixels(block[2], ptr_y + 8 * linesize, linesize); - a->pdsp.get_pixels(block[3], ptr_y + 8 * linesize + 8, linesize); + a->get_pixels(block[0], ptr_y, linesize); + a->get_pixels(block[1], ptr_y + 8, linesize); + a->get_pixels(block[2], ptr_y + 8 * linesize, linesize); + a->get_pixels(block[3], ptr_y + 8 * linesize + 8, linesize); for (i = 0; i < 4; i++) a->fdsp.fdct(block[i]); if (!(a->c.avctx->flags & AV_CODEC_FLAG_GRAY)) { - a->pdsp.get_pixels(block[4], ptr_cb, frame->linesize[1]); - a->pdsp.get_pixels(block[5], ptr_cr, frame->linesize[2]); + a->get_pixels(block[4], ptr_cb, frame->linesize[1]); + a->get_pixels(block[5], ptr_cr, frame->linesize[2]); for (i = 4; i < 6; i++) a->fdsp.fdct(block[i]); } } -static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, - const AVFrame *pict, int *got_packet) +static void handle_partial_mb(ASVEncContext *a, const uint8_t *const data[3], + const int linesizes[3], + int valid_width, int valid_height) { - ASVEncContext *const a = avctx->priv_data; - const ASVCommonContext *const c = &a->c; - int size, ret; - - if (pict->width % 16 || pict->height % 16) { - AVFrame *clone = av_frame_alloc(); - int i; - - if (!clone) - return AVERROR(ENOMEM); - clone->format = pict->format; - clone->width = FFALIGN(pict->width, 16); - clone->height = FFALIGN(pict->height, 16); - ret = av_frame_get_buffer(clone, 0); - if (ret < 0) { - av_frame_free(&clone); - return ret; + const int nb_blocks = a->c.avctx->flags & AV_CODEC_FLAG_GRAY ? 4 : 6; + static const struct Descriptor { + uint8_t x_offset, y_offset; + uint8_t component, subsampling; + } block_descriptor[] = { + { 0, 0, 0, 0 }, { 8, 0, 0, 0 }, { 0, 8, 0, 0 }, { 8, 8, 0, 0 }, + { 0, 0, 1, 1 }, { 0, 0, 2, 1 }, + }; + + for (int i = 0; i < nb_blocks; ++i) { + const struct Descriptor *const desc = block_descriptor + i; + int width_avail = AV_CEIL_RSHIFT(valid_width, desc->subsampling) - desc->x_offset; + int height_avail = AV_CEIL_RSHIFT(valid_height, desc->subsampling) - desc->y_offset; + + if (width_avail <= 0 || height_avail <= 0) { + // This block is outside of the visible part; don't replicate pixels, + // just zero the block, so that only the dc value will be coded. + memset(a->block[i], 0, sizeof(a->block[i])); + continue; } - - ret = av_frame_copy(clone, pict); - if (ret < 0) { - av_frame_free(&clone); - return ret; + width_avail = FFMIN(width_avail, 8); + height_avail = FFMIN(height_avail, 8); + + ptrdiff_t linesize = linesizes[desc->component]; + const uint8_t *src = data[desc->component] + desc->y_offset * linesize + desc->x_offset; + int16_t *block = a->block[i]; + + for (int h = 0;; block += 8, src += linesize) { + int16_t last; + for (int w = 0; w < width_avail; ++w) + last = block[w] = src[w]; + for (int w = width_avail; w < 8; ++w) + block[w] = last; + if (++h == height_avail) + break; } - - for (i = 0; i<3; i++) { - int x, y; - int w = AV_CEIL_RSHIFT(pict->width, !!i); - int h = AV_CEIL_RSHIFT(pict->height, !!i); - int w2 = AV_CEIL_RSHIFT(clone->width, !!i); - int h2 = AV_CEIL_RSHIFT(clone->height, !!i); - for (y=0; ydata[i][x + y*clone->linesize[i]] = - clone->data[i][w - 1 + y*clone->linesize[i]]; - for (y=h; ydata[i][x + y*clone->linesize[i]] = - clone->data[i][x + (h-1)*clone->linesize[i]]; + const int16_t *const last_row = block; + for (int h = height_avail; h < 8; ++h) { + block += 8; + AV_COPY128(block, last_row); } - ret = encode_frame(avctx, pkt, clone, got_packet); - av_frame_free(&clone); - return ret; + a->fdsp.fdct(a->block[i]); } + encode_mb(a, a->block); +} + +static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, + const AVFrame *pict, int *got_packet) +{ + ASVEncContext *const a = avctx->priv_data; + const ASVCommonContext *const c = &a->c; + int size, ret; + ret = ff_alloc_packet(avctx, pkt, c->mb_height * c->mb_width * MAX_MB_SIZE + 3); if (ret < 0) return ret; + if (!PIXBLOCKDSP_8BPP_GET_PIXELS_SUPPORTS_UNALIGNED && + ((uintptr_t)pict->data[0] & 7 || pict->linesize[0] & 7 || + (uintptr_t)pict->data[1] & 7 || pict->linesize[1] & 7 || + (uintptr_t)pict->data[2] & 7 || pict->linesize[2] & 7)) + a->get_pixels = a->pdsp.get_pixels_unaligned; + else + a->get_pixels = a->pdsp.get_pixels; + init_put_bits(&a->pb, pkt->data, pkt->size); for (int mb_y = 0; mb_y < c->mb_height2; mb_y++) { @@ -290,19 +318,37 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, } } - if (c->mb_width2 != c->mb_width) { - int mb_x = c->mb_width2; + if (avctx->width & 15) { + const uint8_t *src[3] = { + pict->data[0] + c->mb_width2 * 16, + pict->data[1] + c->mb_width2 * 8, + pict->data[2] + c->mb_width2 * 8, + }; + int available_width = avctx->width & 15; + for (int mb_y = 0; mb_y < c->mb_height2; mb_y++) { - dct_get(a, pict, mb_x, mb_y); - encode_mb(a, a->block); + handle_partial_mb(a, src, pict->linesize, available_width, 16); + src[0] += 16 * pict->linesize[0]; + src[1] += 8 * pict->linesize[1]; + src[2] += 8 * pict->linesize[2]; } } - if (c->mb_height2 != c->mb_height) { - int mb_y = c->mb_height2; - for (int mb_x = 0; mb_x < c->mb_width; mb_x++) { - dct_get(a, pict, mb_x, mb_y); - encode_mb(a, a->block); + if (avctx->height & 15) { + const uint8_t *src[3] = { + pict->data[0] + c->mb_height2 * 16 * pict->linesize[0], + pict->data[1] + c->mb_height2 * 8 * pict->linesize[1], + pict->data[2] + c->mb_height2 * 8 * pict->linesize[2], + }; + int available_height = avctx->height & 15; + + for (int remaining = avctx->width;; remaining -= 16) { + handle_partial_mb(a, src, pict->linesize, remaining, available_height); + if (remaining <= 16) + break; + src[0] += 16; + src[1] += 8; + src[2] += 8; } } @@ -333,7 +379,7 @@ static av_cold int encode_init(AVCodecContext *avctx) ff_asv_common_init(avctx); ff_fdctdsp_init(&a->fdsp, avctx); - ff_pixblockdsp_init(&a->pdsp, avctx); + ff_pixblockdsp_init(&a->pdsp, 8); if (avctx->global_quality <= 0) avctx->global_quality = 4 * FF_QUALITY_SCALE; @@ -345,8 +391,8 @@ static av_cold int encode_init(AVCodecContext *avctx) if (!avctx->extradata) return AVERROR(ENOMEM); avctx->extradata_size = 8; - AV_WLA(32, avctx->extradata, inv_qscale); - ((uint32_t *) avctx->extradata)[1] = av_le2ne32(AV_RL32("ASUS")); + AV_WL32A(avctx->extradata, inv_qscale); + AV_WL32A(avctx->extradata + 4, MKTAG('A', 'S', 'U', 'S')); for (i = 0; i < 64; i++) { if (a->fdsp.fdct == ff_fdct_ifast) { diff --git a/libavcodec/atrac3.c b/libavcodec/atrac3.c index faa3daa9e6cd8..fe156fa4821e1 100644 --- a/libavcodec/atrac3.c +++ b/libavcodec/atrac3.c @@ -526,7 +526,7 @@ static void reverse_matrixing(float *su1, float *su2, int *prev_code, } break; default: - av_assert1(0); + av_unreachable("curr_code/matrix_coeff_index_* values are stored in two bits"); } } } diff --git a/libavcodec/avdct.c b/libavcodec/avdct.c index f995e73eab44d..5322b181bcf21 100644 --- a/libavcodec/avdct.c +++ b/libavcodec/avdct.c @@ -119,7 +119,7 @@ int avcodec_dct_init(AVDCT *dsp) #if CONFIG_PIXBLOCKDSP { PixblockDSPContext pdsp; - ff_pixblockdsp_init(&pdsp, avctx); + ff_pixblockdsp_init(&pdsp, dsp->bits_per_sample); COPY(pdsp, get_pixels); COPY(pdsp, get_pixels_unaligned); } diff --git a/libavcodec/bsf/dovi_rpu.c b/libavcodec/bsf/dovi_rpu.c index 5dccd4bc7e76e..84b271f736b93 100644 --- a/libavcodec/bsf/dovi_rpu.c +++ b/libavcodec/bsf/dovi_rpu.c @@ -228,8 +228,8 @@ static int dovi_rpu_init(AVBSFContext *bsf) } else { av_log(bsf, AV_LOG_WARNING, "No Dolby Vision configuration record " "found? Generating one, but results may be invalid.\n"); - ret = ff_dovi_configure_ext(&s->enc, bsf->par_out, NULL, s->compression, - FF_COMPLIANCE_NORMAL); + ret = ff_dovi_configure_from_codedpar(&s->enc, bsf->par_out, NULL, s->compression, + FF_COMPLIANCE_NORMAL); if (ret < 0) return ret; /* Be conservative in accepting all compressed RPUs */ diff --git a/libavcodec/cbs_apv_syntax_template.c b/libavcodec/cbs_apv_syntax_template.c index ca66349141e54..fc8a08ff31dd0 100644 --- a/libavcodec/cbs_apv_syntax_template.c +++ b/libavcodec/cbs_apv_syntax_template.c @@ -543,11 +543,11 @@ static int FUNC(metadata)(CodedBitstreamContext *ctx, RWContext *rw, return AVERROR_INVALIDDATA; } + current->metadata_count = p + 1; + CHECK(FUNC(metadata_payload)(ctx, rw, pl)); metadata_bytes_left -= pl->payload_size; - - current->metadata_count = p + 1; if (metadata_bytes_left == 0) break; } diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c index 369e3ac876994..fa70a8fb7b97e 100644 --- a/libavcodec/cbs_h2645.c +++ b/libavcodec/cbs_h2645.c @@ -2310,6 +2310,28 @@ static const SEIMessageTypeDescriptor cbs_sei_h266_types[] = { SEI_MESSAGE_TYPE_END }; +static const SEIMessageTypeDescriptor cbs_sei_h274_types[] = { + { + SEI_TYPE_FILM_GRAIN_CHARACTERISTICS, + 1, 0, + sizeof(SEIRawFilmGrainCharacteristics), + SEI_MESSAGE_RW(sei, film_grain_characteristics), + }, + { + SEI_TYPE_DISPLAY_ORIENTATION, + 1, 0, + sizeof(SEIRawDisplayOrientation), + SEI_MESSAGE_RW(sei, display_orientation) + }, + { + SEI_TYPE_FRAME_FIELD_INFO, + 1, 0, + sizeof(SEIRawFrameFieldInformation), + SEI_MESSAGE_RW(sei, frame_field_information) + }, + SEI_MESSAGE_TYPE_END, +}; + const SEIMessageTypeDescriptor *ff_cbs_sei_find_type(CodedBitstreamContext *ctx, int payload_type) { @@ -2335,6 +2357,13 @@ const SEIMessageTypeDescriptor *ff_cbs_sei_find_type(CodedBitstreamContext *ctx, return &codec_list[i]; } + if (ctx->codec->codec_id == AV_CODEC_ID_H266) { + for (i = 0; cbs_sei_h274_types[i].type >= 0; i++) { + if (cbs_sei_h274_types[i].type == payload_type) + return &cbs_sei_h274_types[i]; + } + } + for (i = 0; cbs_sei_common_types[i].type >= 0; i++) { if (cbs_sei_common_types[i].type == payload_type) return &cbs_sei_common_types[i]; diff --git a/libavcodec/cbs_sei.h b/libavcodec/cbs_sei.h index 15ef3415aba84..81867b79a7e06 100644 --- a/libavcodec/cbs_sei.h +++ b/libavcodec/cbs_sei.h @@ -97,6 +97,46 @@ typedef struct SEIRawAmbientViewingEnvironment { uint16_t ambient_light_y; } SEIRawAmbientViewingEnvironment; +typedef struct SEIRawFilmGrainCharacteristics { + uint8_t fg_characteristics_cancel_flag; + uint8_t fg_model_id; + uint8_t fg_separate_colour_description_present_flag; + uint8_t fg_bit_depth_luma_minus8; + uint8_t fg_bit_depth_chroma_minus8; + uint8_t fg_full_range_flag; + uint8_t fg_colour_primaries; + uint8_t fg_transfer_characteristics; + uint8_t fg_matrix_coeffs; + uint8_t fg_blending_mode_id; + uint8_t fg_log2_scale_factor; + uint8_t fg_comp_model_present_flag[3]; + uint8_t fg_num_intensity_intervals_minus1[3]; + uint8_t fg_num_model_values_minus1[3]; + uint8_t fg_intensity_interval_lower_bound[3][256]; + uint8_t fg_intensity_interval_upper_bound[3][256]; + int16_t fg_comp_model_value[3][256][6]; + uint8_t fg_characteristics_persistence_flag; +} SEIRawFilmGrainCharacteristics; + +typedef struct SEIRawDisplayOrientation { + uint8_t display_orientation_cancel_flag; + uint8_t display_orientation_persistence_flag; + uint8_t display_orientation_transform_type; + uint8_t display_orientation_reserved_zero_3bits; +} SEIRawDisplayOrientation; + +typedef struct SEIRawFrameFieldInformation { + uint8_t ffi_field_pic_flag; + uint8_t ffi_bottom_field_flag; + uint8_t ffi_pairing_indicated_flag; + uint8_t ffi_paired_with_next_field_flag; + uint8_t ffi_display_fields_from_frame_flag; + uint8_t ffi_top_field_first_flag; + uint8_t ffi_display_elemental_periods_minus1; + uint8_t ffi_source_scan_type; + uint8_t ffi_duplicate_flag; +} SEIRawFrameFieldInformation; + typedef struct SEIRawMessage { uint32_t payload_type; uint32_t payload_size; diff --git a/libavcodec/cbs_sei_syntax_template.c b/libavcodec/cbs_sei_syntax_template.c index 0205bb47aa3f4..e6863a0fd7833 100644 --- a/libavcodec/cbs_sei_syntax_template.c +++ b/libavcodec/cbs_sei_syntax_template.c @@ -224,6 +224,103 @@ SEI_FUNC(ambient_viewing_environment, return 0; } +SEI_FUNC(film_grain_characteristics, + (CodedBitstreamContext *ctx, RWContext *rw, + SEIRawFilmGrainCharacteristics *current, + SEIMessageState *state)) +{ + int err, c, i, j; + + HEADER("Film Grain Characteristics"); + + flag(fg_characteristics_cancel_flag); + if (!current->fg_characteristics_cancel_flag) { + int filmGrainBitDepth[3]; + + u(2, fg_model_id, 0, 1); + flag(fg_separate_colour_description_present_flag); + if (current->fg_separate_colour_description_present_flag) { + ub(3, fg_bit_depth_luma_minus8); + ub(3, fg_bit_depth_chroma_minus8); + flag(fg_full_range_flag); + ub(8, fg_colour_primaries); + ub(8, fg_transfer_characteristics); + ub(8, fg_matrix_coeffs); + } + + filmGrainBitDepth[0] = current->fg_bit_depth_luma_minus8 + 8; + filmGrainBitDepth[1] = + filmGrainBitDepth[2] = current->fg_bit_depth_chroma_minus8 + 8; + + u(2, fg_blending_mode_id, 0, 1); + ub(4, fg_log2_scale_factor); + for (c = 0; c < 3; c++) + flags(fg_comp_model_present_flag[c], 1, c); + + for (c = 0; c < 3; c++) { + if (current->fg_comp_model_present_flag[c]) { + ubs(8, fg_num_intensity_intervals_minus1[c], 1, c); + us(3, fg_num_model_values_minus1[c], 0, 5, 1, c); + for (i = 0; i <= current->fg_num_intensity_intervals_minus1[c]; i++) { + ubs(8, fg_intensity_interval_lower_bound[c][i], 2, c, i); + ubs(8, fg_intensity_interval_upper_bound[c][i], 2, c, i); + for (j = 0; j <= current->fg_num_model_values_minus1[c]; j++) + ses(fg_comp_model_value[c][i][j], 0 - current->fg_model_id * (1 << (filmGrainBitDepth[c] - 1)), + ((1 << filmGrainBitDepth[c]) - 1) - current->fg_model_id * (1 << (filmGrainBitDepth[c] - 1)), + 3, c, i, j); + } + } + } + flag(fg_characteristics_persistence_flag); + } + + return 0; +} + +SEI_FUNC(display_orientation, (CodedBitstreamContext *ctx, RWContext *rw, + SEIRawDisplayOrientation *current, + SEIMessageState *state)) +{ + int err; + + HEADER("Display Orientation"); + + flag(display_orientation_cancel_flag); + if (!current->display_orientation_cancel_flag) { + flag(display_orientation_persistence_flag); + u(3, display_orientation_transform_type, 0, 7); + ub(3, display_orientation_reserved_zero_3bits); + } + + return 0; +} + +SEI_FUNC(frame_field_information, (CodedBitstreamContext *ctx, RWContext *rw, + SEIRawFrameFieldInformation *current, + SEIMessageState *state)) +{ + int err; + + HEADER("Frame-field information"); + + flag(ffi_field_pic_flag); + if (current->ffi_field_pic_flag) { + flag(ffi_bottom_field_flag); + flag(ffi_pairing_indicated_flag); + if (current->ffi_pairing_indicated_flag) + flag(ffi_paired_with_next_field_flag); + } else { + flag(ffi_display_fields_from_frame_flag); + if (current->ffi_display_fields_from_frame_flag) + flag(ffi_top_field_first_flag); + u(8, ffi_display_elemental_periods_minus1, 0, 0xff); + } + u(2, ffi_source_scan_type, 0, 3); + flag(ffi_duplicate_flag); + + return 0; +} + static int FUNC(message)(CodedBitstreamContext *ctx, RWContext *rw, SEIRawMessage *current) { diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c index 6575f0f6b14a9..3437ee2109c23 100644 --- a/libavcodec/cuviddec.c +++ b/libavcodec/cuviddec.c @@ -424,6 +424,7 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF AVCodecContext *avctx = opaque; CuvidContext *ctx = avctx->priv_data; CuvidParsedFrame parsed_frame = { { 0 } }; + int ret; parsed_frame.dispinfo = *dispinfo; ctx->internal_error = 0; @@ -432,13 +433,20 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF parsed_frame.dispinfo.progressive_frame = ctx->progressive_sequence; if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) { - av_fifo_write(ctx->frame_queue, &parsed_frame, 1); + ret = av_fifo_write(ctx->frame_queue, &parsed_frame, 1); + if (ret < 0) + av_log(avctx, AV_LOG_ERROR, "Writing frame to fifo failed!\n"); } else { parsed_frame.is_deinterlacing = 1; - av_fifo_write(ctx->frame_queue, &parsed_frame, 1); + ret = av_fifo_write(ctx->frame_queue, &parsed_frame, 1); + if (ret < 0) + av_log(avctx, AV_LOG_ERROR, "Writing first frame to fifo failed!\n"); + if (!ctx->drop_second_field) { parsed_frame.second_field = 1; - av_fifo_write(ctx->frame_queue, &parsed_frame, 1); + ret = av_fifo_write(ctx->frame_queue, &parsed_frame, 1); + if (ret < 0) + av_log(avctx, AV_LOG_ERROR, "Writing second frame to fifo failed!\n"); } } @@ -497,7 +505,12 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) ctx->decoder_flushing = 1; } - ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt)); + // When flushing, only actually flush cuvid when the output buffer has been fully emptied. + // CUVID happily dumps out a ton of frames with no regard for its own available surfaces. + if (!ctx->decoder_flushing || (ctx->decoder_flushing && !av_fifo_can_read(ctx->frame_queue))) + ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt)); + else + ret = 0; if (ret < 0) goto error; diff --git a/libavcodec/decode.c b/libavcodec/decode.c index c2b2dd6e3b6e6..ef0956838137f 100644 --- a/libavcodec/decode.c +++ b/libavcodec/decode.c @@ -1590,22 +1590,49 @@ static void update_frame_props(AVCodecContext *avctx, AVFrame *frame) } } -static void attach_post_process_data(AVCodecContext *avctx, AVFrame *frame) +static int attach_post_process_data(AVCodecContext *avctx, AVFrame *frame) { AVCodecInternal *avci = avctx->internal; DecodeContext *dc = decode_ctx(avci); if (dc->lcevc_frame) { FrameDecodeData *fdd = frame->private_ref; + FFLCEVCFrame *frame_ctx; + int ret; - fdd->post_process_opaque = av_refstruct_ref(dc->lcevc); - fdd->post_process_opaque_free = ff_lcevc_unref; - fdd->post_process = ff_lcevc_process; + frame_ctx = av_mallocz(sizeof(*frame_ctx)); + if (!frame_ctx) + return AVERROR(ENOMEM); + + frame_ctx->frame = av_frame_alloc(); + if (!frame_ctx->frame) { + av_free(frame_ctx); + return AVERROR(ENOMEM); + } + + frame_ctx->lcevc = av_refstruct_ref(dc->lcevc); + frame_ctx->frame->width = frame->width; + frame_ctx->frame->height = frame->height; + frame_ctx->frame->format = frame->format; frame->width = dc->width; frame->height = dc->height; + + ret = avctx->get_buffer2(avctx, frame_ctx->frame, 0); + if (ret < 0) { + ff_lcevc_unref(frame_ctx); + return ret; + } + + validate_avframe_allocation(avctx, frame_ctx->frame); + + fdd->post_process_opaque = frame_ctx; + fdd->post_process_opaque_free = ff_lcevc_unref; + fdd->post_process = ff_lcevc_process; } dc->lcevc_frame = 0; + + return 0; } int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags) @@ -1666,7 +1693,9 @@ int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags) if (ret < 0) goto fail; - attach_post_process_data(avctx, frame); + ret = attach_post_process_data(avctx, frame); + if (ret < 0) + goto fail; end: if (avctx->codec_type == AVMEDIA_TYPE_VIDEO && !override_dimensions && diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c index a8f8ab3cd925a..7a5978c137dfb 100644 --- a/libavcodec/dnxhdenc.c +++ b/libavcodec/dnxhdenc.c @@ -423,7 +423,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx) ff_fdctdsp_init(&ctx->m.fdsp, avctx); ff_mpv_idct_init(&ctx->m.c); ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx); - ff_pixblockdsp_init(&ctx->m.pdsp, avctx); + ff_pixblockdsp_init(&ctx->m.pdsp, ctx->bit_depth); ff_dct_encode_init(&ctx->m); if (ctx->profile != AV_PROFILE_DNXHD) diff --git a/libavcodec/dolby_e_parse.c b/libavcodec/dolby_e_parse.c index ffedcd99a44c0..fc20eae5b4a06 100644 --- a/libavcodec/dolby_e_parse.c +++ b/libavcodec/dolby_e_parse.c @@ -18,6 +18,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/avassert.h" #include "get_bits.h" #include "put_bits.h" #include "dolby_e.h" @@ -88,7 +89,7 @@ int ff_dolby_e_convert_input(DBEContext *s, int nb_words, int key) AV_WB24(dst, AV_RB24(src) ^ key); break; default: - av_assert0(0); + av_unreachable("ff_dolby_e_parse_header() only sets 16, 20, 24 and errors out otherwise"); } return init_get_bits(&s->gb, s->buffer, nb_words * s->word_bits); diff --git a/libavcodec/dovi_rpu.h b/libavcodec/dovi_rpu.h index f3ccc27ae87e7..1b74983205dba 100644 --- a/libavcodec/dovi_rpu.h +++ b/libavcodec/dovi_rpu.h @@ -133,9 +133,10 @@ int ff_dovi_attach_side_data(DOVIContext *s, AVFrame *frame); /** * Configure the encoder for Dolby Vision encoding. Generates a configuration - * record in s->cfg, and attaches it to avctx->coded_side_data. Sets the correct - * profile and compatibility ID based on the tagged AVCodecParameters colorspace - * metadata, and the correct level based on the resolution and tagged framerate. + * record in s->cfg, and attaches it to codecpar->coded_side_data. Sets the + * correct profile and compatibility ID based on the tagged AVCodecParameters + * colorspace metadata, and the correct level based on the resolution and + * tagged framerate. * * `metadata` should point to the first frame's RPU, if available. If absent, * auto-detection will be performed, but this can sometimes lead to inaccurate @@ -143,13 +144,13 @@ int ff_dovi_attach_side_data(DOVIContext *s, AVFrame *frame); * * Returns 0 or a negative error code. */ -int ff_dovi_configure_ext(DOVIContext *s, AVCodecParameters *codecpar, - const AVDOVIMetadata *metadata, - enum AVDOVICompression compression, - int strict_std_compliance); +int ff_dovi_configure_from_codedpar(DOVIContext *s, AVCodecParameters *codecpar, + const AVDOVIMetadata *metadata, + enum AVDOVICompression compression, + int strict_std_compliance); /** - * Helper wrapper around `ff_dovi_configure_ext` which infers the codec + * Variant of `ff_dovi_configure_from_codedpar` which infers the codec * parameters from an AVCodecContext. */ int ff_dovi_configure(DOVIContext *s, AVCodecContext *avctx); diff --git a/libavcodec/dovi_rpuenc.c b/libavcodec/dovi_rpuenc.c index 2e1f8be08ee16..b05ad0a358818 100644 --- a/libavcodec/dovi_rpuenc.c +++ b/libavcodec/dovi_rpuenc.c @@ -20,16 +20,17 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/attributes.h" #include "libavutil/avassert.h" #include "libavutil/crc.h" #include "libavutil/mem.h" +#include "libavutil/refstruct.h" #include "avcodec.h" #include "dovi_rpu.h" #include "itut35.h" #include "put_bits.h" #include "put_golomb.h" -#include "libavutil/refstruct.h" static const struct { uint64_t pps; // maximum pixels per second @@ -52,10 +53,18 @@ static const struct { [13] = {7680*4320*120u, 7680, 240, 800}, }; -int ff_dovi_configure_ext(DOVIContext *s, AVCodecParameters *codecpar, - const AVDOVIMetadata *metadata, - enum AVDOVICompression compression, - int strict_std_compliance) +static av_cold int dovi_configure_ext(DOVIContext *s, enum AVCodecID codec_id, + const AVDOVIMetadata *metadata, + enum AVDOVICompression compression, + int strict_std_compliance, + int width, int height, + AVRational framerate, + enum AVPixelFormat pix_format, + enum AVColorSpace color_space, + enum AVColorPrimaries color_primaries, + enum AVColorTransferCharacteristic color_trc, + AVPacketSideData **coded_side_data, + int *nb_coded_side_data) { AVDOVIDecoderConfigurationRecord *cfg; const AVDOVIRpuDataHeader *hdr = NULL; @@ -76,7 +85,7 @@ int ff_dovi_configure_ext(DOVIContext *s, AVCodecParameters *codecpar, compression > AV_DOVI_COMPRESSION_EXTENDED) return AVERROR(EINVAL); - switch (codecpar->codec_id) { + switch (codec_id) { case AV_CODEC_ID_AV1: dv_profile = 10; break; case AV_CODEC_ID_H264: dv_profile = 9; break; case AV_CODEC_ID_HEVC: @@ -86,25 +95,23 @@ int ff_dovi_configure_ext(DOVIContext *s, AVCodecParameters *codecpar, } /* This is likely to be proprietary IPTPQc2 */ - if (codecpar->color_space == AVCOL_SPC_IPT_C2 || - (codecpar->color_space == AVCOL_SPC_UNSPECIFIED && - codecpar->color_trc == AVCOL_TRC_UNSPECIFIED)) + if (color_space == AVCOL_SPC_IPT_C2 || + (color_space == AVCOL_SPC_UNSPECIFIED && + color_trc == AVCOL_TRC_UNSPECIFIED)) dv_profile = 5; else dv_profile = 8; break; default: - /* No other encoder should be calling this! */ - av_assert0(0); - return AVERROR_BUG; + av_unreachable("ff_dovi_configure only used with AV1, H.264 and HEVC"); } if (strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) { if (dv_profile == 9) { - if (codecpar->format != AV_PIX_FMT_YUV420P) + if (pix_format != AV_PIX_FMT_YUV420P) dv_profile = 0; } else { - if (codecpar->format != AV_PIX_FMT_YUV420P10) + if (pix_format != AV_PIX_FMT_YUV420P10) dv_profile = 0; } } @@ -131,17 +138,17 @@ int ff_dovi_configure_ext(DOVIContext *s, AVCodecParameters *codecpar, } /* fall through */ case 8: /* HEVC (or AV1) with BL compatibility */ - if (codecpar->color_space == AVCOL_SPC_BT2020_NCL && - codecpar->color_primaries == AVCOL_PRI_BT2020 && - codecpar->color_trc == AVCOL_TRC_SMPTE2084) { + if (color_space == AVCOL_SPC_BT2020_NCL && + color_primaries == AVCOL_PRI_BT2020 && + color_trc == AVCOL_TRC_SMPTE2084) { bl_compat_id = 1; - } else if (codecpar->color_space == AVCOL_SPC_BT2020_NCL && - codecpar->color_primaries == AVCOL_PRI_BT2020 && - codecpar->color_trc == AVCOL_TRC_ARIB_STD_B67) { + } else if (color_space == AVCOL_SPC_BT2020_NCL && + color_primaries == AVCOL_PRI_BT2020 && + color_trc == AVCOL_TRC_ARIB_STD_B67) { bl_compat_id = 4; - } else if (codecpar->color_space == AVCOL_SPC_BT709 && - codecpar->color_primaries == AVCOL_PRI_BT709 && - codecpar->color_trc == AVCOL_TRC_BT709) { + } else if (color_space == AVCOL_SPC_BT709 && + color_primaries == AVCOL_PRI_BT709 && + color_trc == AVCOL_TRC_BT709) { bl_compat_id = 2; } } @@ -175,9 +182,9 @@ int ff_dovi_configure_ext(DOVIContext *s, AVCodecParameters *codecpar, } } - pps = codecpar->width * codecpar->height; - if (codecpar->framerate.num) { - pps = pps * codecpar->framerate.num / codecpar->framerate.den; + pps = width * height; + if (framerate.num) { + pps = pps * framerate.num / framerate.den; } else { pps *= 25; /* sanity fallback */ } @@ -186,7 +193,7 @@ int ff_dovi_configure_ext(DOVIContext *s, AVCodecParameters *codecpar, for (int i = 1; i < FF_ARRAY_ELEMS(dv_levels); i++) { if (pps > dv_levels[i].pps) continue; - if (codecpar->width > dv_levels[i].width) + if (width > dv_levels[i].width) continue; /* In theory, we should also test the bitrate when known, and * distinguish between main and high tier. In practice, just ignore @@ -199,12 +206,12 @@ int ff_dovi_configure_ext(DOVIContext *s, AVCodecParameters *codecpar, if (!dv_level) { if (strict_std_compliance >= FF_COMPLIANCE_STRICT) { av_log(s->logctx, AV_LOG_ERROR, "Coded PPS (%"PRIu64") and width (%d) " - "exceed Dolby Vision limitations\n", pps, codecpar->width); + "exceed Dolby Vision limitations\n", pps, width); return AVERROR(EINVAL); } else { av_log(s->logctx, AV_LOG_WARNING, "Coded PPS (%"PRIu64") and width (%d) " "exceed Dolby Vision limitations. Ignoring, resulting file " - "may be non-conforming.\n", pps, codecpar->width); + "may be non-conforming.\n", pps, width); dv_level = FF_ARRAY_ELEMS(dv_levels) - 1; } } @@ -213,8 +220,8 @@ int ff_dovi_configure_ext(DOVIContext *s, AVCodecParameters *codecpar, if (!cfg) return AVERROR(ENOMEM); - if (!av_packet_side_data_add(&codecpar->coded_side_data, - &codecpar->nb_coded_side_data, + if (!av_packet_side_data_add(coded_side_data, + nb_coded_side_data, AV_PKT_DATA_DOVI_CONF, cfg, cfg_size, 0)) { av_free(cfg); return AVERROR(ENOMEM); @@ -238,19 +245,22 @@ int ff_dovi_configure_ext(DOVIContext *s, AVCodecParameters *codecpar, return 0; } -int ff_dovi_configure(DOVIContext *s, AVCodecContext *avctx) +av_cold int ff_dovi_configure_from_codedpar(DOVIContext *s, AVCodecParameters *par, + const AVDOVIMetadata *metadata, + enum AVDOVICompression compression, + int strict_std_compliance) { - int ret; - const AVFrameSideData *sd; - const AVDOVIMetadata *metadata = NULL; - AVCodecParameters *codecpar = avcodec_parameters_alloc(); - if (!codecpar) - return AVERROR(ENOMEM); - - ret = avcodec_parameters_from_context(codecpar, avctx); - if (ret < 0) - goto fail; + return dovi_configure_ext(s, par->codec_id, metadata, compression, + strict_std_compliance, par->width, par->height, + par->framerate, par->format, par->color_space, + par->color_primaries, par->color_trc, + &par->coded_side_data, &par->nb_coded_side_data); +} +av_cold int ff_dovi_configure(DOVIContext *s, AVCodecContext *avctx) +{ + const AVDOVIMetadata *metadata = NULL; + const AVFrameSideData *sd; sd = av_frame_side_data_get(avctx->decoded_side_data, avctx->nb_decoded_side_data, AV_FRAME_DATA_DOVI_METADATA); @@ -258,16 +268,11 @@ int ff_dovi_configure(DOVIContext *s, AVCodecContext *avctx) metadata = (const AVDOVIMetadata *) sd->data; /* Current encoders cannot handle metadata compression during encoding */ - ret = ff_dovi_configure_ext(s, codecpar, metadata, AV_DOVI_COMPRESSION_NONE, - avctx->strict_std_compliance); - if (ret < 0) - goto fail; - - ret = avcodec_parameters_to_context(avctx, codecpar); - -fail: - avcodec_parameters_free(&codecpar); - return ret; + return dovi_configure_ext(s, avctx->codec_id, metadata, AV_DOVI_COMPRESSION_NONE, + avctx->strict_std_compliance, avctx->width, + avctx->height, avctx->framerate, avctx->pix_fmt, + avctx->colorspace, avctx->color_primaries, avctx->color_trc, + &avctx->coded_side_data, &avctx->nb_coded_side_data); } /* Compares only the static DM metadata parts of AVDOVIColorMetadata (excluding diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c index c7fc930b4b1db..a477b84261bfa 100644 --- a/libavcodec/dvenc.c +++ b/libavcodec/dvenc.c @@ -63,6 +63,8 @@ typedef struct DVEncContext { DVwork_chunk work_chunks[4 * 12 * 27]; int quant_deadzone; + + PixblockDSPContext pdsp; } DVEncContext; @@ -70,7 +72,6 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx) { DVEncContext *s = avctx->priv_data; FDCTDSPContext fdsp; - PixblockDSPContext pdsp; int ret; s->avctx = avctx; @@ -108,12 +109,10 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx) } memset(&fdsp,0, sizeof(fdsp)); - memset(&pdsp,0, sizeof(pdsp)); ff_fdctdsp_init(&fdsp, avctx); - ff_pixblockdsp_init(&pdsp, avctx); - s->get_pixels = pdsp.get_pixels; s->fdct[0] = fdsp.fdct; s->fdct[1] = fdsp.fdct248; + ff_pixblockdsp_init(&s->pdsp, 8); #if !CONFIG_HARDCODED_TABLES { @@ -1201,6 +1200,14 @@ static int dvvideo_encode_frame(AVCodecContext *c, AVPacket *pkt, DVEncContext *s = c->priv_data; int ret; + if (!PIXBLOCKDSP_8BPP_GET_PIXELS_SUPPORTS_UNALIGNED && + ((uintptr_t)frame->data[0] & 7 || frame->linesize[0] & 7 || + (uintptr_t)frame->data[1] & 7 || frame->linesize[1] & 7 || + (uintptr_t)frame->data[2] & 7 || frame->linesize[2] & 7)) + s->get_pixels = s->pdsp.get_pixels_unaligned; + else + s->get_pixels = s->pdsp.get_pixels; + if ((ret = ff_get_encode_buffer(c, pkt, s->sys->frame_size, 0)) < 0) return ret; /* Fixme: Only zero the part that is not overwritten later. */ diff --git a/libavcodec/dxv.h b/libavcodec/dxv.h index 71cfddec858de..184813e427b73 100644 --- a/libavcodec/dxv.h +++ b/libavcodec/dxv.h @@ -1,6 +1,6 @@ /* * Resolume DXV common - * Copyright (C) 2024 Connor Worley + * Copyright (C) 2024 Emma Worley * * This file is part of FFmpeg. * diff --git a/libavcodec/dxvenc.c b/libavcodec/dxvenc.c index 808d8daedb529..ee6a0a5b367e4 100644 --- a/libavcodec/dxvenc.c +++ b/libavcodec/dxvenc.c @@ -1,6 +1,6 @@ /* * Resolume DXV encoder - * Copyright (C) 2024 Connor Worley + * Copyright (C) 2024 Emma Worley * * This file is part of FFmpeg. * @@ -21,7 +21,7 @@ #include -#include "libavutil/crc.h" +#include "libavcodec/hashtable.h" #include "libavutil/imgutils.h" #include "libavutil/mem.h" #include "libavutil/opt.h" @@ -34,77 +34,19 @@ #define DXV_HEADER_LENGTH 12 +/* + * Resolume will refuse to display frames that are not padded to 16x16 pixels. + */ +#define DXV_ALIGN(x) FFALIGN(x, 16) + /* * DXV uses LZ-like back-references to avoid copying words that have already * appeared in the decompressed stream. Using a simple hash table (HT) * significantly speeds up the lookback process while encoding. */ -#define LOOKBACK_HT_ELEMS 0x40000 +#define LOOKBACK_HT_ELEMS 0x20202 #define LOOKBACK_WORDS 0x20202 -typedef struct HTEntry { - uint32_t key; - uint32_t pos; -} HTEntry; - -static void ht_init(HTEntry *ht) -{ - for (size_t i = 0; i < LOOKBACK_HT_ELEMS; i++) { - ht[i].pos = -1; - } -} - -static uint32_t ht_lookup_and_upsert(HTEntry *ht, const AVCRC *hash_ctx, - uint32_t key, uint32_t pos) -{ - uint32_t ret = -1; - size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS; - for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) { - size_t wrapped_index = i % LOOKBACK_HT_ELEMS; - HTEntry *entry = &ht[wrapped_index]; - if (entry->key == key || entry->pos == -1) { - ret = entry->pos; - entry->key = key; - entry->pos = pos; - break; - } - } - return ret; -} - -static void ht_delete(HTEntry *ht, const AVCRC *hash_ctx, - uint32_t key, uint32_t pos) -{ - HTEntry *removed_entry = NULL; - size_t removed_hash; - size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS; - - for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) { - size_t wrapped_index = i % LOOKBACK_HT_ELEMS; - HTEntry *entry = &ht[wrapped_index]; - if (entry->pos == -1) - return; - if (removed_entry) { - size_t candidate_hash = av_crc(hash_ctx, 0, (uint8_t*)&entry->key, 4) % LOOKBACK_HT_ELEMS; - if ((wrapped_index > removed_hash && (candidate_hash <= removed_hash || candidate_hash > wrapped_index)) || - (wrapped_index < removed_hash && (candidate_hash <= removed_hash && candidate_hash > wrapped_index))) { - *removed_entry = *entry; - entry->pos = -1; - removed_entry = entry; - removed_hash = wrapped_index; - } - } else if (entry->key == key) { - if (entry->pos <= pos) { - entry->pos = -1; - removed_entry = entry; - removed_hash = wrapped_index; - } else { - return; - } - } - } -} - typedef struct DXVEncContext { AVClass *class; @@ -121,10 +63,9 @@ typedef struct DXVEncContext { DXVTextureFormat tex_fmt; int (*compress_tex)(AVCodecContext *avctx); - const AVCRC *crc_ctx; - - HTEntry color_lookback_ht[LOOKBACK_HT_ELEMS]; - HTEntry lut_lookback_ht[LOOKBACK_HT_ELEMS]; + FFHashtableContext *color_ht; + FFHashtableContext *lut_ht; + FFHashtableContext *combo_ht; } DXVEncContext; /* Converts an index offset value to a 2-bit opcode and pushes it to a stream. @@ -159,58 +100,63 @@ static int dxv_compress_dxt1(AVCodecContext *avctx) DXVEncContext *ctx = avctx->priv_data; PutByteContext *pbc = &ctx->pbc; void *value; - uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 2, op = 0; + uint64_t combo; + uint32_t color, lut, idx, combo_idx, prev_pos, old_pos, state = 16, pos = 0, op = 0; + + ff_hashtable_clear(ctx->color_ht); + ff_hashtable_clear(ctx->lut_ht); + ff_hashtable_clear(ctx->combo_ht); - ht_init(ctx->color_lookback_ht); - ht_init(ctx->lut_lookback_ht); + ff_hashtable_set(ctx->combo_ht, ctx->tex_data, &pos); bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data)); + ff_hashtable_set(ctx->color_ht, ctx->tex_data, &pos); + pos++; bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data + 4)); - - ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data), 0); - ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data + 4), 1); + ff_hashtable_set(ctx->lut_ht, ctx->tex_data + 4, &pos); + pos++; while (pos + 2 <= ctx->tex_size / 4) { - idx = 0; - - color = AV_RL32(ctx->tex_data + pos * 4); - prev_pos = ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, color, pos); - color_idx = prev_pos != -1 ? pos - prev_pos : 0; + combo = AV_RL64(ctx->tex_data + pos * 4); + combo_idx = ff_hashtable_get(ctx->combo_ht, &combo, &prev_pos) ? pos - prev_pos : 0; + idx = combo_idx; + PUSH_OP(2); if (pos >= LOOKBACK_WORDS) { - uint32_t old_pos = pos - LOOKBACK_WORDS; - uint32_t old_color = AV_RL32(ctx->tex_data + old_pos * 4); - ht_delete(ctx->color_lookback_ht, ctx->crc_ctx, old_color, old_pos); + old_pos = pos - LOOKBACK_WORDS; + if (ff_hashtable_get(ctx->combo_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos) + ff_hashtable_delete(ctx->combo_ht, ctx->tex_data + old_pos * 4); } - pos++; + ff_hashtable_set(ctx->combo_ht, &combo, &pos); - lut = AV_RL32(ctx->tex_data + pos * 4); - if (color_idx && lut == AV_RL32(ctx->tex_data + (pos - color_idx) * 4)) { - idx = color_idx; - } else { - idx = 0; - prev_pos = ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, lut, pos); - lut_idx = prev_pos != -1 ? pos - prev_pos : 0; + color = AV_RL32(ctx->tex_data + pos * 4); + if (!combo_idx) { + idx = ff_hashtable_get(ctx->color_ht, &color, &prev_pos) ? pos - prev_pos : 0; + PUSH_OP(2); + if (!idx) + bytestream2_put_le32(pbc, color); } if (pos >= LOOKBACK_WORDS) { - uint32_t old_pos = pos - LOOKBACK_WORDS; - uint32_t old_lut = AV_RL32(ctx->tex_data + old_pos * 4); - ht_delete(ctx->lut_lookback_ht, ctx->crc_ctx, old_lut, old_pos); + old_pos = pos - LOOKBACK_WORDS; + if (ff_hashtable_get(ctx->color_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos) + ff_hashtable_delete(ctx->color_ht, ctx->tex_data + old_pos * 4); } + ff_hashtable_set(ctx->color_ht, &color, &pos); pos++; - PUSH_OP(2); - - if (!idx) { - idx = color_idx; - PUSH_OP(2); - if (!idx) - bytestream2_put_le32(pbc, color); - - idx = lut_idx; + lut = AV_RL32(ctx->tex_data + pos * 4); + if (!combo_idx) { + idx = ff_hashtable_get(ctx->lut_ht, &lut, &prev_pos) ? pos - prev_pos : 0; PUSH_OP(2); if (!idx) - bytestream2_put_le32(pbc, lut); + bytestream2_put_le32(pbc, lut); + } + if (pos >= LOOKBACK_WORDS) { + old_pos = pos - LOOKBACK_WORDS; + if (ff_hashtable_get(ctx->lut_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos) + ff_hashtable_delete(ctx->lut_ht, ctx->tex_data + old_pos * 4); } + ff_hashtable_set(ctx->lut_ht, &lut, &pos); + pos++; } return 0; @@ -231,12 +177,50 @@ static int dxv_encode(AVCodecContext *avctx, AVPacket *pkt, return ret; if (ctx->enc.tex_funct) { + uint8_t *safe_data[4] = {frame->data[0], 0, 0, 0}; + int safe_linesize[4] = {frame->linesize[0], 0, 0, 0}; + + if (avctx->width != DXV_ALIGN(avctx->width) || avctx->height != DXV_ALIGN(avctx->height)) { + ret = av_image_alloc( + safe_data, + safe_linesize, + DXV_ALIGN(avctx->width), + DXV_ALIGN(avctx->height), + avctx->pix_fmt, + 1); + if (ret < 0) + return ret; + + av_image_copy2( + safe_data, + safe_linesize, + frame->data, + frame->linesize, + avctx->pix_fmt, + avctx->width, + avctx->height); + + if (avctx->width != DXV_ALIGN(avctx->width)) { + for (int y = 0; y < avctx->height; y++) { + memset(safe_data[0] + y * safe_linesize[0] + frame->linesize[0], 0, safe_linesize[0] - frame->linesize[0]); + } + } + if (avctx->height != DXV_ALIGN(avctx->height)) { + for (int y = avctx->height; y < DXV_ALIGN(avctx->height); y++) { + memset(safe_data[0] + y * safe_linesize[0], 0, safe_linesize[0]); + } + } + } + ctx->enc.tex_data.out = ctx->tex_data; - ctx->enc.frame_data.in = frame->data[0]; - ctx->enc.stride = frame->linesize[0]; - ctx->enc.width = avctx->width; - ctx->enc.height = avctx->height; + ctx->enc.frame_data.in = safe_data[0]; + ctx->enc.stride = safe_linesize[0]; + ctx->enc.width = DXV_ALIGN(avctx->width); + ctx->enc.height = DXV_ALIGN(avctx->height); ff_texturedsp_exec_compress_threads(avctx, &ctx->enc); + + if (safe_data[0] != frame->data[0]) + av_freep(&safe_data[0]); } else { /* unimplemented: YCoCg formats */ return AVERROR_INVALIDDATA; @@ -275,14 +259,6 @@ static av_cold int dxv_init(AVCodecContext *avctx) return ret; } - if (avctx->width % TEXTURE_BLOCK_W || avctx->height % TEXTURE_BLOCK_H) { - av_log(avctx, - AV_LOG_ERROR, - "Video size %dx%d is not multiple of "AV_STRINGIFY(TEXTURE_BLOCK_W)"x"AV_STRINGIFY(TEXTURE_BLOCK_H)".\n", - avctx->width, avctx->height); - return AVERROR_INVALIDDATA; - } - ff_texturedspenc_init(&texdsp); switch (ctx->tex_fmt) { @@ -296,21 +272,25 @@ static av_cold int dxv_init(AVCodecContext *avctx) return AVERROR_INVALIDDATA; } ctx->enc.raw_ratio = 16; - ctx->tex_size = avctx->width / TEXTURE_BLOCK_W * - avctx->height / TEXTURE_BLOCK_H * + ctx->tex_size = DXV_ALIGN(avctx->width) / TEXTURE_BLOCK_W * + DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H * ctx->enc.tex_ratio; - ctx->enc.slice_count = av_clip(avctx->thread_count, 1, avctx->height / TEXTURE_BLOCK_H); + ctx->enc.slice_count = av_clip(avctx->thread_count, 1, DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H); ctx->tex_data = av_malloc(ctx->tex_size); if (!ctx->tex_data) { return AVERROR(ENOMEM); } - ctx->crc_ctx = av_crc_get_table(AV_CRC_32_IEEE); - if (!ctx->crc_ctx) { - av_log(avctx, AV_LOG_ERROR, "Could not initialize CRC table.\n"); - return AVERROR_BUG; - } + ret = ff_hashtable_alloc(&ctx->color_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS); + if (ret < 0) + return ret; + ret = ff_hashtable_alloc(&ctx->lut_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS); + if (ret < 0) + return ret; + ret = ff_hashtable_alloc(&ctx->combo_ht, sizeof(uint64_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS); + if (ret < 0) + return ret; return 0; } @@ -321,6 +301,10 @@ static av_cold int dxv_close(AVCodecContext *avctx) av_freep(&ctx->tex_data); + ff_hashtable_freep(&ctx->color_ht); + ff_hashtable_freep(&ctx->lut_ht); + ff_hashtable_freep(&ctx->combo_ht); + return 0; } diff --git a/libavcodec/eac3enc.c b/libavcodec/eac3enc.c index 3590b821a33ee..10b1ab337c648 100644 --- a/libavcodec/eac3enc.c +++ b/libavcodec/eac3enc.c @@ -135,6 +135,8 @@ static void eac3_output_frame_header(AC3EncodeContext *s, PutBitContext *pb) int blk, ch; AC3EncOptions *opt = &s->options; + put_bits_assume_flushed(pb); + put_bits(pb, 16, 0x0b77); /* sync word */ /* BSI header */ diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c index 40209f99359ab..463f46e091ef1 100644 --- a/libavcodec/ffv1enc.c +++ b/libavcodec/ffv1enc.c @@ -629,7 +629,6 @@ av_cold int ff_ffv1_encode_init(AVCodecContext *avctx) if (s->ec < 0) { if (s->version >= 4) { s->ec = 2; - s->crcref = 0x7a8c4079; } else if (s->version >= 3) { s->ec = 1; } else @@ -639,8 +638,10 @@ av_cold int ff_ffv1_encode_init(AVCodecContext *avctx) // CRC requires version 3+ if (s->ec == 1) s->version = FFMAX(s->version, 3); - if (s->ec == 2) + if (s->ec == 2) { s->version = FFMAX(s->version, 4); + s->crcref = 0x7a8c4079; + } if ((s->version == 2 || s->version>3) && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) { av_log(avctx, AV_LOG_ERROR, "Version 2 or 4 needed for requested features but version 2 or 4 is experimental and not enabled\n"); diff --git a/libavcodec/ffv1enc_template.c b/libavcodec/ffv1enc_template.c index 64f3c420c51a7..aaf82159eef81 100644 --- a/libavcodec/ffv1enc_template.c +++ b/libavcodec/ffv1enc_template.c @@ -38,19 +38,13 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc, if (bits == 0) return 0; - if (ac != AC_GOLOMB_RICE) { - if (c->bytestream_end - c->bytestream < w * 35) { + if (sc->slice_coding_mode == 1) { + av_assert0(ac != AC_GOLOMB_RICE); + if (c->bytestream_end - c->bytestream < (w * bits + 7LL)>>3) { av_log(logctx, AV_LOG_ERROR, "encoded Range Coder frame too large\n"); return AVERROR_INVALIDDATA; } - } else { - if (put_bytes_left(&sc->pb, 0) < w * 4) { - av_log(logctx, AV_LOG_ERROR, "encoded Golomb Rice frame too large\n"); - return AVERROR_INVALIDDATA; - } - } - if (sc->slice_coding_mode == 1) { for (x = 0; x < w; x++) { int i; int v = sample[0][x]; @@ -62,6 +56,18 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc, return 0; } + if (ac != AC_GOLOMB_RICE) { + if (c->bytestream_end - c->bytestream < w * 35) { + av_log(logctx, AV_LOG_ERROR, "encoded Range Coder frame too large\n"); + return AVERROR_INVALIDDATA; + } + } else { + if (put_bytes_left(&sc->pb, 0) < w * 4) { + av_log(logctx, AV_LOG_ERROR, "encoded Golomb Rice frame too large\n"); + return AVERROR_INVALIDDATA; + } + } + for (x = 0; x < w; x++) { int diff, context; diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c index 42a98a5efa244..259bc75d4c8ff 100644 --- a/libavcodec/ffv1enc_vulkan.c +++ b/libavcodec/ffv1enc_vulkan.c @@ -37,6 +37,9 @@ #define LG_ALIGN_W 32 #define LG_ALIGN_H 32 +/* Unlike the decoder, we need 4 lines (but really only 3) */ +#define RGB_LINECACHE 4 + typedef struct VulkanEncodeFFv1FrameData { /* Output data */ AVBufferRef *out_data_ref; @@ -71,8 +74,8 @@ typedef struct VulkanEncodeFFv1Context { size_t max_heap_size; FFVulkanShader setup; + FFVulkanShader rct_search; FFVulkanShader reset; - FFVulkanShader rct; FFVulkanShader enc; /* Constant read-only buffers */ @@ -86,10 +89,6 @@ typedef struct VulkanEncodeFFv1Context { /* Output data buffer */ AVBufferPool *out_data_pool; - AVBufferPool *pkt_data_pool; - - /* Temporary data buffer */ - AVBufferPool *tmp_data_pool; /* Slice results buffer */ AVBufferPool *results_data_pool; @@ -103,6 +102,7 @@ typedef struct VulkanEncodeFFv1Context { int num_h_slices; int num_v_slices; int force_pcm; + int optimize_rct; int is_rgb; int ppi; @@ -114,19 +114,16 @@ extern const char *ff_source_rangecoder_comp; extern const char *ff_source_ffv1_vlc_comp; extern const char *ff_source_ffv1_common_comp; extern const char *ff_source_ffv1_reset_comp; -extern const char *ff_source_ffv1_enc_common_comp; -extern const char *ff_source_ffv1_enc_rct_comp; -extern const char *ff_source_ffv1_enc_vlc_comp; -extern const char *ff_source_ffv1_enc_ac_comp; +extern const char *ff_source_ffv1_rct_search_comp; extern const char *ff_source_ffv1_enc_setup_comp; extern const char *ff_source_ffv1_enc_comp; -extern const char *ff_source_ffv1_enc_rgb_comp; typedef struct FFv1VkParameters { VkDeviceAddress slice_state; VkDeviceAddress scratch_data; VkDeviceAddress out_data; + int32_t fmt_lut[4]; int32_t sar[2]; uint32_t chroma_shift[2]; @@ -134,7 +131,9 @@ typedef struct FFv1VkParameters { uint32_t context_count; uint32_t crcref; uint32_t slice_size_max; + int rct_offset; + uint8_t extend_lookup[8]; uint8_t bits_per_raw_sample; uint8_t context_model; uint8_t version; @@ -144,13 +143,15 @@ typedef struct FFv1VkParameters { uint8_t components; uint8_t planes; uint8_t codec_planes; + uint8_t planar_rgb; uint8_t transparency; uint8_t colorspace; uint8_t pic_mode; uint8_t ec; uint8_t ppi; uint8_t chunks; - uint8_t padding[1]; + uint8_t rct_search; + uint8_t padding[3]; } FFv1VkParameters; static void add_push_data(FFVulkanShader *shd) @@ -160,6 +161,7 @@ static void add_push_data(FFVulkanShader *shd) GLSLC(1, u8buf scratch_data; ); GLSLC(1, u8buf out_data; ); GLSLC(0, ); + GLSLC(1, ivec4 fmt_lut; ); GLSLC(1, ivec2 sar; ); GLSLC(1, uvec2 chroma_shift; ); GLSLC(0, ); @@ -167,7 +169,9 @@ static void add_push_data(FFVulkanShader *shd) GLSLC(1, uint context_count; ); GLSLC(1, uint32_t crcref; ); GLSLC(1, uint32_t slice_size_max; ); + GLSLC(1, int rct_offset; ); GLSLC(0, ); + GLSLC(1, uint8_t extend_lookup[8]; ); GLSLC(1, uint8_t bits_per_raw_sample; ); GLSLC(1, uint8_t context_model; ); GLSLC(1, uint8_t version; ); @@ -177,120 +181,81 @@ static void add_push_data(FFVulkanShader *shd) GLSLC(1, uint8_t components; ); GLSLC(1, uint8_t planes; ); GLSLC(1, uint8_t codec_planes; ); + GLSLC(1, uint8_t planar_rgb; ); GLSLC(1, uint8_t transparency; ); GLSLC(1, uint8_t colorspace; ); GLSLC(1, uint8_t pic_mode; ); GLSLC(1, uint8_t ec; ); GLSLC(1, uint8_t ppi; ); GLSLC(1, uint8_t chunks; ); - GLSLC(1, uint8_t padding[1]; ); + GLSLC(1, uint8_t rct_search; ); + GLSLC(1, uint8_t padding[3]; ); GLSLC(0, }; ); ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters), VK_SHADER_STAGE_COMPUTE_BIT); } -static int run_rct(AVCodecContext *avctx, FFVkExecContext *exec, - AVFrame *enc_in, VkImageView *enc_in_views, - AVFrame **intermediate_frame, VkImageView *intermediate_views, - VkImageMemoryBarrier2 *img_bar, int *nb_img_bar, - VkBufferMemoryBarrier2 *buf_bar, int *nb_buf_bar, - FFVkBuffer *slice_data_buf, uint32_t slice_data_size) +typedef struct FFv1VkRCTSearchParameters { + int fmt_lut[4]; + int rct_offset; + uint8_t planar_rgb; + uint8_t transparency; + uint8_t key_frame; + uint8_t force_pcm; + uint8_t version; + uint8_t micro_version; + uint8_t padding[2]; +} FFv1VkRCTSearchParameters; + +static int run_rct_search(AVCodecContext *avctx, FFVkExecContext *exec, + AVFrame *enc_in, VkImageView *enc_in_views, + FFVkBuffer *slice_data_buf, uint32_t slice_data_size) { - int err; VulkanEncodeFFv1Context *fv = avctx->priv_data; FFV1Context *f = &fv->ctx; FFVulkanFunctions *vk = &fv->s.vkfn; AVHWFramesContext *src_hwfc = (AVHWFramesContext *)enc_in->hw_frames_ctx->data; - FFv1VkRCTParameters pd; - - /* Create a temporaty frame */ - *intermediate_frame = av_frame_alloc(); - if (!(*intermediate_frame)) - return AVERROR(ENOMEM); - - RET(av_hwframe_get_buffer(fv->intermediate_frames_ref, - *intermediate_frame, 0)); - - RET(ff_vk_exec_add_dep_frame(&fv->s, exec, *intermediate_frame, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_create_imageviews(&fv->s, exec, intermediate_views, - *intermediate_frame, - fv->rep_fmt)); + FFv1VkRCTSearchParameters pd; /* Update descriptors */ - ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->rct, - 1, 0, 0, + ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->rct_search, + 0, 0, 0, slice_data_buf, 0, slice_data_size*f->slice_count, VK_FORMAT_UNDEFINED); - ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct, + ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct_search, enc_in, enc_in_views, - 1, 1, - VK_IMAGE_LAYOUT_GENERAL, - VK_NULL_HANDLE); - ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct, - *intermediate_frame, intermediate_views, - 1, 2, + 0, 1, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); - ff_vk_frame_barrier(&fv->s, exec, *intermediate_frame, img_bar, nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_WRITE_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - - /* Prep the input/output images */ - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = *nb_img_bar, - .pBufferMemoryBarriers = buf_bar, - .bufferMemoryBarrierCount = *nb_buf_bar, - }); - *nb_img_bar = 0; - if (*nb_buf_bar) { - slice_data_buf->stage = buf_bar[0].dstStageMask; - slice_data_buf->access = buf_bar[0].dstAccessMask; - *nb_buf_bar = 0; - } + ff_vk_exec_bind_shader(&fv->s, exec, &fv->rct_search); - /* Run the shader */ - ff_vk_exec_bind_shader(&fv->s, exec, &fv->rct); - pd = (FFv1VkRCTParameters) { - .offset = 1 << f->bits_per_raw_sample, - .bits = f->bits_per_raw_sample, + pd = (FFv1VkRCTSearchParameters) { + .rct_offset = 1 << f->bits_per_raw_sample, .planar_rgb = ff_vk_mt_is_np_rgb(src_hwfc->sw_format) && (ff_vk_count_images((AVVkFrame *)enc_in->data[0]) > 1), .transparency = f->transparency, + .key_frame = f->key_frame, + .force_pcm = fv->force_pcm, + .version = f->version, + .micro_version = f->micro_version, }; - /* For some reason the C FFv1 encoder/decoder treats these differently */ - if (src_hwfc->sw_format == AV_PIX_FMT_GBRP10 || - src_hwfc->sw_format == AV_PIX_FMT_GBRP12 || - src_hwfc->sw_format == AV_PIX_FMT_GBRP14) + if (avctx->sw_pix_fmt == AV_PIX_FMT_GBRP10 || + avctx->sw_pix_fmt == AV_PIX_FMT_GBRP12 || + avctx->sw_pix_fmt == AV_PIX_FMT_GBRP14) memcpy(pd.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int)); else - ff_vk_set_perm(src_hwfc->sw_format, pd.fmt_lut, 1); + ff_vk_set_perm(avctx->sw_pix_fmt, pd.fmt_lut, 1); - ff_vk_shader_update_push_const(&fv->s, exec, &fv->rct, + ff_vk_shader_update_push_const(&fv->s, exec, &fv->rct_search, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pd), &pd); vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1); - /* Add a post-dispatch barrier before encoding */ - ff_vk_frame_barrier(&fv->s, exec, *intermediate_frame, img_bar, nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - -fail: - return err; + return 0; } static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, @@ -305,13 +270,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, VulkanEncodeFFv1FrameData *fd = exec->opaque; FFv1VkParameters pd; - AVFrame *intermediate_frame = NULL; - - /* Temporary data */ - size_t tmp_data_size; - AVBufferRef *tmp_data_ref; - FFVkBuffer *tmp_data_buf; - /* Slice data */ AVBufferRef *slice_data_ref; FFVkBuffer *slice_data_buf; @@ -330,11 +288,11 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, uint32_t context_count = f->context_count[f->context_model]; const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); - VkImageView in_views[AV_NUM_DATA_POINTERS]; - VkImageView intermediate_views[AV_NUM_DATA_POINTERS]; + AVFrame *src = (AVFrame *)pict; + VkImageView src_views[AV_NUM_DATA_POINTERS]; - AVFrame *enc_in = (AVFrame *)pict; - VkImageView *enc_in_views = in_views; + AVFrame *tmp = NULL; + VkImageView tmp_views[AV_NUM_DATA_POINTERS]; VkImageMemoryBarrier2 img_bar[37]; int nb_img_bar = 0; @@ -356,17 +314,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, f->slice_count = f->max_slice_count; - /* Allocate temporary data buffer */ - tmp_data_size = f->slice_count*CONTEXT_SIZE; - RET(ff_vk_get_pooled_buffer(&fv->s, &fv->tmp_data_pool, - &tmp_data_ref, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - NULL, tmp_data_size, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)); - tmp_data_buf = (FFVkBuffer *)tmp_data_ref->data; - ff_vk_exec_add_dep_buf(&fv->s, exec, &tmp_data_ref, 1, 0); - /* Allocate slice buffer data */ if (f->ac == AC_GOLOMB_RICE) plane_state_size = 8; @@ -419,33 +366,53 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, NULL, maxsize, - maxsize < fv->max_heap_size ? - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0x0)); + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + (maxsize < fv->max_heap_size ? + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0x0) | + (!(fv->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) ? + VK_MEMORY_PROPERTY_HOST_CACHED_BIT : 0x0))); out_data_buf = (FFVkBuffer *)fd->out_data_ref->data; ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 1); /* Prepare input frame */ - RET(ff_vk_exec_add_dep_frame(&fv->s, exec, enc_in, + RET(ff_vk_exec_add_dep_frame(&fv->s, exec, src, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_create_imageviews(&fv->s, exec, enc_in_views, enc_in, + RET(ff_vk_create_imageviews(&fv->s, exec, src_views, src, fv->rep_fmt)); - ff_vk_frame_barrier(&fv->s, exec, enc_in, img_bar, &nb_img_bar, + ff_vk_frame_barrier(&fv->s, exec, src, img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_IGNORED); - /* Setup shader needs the original input */ + if (fv->is_rgb) { + /* Create a temporaty frame */ + tmp = av_frame_alloc(); + if (!(tmp)) + return AVERROR(ENOMEM); + + RET(av_hwframe_get_buffer(fv->intermediate_frames_ref, + tmp, 0)); + + RET(ff_vk_exec_add_dep_frame(&fv->s, exec, tmp, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(&fv->s, exec, tmp_views, + tmp, + fv->rep_fmt)); + } + + /* Setup shader */ ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->setup, 1, 0, 0, slice_data_buf, 0, slice_data_size*f->slice_count, VK_FORMAT_UNDEFINED); ff_vk_shader_update_img_array(&fv->s, exec, &fv->setup, - enc_in, enc_in_views, + src, src_views, 1, 1, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); @@ -467,6 +434,25 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, }; } + if (fv->optimize_rct) { + RET(run_rct_search(avctx, exec, + src, src_views, + slice_data_buf, slice_data_size)); + + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_data_buf->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_data_buf->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_data_buf->buf, + .size = slice_data_size*f->slice_count, + .offset = 0, + }; + } + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .pImageMemoryBarriers = img_bar, @@ -485,7 +471,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, ff_vk_exec_bind_shader(&fv->s, exec, &fv->setup); pd = (FFv1VkParameters) { .slice_state = slice_data_buf->address + f->slice_count*256, - .scratch_data = tmp_data_buf->address, .out_data = out_data_buf->address, .bits_per_raw_sample = f->bits_per_raw_sample, .sar[0] = pict->sample_aspect_ratio.num, @@ -495,6 +480,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, .plane_state_size = plane_state_size, .context_count = context_count, .crcref = f->crcref, + .rct_offset = 1 << f->bits_per_raw_sample, .slice_size_max = out_data_buf->size / f->slice_count, .context_model = fv->ctx.context_model, .version = f->version, @@ -504,6 +490,8 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, .components = fmt_desc->nb_components, .planes = av_pix_fmt_count_planes(avctx->sw_pix_fmt), .codec_planes = f->plane_count, + .planar_rgb = ff_vk_mt_is_np_rgb(avctx->sw_pix_fmt) && + (ff_vk_count_images((AVVkFrame *)src->data[0]) > 1), .transparency = f->transparency, .colorspace = f->colorspace, .pic_mode = !(pict->flags & AV_FRAME_FLAG_INTERLACED) ? 3 : @@ -511,12 +499,37 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, .ec = f->ec, .ppi = fv->ppi, .chunks = fv->chunks, + .rct_search = fv->optimize_rct, }; + + /* For some reason the C FFv1 encoder/decoder treats these differently */ + if (avctx->sw_pix_fmt == AV_PIX_FMT_GBRP10 || + avctx->sw_pix_fmt == AV_PIX_FMT_GBRP12 || + avctx->sw_pix_fmt == AV_PIX_FMT_GBRP14) + memcpy(pd.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int)); + else + ff_vk_set_perm(avctx->sw_pix_fmt, pd.fmt_lut, 1); + + for (int i = 0; i < f->quant_table_count; i++) + pd.extend_lookup[i] = (f->quant_tables[i][3][127] != 0) || + (f->quant_tables[i][4][127] != 0); ff_vk_shader_update_push_const(&fv->s, exec, &fv->setup, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pd), &pd); vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1); + /* Clean up temporary image */ + if (fv->is_rgb) { + AVVkFrame *vkf = (AVVkFrame *)tmp->data[0]; + vk->CmdClearColorImage(exec->buf, vkf->img[0], VK_IMAGE_LAYOUT_GENERAL, + &((VkClearColorValue) { 0 }), + 1, &((VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .levelCount = 1, + .layerCount = 1, + })); + } + /* Setup shader modified the slice data buffer */ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, @@ -570,19 +583,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, f->plane_count); } - /* Run RCT shader */ - if (fv->is_rgb) { - RET(run_rct(avctx, exec, - enc_in, enc_in_views, - &intermediate_frame, intermediate_views, - img_bar, &nb_img_bar, buf_bar, &nb_buf_bar, - slice_data_buf, slice_data_size)); - - /* Use the new frame */ - enc_in = intermediate_frame; - enc_in_views = intermediate_views; - } - /* If the reset shader ran, insert a barrier now. */ if (f->key_frame || f->version > 3) { /* Reset shader modified the slice data buffer */ @@ -601,6 +601,15 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, }; } + if (fv->is_rgb) { + ff_vk_frame_barrier(&fv->s, exec, tmp, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + } + /* Final barrier before encoding */ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, @@ -623,7 +632,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, 0, slice_data_size*f->slice_count, VK_FORMAT_UNDEFINED); ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc, - enc_in, enc_in_views, + src, src_views, 1, 1, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); @@ -632,6 +641,12 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, results_data_buf, 0, results_data_buf->size, VK_FORMAT_UNDEFINED); + if (fv->is_rgb) + ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc, + tmp, tmp_views, + 1, 3, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); ff_vk_exec_bind_shader(&fv->s, exec, &fv->enc); ff_vk_shader_update_push_const(&fv->s, exec, &fv->enc, @@ -648,20 +663,20 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, /* This, if needed, was referenced by the execution context * as it was declared as a dependency. */ - av_frame_free(&intermediate_frame); + av_frame_free(&tmp); return 0; fail: - av_frame_free(&intermediate_frame); + av_frame_free(&tmp); ff_vk_exec_discard_deps(&fv->s, exec); return err; } -static int download_slices(AVCodecContext *avctx, +static int transfer_slices(AVCodecContext *avctx, VkBufferCopy *buf_regions, int nb_regions, VulkanEncodeFFv1FrameData *fd, - AVBufferRef *pkt_data_ref) + uint8_t *dst, AVBufferRef *dst_ref) { int err; VulkanEncodeFFv1Context *fv = avctx->priv_data; @@ -669,11 +684,20 @@ static int download_slices(AVCodecContext *avctx, FFVkExecContext *exec; FFVkBuffer *out_data_buf = (FFVkBuffer *)fd->out_data_ref->data; - FFVkBuffer *pkt_data_buf = (FFVkBuffer *)pkt_data_ref->data; + + AVBufferRef *mapped_ref; + FFVkBuffer *mapped_buf; VkBufferMemoryBarrier2 buf_bar[8]; int nb_buf_bar = 0; + err = ff_vk_host_map_buffer(&fv->s, &mapped_ref, dst, dst_ref, + VK_BUFFER_USAGE_TRANSFER_DST_BIT); + if (err < 0) + return err; + + mapped_buf = (FFVkBuffer *)mapped_ref->data; + /* Transfer the slices */ exec = ff_vk_exec_get(&fv->s, &fv->transfer_exec_pool); ff_vk_exec_start(&fv->s, exec); @@ -681,7 +705,8 @@ static int download_slices(AVCodecContext *avctx, ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 0); fd->out_data_ref = NULL; /* Ownership passed */ - ff_vk_exec_add_dep_buf(&fv->s, exec, &pkt_data_ref, 1, 1); + ff_vk_exec_add_dep_buf(&fv->s, exec, &mapped_ref, 1, 0); + mapped_ref = NULL; /* Ownership passed */ /* Ensure the output buffer is finished */ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { @@ -705,8 +730,11 @@ static int download_slices(AVCodecContext *avctx, out_data_buf->access = buf_bar[0].dstAccessMask; nb_buf_bar = 0; + for (int i = 0; i < nb_regions; i++) + buf_regions[i].dstOffset += mapped_buf->virtual_offset; + vk->CmdCopyBuffer(exec->buf, - out_data_buf->buf, pkt_data_buf->buf, + out_data_buf->buf, mapped_buf->buf, nb_regions, buf_regions); /* Submit */ @@ -717,18 +745,6 @@ static int download_slices(AVCodecContext *avctx, /* We need the encoded data immediately */ ff_vk_exec_wait(&fv->s, exec); - /* Invalidate slice/output data if needed */ - if (!(pkt_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { - VkMappedMemoryRange invalidate_data = { - .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = pkt_data_buf->mem, - .offset = 0, - .size = VK_WHOLE_SIZE, - }; - vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev, - 1, &invalidate_data); - } - return 0; } @@ -739,13 +755,9 @@ static int get_packet(AVCodecContext *avctx, FFVkExecContext *exec, VulkanEncodeFFv1Context *fv = avctx->priv_data; FFV1Context *f = &fv->ctx; FFVulkanFunctions *vk = &fv->s.vkfn; - - /* Packet data */ - AVBufferRef *pkt_data_ref; - FFVkBuffer *pkt_data_buf; - VulkanEncodeFFv1FrameData *fd = exec->opaque; + FFVkBuffer *out_data_buf = (FFVkBuffer *)fd->out_data_ref->data; FFVkBuffer *results_data_buf = (FFVkBuffer *)fd->results_data_ref->data; uint64_t *sc; @@ -782,20 +794,9 @@ static int get_packet(AVCodecContext *avctx, FFVkExecContext *exec, av_log(avctx, AV_LOG_VERBOSE, "Encoded data: %iMiB\n", pkt->size / (1024*1024)); av_buffer_unref(&fd->results_data_ref); /* No need for this buffer anymore */ - /* Allocate packet buffer */ - err = ff_vk_get_pooled_buffer(&fv->s, &fv->pkt_data_pool, - &pkt_data_ref, - VK_BUFFER_USAGE_TRANSFER_DST_BIT, - NULL, pkt->size, - VK_MEMORY_PROPERTY_HOST_CACHED_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); - if (err < 0) + /* Allocate packet */ + if ((err = ff_get_encode_buffer(avctx, pkt, pkt->size, 0)) < 0) return err; - pkt_data_buf = (FFVkBuffer *)pkt_data_ref->data; - - /* Setup packet data */ - pkt->data = pkt_data_buf->mapped_mem; - pkt->buf = pkt_data_ref; pkt->pts = fd->pts; pkt->dts = fd->pts; @@ -808,8 +809,37 @@ static int get_packet(AVCodecContext *avctx, FFVkExecContext *exec, fd->frame_opaque_ref = NULL; } - return download_slices(avctx, fv->buf_regions, f->slice_count, fd, - pkt_data_ref); + /* Try using host mapped memory transfers first */ + if (fv->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) { + err = transfer_slices(avctx, fv->buf_regions, f->slice_count, fd, + pkt->data, pkt->buf); + if (err >= 0) + return err; + } + + /* Invalidate slice/output data if needed */ + if (!(out_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkMappedMemoryRange invalidate_data = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = out_data_buf->mem, + .offset = 0, + .size = VK_WHOLE_SIZE, + }; + vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev, + 1, &invalidate_data); + } + + /* Copy each slice */ + for (int i = 0; i < f->slice_count; i++) { + VkBufferCopy *region = &fv->buf_regions[i]; + memcpy(pkt->data + region->dstOffset, + out_data_buf->mapped_mem + region->srcOffset, + region->size); + } + + av_buffer_unref(&fd->out_data_ref); + + return 0; } static int vulkan_encode_ffv1_receive_packet(AVCodecContext *avctx, @@ -870,6 +900,7 @@ static int init_indirect(AVCodecContext *avctx, enum AVPixelFormat sw_format) { int err; VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFV1Context *f = &fv->ctx; AVHWFramesContext *frames_ctx; AVVulkanFramesContext *vk_frames; @@ -880,12 +911,13 @@ static int init_indirect(AVCodecContext *avctx, enum AVPixelFormat sw_format) frames_ctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data; frames_ctx->format = AV_PIX_FMT_VULKAN; frames_ctx->sw_format = sw_format; - frames_ctx->width = FFALIGN(fv->s.frames->width, 32); - frames_ctx->height = FFALIGN(fv->s.frames->height, 32); + frames_ctx->width = fv->s.frames->width; + frames_ctx->height = f->num_v_slices*RGB_LINECACHE; vk_frames = frames_ctx->hwctx; vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; - vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT; + vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT; vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; err = av_hwframe_ctx_init(fv->intermediate_frames_ref); @@ -953,6 +985,7 @@ static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd) FFV1Context *f = &fv->ctx; int smp_bits = fv->ctx.use32bit ? 32 : 16; + av_bprintf(&shd->src, "#define RGB_LINECACHE %i\n" ,RGB_LINECACHE); av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK); @@ -961,6 +994,9 @@ static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd) av_bprintf(&shd->src, "#define GOLOMB\n" ); } + if (fv->is_rgb) + av_bprintf(&shd->src, "#define RGB\n"); + GLSLF(0, #define TYPE int%i_t ,smp_bits); GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits); GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits); @@ -972,32 +1008,48 @@ static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd) GLSLD(ff_source_ffv1_common_comp); } -static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) +static int init_rct_search_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) { int err; VulkanEncodeFFv1Context *fv = avctx->priv_data; - FFVulkanShader *shd = &fv->setup; + FFV1Context *f = &fv->ctx; + FFVulkanShader *shd = &fv->rct_search; FFVulkanDescriptorSetBinding *desc_set; uint8_t *spv_data; size_t spv_len; void *spv_opaque = NULL; - RET(ff_vk_shader_init(&fv->s, shd, "ffv1_setup", + RET(ff_vk_shader_init(&fv->s, shd, "ffv1_rct_search", VK_SHADER_STAGE_COMPUTE_BIT, (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - 1, 1, 1, + "GL_EXT_buffer_reference2", + "GL_EXT_null_initializer" }, 3, + 32, 32, 1, 0)); /* Common codec header */ GLSLD(ff_source_common_comp); - add_push_data(shd); + + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, ivec4 fmt_lut; ); + GLSLC(1, int rct_offset; ); + GLSLC(1, uint8_t planar_rgb; ); + GLSLC(1, uint8_t transparency; ); + GLSLC(1, uint8_t key_frame; ); + GLSLC(1, uint8_t force_pcm; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t padding[3]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), + VK_SHADER_STAGE_COMPUTE_BIT); av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + /* Never used */ desc_set = (FFVulkanDescriptorSetBinding []) { { .name = "rangecoder_static_buf", @@ -1006,7 +1058,7 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) .mem_layout = "scalar", .buf_content = "uint8_t zero_one_state[512];", }, - { /* This descriptor is never used */ + { .name = "quant_buf", .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, @@ -1015,7 +1067,7 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", }, }; - RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0)); + RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 1)); define_shared_code(avctx, shd); @@ -1024,7 +1076,8 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) .name = "slice_data_buf", .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "SliceContext slice_ctx[1024];", + .buf_content = "SliceContext slice_ctx", + .buf_elems = f->max_slice_count, }, { .name = "src", @@ -1039,7 +1092,7 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) }; RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0)); - GLSLD(ff_source_ffv1_enc_setup_comp); + GLSLD(ff_source_ffv1_rct_search_comp); RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", &spv_opaque)); @@ -1054,44 +1107,33 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) return err; } -static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) +static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) { int err; VulkanEncodeFFv1Context *fv = avctx->priv_data; - FFVulkanShader *shd = &fv->reset; + FFV1Context *f = &fv->ctx; + FFVulkanShader *shd = &fv->setup; FFVulkanDescriptorSetBinding *desc_set; uint8_t *spv_data; size_t spv_len; void *spv_opaque = NULL; - int wg_dim = FFMIN(fv->s.props.properties.limits.maxComputeWorkGroupSize[0], 1024); - RET(ff_vk_shader_init(&fv->s, shd, "ffv1_reset", + RET(ff_vk_shader_init(&fv->s, shd, "ffv1_setup", VK_SHADER_STAGE_COMPUTE_BIT, (const char *[]) { "GL_EXT_buffer_reference", "GL_EXT_buffer_reference2" }, 2, - wg_dim, 1, 1, + 1, 1, 1, 0)); /* Common codec header */ GLSLD(ff_source_common_comp); - - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES); - GLSLC(1, u8buf slice_state; ); - GLSLC(1, uint plane_state_size; ); - GLSLC(1, uint8_t codec_planes; ); - GLSLC(1, uint8_t key_frame; ); - GLSLC(1, uint8_t version; ); - GLSLC(1, uint8_t micro_version; ); - GLSLC(1, uint8_t padding[1]; ); - GLSLC(0, }; ); - ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), - VK_SHADER_STAGE_COMPUTE_BIT); + add_push_data(shd); av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + av_bprintf(&shd->src, "#define FULL_RENORM\n"); desc_set = (FFVulkanDescriptorSetBinding []) { { @@ -1101,7 +1143,7 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) .mem_layout = "scalar", .buf_content = "uint8_t zero_one_state[512];", }, - { + { /* This descriptor is never used */ .name = "quant_buf", .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, @@ -1118,14 +1160,24 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) { .name = "slice_data_buf", .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "SliceContext slice_ctx[1024];", + .buf_content = "SliceContext slice_ctx", + .buf_elems = f->max_slice_count, + }, + { + .name = "src", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format, + fv->rep_fmt), + .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format), + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; - RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 1, 0, 0)); + RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0)); - GLSLD(ff_source_ffv1_reset_comp); + GLSLD(ff_source_ffv1_enc_setup_comp); RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", &spv_opaque)); @@ -1140,49 +1192,40 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) return err; } -static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) +static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) { int err; VulkanEncodeFFv1Context *fv = avctx->priv_data; - FFVulkanShader *shd = &fv->rct; + FFV1Context *f = &fv->ctx; + FFVulkanShader *shd = &fv->reset; FFVulkanDescriptorSetBinding *desc_set; uint8_t *spv_data; size_t spv_len; void *spv_opaque = NULL; - int wg_count = sqrt(fv->s.props.properties.limits.maxComputeWorkGroupInvocations); - - enum AVPixelFormat intermediate_fmt = get_supported_rgb_buffer_fmt(avctx); - if (intermediate_fmt == AV_PIX_FMT_NONE) { - av_log(avctx, AV_LOG_ERROR, "Unable to find a supported compatible " - "pixel format for RCT buffer!\n"); - return AVERROR(ENOTSUP); - } - - RET(init_indirect(avctx, intermediate_fmt)); + int wg_dim = FFMIN(fv->s.props.properties.limits.maxComputeWorkGroupSize[0], 1024); - RET(ff_vk_shader_init(&fv->s, shd, "ffv1_rct", + RET(ff_vk_shader_init(&fv->s, shd, "ffv1_reset", VK_SHADER_STAGE_COMPUTE_BIT, (const char *[]) { "GL_EXT_buffer_reference", "GL_EXT_buffer_reference2" }, 2, - wg_count, wg_count, 1, + wg_dim, 1, 1, 0)); /* Common codec header */ GLSLD(ff_source_common_comp); GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, ivec4 fmt_lut; ); - GLSLC(1, int offset; ); - GLSLC(1, uint8_t bits; ); - GLSLC(1, uint8_t planar_rgb; ); - GLSLC(1, uint8_t color_planes; ); - GLSLC(1, uint8_t transparency; ); + GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES); + GLSLC(1, u8buf slice_state; ); + GLSLC(1, uint plane_state_size; ); + GLSLC(1, uint8_t codec_planes; ); + GLSLC(1, uint8_t key_frame; ); GLSLC(1, uint8_t version; ); GLSLC(1, uint8_t micro_version; ); - GLSLC(1, uint8_t padding[2]; ); + GLSLC(1, uint8_t padding[1]; ); GLSLC(0, }; ); - ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters), + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), VK_SHADER_STAGE_COMPUTE_BIT); av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); @@ -1216,32 +1259,13 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .mem_quali = "readonly", .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "SliceContext slice_ctx[1024];", - }, - { - .name = "src", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format, - fv->rep_fmt), - .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format), - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "dst", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(intermediate_fmt, - fv->rep_fmt), - .elems = av_pix_fmt_count_planes(intermediate_fmt), - .mem_quali = "writeonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx", + .buf_elems = f->max_slice_count, }, }; - RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0)); + RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 1, 0, 0)); - GLSLD(ff_source_ffv1_enc_rct_comp); + GLSLD(ff_source_ffv1_reset_comp); RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", &spv_opaque)); @@ -1264,19 +1288,16 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) FFVulkanShader *shd = &fv->enc; FFVulkanDescriptorSetBinding *desc_set; - AVHWFramesContext *frames_ctx = fv->intermediate_frames_ref ? - (AVHWFramesContext *)fv->intermediate_frames_ref->data : - fv->s.frames; - uint8_t *spv_data; size_t spv_len; void *spv_opaque = NULL; + int use_cached_reader = fv->ctx.ac != AC_GOLOMB_RICE; RET(ff_vk_shader_init(&fv->s, shd, "ffv1_enc", VK_SHADER_STAGE_COMPUTE_BIT, (const char *[]) { "GL_EXT_buffer_reference", "GL_EXT_buffer_reference2" }, 2, - 1, 1, 1, + use_cached_reader ? CONTEXT_SIZE : 1, 1, 1, 0)); /* Common codec header */ @@ -1288,6 +1309,9 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + if (use_cached_reader) + av_bprintf(&shd->src, "#define CACHED_SYMBOL_READER 1\n"); + desc_set = (FFVulkanDescriptorSetBinding []) { { .name = "rangecoder_static_buf", @@ -1322,15 +1346,16 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) .name = "slice_data_buf", .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "SliceContext slice_ctx[1024];", + .buf_content = "SliceContext slice_ctx", + .buf_elems = f->max_slice_count, }, { .name = "src", .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(frames_ctx->sw_format, + .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format, fv->rep_fmt), - .elems = av_pix_fmt_count_planes(frames_ctx->sw_format), + .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format), .mem_quali = "readonly", .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, @@ -1341,21 +1366,24 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) .mem_quali = "writeonly", .buf_content = "uint64_t slice_results[2048];", }, + { /* place holder for desc_set[3] */ + }, }; - RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0)); - - /* Assemble the shader body */ - GLSLD(ff_source_ffv1_enc_common_comp); - - if (f->ac == AC_GOLOMB_RICE) - GLSLD(ff_source_ffv1_enc_vlc_comp); - else - GLSLD(ff_source_ffv1_enc_ac_comp); + if (fv->is_rgb) { + AVHWFramesContext *intermediate_frames_ctx; + intermediate_frames_ctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data; + desc_set[3] = (FFVulkanDescriptorSetBinding) { + .name = "tmp", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(intermediate_frames_ctx->sw_format, + FF_VK_REP_NATIVE), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }; + } + RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3 + fv->is_rgb, 0, 0)); - if (fv->is_rgb) - GLSLD(ff_source_ffv1_enc_rgb_comp); - else - GLSLD(ff_source_ffv1_enc_comp); + GLSLD(ff_source_ffv1_enc_comp); RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", &spv_opaque)); @@ -1463,22 +1491,24 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) f->num_v_slices = 32; } } else if (f->num_h_slices && f->num_v_slices <= 0) { - f->num_v_slices = 1024 / f->num_h_slices; + f->num_v_slices = MAX_SLICES / f->num_h_slices; } else if (f->num_v_slices && f->num_h_slices <= 0) { - f->num_h_slices = 1024 / f->num_v_slices; + f->num_h_slices = MAX_SLICES / f->num_v_slices; } f->num_h_slices = FFMIN(f->num_h_slices, avctx->width); f->num_v_slices = FFMIN(f->num_v_slices, avctx->height); - if (f->num_h_slices * f->num_v_slices > 1024) { + if (f->num_h_slices * f->num_v_slices > MAX_SLICES) { av_log(avctx, AV_LOG_ERROR, "Too many slices (%i), maximum supported " - "by the standard is 1024\n", - f->num_h_slices * f->num_v_slices); + "by the standard is %i\n", + f->num_h_slices * f->num_v_slices, MAX_SLICES); return AVERROR_PATCHWELCOME; } } + f->max_slice_count = f->num_h_slices * f->num_v_slices; + if ((err = ff_ffv1_write_extradata(avctx)) < 0) return err; @@ -1584,6 +1614,17 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) if (!fv->is_rgb && f->bits_per_raw_sample > 8) fv->rep_fmt = FF_VK_REP_INT; + /* Init rct search shader */ + fv->optimize_rct = fv->is_rgb && f->version >= 4 && + !fv->force_pcm && fv->optimize_rct; + if (fv->optimize_rct) { + err = init_rct_search_shader(avctx, spv); + if (err < 0) { + spv->uninit(&spv); + return err; + } + } + /* Init setup shader */ err = init_setup_shader(avctx, spv); if (err < 0) { @@ -1598,13 +1639,15 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) return err; } - /* Init RCT shader */ if (fv->is_rgb) { - err = init_rct_shader(avctx, spv); - if (err < 0) { - spv->uninit(&spv); - return err; + enum AVPixelFormat intermediate_fmt = get_supported_rgb_buffer_fmt(avctx); + if (intermediate_fmt == AV_PIX_FMT_NONE) { + av_log(avctx, AV_LOG_ERROR, "Unable to find a supported compatible " + "pixel format for RCT buffer!\n"); + return AVERROR(ENOTSUP); } + + RET(init_indirect(avctx, intermediate_fmt)); } /* Encode shader */ @@ -1674,7 +1717,6 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) for (int i = 0; i < fv->async_depth; i++) fv->exec_pool.contexts[i].opaque = &fv->exec_ctx_info[i]; - f->max_slice_count = f->num_h_slices * f->num_v_slices; fv->buf_regions = av_malloc_array(f->max_slice_count, sizeof(*fv->buf_regions)); if (!fv->buf_regions) return AVERROR(ENOMEM); @@ -1691,9 +1733,9 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx) ff_vk_exec_pool_free(&fv->s, &fv->transfer_exec_pool); ff_vk_shader_free(&fv->s, &fv->enc); - ff_vk_shader_free(&fv->s, &fv->rct); ff_vk_shader_free(&fv->s, &fv->reset); ff_vk_shader_free(&fv->s, &fv->setup); + ff_vk_shader_free(&fv->s, &fv->rct_search); if (fv->exec_ctx_info) { for (int i = 0; i < fv->async_depth; i++) { @@ -1710,8 +1752,6 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx) av_buffer_pool_uninit(&fv->results_data_pool); av_buffer_pool_uninit(&fv->out_data_pool); - av_buffer_pool_uninit(&fv->pkt_data_pool); - av_buffer_pool_uninit(&fv->tmp_data_pool); av_buffer_unref(&fv->keyframe_slice_data_ref); av_buffer_pool_uninit(&fv->slice_data_pool); @@ -1730,8 +1770,8 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx) #define OFFSET(x) offsetof(VulkanEncodeFFv1Context, x) #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM static const AVOption vulkan_encode_ffv1_options[] = { - { "slicecrc", "Protect slices with CRCs", OFFSET(ctx.ec), AV_OPT_TYPE_BOOL, - { .i64 = -1 }, -1, 1, VE }, + { "slicecrc", "Protect slices with CRCs", OFFSET(ctx.ec), AV_OPT_TYPE_INT, + { .i64 = -1 }, -1, 2, VE }, { "context", "Context model", OFFSET(ctx.context_model), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE }, { "coder", "Coder type", OFFSET(ctx.ac), AV_OPT_TYPE_INT, @@ -1752,13 +1792,16 @@ static const AVOption vulkan_encode_ffv1_options[] = { { .i64 = QTABLE_GT8BIT }, INT_MIN, INT_MAX, VE, .unit = "qtable" }, { "slices_h", "Number of horizontal slices", OFFSET(num_h_slices), AV_OPT_TYPE_INT, - { .i64 = -1 }, -1, 1024, VE }, + { .i64 = -1 }, -1, MAX_SLICES, VE }, { "slices_v", "Number of vertical slices", OFFSET(num_v_slices), AV_OPT_TYPE_INT, - { .i64 = -1 }, -1, 1024, VE }, + { .i64 = -1 }, -1, MAX_SLICES, VE }, { "force_pcm", "Code all slices with no prediction", OFFSET(force_pcm), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, + { "rct_search", "Run a search for RCT parameters (level 4 only)", OFFSET(optimize_rct), AV_OPT_TYPE_BOOL, + { .i64 = 1 }, 0, 1, VE }, + { "async_depth", "Internal parallelization depth", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, INT_MAX, VE }, diff --git a/libavcodec/flvdec.c b/libavcodec/flvdec.c index 627538ef83d42..e8c159563967a 100644 --- a/libavcodec/flvdec.c +++ b/libavcodec/flvdec.c @@ -89,8 +89,6 @@ int ff_flv_decode_picture_header(MpegEncContext *s) skip_bits1(&s->gb); /* deblocking flag */ s->chroma_qscale = s->qscale = get_bits(&s->gb, 5); - s->h263_plus = 0; - s->h263_long_vectors = 0; /* PEI */ diff --git a/libavcodec/flvenc.c b/libavcodec/flvenc.c index df1a650222edb..8f07c3c778119 100644 --- a/libavcodec/flvenc.c +++ b/libavcodec/flvenc.c @@ -22,13 +22,14 @@ #include "flvenc.h" #include "mpegvideo.h" #include "mpegvideoenc.h" +#include "put_bits.h" int ff_flv_encode_picture_header(MPVMainEncContext *const m) { MPVEncContext *const s = &m->s; int format; - align_put_bits(&s->pb); + put_bits_assume_flushed(&s->pb); put_bits(&s->pb, 17, 1); /* 0: H.263 escape codes 1: 11-bit escape codes */ diff --git a/libavcodec/h261enc.c b/libavcodec/h261enc.c index 70f5f2b09c984..c217fb6233d1c 100644 --- a/libavcodec/h261enc.c +++ b/libavcodec/h261enc.c @@ -35,6 +35,7 @@ #include "h261.h" #include "h261enc.h" #include "mpegvideoenc.h" +#include "put_bits.h" #define H261_MAX_RUN 26 #define H261_MAX_LEVEL 15 @@ -72,7 +73,7 @@ static int h261_encode_picture_header(MPVMainEncContext *const m) MPVEncContext *const s = &h->s.s; int temp_ref; - align_put_bits(&s->pb); + put_bits_assume_flushed(&s->pb); put_bits(&s->pb, 20, 0x10); /* PSC */ diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c index 2f8bd73665e91..37abf3382ebfa 100644 --- a/libavcodec/h263dec.c +++ b/libavcodec/h263dec.c @@ -110,7 +110,8 @@ av_cold int ff_h263_decode_init(AVCodecContext *avctx) ff_mpv_unquantize_init(&unquant_dsp_ctx, avctx->flags & AV_CODEC_FLAG_BITEXACT, 0); // dct_unquantize defaults for H.263; - // they might change on a per-frame basis for MPEG-4. + // they might change on a per-frame basis for MPEG-4; + // dct_unquantize_inter will be unset for MSMPEG4 codecs later. s->dct_unquantize_intra = unquant_dsp_ctx.dct_unquantize_h263_intra; s->dct_unquantize_inter = unquant_dsp_ctx.dct_unquantize_h263_inter; @@ -150,9 +151,7 @@ av_cold int ff_h263_decode_init(AVCodecContext *avctx) s->h263_flv = 1; break; default: - av_log(avctx, AV_LOG_ERROR, "Unsupported codec %d\n", - avctx->codec->id); - return AVERROR(ENOSYS); + av_unreachable("Switch contains a case for every codec using ff_h263_decode_init()"); } if (avctx->codec_tag == AV_RL32("L263") || avctx->codec_tag == AV_RL32("S263")) @@ -174,6 +173,12 @@ av_cold int ff_h263_decode_init(AVCodecContext *avctx) return 0; } +static void report_decode_progress(MpegEncContext *s) +{ + if (s->pict_type != AV_PICTURE_TYPE_B && !s->partitioned_frame && !s->er.error_occurred) + ff_thread_progress_report(&s->cur_pic.ptr->progress, s->mb_y); +} + static int decode_slice(MpegEncContext *s) { const int part_mask = s->partitioned_frame @@ -278,8 +283,8 @@ static int decode_slice(MpegEncContext *s) if (++s->mb_x >= s->mb_width) { s->mb_x = 0; + report_decode_progress(s); ff_mpeg_draw_horiz_band(s, s->mb_y * mb_size, mb_size); - ff_mpv_report_decode_progress(s); s->mb_y++; } return 0; @@ -305,8 +310,8 @@ static int decode_slice(MpegEncContext *s) ff_h263_loop_filter(s); } + report_decode_progress(s); ff_mpeg_draw_horiz_band(s, s->mb_y * mb_size, mb_size); - ff_mpv_report_decode_progress(s); s->mb_x = 0; } @@ -531,11 +536,6 @@ int ff_h263_decode_frame(AVCodecContext *avctx, AVFrame *pict, } } - if (s->codec_id == AV_CODEC_ID_H263 || - s->codec_id == AV_CODEC_ID_H263P || - s->codec_id == AV_CODEC_ID_H263I) - s->gob_index = H263_GOB_HEIGHT(s->height); - /* skip B-frames if we don't have reference frames */ if (!s->last_pic.ptr && (s->pict_type == AV_PICTURE_TYPE_B || s->droppable)) diff --git a/libavcodec/h263dec.h b/libavcodec/h263dec.h index 633d4aa577453..c1306c7ec5a15 100644 --- a/libavcodec/h263dec.h +++ b/libavcodec/h263dec.h @@ -55,7 +55,7 @@ int ff_h263_decode_mba(MpegEncContext *s); /** * Print picture info if FF_DEBUG_PICT_INFO is set. */ -void ff_h263_show_pict_info(MpegEncContext *s); +void ff_h263_show_pict_info(MpegEncContext *s, int h263_plus); int ff_intel_h263_decode_picture_header(MpegEncContext *s); int ff_h263_decode_mb(MpegEncContext *s, diff --git a/libavcodec/h2645_parse.c b/libavcodec/h2645_parse.c index 82816999e842c..fa57911c08bbb 100644 --- a/libavcodec/h2645_parse.c +++ b/libavcodec/h2645_parse.c @@ -22,6 +22,7 @@ #include "config.h" +#include "libavutil/error.h" #include "libavutil/intmath.h" #include "libavutil/intreadwrite.h" #include "libavutil/mem.h" @@ -588,8 +589,9 @@ int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length, } else ret = h264_parse_nal_header(nal, logctx); if (ret < 0) { - av_log(logctx, AV_LOG_WARNING, "Invalid NAL unit %d, skipping.\n", - nal->type); + av_log(logctx, AV_LOG_WARNING, + "Failed to parse header of NALU (type %d): \"%s\". Skipping NALU.\n", + nal->type, av_err2str(ret)); continue; } diff --git a/libavcodec/h2645_sei.c b/libavcodec/h2645_sei.c index c7950a4a45ae7..d17c4fb5f9bae 100644 --- a/libavcodec/h2645_sei.c +++ b/libavcodec/h2645_sei.c @@ -44,8 +44,9 @@ #include "h2645_sei.h" #include "itut35.h" -#define IS_H264(codec_id) (CONFIG_H264_SEI && CONFIG_HEVC_SEI ? codec_id == AV_CODEC_ID_H264 : CONFIG_H264_SEI) -#define IS_HEVC(codec_id) (CONFIG_H264_SEI && CONFIG_HEVC_SEI ? codec_id == AV_CODEC_ID_HEVC : CONFIG_HEVC_SEI) +#define IS_H264(codec_id) (CONFIG_H264_SEI && (CONFIG_HEVC_SEI || CONFIG_VVC_SEI ) ? codec_id == AV_CODEC_ID_H264 : CONFIG_H264_SEI) +#define IS_HEVC(codec_id) (CONFIG_HEVC_SEI && (CONFIG_H264_SEI || CONFIG_VVC_SEI ) ? codec_id == AV_CODEC_ID_HEVC : CONFIG_HEVC_SEI) +#define IS_VVC(codec_id) (CONFIG_VVC_SEI && (CONFIG_H264_SEI || CONFIG_HEVC_SEI) ? codec_id == AV_CODEC_ID_VVC : CONFIG_VVC_SEI ) #if CONFIG_HEVC_SEI static int decode_registered_user_data_dynamic_hdr_plus(HEVCSEIDynamicHDRPlus *s, @@ -427,7 +428,7 @@ static int decode_film_grain_characteristics(H2645SEIFilmGrainCharacteristics *h } } } - if (IS_HEVC(codec_id)) + if (!IS_H264(codec_id)) h->persistence_flag = get_bits1(gb); else h->repetition_period = get_ue_golomb_long(gb); @@ -854,7 +855,7 @@ FF_ENABLE_DEPRECATION_WARNINGS fgp->subsampling_x = fgp->subsampling_y = 0; h274->model_id = fgc->model_id; - if (fgc->separate_colour_description_present_flag) { + if (IS_VVC(codec_id) || fgc->separate_colour_description_present_flag) { fgp->bit_depth_luma = fgc->bit_depth_luma; fgp->bit_depth_chroma = fgc->bit_depth_chroma; fgp->color_range = fgc->full_range + 1; diff --git a/libavcodec/h2645_sei.h b/libavcodec/h2645_sei.h index abc49760d9f1b..f2ad7147c6059 100644 --- a/libavcodec/h2645_sei.h +++ b/libavcodec/h2645_sei.h @@ -108,7 +108,7 @@ typedef struct H2645SEIFilmGrainCharacteristics { uint8_t intensity_interval_upper_bound[3][256]; int16_t comp_model_value[3][256][6]; int repetition_period; //< H.264 only - int persistence_flag; //< HEVC only + int persistence_flag; //< HEVC/VVC } H2645SEIFilmGrainCharacteristics; typedef struct H2645SEIMasteringDisplay { diff --git a/libavcodec/h2645_vui.c b/libavcodec/h2645_vui.c index e5c7bf46f9b3d..0e576c15632a3 100644 --- a/libavcodec/h2645_vui.c +++ b/libavcodec/h2645_vui.c @@ -67,11 +67,16 @@ void ff_h2645_decode_common_vui_params(GetBitContext *gb, H2645VUI *vui, void *l vui->matrix_coeffs = get_bits(gb, 8); // Set invalid values to "unspecified" - if (!av_color_primaries_name(vui->colour_primaries)) + if (vui->colour_primaries == AVCOL_PRI_RESERVED0 || + vui->colour_primaries == AVCOL_PRI_RESERVED || + !av_color_primaries_name(vui->colour_primaries)) vui->colour_primaries = AVCOL_PRI_UNSPECIFIED; - if (!av_color_transfer_name(vui->transfer_characteristics)) + if (vui->transfer_characteristics == AVCOL_TRC_RESERVED0 || + vui->transfer_characteristics == AVCOL_TRC_RESERVED || + !av_color_transfer_name(vui->transfer_characteristics)) vui->transfer_characteristics = AVCOL_TRC_UNSPECIFIED; - if (!av_color_space_name(vui->matrix_coeffs)) + if (vui->matrix_coeffs == AVCOL_SPC_RESERVED || + !av_color_space_name(vui->matrix_coeffs)) vui->matrix_coeffs = AVCOL_SPC_UNSPECIFIED; } } diff --git a/libavcodec/h274.c b/libavcodec/h274.c index 5709200322e61..e46926e4cc625 100644 --- a/libavcodec/h274.c +++ b/libavcodec/h274.c @@ -26,7 +26,11 @@ */ #include "libavutil/avassert.h" +#include "libavutil/bswap.h" +#include "libavutil/crc.h" #include "libavutil/imgutils.h" +#include "libavutil/md5.h" +#include "libavutil/mem.h" #include "h274.h" @@ -790,3 +794,154 @@ static const int8_t R64T[64][64] = { 17, -16, 15, -14, 13, -12, 11, -10, 9, -8, 7, -6, 4, -3, 2, -1, } }; + +static int verify_plane_md5(struct AVMD5 *ctx, + const uint8_t *src, const int w, const int h, const int stride, + const uint8_t *expected) +{ +#define MD5_SIZE 16 + uint8_t md5[MD5_SIZE]; + av_md5_init(ctx); + for (int j = 0; j < h; j++) { + av_md5_update(ctx, src, w); + src += stride; + } + av_md5_final(ctx, md5); + + if (memcmp(md5, expected, MD5_SIZE)) + return AVERROR_INVALIDDATA; + + return 0; +} + +static int verify_plane_crc(const uint8_t *src, const int w, const int h, const int stride, + uint16_t expected) +{ + uint32_t crc = 0x0F1D; // CRC-16-CCITT-AUG + const AVCRC *ctx = av_crc_get_table(AV_CRC_16_CCITT); + + expected = av_le2ne32(expected); + for (int j = 0; j < h; j++) { + crc = av_crc(ctx, crc, src, w); + src += stride; + } + crc = av_bswap16(crc); + + if (crc != expected) + return AVERROR_INVALIDDATA; + + return 0; +} + +#define CAL_CHECKSUM(pixel) ((pixel) ^ xor_mask) +static int verify_plane_checksum(const uint8_t *src, const int w, const int h, const int stride, const int ps, + uint32_t expected) +{ + uint32_t checksum = 0; + expected = av_le2ne32(expected); + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + const int xor_mask = (x & 0xFF) ^ (y & 0xFF) ^ (x >> 8) ^ (y >> 8); + checksum += CAL_CHECKSUM(src[x << ps]); + if (ps) + checksum += CAL_CHECKSUM(src[(x << ps) + 1]); + } + src += stride; + } + + if (checksum != expected) + return AVERROR_INVALIDDATA; + + return 0; +} + +enum { + HASH_MD5SUM, + HASH_CRC, + HASH_CHECKSUM, + HASH_LAST = HASH_CHECKSUM, +}; + +struct H274HashContext { + int type; + struct AVMD5 *ctx; +}; + +void ff_h274_hash_freep(H274HashContext **ctx) +{ + if (*ctx) { + H274HashContext *c = *ctx; + if (c->ctx) + av_free(c->ctx); + av_freep(ctx); + } +} + +int ff_h274_hash_init(H274HashContext **ctx, const int type) +{ + H274HashContext *c; + + if (type > HASH_LAST || !ctx) + return AVERROR(EINVAL); + + c = *ctx; + if (c) { + if (c->type != type) { + if (c->type == HASH_MD5SUM) + av_freep(&c->ctx); + c->type = type; + } + } else { + c = av_mallocz(sizeof(H274HashContext)); + if (!c) + return AVERROR(ENOMEM); + c->type = type; + *ctx = c; + } + + if (type == HASH_MD5SUM && !c->ctx) { + c->ctx = av_md5_alloc(); + if (!c->ctx) + return AVERROR(ENOMEM); + } + + return 0; +} + +int ff_h274_hash_verify(H274HashContext *c, const H274SEIPictureHash *hash, + const AVFrame *frame, const int coded_width, const int coded_height) +{ + const AVPixFmtDescriptor *desc; + int err = 0; + + if (!c || !hash || !frame) + return AVERROR(EINVAL); + + if (c->type != hash->hash_type) + return AVERROR(EINVAL); + + desc = av_pix_fmt_desc_get(frame->format); + if (!desc) + return AVERROR(EINVAL); + + for (int i = 0; i < desc->nb_components; i++) { + const int w = i ? (coded_width >> desc->log2_chroma_w) : coded_width; + const int h = i ? (coded_height >> desc->log2_chroma_h) : coded_height; + const int ps = desc->comp[i].step - 1; + const uint8_t *src = frame->data[i]; + const int stride = frame->linesize[i]; + + if (c->type == HASH_MD5SUM) + err = verify_plane_md5(c->ctx, src, w << ps, h, stride, hash->md5[i]); + else if (c->type == HASH_CRC) + err = verify_plane_crc(src, w << ps, h, stride, hash->crc[i]); + else if (c->type == HASH_CHECKSUM) + err = verify_plane_checksum(src, w, h, stride, ps, hash->checksum[i]); + if (err < 0) + goto fail; + } + +fail: + return err; +} diff --git a/libavcodec/h274.h b/libavcodec/h274.h index cebc8becb37ba..055dd591d2966 100644 --- a/libavcodec/h274.h +++ b/libavcodec/h274.h @@ -64,4 +64,29 @@ int ff_h274_apply_film_grain(AVFrame *out, const AVFrame *in, H274FilmGrainDatabase *db, const AVFilmGrainParams *params); +typedef struct H274HashContext H274HashContext; + +typedef struct H274SEIPictureHash { + int present; + union { + uint8_t md5[3][16]; + uint16_t crc[3]; + uint32_t checksum[3]; + }; + uint8_t hash_type; +} H274SEIPictureHash; + +int ff_h274_hash_init(H274HashContext **c, int type); +int ff_h274_hash_verify(H274HashContext *c, const H274SEIPictureHash *hash, + const AVFrame *frame, int coded_width, int coded_height); +void ff_h274_hash_freep(H274HashContext **c); + +typedef struct H274SEIFrameFieldInfo { + int present; + int picture_struct; + uint8_t display_elemental_periods; + uint8_t source_scan_type; + uint8_t duplicate_flag; +} H274SEIFrameFieldInfo; + #endif /* AVCODEC_H274_H */ diff --git a/libavcodec/hashtable.c b/libavcodec/hashtable.c new file mode 100644 index 0000000000000..151476176ba10 --- /dev/null +++ b/libavcodec/hashtable.c @@ -0,0 +1,214 @@ +/* + * Generic hashtable + * Copyright (C) 2025 Emma Worley + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include + +#include "libavutil/crc.h" +#include "libavutil/error.h" +#include "libavutil/mem.h" +#include "hashtable.h" + +#define ALIGN _Alignof(size_t) + +struct FFHashtableContext { + size_t key_size; + size_t key_size_aligned; + size_t val_size; + size_t val_size_aligned; + size_t entry_size; + size_t max_entries; + size_t nb_entries; + const AVCRC *crc; + uint8_t *table; + uint8_t *swapbuf; +}; + +/* + * Hash table entries are comprised of a probe sequence length (PSL), key, and + * value. When the PSL of an entry is zero, it means it is not occupied by a + * key/value pair. When the PSL is non-zero, it represents the "distance" of + * the entry from its "home" location plus one, where the "home" location is + * hash(key) % max_entries. + */ + +#define ENTRY_PSL_VAL(entry) (*(size_t*)(entry)) +#define ENTRY_KEY_PTR(entry) ((entry) + FFALIGN(sizeof(size_t), ALIGN)) +#define ENTRY_VAL_PTR(entry) (ENTRY_KEY_PTR(entry) + ctx->key_size_aligned) + +#define KEYS_EQUAL(k1, k2) (!memcmp((k1), (k2), ctx->key_size)) + +int ff_hashtable_alloc(struct FFHashtableContext **ctx, size_t key_size, size_t val_size, size_t max_entries) +{ + struct FFHashtableContext *res = av_malloc(sizeof(struct FFHashtableContext)); + if (!res) + return AVERROR(ENOMEM); + res->key_size = key_size; + res->key_size_aligned = FFALIGN(key_size, ALIGN); + res->val_size = val_size; + res->val_size_aligned = FFALIGN(val_size, ALIGN); + res->entry_size = FFALIGN(sizeof(size_t), ALIGN) + + res->key_size_aligned + + res->val_size_aligned; + res->max_entries = max_entries; + res->nb_entries = 0; + res->crc = av_crc_get_table(AV_CRC_32_IEEE); + if (!res->crc) { + ff_hashtable_freep(&res); + return AVERROR_BUG; + } + res->table = av_calloc(res->max_entries, res->entry_size); + if (!res->table) { + ff_hashtable_freep(&res); + return AVERROR(ENOMEM); + } + + res->swapbuf = av_calloc(2, res->key_size_aligned + res->val_size_aligned); + if (!res->swapbuf) { + ff_hashtable_freep(&res); + return AVERROR(ENOMEM); + } + *ctx = res; + return 0; +} + +static size_t hash_key(const struct FFHashtableContext *ctx, const void *key) +{ + return av_crc(ctx->crc, 0, key, ctx->key_size) % ctx->max_entries; +} + +int ff_hashtable_get(const struct FFHashtableContext *ctx, const void *key, void *val) +{ + if (!ctx->nb_entries) + return 0; + + size_t hash = hash_key(ctx, key); + + for (size_t psl = 1; psl <= ctx->max_entries; psl++) { + size_t wrapped_index = (hash + psl) % ctx->max_entries; + uint8_t *entry = ctx->table + wrapped_index * ctx->entry_size; + if (ENTRY_PSL_VAL(entry) < psl) + // When PSL stops increasing it means there are no further entries + // with the same key hash. + return 0; + if (KEYS_EQUAL(ENTRY_KEY_PTR(entry), key)) { + memcpy(val, ENTRY_VAL_PTR(entry), ctx->val_size); + return 1; + } + } + return 0; +} + +int ff_hashtable_set(struct FFHashtableContext *ctx, const void *key, const void *val) +{ + int swapping = 0; + size_t psl = 1; + size_t hash = hash_key(ctx, key); + size_t wrapped_index = hash % ctx->max_entries; + uint8_t *set = ctx->swapbuf; + uint8_t *tmp = ctx->swapbuf + ctx->key_size_aligned + ctx->val_size_aligned; + + memcpy(set, key, ctx->key_size); + memcpy(set + ctx->key_size_aligned, val, ctx->val_size); + + for (size_t i = 0; i < ctx->max_entries; i++) { + if (++wrapped_index == ctx->max_entries) + wrapped_index = 0; + uint8_t *entry = ctx->table + wrapped_index * ctx->entry_size; + if (!ENTRY_PSL_VAL(entry) || (!swapping && KEYS_EQUAL(ENTRY_KEY_PTR(entry), set))) { + if (!ENTRY_PSL_VAL(entry)) + ctx->nb_entries++; + ENTRY_PSL_VAL(entry) = psl; + memcpy(ENTRY_KEY_PTR(entry), set, ctx->key_size_aligned + ctx->val_size); + return 1; + } + if (ENTRY_PSL_VAL(entry) < psl) { + // When PSL stops increasing it means there are no further entries + // with the same key hash. We can only hope to find an unoccupied + // entry. + if (ctx->nb_entries == ctx->max_entries) + // The table is full so inserts are impossible. + return 0; + // Robin Hood hash tables "steal from the rich" by minimizing the + // PSL of the inserted entry. + swapping = 1; + // set needs to swap with entry + memcpy(tmp, ENTRY_KEY_PTR(entry), ctx->key_size_aligned + ctx->val_size_aligned); + memcpy(ENTRY_KEY_PTR(entry), set, ctx->key_size_aligned + ctx->val_size_aligned); + FFSWAP(uint8_t*, set, tmp); + FFSWAP(size_t, psl, ENTRY_PSL_VAL(entry)); + } + psl++; + } + return 0; +} + +int ff_hashtable_delete(struct FFHashtableContext *ctx, const void *key) +{ + if (!ctx->nb_entries) + return 0; + + uint8_t *next_entry; + size_t hash = hash_key(ctx, key); + size_t wrapped_index = hash % ctx->max_entries; + + for (size_t psl = 1; psl <= ctx->max_entries; psl++) { + if (++wrapped_index == ctx->max_entries) + wrapped_index = 0; + uint8_t *entry = ctx->table + wrapped_index * ctx->entry_size; + if (ENTRY_PSL_VAL(entry) < psl) + // When PSL stops increasing it means there are no further entries + // with the same key hash. + return 0; + if (KEYS_EQUAL(ENTRY_KEY_PTR(entry), key)) { + ENTRY_PSL_VAL(entry) = 0; + // Shift each following entry that will benefit from a reduced PSL. + for (psl++; psl <= ctx->max_entries; psl++) { + if (++wrapped_index == ctx->max_entries) + wrapped_index = 0; + next_entry = ctx->table + wrapped_index * ctx->entry_size; + if (ENTRY_PSL_VAL(next_entry) <= 1) { + ctx->nb_entries--; + return 1; + } + memcpy(entry, next_entry, ctx->entry_size); + ENTRY_PSL_VAL(entry)--; + ENTRY_PSL_VAL(next_entry) = 0; + entry = next_entry; + } + } + }; + return 0; +} + +void ff_hashtable_clear(struct FFHashtableContext *ctx) +{ + memset(ctx->table, 0, ctx->entry_size * ctx->max_entries); +} + +void ff_hashtable_freep(struct FFHashtableContext **ctx) +{ + if (*ctx) { + av_freep(&(*ctx)->table); + av_freep(&(*ctx)->swapbuf); + } + av_freep(ctx); +} diff --git a/libavcodec/hashtable.h b/libavcodec/hashtable.h new file mode 100644 index 0000000000000..f81b4bb93f478 --- /dev/null +++ b/libavcodec/hashtable.h @@ -0,0 +1,94 @@ +/* + * Generic hashtable + * Copyright (C) 2024 Emma Worley + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_HASHTABLE_H +#define AVCODEC_HASHTABLE_H + +#include + +/* Implements a hash table using Robin Hood open addressing. + * See: https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf + * + * Keys are placed in the table based on their CRC value and are considered + * equal when they are bytewise-identical. + */ + +typedef struct FFHashtableContext FFHashtableContext; + +/** + * Create a fixed-sized Robin Hood hash table. + * + * @param ctx context to allocate and initialize + * @param key_size size of key type in bytes + * @param val_size size of value type in bytes + * @param max_entries maximum number of key-value pairs to store + * + * @return zero on success, nonzero on error + */ +int ff_hashtable_alloc(struct FFHashtableContext **ctx, size_t key_size, size_t val_size, size_t max_entries); + +/** + * Look up a value from a hash table given a key. + * + * @param ctx hash table context + * @param key pointer to key data + * @param val destination pointer for value data + * + * @return 1 if the key is found, zero if the key is not found + */ +int ff_hashtable_get(const struct FFHashtableContext *ctx, const void *key, void *val); + +/** + * Store a value in a hash table given a key. + * + * @param ctx hash table context + * @param key pointer to key data + * @param val pointer for value data + * + * @return 1 if the key is written, zero if the key is not written due to the hash table reaching max capacity + */ +int ff_hashtable_set(struct FFHashtableContext *ctx, const void *key, const void *val); + +/** + * Delete a value from a hash table given a key. + * + * @param ctx hash table context + * @param key pointer to key data + * + * @return 1 if the key is deleted, zero if the key is not deleted due to not being found + */ +int ff_hashtable_delete(struct FFHashtableContext *ctx, const void *key); + +/** + * Delete all values from a hash table. + * + * @param ctx hash table context + */ +void ff_hashtable_clear(struct FFHashtableContext *ctx); + +/** + * Free a hash table. + * + * @param ctx hash table context + */ +void ff_hashtable_freep(struct FFHashtableContext **ctx); + +#endif diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c index a7a91769fec94..636df5a4e9e1c 100644 --- a/libavcodec/hevc/hevcdec.c +++ b/libavcodec/hevc/hevcdec.c @@ -1110,7 +1110,7 @@ static int hls_slice_header(SliceHeader *sh, const HEVCContext *s, GetBitContext if (pps->tiles_enabled_flag || pps->entropy_coding_sync_enabled_flag) { unsigned num_entry_point_offsets = get_ue_golomb_long(gb); // It would be possible to bound this tighter but this here is simpler - if (num_entry_point_offsets > get_bits_left(gb)) { + if (num_entry_point_offsets > get_bits_left(gb) || num_entry_point_offsets > UINT16_MAX) { av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets); return AVERROR_INVALIDDATA; } diff --git a/libavcodec/hpeldsp.c b/libavcodec/hpeldsp.c index 80494c9749d07..db0e02ee934f3 100644 --- a/libavcodec/hpeldsp.c +++ b/libavcodec/hpeldsp.c @@ -314,9 +314,6 @@ CALL_2X_PIXELS(OPNAME ## _pixels16_y2_8_c, \ CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_8_c, \ OPNAME ## _pixels8_xy2_8_c, \ 8) \ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_8_c, \ - OPNAME ## _pixels8_8_c, \ - 8) \ CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_8_c, \ OPNAME ## _no_rnd_pixels8_x2_8_c, \ 8) \ @@ -330,6 +327,8 @@ CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_8_c, \ #define op_avg(a, b) a = rnd_avg32(a, b) #define op_put(a, b) a = b #define put_no_rnd_pixels8_8_c put_pixels8_8_c +#define put_no_rnd_pixels16_8_c put_pixels16_8_c +#define avg_no_rnd_pixels16_8_c avg_pixels16_8_c PIXOP2(avg, op_avg) PIXOP2(put, op_put) #undef op_avg diff --git a/libavcodec/idctdsp.c b/libavcodec/idctdsp.c index 4259a117dc27b..8a71c7ef7733b 100644 --- a/libavcodec/idctdsp.c +++ b/libavcodec/idctdsp.c @@ -276,6 +276,10 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx) c->idct = ff_faanidct; c->perm_type = FF_IDCT_PERM_NONE; #endif /* CONFIG_FAANIDCT */ +#if CONFIG_MPEG4_DECODER + } else if (avctx->idct_algo == FF_IDCT_XVID) { + ff_xvid_idct_init(c); +#endif } else { // accurate/default c->idct_put = ff_simple_idct_put_int16_8bit; c->idct_add = ff_simple_idct_add_int16_8bit; @@ -289,9 +293,6 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx) c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; c->add_pixels_clamped = ff_add_pixels_clamped_c; - if (CONFIG_MPEG4_DECODER && avctx->idct_algo == FF_IDCT_XVID) - ff_xvid_idct_init(c, avctx); - #if ARCH_AARCH64 ff_idctdsp_init_aarch64(c, avctx, high_bit_depth); #elif ARCH_ARM diff --git a/libavcodec/intelh263dec.c b/libavcodec/intelh263dec.c index 374dfdc0de1f3..02016e93bfef2 100644 --- a/libavcodec/intelh263dec.c +++ b/libavcodec/intelh263dec.c @@ -19,6 +19,7 @@ */ #include "codec_internal.h" +#include "h263.h" #include "mpegvideo.h" #include "mpegvideodec.h" #include "h263data.h" @@ -56,7 +57,6 @@ int ff_intel_h263_decode_picture_header(MpegEncContext *s) av_log(s->avctx, AV_LOG_ERROR, "Intel H.263 free format not supported\n"); return -1; } - s->h263_plus = 0; s->pict_type = AV_PICTURE_TYPE_I + get_bits1(&s->gb); @@ -119,7 +119,9 @@ int ff_intel_h263_decode_picture_header(MpegEncContext *s) if (skip_1stop_8data_bits(&s->gb) < 0) return AVERROR_INVALIDDATA; - ff_h263_show_pict_info(s); + s->gob_index = H263_GOB_HEIGHT(s->height); + + ff_h263_show_pict_info(s, 0); return 0; } diff --git a/libavcodec/ituh263dec.c b/libavcodec/ituh263dec.c index d19bdc4dab3cd..21c78f3cb5bd6 100644 --- a/libavcodec/ituh263dec.c +++ b/libavcodec/ituh263dec.c @@ -77,7 +77,8 @@ static const int16_t h263_mb_type_b_map[15]= { MB_TYPE_INTRA4x4 | MB_TYPE_CBP | MB_TYPE_QUANT, }; -void ff_h263_show_pict_info(MpegEncContext *s){ +void ff_h263_show_pict_info(MpegEncContext *s, int h263_plus) +{ if(s->avctx->debug&FF_DEBUG_PICT_INFO){ av_log(s->avctx, AV_LOG_DEBUG, "qp:%d %c size:%d rnd:%d%s%s%s%s%s%s%s%s%s %d/%d\n", s->qscale, av_get_picture_type_char(s->pict_type), @@ -85,7 +86,7 @@ void ff_h263_show_pict_info(MpegEncContext *s){ s->obmc ? " AP" : "", s->umvplus ? " UMV" : "", s->h263_long_vectors ? " LONG" : "", - s->h263_plus ? " +" : "", + h263_plus ? " +" : "", s->h263_aic ? " AIC" : "", s->alt_inter_vlc ? " AIV" : "", s->modified_quant ? " MQ" : "", @@ -1089,6 +1090,7 @@ int ff_h263_decode_picture_header(MpegEncContext *s) { int format, width, height, i, ret; uint32_t startcode; + int h263_plus; align_get_bits(&s->gb); @@ -1137,7 +1139,7 @@ int ff_h263_decode_picture_header(MpegEncContext *s) */ if (format != 7 && format != 6) { - s->h263_plus = 0; + h263_plus = 0; /* H.263v1 */ width = ff_h263_format[format][0]; height = ff_h263_format[format][1]; @@ -1166,7 +1168,7 @@ int ff_h263_decode_picture_header(MpegEncContext *s) int ufep; /* H.263v2 */ - s->h263_plus = 1; + h263_plus = 1; ufep = get_bits(&s->gb, 3); /* Update Full Extended PTYPE */ /* ufep other than 0 and 1 are reserved */ @@ -1314,6 +1316,8 @@ int ff_h263_decode_picture_header(MpegEncContext *s) s->mb_height = (s->height + 15) / 16; s->mb_num = s->mb_width * s->mb_height; + s->gob_index = H263_GOB_HEIGHT(s->height); + if (s->pb_frame) { skip_bits(&s->gb, 3); /* Temporal reference for B-pictures */ if (s->custom_pcf) @@ -1364,7 +1368,8 @@ int ff_h263_decode_picture_header(MpegEncContext *s) s->c_dc_scale_table= ff_mpeg1_dc_scale_table; } - ff_h263_show_pict_info(s); + ff_h263_show_pict_info(s, h263_plus); + if (s->pict_type == AV_PICTURE_TYPE_I && s->codec_tag == AV_RL32("ZYGO") && get_bits_left(&s->gb) >= 85 + 13*3*16 + 50){ int i,j; for(i=0; i<85; i++) av_log(s->avctx, AV_LOG_DEBUG, "%d", get_bits1(&s->gb)); diff --git a/libavcodec/ituh263enc.c b/libavcodec/ituh263enc.c index 8be7ee4636fdf..4fdf9cf40281c 100644 --- a/libavcodec/ituh263enc.c +++ b/libavcodec/ituh263enc.c @@ -46,6 +46,7 @@ #include "mathops.h" #include "mpegutils.h" #include "internal.h" +#include "put_bits.h" /** * Table of number of bits a motion vector component needs. @@ -230,7 +231,9 @@ static int h263_encode_picture_header(MPVMainEncContext *const m) int best_error= INT_MAX; int custom_pcf; - if(s->c.h263_plus){ + put_bits_assume_flushed(&s->pb); + + if (s->c.codec_id == AV_CODEC_ID_H263P) { for(i=0; i<2; i++){ int div, error; div= (s->c.avctx->time_base.num*1800000LL + 500LL*s->c.avctx->time_base.den) / ((1000LL+i)*s->c.avctx->time_base.den); @@ -247,8 +250,6 @@ static int h263_encode_picture_header(MPVMainEncContext *const m) coded_frame_rate= 1800000; coded_frame_rate_base= (1000+best_clock_code)*best_divisor; - align_put_bits(&s->pb); - put_bits(&s->pb, 22, 0x20); /* PSC */ temp_ref= s->c.picture_number * (int64_t)coded_frame_rate * s->c.avctx->time_base.num / //FIXME use timestamp (coded_frame_rate_base * (int64_t)s->c.avctx->time_base.den); @@ -261,7 +262,7 @@ static int h263_encode_picture_header(MPVMainEncContext *const m) put_bits(&s->pb, 1, 0); /* freeze picture release off */ format = ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format), s->c.width, s->c.height); - if (!s->c.h263_plus) { + if (s->c.codec_id != AV_CODEC_ID_H263P) { /* H.263v1 */ put_bits(&s->pb, 3, format); put_bits(&s->pb, 1, (s->c.pict_type == AV_PICTURE_TYPE_P)); @@ -841,6 +842,9 @@ av_cold void ff_h263_encode_init(MPVMainEncContext *const m) if (s->c.modified_quant) s->c.chroma_qscale_table = ff_h263_chroma_qscale_table; + // Only used for H.263 and H.263+ + s->c.gob_index = H263_GOB_HEIGHT(s->c.height); + // use fcodes >1 only for MPEG-4 & H.263 & H.263+ FIXME switch(s->c.codec_id){ case AV_CODEC_ID_H263P: diff --git a/libavcodec/lcevcdec.c b/libavcodec/lcevcdec.c index 2fe06b8800be8..102f6f32e9513 100644 --- a/libavcodec/lcevcdec.c +++ b/libavcodec/lcevcdec.c @@ -47,7 +47,7 @@ static LCEVC_ColorFormat map_format(int format) return LCEVC_ColorFormat_Unknown; } -static int alloc_base_frame(void *logctx, LCEVC_DecoderHandle decoder, +static int alloc_base_frame(void *logctx, FFLCEVCContext *lcevc, const AVFrame *frame, LCEVC_PictureHandle *picture) { LCEVC_PictureDesc desc; @@ -70,22 +70,22 @@ static int alloc_base_frame(void *logctx, LCEVC_DecoderHandle decoder, desc.sampleAspectRatioDen = frame->sample_aspect_ratio.den; /* Allocate LCEVC Picture */ - res = LCEVC_AllocPicture(decoder, &desc, picture); + res = LCEVC_AllocPicture(lcevc->decoder, &desc, picture); if (res != LCEVC_Success) { return AVERROR_EXTERNAL; } - res = LCEVC_LockPicture(decoder, *picture, LCEVC_Access_Write, &lock); + res = LCEVC_LockPicture(lcevc->decoder, *picture, LCEVC_Access_Write, &lock); if (res != LCEVC_Success) return AVERROR_EXTERNAL; - res = LCEVC_GetPicturePlaneCount(decoder, *picture, &planes); + res = LCEVC_GetPicturePlaneCount(lcevc->decoder, *picture, &planes); if (res != LCEVC_Success) return AVERROR_EXTERNAL; for (unsigned i = 0; i < planes; i++) { LCEVC_PicturePlaneDesc plane; - res = LCEVC_GetPictureLockPlaneDesc(decoder, lock, i, &plane); + res = LCEVC_GetPictureLockPlaneDesc(lcevc->decoder, lock, i, &plane); if (res != LCEVC_Success) return AVERROR_EXTERNAL; @@ -96,43 +96,43 @@ static int alloc_base_frame(void *logctx, LCEVC_DecoderHandle decoder, av_image_copy2(data, linesizes, frame->data, frame->linesize, frame->format, frame->width, frame->height); - res = LCEVC_UnlockPicture(decoder, lock); + res = LCEVC_UnlockPicture(lcevc->decoder, lock); if (res != LCEVC_Success) return AVERROR_EXTERNAL; return 0; } -static int alloc_enhanced_frame(void *logctx, LCEVC_DecoderHandle decoder, - const AVFrame *frame, LCEVC_PictureHandle *picture) +static int alloc_enhanced_frame(void *logctx, FFLCEVCFrame *frame_ctx, + LCEVC_PictureHandle *picture) { + FFLCEVCContext *lcevc = frame_ctx->lcevc; LCEVC_PictureDesc desc ; - LCEVC_ColorFormat fmt = map_format(frame->format); + LCEVC_ColorFormat fmt = map_format(frame_ctx->frame->format); LCEVC_PicturePlaneDesc planes[4] = { 0 }; - int width = frame->width * 2 / FFMAX(frame->sample_aspect_ratio.den, 1); - int height = frame->height * 2 / FFMAX(frame->sample_aspect_ratio.num, 1); LCEVC_ReturnCode res; - res = LCEVC_DefaultPictureDesc(&desc, fmt, width, height); + res = LCEVC_DefaultPictureDesc(&desc, fmt, frame_ctx->frame->width, frame_ctx->frame->height); if (res != LCEVC_Success) return AVERROR_EXTERNAL; /* Set plane description */ for (int i = 0; i < 4; i++) { - planes[i].firstSample = frame->data[i]; - planes[i].rowByteStride = frame->linesize[i]; + planes[i].firstSample = frame_ctx->frame->data[i]; + planes[i].rowByteStride = frame_ctx->frame->linesize[i]; } /* Allocate LCEVC Picture */ - res = LCEVC_AllocPictureExternal(decoder, &desc, NULL, planes, picture); + res = LCEVC_AllocPictureExternal(lcevc->decoder, &desc, NULL, planes, picture); if (res != LCEVC_Success) { return AVERROR_EXTERNAL; } return 0; } -static int lcevc_send_frame(void *logctx, FFLCEVCContext *lcevc, const AVFrame *in) +static int lcevc_send_frame(void *logctx, FFLCEVCFrame *frame_ctx, const AVFrame *in) { + FFLCEVCContext *lcevc = frame_ctx->lcevc; const AVFrameSideData *sd = av_frame_get_side_data(in, AV_FRAME_DATA_LCEVC); LCEVC_PictureHandle picture; LCEVC_ReturnCode res; @@ -145,7 +145,7 @@ static int lcevc_send_frame(void *logctx, FFLCEVCContext *lcevc, const AVFrame * if (res != LCEVC_Success) return AVERROR_EXTERNAL; - ret = alloc_base_frame(logctx, lcevc->decoder, in, &picture); + ret = alloc_base_frame(logctx, lcevc, in, &picture); if (ret < 0) return ret; @@ -154,7 +154,7 @@ static int lcevc_send_frame(void *logctx, FFLCEVCContext *lcevc, const AVFrame * return AVERROR_EXTERNAL; memset(&picture, 0, sizeof(picture)); - ret = alloc_enhanced_frame(logctx, lcevc->decoder, in, &picture); + ret = alloc_enhanced_frame(logctx, frame_ctx, &picture); if (ret < 0) return ret; @@ -165,8 +165,9 @@ static int lcevc_send_frame(void *logctx, FFLCEVCContext *lcevc, const AVFrame * return 0; } -static int generate_output(void *logctx, FFLCEVCContext *lcevc, AVFrame *out) +static int generate_output(void *logctx, FFLCEVCFrame *frame_ctx, AVFrame *out) { + FFLCEVCContext *lcevc = frame_ctx->lcevc; LCEVC_PictureDesc desc; LCEVC_DecodeInformation info; LCEVC_PictureHandle picture; @@ -186,6 +187,11 @@ static int generate_output(void *logctx, FFLCEVCContext *lcevc, AVFrame *out) out->crop_right = desc.cropRight; out->sample_aspect_ratio.num = desc.sampleAspectRatioNum; out->sample_aspect_ratio.den = desc.sampleAspectRatioDen; + + av_frame_copy_props(frame_ctx->frame, out); + av_frame_unref(out); + av_frame_move_ref(out, frame_ctx->frame); + out->width = desc.width + out->crop_left + out->crop_right; out->height = desc.height + out->crop_top + out->crop_bottom; @@ -196,13 +202,14 @@ static int generate_output(void *logctx, FFLCEVCContext *lcevc, AVFrame *out) return 0; } -static int lcevc_receive_frame(void *logctx, FFLCEVCContext *lcevc, AVFrame *out) +static int lcevc_receive_frame(void *logctx, FFLCEVCFrame *frame_ctx, AVFrame *out) { + FFLCEVCContext *lcevc = frame_ctx->lcevc; LCEVC_PictureHandle picture; LCEVC_ReturnCode res; int ret; - ret = generate_output(logctx, lcevc, out); + ret = generate_output(logctx, frame_ctx, out); if (ret < 0) return ret; @@ -249,12 +256,7 @@ static int lcevc_init(FFLCEVCContext *lcevc, void *logctx) #if CONFIG_LIBLCEVC_DEC LCEVC_AccelContextHandle dummy = { 0 }; const int32_t event = LCEVC_Log; -#endif - if (lcevc->initialized) - return 0; - -#if CONFIG_LIBLCEVC_DEC if (LCEVC_CreateDecoder(&lcevc->decoder, dummy) != LCEVC_Success) { av_log(logctx, AV_LOG_ERROR, "Failed to create LCEVC decoder\n"); return AVERROR_EXTERNAL; @@ -279,7 +281,8 @@ static int lcevc_init(FFLCEVCContext *lcevc, void *logctx) int ff_lcevc_process(void *logctx, AVFrame *frame) { FrameDecodeData *fdd = frame->private_ref; - FFLCEVCContext *lcevc = fdd->post_process_opaque; + FFLCEVCFrame *frame_ctx = fdd->post_process_opaque; + FFLCEVCContext *lcevc = frame_ctx->lcevc; int ret; if (!lcevc->initialized) { @@ -289,11 +292,14 @@ int ff_lcevc_process(void *logctx, AVFrame *frame) } #if CONFIG_LIBLCEVC_DEC - ret = lcevc_send_frame(logctx, lcevc, frame); + av_assert0(frame_ctx->frame); + + + ret = lcevc_send_frame(logctx, frame_ctx, frame); if (ret) return ret < 0 ? ret : 0; - lcevc_receive_frame(logctx, lcevc, frame); + lcevc_receive_frame(logctx, frame_ctx, frame); if (ret < 0) return ret; @@ -317,5 +323,8 @@ int ff_lcevc_alloc(FFLCEVCContext **plcevc) void ff_lcevc_unref(void *opaque) { - av_refstruct_unref(&opaque); + FFLCEVCFrame *lcevc = opaque; + av_refstruct_unref(&lcevc->lcevc); + av_frame_free(&lcevc->frame); + av_free(opaque); } diff --git a/libavcodec/lcevcdec.h b/libavcodec/lcevcdec.h index b21d1073c4af1..62014132d92a8 100644 --- a/libavcodec/lcevcdec.h +++ b/libavcodec/lcevcdec.h @@ -35,6 +35,11 @@ typedef struct FFLCEVCContext { struct AVFrame; +typedef struct FFLCEVCFrame { + FFLCEVCContext *lcevc; + struct AVFrame *frame; +} FFLCEVCFrame; + int ff_lcevc_alloc(FFLCEVCContext **plcevc); int ff_lcevc_process(void *logctx, struct AVFrame *frame); void ff_lcevc_unref(void *opaque); diff --git a/libavcodec/libaomenc.c b/libavcodec/libaomenc.c index 9a384fcc39ccc..903292d164397 100644 --- a/libavcodec/libaomenc.c +++ b/libavcodec/libaomenc.c @@ -681,7 +681,6 @@ static av_cold int aom_init(AVCodecContext *avctx, struct aom_codec_enc_cfg enccfg = { 0 }; aom_codec_flags_t flags = (avctx->flags & AV_CODEC_FLAG_PSNR) ? AOM_CODEC_USE_PSNR : 0; - AVCPBProperties *cpb_props; int res; aom_img_fmt_t img_fmt; aom_codec_caps_t codec_caps = aom_codec_get_caps(iface); @@ -989,10 +988,6 @@ static av_cold int aom_init(AVCodecContext *avctx, if (codec_caps & AOM_CODEC_CAP_HIGHBITDEPTH) ctx->rawimg.bit_depth = enccfg.g_bit_depth; - cpb_props = ff_encode_add_cpb_side_data(avctx); - if (!cpb_props) - return AVERROR(ENOMEM); - ctx->dovi.logctx = avctx; if ((res = ff_dovi_configure(&ctx->dovi, avctx)) < 0) return res; @@ -1019,6 +1014,10 @@ static av_cold int aom_init(AVCodecContext *avctx, return ret; } + AVCPBProperties *cpb_props = ff_encode_add_cpb_side_data(avctx); + if (!cpb_props) + return AVERROR(ENOMEM); + if (enccfg.rc_end_usage == AOM_CBR || enccfg.g_pass != AOM_RC_ONE_PASS) { cpb_props->max_bitrate = avctx->rc_max_rate; diff --git a/libavcodec/libaribb24.c b/libavcodec/libaribb24.c index 6e062cbffd699..a26e456295418 100644 --- a/libavcodec/libaribb24.c +++ b/libavcodec/libaribb24.c @@ -96,13 +96,13 @@ static int libaribb24_generate_ass_header(AVCodecContext *avctx) font_size = get_profile_font_size(avctx); avctx->subtitle_header = av_asprintf( - "[Script Info]\r\n" - "; Script generated by FFmpeg/Lavc%s\r\n" - "ScriptType: v4.00+\r\n" - "PlayResX: %d\r\n" - "PlayResY: %d\r\n" - "\r\n" - "[V4+ Styles]\r\n" + "[Script Info]\n" + "; Script generated by FFmpeg/Lavc%s\n" + "ScriptType: v4.00+\n" + "PlayResX: %d\n" + "PlayResY: %d\n" + "\n" + "[V4+ Styles]\n" /* ASSv4 header */ "Format: Name, " @@ -113,7 +113,7 @@ static int libaribb24_generate_ass_header(AVCodecContext *avctx) "Spacing, Angle, " "BorderStyle, Outline, Shadow, " "Alignment, MarginL, MarginR, MarginV, " - "Encoding\r\n" + "Encoding\n" "Style: " "Default," /* Name */ @@ -124,11 +124,11 @@ static int libaribb24_generate_ass_header(AVCodecContext *avctx) "0,0," /* Spacing, Angle */ "%d,1,0," /* BorderStyle, Outline, Shadow */ "%d,10,10,10," /* Alignment, Margin[LRV] */ - "0\r\n" /* Encoding */ + "0\n" /* Encoding */ - "\r\n" - "[Events]\r\n" - "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\r\n", + "\n" + "[Events]\n" + "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n", !(avctx->flags & AV_CODEC_FLAG_BITEXACT) ? AV_STRINGIFY(LIBAVCODEC_VERSION) : "", plane_width, plane_height, ASS_DEFAULT_FONT, font_size, ASS_DEFAULT_COLOR, diff --git a/libavcodec/libaribcaption.c b/libavcodec/libaribcaption.c index 91691f67787df..53d334803fe41 100644 --- a/libavcodec/libaribcaption.c +++ b/libavcodec/libaribcaption.c @@ -522,14 +522,14 @@ static int set_ass_header(ARIBCaptionContext *ctx) av_freep(&avctx->subtitle_header); avctx->subtitle_header = av_asprintf( - "[Script Info]\r\n" - "ScriptType: v4.00+\r\n" - "PlayResX: %d\r\n" - "PlayResY: %d\r\n" - "WrapStyle: 2\r\n" /* 2: no word wrapping */ - "\r\n" - - "[V4+ Styles]\r\n" + "[Script Info]\n" + "ScriptType: v4.00+\n" + "PlayResX: %d\n" + "PlayResY: %d\n" + "WrapStyle: 2\n" /* 2: no word wrapping */ + "\n" + + "[V4+ Styles]\n" "Format: Name, " "Fontname, Fontsize, " "PrimaryColour, SecondaryColour, OutlineColour, BackColour, " @@ -538,7 +538,7 @@ static int set_ass_header(ARIBCaptionContext *ctx) "Spacing, Angle, " "BorderStyle, Outline, Shadow, " "Alignment, MarginL, MarginR, MarginV, " - "Encoding\r\n" + "Encoding\n" "Style: " "Default," /* Name */ @@ -549,11 +549,11 @@ static int set_ass_header(ARIBCaptionContext *ctx) "0,0," /* Spacing, Angle */ "%d,%d,%d," /* BorderStyle, Outline, Shadow */ "%d,10,10,10," /* Alignment, Margin[LRV] */ - "0\r\n" /* Encoding */ - "\r\n" + "0\n" /* Encoding */ + "\n" - "[Events]\r\n" - "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\r\n", + "[Events]\n" + "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n", ctx->plane_width, ctx->plane_height, font_name, ctx->font_size, ASS_DEFAULT_COLOR, ASS_DEFAULT_COLOR, diff --git a/libavcodec/libzvbi-teletextdec.c b/libavcodec/libzvbi-teletextdec.c index 68ffe1f76ce1d..e02ecb8b3a3f9 100644 --- a/libavcodec/libzvbi-teletextdec.c +++ b/libavcodec/libzvbi-teletextdec.c @@ -91,7 +91,7 @@ static int my_ass_subtitle_header(AVCodecContext *avctx) if (ret < 0) return ret; - event_pos = strstr(avctx->subtitle_header, "\r\n[Events]\r\n"); + event_pos = strstr(avctx->subtitle_header, "\n[Events]\n"); if (!event_pos) return AVERROR_BUG; @@ -106,7 +106,7 @@ static int my_ass_subtitle_header(AVCodecContext *avctx) "0,0," /* Spacing, Angle */ "3,0.1,0," /* BorderStyle, Outline, Shadow */ "5,1,1,1," /* Alignment, Margin[LRV] */ - "0\r\n" /* Encoding */ + "0\n" /* Encoding */ "Style: " "Subtitle," /* Name */ "Monospace,16," /* Font{name,size} */ @@ -116,7 +116,7 @@ static int my_ass_subtitle_header(AVCodecContext *avctx) "0,0," /* Spacing, Angle */ "1,1,1," /* BorderStyle, Outline, Shadow */ "8,48,48,20," /* Alignment, Margin[LRV] */ - "0\r\n" /* Encoding */ + "0\n" /* Encoding */ , event_pos); if (!new_header) diff --git a/libavcodec/mips/pixblockdsp_init_mips.c b/libavcodec/mips/pixblockdsp_init_mips.c index 00f189d558d7f..acea95d36e74e 100644 --- a/libavcodec/mips/pixblockdsp_init_mips.c +++ b/libavcodec/mips/pixblockdsp_init_mips.c @@ -23,7 +23,7 @@ #include "libavcodec/pixblockdsp.h" #include "pixblockdsp_mips.h" -void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx, +void ff_pixblockdsp_init_mips(PixblockDSPContext *c, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); @@ -31,27 +31,13 @@ void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx, if (have_mmi(cpu_flags)) { c->diff_pixels = ff_diff_pixels_mmi; - if (!high_bit_depth || avctx->codec_type != AVMEDIA_TYPE_VIDEO) { + if (!high_bit_depth) c->get_pixels = ff_get_pixels_8_mmi; - } } if (have_msa(cpu_flags)) { c->diff_pixels = ff_diff_pixels_msa; - switch (avctx->bits_per_raw_sample) { - case 9: - case 10: - case 12: - case 14: - c->get_pixels = ff_get_pixels_16_msa; - break; - default: - if (avctx->bits_per_raw_sample <= 8 || avctx->codec_type != - AVMEDIA_TYPE_VIDEO) { - c->get_pixels = ff_get_pixels_8_msa; - } - break; - } + c->get_pixels = high_bit_depth ? ff_get_pixels_16_msa : ff_get_pixels_8_msa; } } diff --git a/libavcodec/mjpegenc_common.c b/libavcodec/mjpegenc_common.c index e7a4f8f16a67a..21b3b19b9362b 100644 --- a/libavcodec/mjpegenc_common.c +++ b/libavcodec/mjpegenc_common.c @@ -304,7 +304,8 @@ void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb, switch (avctx->codec_id) { case AV_CODEC_ID_MJPEG: put_marker(pb, SOF0 ); break; case AV_CODEC_ID_LJPEG: put_marker(pb, SOF3 ); break; - default: av_assert0(0); + default: av_unreachable("ff_mjpeg_encode_picture_header only called by " + "AMV, LJPEG, MJPEG and the former has been ruled out"); } put_bits(pb, 16, 8 + 3 * components); @@ -375,7 +376,7 @@ void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb, switch (avctx->codec_id) { case AV_CODEC_ID_MJPEG: put_bits(pb, 8, 63); break; /* Se (not used) */ case AV_CODEC_ID_LJPEG: put_bits(pb, 8, 0); break; /* not used */ - default: av_assert0(0); + default: av_unreachable("Only LJPEG, MJPEG possible here"); } put_bits(pb, 8, 0); /* Ah/Al (not used) */ diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c index fc41c548e204c..9cf1bb9b28eb4 100644 --- a/libavcodec/mpeg12dec.c +++ b/libavcodec/mpeg12dec.c @@ -80,7 +80,7 @@ typedef struct Mpeg1Context { int has_afd; int slice_count; unsigned aspect_ratio_info; - int save_width, save_height, save_progressive_seq, save_chroma_format; + int save_progressive_seq, save_chroma_format; AVRational frame_rate_ext; /* MPEG-2 specific framerate modificator */ unsigned frame_rate_index; int sync; /* Did we reach a sync point like a GOP/SEQ/KEYFrame? */ @@ -381,9 +381,6 @@ static inline int mpeg2_decode_block_intra(MpegEncContext *s, return 0; } -/******************************************/ -/* decoding */ - static inline int get_dmv(MpegEncContext *s) { if (get_bits1(&s->gb)) @@ -915,8 +912,6 @@ static int mpeg_decode_postinit(AVCodecContext *avctx) if (!s->context_initialized || avctx->coded_width != s->width || avctx->coded_height != s->height || - s1->save_width != s->width || - s1->save_height != s->height || s1->save_chroma_format != s->chroma_format || (s1->save_progressive_seq != s->progressive_sequence && FFALIGN(s->height, 16) != FFALIGN(s->height, 32)) || 0) { @@ -934,8 +929,6 @@ static int mpeg_decode_postinit(AVCodecContext *avctx) (s1->bit_rate != 0x3FFFF*400 || s1->vbv_delay != 0xFFFF)) { avctx->bit_rate = s1->bit_rate; } - s1->save_width = s->width; - s1->save_height = s->height; s1->save_progressive_seq = s->progressive_sequence; s1->save_chroma_format = s->chroma_format; @@ -1863,9 +1856,8 @@ static int vcr2_init_sequence(AVCodecContext *avctx) } else { s->codec_id = s->avctx->codec_id = AV_CODEC_ID_MPEG2VIDEO; } - s1->save_width = s->width; - s1->save_height = s->height; s1->save_progressive_seq = s->progressive_sequence; + s1->save_chroma_format = s->chroma_format; return 0; } diff --git a/libavcodec/mpeg12enc.c b/libavcodec/mpeg12enc.c index 9d0a8e41704f4..96957235e9e60 100644 --- a/libavcodec/mpeg12enc.c +++ b/libavcodec/mpeg12enc.c @@ -49,6 +49,7 @@ #include "mpegvideo.h" #include "mpegvideoenc.h" #include "profiles.h" +#include "put_bits.h" #include "rl.h" #if CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER @@ -155,6 +156,8 @@ static void mpeg1_encode_sequence_header(MPEG12EncContext *mpeg12) AVRational aspect_ratio = s->c.avctx->sample_aspect_ratio; int aspect_ratio_info; + put_bits_assume_flushed(&s->pb); + if (!(s->c.cur_pic.ptr->f->flags & AV_FRAME_FLAG_KEY)) return; @@ -339,6 +342,8 @@ static int mpeg1_encode_picture_header(MPVMainEncContext *const m) MPVEncContext *const s = &m->s; const AVFrameSideData *side_data; + put_bits_assume_flushed(&s->pb); + mpeg1_encode_sequence_header(mpeg12); /* MPEG-1 picture header */ @@ -454,8 +459,7 @@ static int mpeg1_encode_picture_header(MPVMainEncContext *const m) put_bits(&s->pb, 1, 1); // reserved_bit put_bits(&s->pb, 7, fpa_type); // S3D_video_format_type - put_bits(&s->pb, 8, 0x04); // reserved_data[0] - put_bits(&s->pb, 8, 0xFF); // reserved_data[1] + put_bits(&s->pb, 16, 0x04FF); // reserved_data } } @@ -1121,7 +1125,7 @@ static av_cold int encode_init(AVCodecContext *avctx) } else { s->min_qcoeff = -2047; s->max_qcoeff = 2047; - s->c.mpeg_quant = 1; + s->mpeg_quant = 1; } if (s->c.intra_vlc_format) { s->intra_ac_vlc_length = diff --git a/libavcodec/mpeg4video.c b/libavcodec/mpeg4video.c index 2c0c1044f2014..3980a3930586c 100644 --- a/libavcodec/mpeg4video.c +++ b/libavcodec/mpeg4video.c @@ -20,25 +20,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavutil/thread.h" - #include "mpegutils.h" #include "mpegvideo.h" #include "mpeg4video.h" #include "mpeg4data.h" -static av_cold void mpeg4_init_rl_intra(void) -{ - static uint8_t mpeg4_rl_intra_table[2][2 * MAX_RUN + MAX_LEVEL + 3]; - ff_rl_init(&ff_mpeg4_rl_intra, mpeg4_rl_intra_table); -} - -av_cold void ff_mpeg4_init_rl_intra(void) -{ - static AVOnce init_static_once = AV_ONCE_INIT; - ff_thread_once(&init_static_once, mpeg4_init_rl_intra); -} - int ff_mpeg4_get_video_packet_prefix_length(enum AVPictureType pict_type, int f_code, int b_code) { diff --git a/libavcodec/mpeg4videodata.h b/libavcodec/mpeg4videodata.h index 8aac8a225587c..baca8a0b9a499 100644 --- a/libavcodec/mpeg4videodata.h +++ b/libavcodec/mpeg4videodata.h @@ -35,7 +35,6 @@ extern const int8_t ff_mpeg4_intra_level[102]; extern const int8_t ff_mpeg4_intra_run[102]; extern RLTable ff_mpeg4_rl_intra; -void ff_mpeg4_init_rl_intra(void); /* Note this is identical to the intra rvlc except that it is reordered. */ extern RLTable ff_rvlc_rl_inter; diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c index d5979096ae2d7..313d73157f73b 100644 --- a/libavcodec/mpeg4videodec.c +++ b/libavcodec/mpeg4videodec.c @@ -24,6 +24,7 @@ #include "config_components.h" +#include "libavutil/avassert.h" #include "libavutil/internal.h" #include "libavutil/opt.h" #include "libavutil/thread.h" @@ -47,7 +48,6 @@ #include "profiles.h" #include "qpeldsp.h" #include "threadprogress.h" -#include "xvididct.h" #include "unary.h" #if 0 //3IV1 is quite rare and it slows things down a tiny bit @@ -605,7 +605,8 @@ static int mpeg4_decode_sprite_trajectory(Mpeg4DecContext *ctx, GetBitContext *g ctx->sprite_shift[1] = alpha + beta + rho - min_ab + 2; break; default: - av_assert0(0); + av_unreachable("num_sprite_warping_points outside of 0..3 results in an error" + "in which num_sprite_warping_points is reset to zero"); } /* try to simplify the situation */ if (sprite_delta[0][0] == a << ctx->sprite_shift[0] && @@ -1398,7 +1399,7 @@ static inline int mpeg4_decode_block(Mpeg4DecContext *ctx, int16_t *block, scan_table = s->intra_scantable.permutated; - if (s->mpeg_quant) { + if (ctx->mpeg_quant) { qmul = 1; qadd = 0; if (rvlc) @@ -2154,7 +2155,7 @@ static int mpeg4_decode_studio_block(MpegEncContext *s, int32_t block[64], int n s->last_dc[cc] += dct_diff; - if (s->mpeg_quant) + if (ctx->mpeg_quant) block[0] = s->last_dc[cc] * (8 >> s->intra_dc_precision); else block[0] = s->last_dc[cc] * (8 >> s->intra_dc_precision) * (8 >> s->dct_precision); @@ -2584,7 +2585,7 @@ static int decode_studio_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb) skip_bits(gb, 15); /* latter_half_vbv_occupancy */ check_marker(s->avctx, gb, "after latter_half_vbv_occupancy"); s->low_delay = get_bits1(gb); - s->mpeg_quant = get_bits1(gb); /* mpeg2_stream */ + ctx->mpeg_quant = get_bits1(gb); /* mpeg2_stream */ next_start_code_studio(gb); extension_and_user_data(s, gb, 2); @@ -2766,7 +2767,7 @@ static int decode_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb) // FIXME a bunch of grayscale shape things - if ((s->mpeg_quant = get_bits1(gb))) { /* vol_quant_type */ + if ((ctx->mpeg_quant = get_bits1(gb))) { /* vol_quant_type */ int i, v; mpeg4_load_default_matrices(s); @@ -3414,8 +3415,10 @@ static int decode_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb, } } - s->dct_unquantize_intra = s->mpeg_quant ? ctx->dct_unquantize_mpeg2_intra - : ctx->dct_unquantize_h263_intra; + s->dct_unquantize_intra = ctx->mpeg_quant ? ctx->dct_unquantize_mpeg2_intra + : ctx->dct_unquantize_h263_intra; + // The following tells ff_mpv_reconstruct_mb() to unquantize iff mpeg_quant + s->dct_unquantize_inter = ctx->mpeg_quant ? ctx->dct_unquantize_mpeg2_inter : NULL; end: /* detect buggy encoders which don't set the low_delay flag @@ -3854,6 +3857,7 @@ static int mpeg4_update_thread_context(AVCodecContext *dst, s->sprite_warping_accuracy = s1->sprite_warping_accuracy; s->num_sprite_warping_points = s1->num_sprite_warping_points; s->m.data_partitioning = s1->m.data_partitioning; + s->mpeg_quant = s1->mpeg_quant; s->rvlc = s1->rvlc; s->resync_marker = s1->resync_marker; s->t_frame = s1->t_frame; @@ -3878,9 +3882,6 @@ static int mpeg4_update_thread_context(AVCodecContext *dst, memcpy(s->sprite_shift, s1->sprite_shift, sizeof(s1->sprite_shift)); memcpy(s->sprite_traj, s1->sprite_traj, sizeof(s1->sprite_traj)); - if (!init && s1->xvid_build >= 0) - ff_xvid_idct_init(&s->m.idsp, dst); - return av_buffer_replace(&s->bitstream_buffer, s1->bitstream_buffer); } @@ -3899,7 +3900,6 @@ static int mpeg4_update_thread_context_for_user(AVCodecContext *dst, static av_cold void mpeg4_init_static(void) { - static uint8_t mpeg4_rvlc_rl_tables[2][2][2 * MAX_RUN + MAX_LEVEL + 3]; static VLCElem vlc_buf[6498]; VLCInitState state = VLC_INIT_STATE(vlc_buf); @@ -3921,9 +3921,9 @@ static av_cold void mpeg4_init_static(void) 0, 0); } - ff_mpeg4_init_rl_intra(); - ff_rl_init(&ff_rvlc_rl_inter, mpeg4_rvlc_rl_tables[0]); - ff_rl_init(&ff_rvlc_rl_intra, mpeg4_rvlc_rl_tables[1]); + static uint8_t mpeg4_rl_intra_table[2][2 * MAX_RUN + MAX_LEVEL + 3]; + ff_rl_init(&ff_mpeg4_rl_intra, mpeg4_rl_intra_table); + INIT_FIRST_VLC_RL(ff_mpeg4_rl_intra, 554); VLC_INIT_RL(ff_rvlc_rl_inter, 1072); INIT_FIRST_VLC_RL(ff_rvlc_rl_intra, 1072); @@ -3964,8 +3964,8 @@ static av_cold int decode_init(AVCodecContext *avctx) ctx->dct_unquantize_h263_intra = unquant_dsp_ctx.dct_unquantize_h263_intra; ctx->dct_unquantize_mpeg2_intra = unquant_dsp_ctx.dct_unquantize_mpeg2_intra; // dct_unquantize_inter is only used with MPEG-2 quantizers, - // so we can already set dct_unquantize_inter here once and for all. - s->dct_unquantize_inter = unquant_dsp_ctx.dct_unquantize_mpeg2_inter; + // so that is all we keep. + ctx->dct_unquantize_mpeg2_inter = unquant_dsp_ctx.dct_unquantize_mpeg2_inter; s->y_dc_scale_table = ff_mpeg4_y_dc_scale_table; s->c_dc_scale_table = ff_mpeg4_c_dc_scale_table; diff --git a/libavcodec/mpeg4videodec.h b/libavcodec/mpeg4videodec.h index 57a2f81816a18..ae8428fd2bdbc 100644 --- a/libavcodec/mpeg4videodec.h +++ b/libavcodec/mpeg4videodec.h @@ -52,6 +52,7 @@ typedef struct Mpeg4DecContext { /// sprite shift [isChroma] int sprite_shift[2]; + int mpeg_quant; // reversible vlc int rvlc; /// could this stream contain resync markers @@ -91,15 +92,19 @@ typedef struct Mpeg4DecContext { Mpeg4VideoDSPContext mdsp; + void (*dct_unquantize_mpeg2_inter)(MpegEncContext *s, + int16_t *block, int n, int qscale); void (*dct_unquantize_mpeg2_intra)(MpegEncContext *s, int16_t *block, int n, int qscale); void (*dct_unquantize_h263_intra)(MpegEncContext *s, int16_t *block, int n, int qscale); - DECLARE_ALIGNED(8, int32_t, block32)[12][64]; + union { + DECLARE_ALIGNED(8, int32_t, block32)[12][64]; + int16_t dpcm_macroblock[3][256]; + }; // 0 = DCT, 1 = DPCM top to bottom scan, -1 = DPCM bottom to top scan int dpcm_direction; - int16_t dpcm_macroblock[3][256]; } Mpeg4DecContext; int ff_mpeg4_decode_picture_header(MpegEncContext *s); diff --git a/libavcodec/mpeg4videoenc.c b/libavcodec/mpeg4videoenc.c index 01d5076547f21..0fa8159f181ce 100644 --- a/libavcodec/mpeg4videoenc.c +++ b/libavcodec/mpeg4videoenc.c @@ -35,6 +35,7 @@ #include "mpeg4videoenc.h" #include "mpegvideoenc.h" #include "profiles.h" +#include "put_bits.h" #include "version.h" /** @@ -237,11 +238,10 @@ static inline int decide_ac_pred(MPVEncContext *const s, int16_t block[6][64], */ void ff_clean_mpeg4_qscales(MPVEncContext *const s) { - int8_t *const qscale_table = s->c.cur_pic.qscale_table; - ff_clean_h263_qscales(s); if (s->c.pict_type == AV_PICTURE_TYPE_B) { + int8_t *const qscale_table = s->c.cur_pic.qscale_table; int odd = 0; /* ok, come on, this isn't funny anymore, there's more code for * handling this MPEG-4 mess than for the actual adaptive quantization */ @@ -291,46 +291,19 @@ static inline void mpeg4_encode_dc(PutBitContext *s, int level, int n) } } -static inline int mpeg4_get_dc_length(int level, int n) -{ - if (n < 4) - return uni_DCtab_lum_len[level + 256]; - else - return uni_DCtab_chrom_len[level + 256]; -} - /** - * Encode an 8x8 block. - * @param n block index (0-3 are luma, 4-5 are chroma) + * Encode the AC coefficients of an 8x8 block. */ -static inline void mpeg4_encode_block(const MPVEncContext *const s, - const int16_t *block, int n, int intra_dc, - const uint8_t *scan_table, PutBitContext *dc_pb, - PutBitContext *ac_pb) +static inline void mpeg4_encode_ac_coeffs(const int16_t block[64], + const int last_index, int i, + const uint8_t *const scan_table, + PutBitContext *const ac_pb, + const uint32_t *const bits_tab, + const uint8_t *const len_tab) { - int i, last_non_zero; - const uint32_t *bits_tab; - const uint8_t *len_tab; - const int last_index = s->c.block_last_index[n]; - - if (s->c.mb_intra) { // Note gcc (3.2.1 at least) will optimize this away - /* MPEG-4 based DC predictor */ - mpeg4_encode_dc(dc_pb, intra_dc, n); - if (last_index < 1) - return; - i = 1; - bits_tab = uni_mpeg4_intra_rl_bits; - len_tab = uni_mpeg4_intra_rl_len; - } else { - if (last_index < 0) - return; - i = 0; - bits_tab = uni_mpeg4_inter_rl_bits; - len_tab = uni_mpeg4_inter_rl_len; - } + int last_non_zero = i - 1; /* AC coefs */ - last_non_zero = i - 1; for (; i < last_index; i++) { int level = block[scan_table[i]]; if (level) { @@ -364,93 +337,40 @@ static inline void mpeg4_encode_block(const MPVEncContext *const s, } } -static int mpeg4_get_block_length(MPVEncContext *const s, - const int16_t *block, int n, - int intra_dc, const uint8_t *scan_table) +static void mpeg4_encode_blocks_inter(MPVEncContext *const s, + const int16_t block[6][64], + PutBitContext *ac_pb) { - int i, last_non_zero; - const uint8_t *len_tab; - const int last_index = s->c.block_last_index[n]; - int len = 0; - - if (s->c.mb_intra) { // Note gcc (3.2.1 at least) will optimize this away - /* MPEG-4 based DC predictor */ - len += mpeg4_get_dc_length(intra_dc, n); - if (last_index < 1) - return len; - i = 1; - len_tab = uni_mpeg4_intra_rl_len; - } else { + /* encode each block */ + for (int n = 0; n < 6; ++n) { + const int last_index = s->c.block_last_index[n]; if (last_index < 0) - return 0; - i = 0; - len_tab = uni_mpeg4_inter_rl_len; - } + continue; - /* AC coefs */ - last_non_zero = i - 1; - for (; i < last_index; i++) { - int level = block[scan_table[i]]; - if (level) { - int run = i - last_non_zero - 1; - level += 64; - if ((level & (~127)) == 0) { - const int index = UNI_MPEG4_ENC_INDEX(0, run, level); - len += len_tab[index]; - } else { // ESC3 - len += 7 + 2 + 1 + 6 + 1 + 12 + 1; - } - last_non_zero = i; - } - } - /* if (i <= last_index) */ { - int level = block[scan_table[i]]; - int run = i - last_non_zero - 1; - level += 64; - if ((level & (~127)) == 0) { - const int index = UNI_MPEG4_ENC_INDEX(1, run, level); - len += len_tab[index]; - } else { // ESC3 - len += 7 + 2 + 1 + 6 + 1 + 12 + 1; - } + mpeg4_encode_ac_coeffs(block[n], last_index, 0, + s->c.intra_scantable.permutated, ac_pb, + uni_mpeg4_inter_rl_bits, uni_mpeg4_inter_rl_len); } - - return len; } -static inline void mpeg4_encode_blocks(MPVEncContext *const s, - const int16_t block[6][64], - const int intra_dc[6], - const uint8_t * const *scan_table, - PutBitContext *dc_pb, - PutBitContext *ac_pb) +static void mpeg4_encode_blocks_intra(MPVEncContext *const s, + const int16_t block[6][64], + const int intra_dc[6], + const uint8_t * const *scan_table, + PutBitContext *dc_pb, + PutBitContext *ac_pb) { - int i; + /* encode each block */ + for (int n = 0; n < 6; ++n) { + mpeg4_encode_dc(dc_pb, intra_dc[n], n); - if (scan_table) { - if (s->c.avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT) { - for (i = 0; i < 6; i++) - skip_put_bits(&s->pb, - mpeg4_get_block_length(s, block[i], i, - intra_dc[i], scan_table[i])); - } else { - /* encode each block */ - for (i = 0; i < 6; i++) - mpeg4_encode_block(s, block[i], i, - intra_dc[i], scan_table[i], dc_pb, ac_pb); - } - } else { - if (s->c.avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT) { - for (i = 0; i < 6; i++) - skip_put_bits(&s->pb, - mpeg4_get_block_length(s, block[i], i, 0, - s->c.intra_scantable.permutated)); - } else { - /* encode each block */ - for (i = 0; i < 6; i++) - mpeg4_encode_block(s, block[i], i, 0, - s->c.intra_scantable.permutated, dc_pb, ac_pb); - } + const int last_index = s->c.block_last_index[n]; + if (last_index <= 0) + continue; + + mpeg4_encode_ac_coeffs(block[n], last_index, 1, + scan_table[n], ac_pb, + uni_mpeg4_intra_rl_bits, uni_mpeg4_intra_rl_len); } } @@ -640,7 +560,7 @@ static void mpeg4_encode_mb(MPVEncContext *const s, int16_t block[][64], if (interleaved_stats) s->mv_bits += get_bits_diff(s); - mpeg4_encode_blocks(s, block, NULL, NULL, NULL, &s->pb); + mpeg4_encode_blocks_inter(s, block, &s->pb); if (interleaved_stats) s->p_tex_bits += get_bits_diff(s); @@ -803,7 +723,7 @@ static void mpeg4_encode_mb(MPVEncContext *const s, int16_t block[][64], if (interleaved_stats) s->mv_bits += get_bits_diff(s); - mpeg4_encode_blocks(s, block, NULL, NULL, NULL, tex_pb); + mpeg4_encode_blocks_inter(s, block, tex_pb); if (interleaved_stats) s->p_tex_bits += get_bits_diff(s); @@ -865,7 +785,7 @@ static void mpeg4_encode_mb(MPVEncContext *const s, int16_t block[][64], if (interleaved_stats) s->misc_bits += get_bits_diff(s); - mpeg4_encode_blocks(s, block, dc_diff, scan_table, dc_pb, tex_pb); + mpeg4_encode_blocks_intra(s, block, dc_diff, scan_table, dc_pb, tex_pb); if (interleaved_stats) s->i_tex_bits += get_bits_diff(s); @@ -1032,9 +952,9 @@ static void mpeg4_encode_vol_header(Mpeg4EncContext *const m4, put_bits(&s->pb, 2, 0); /* sprite enable */ put_bits(&s->pb, 1, 0); /* not 8 bit == false */ - put_bits(&s->pb, 1, s->c.mpeg_quant); /* quant type = (0 = H.263 style) */ + put_bits(&s->pb, 1, s->mpeg_quant); /* quant type = (0 = H.263 style) */ - if (s->c.mpeg_quant) { + if (s->mpeg_quant) { ff_write_quant_matrix(&s->pb, s->c.avctx->intra_matrix); ff_write_quant_matrix(&s->pb, s->c.avctx->inter_matrix); } @@ -1070,6 +990,8 @@ static int mpeg4_encode_picture_header(MPVMainEncContext *const m) uint64_t time_incr; int64_t time_div, time_mod; + put_bits_assume_flushed(&s->pb); + if (s->c.pict_type == AV_PICTURE_TYPE_I) { if (!(s->c.avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) { if (s->c.avctx->strict_std_compliance < FF_COMPLIANCE_VERY_STRICT) // HACK, the reference sw is buggy @@ -1181,95 +1103,78 @@ static av_cold void init_uni_dc_tab(void) static av_cold void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_tab) { - int slevel, run, last; - - av_assert0(MAX_LEVEL >= 64); - av_assert0(MAX_RUN >= 63); + // Type 3 escape method. The escape code is the same for both VLCs + // (0x3, seven bits), so it is hardcoded. + memset(len_tab, 30, 2 * 2 * 64 * 64); + len_tab += 64; + bits_tab += 64; + for (int run = 0; run < 64; ++run) { + for (int level = 1;; ++level) { + // Escape code type 3 not last run (6 bits) marker marker + unsigned code = (3 << 23) | (3 << 21) | (0 << 20) | (run << 14) | (1 << 13) | 1; + // first the negative levels + bits_tab[UNI_MPEG4_ENC_INDEX(0, run, -level)] = code | (-level & 0xfff) << 1; + bits_tab[UNI_MPEG4_ENC_INDEX(1, run, -level)] = + bits_tab[UNI_MPEG4_ENC_INDEX(0, run, -level)] | (1 << 20) /* last */; + + if (level == 64) // positive levels have a range of 1..63 + break; + bits_tab[UNI_MPEG4_ENC_INDEX(0, run, level)] = code | level << 1; + bits_tab[UNI_MPEG4_ENC_INDEX(1, run, level)] = + bits_tab[UNI_MPEG4_ENC_INDEX(0, run, level)] | (1 << 20) /* last */; + } + // Is this needed at all? + len_tab[UNI_MPEG4_ENC_INDEX(0, run, 0)] = + len_tab[UNI_MPEG4_ENC_INDEX(1, run, 0)] = 0; + } - for (slevel = -64; slevel < 64; slevel++) { - if (slevel == 0) - continue; - for (run = 0; run < 64; run++) { - for (last = 0; last <= 1; last++) { - const int index = UNI_MPEG4_ENC_INDEX(last, run, slevel + 64); - int level = slevel < 0 ? -slevel : slevel; - int sign = slevel < 0 ? 1 : 0; - int bits, len, code; - int level1, run1; - - len_tab[index] = 100; - - /* ESC0 */ - code = get_rl_index(rl, last, run, level); - bits = rl->table_vlc[code][0]; - len = rl->table_vlc[code][1]; - bits = bits * 2 + sign; - len++; - - if (code != rl->n && len < len_tab[index]) { - bits_tab[index] = bits; - len_tab[index] = len; - } - /* ESC1 */ - bits = rl->table_vlc[rl->n][0]; - len = rl->table_vlc[rl->n][1]; - bits = bits * 2; - len++; // esc1 - level1 = level - rl->max_level[last][run]; - if (level1 > 0) { - code = get_rl_index(rl, last, run, level1); - bits <<= rl->table_vlc[code][1]; - len += rl->table_vlc[code][1]; - bits += rl->table_vlc[code][0]; - bits = bits * 2 + sign; - len++; - - if (code != rl->n && len < len_tab[index]) { - bits_tab[index] = bits; - len_tab[index] = len; - } - } - /* ESC2 */ - bits = rl->table_vlc[rl->n][0]; - len = rl->table_vlc[rl->n][1]; - bits = bits * 4 + 2; - len += 2; // esc2 - run1 = run - rl->max_run[last][level] - 1; - if (run1 >= 0) { - code = get_rl_index(rl, last, run1, level); - bits <<= rl->table_vlc[code][1]; - len += rl->table_vlc[code][1]; - bits += rl->table_vlc[code][0]; - bits = bits * 2 + sign; - len++; - - if (code != rl->n && len < len_tab[index]) { - bits_tab[index] = bits; - len_tab[index] = len; - } - } - /* ESC3 */ - bits = rl->table_vlc[rl->n][0]; - len = rl->table_vlc[rl->n][1]; - bits = bits * 4 + 3; - len += 2; // esc3 - bits = bits * 2 + last; - len++; - bits = bits * 64 + run; - len += 6; - bits = bits * 2 + 1; - len++; // marker - bits = bits * 4096 + (slevel & 0xfff); - len += 12; - bits = bits * 2 + 1; - len++; // marker - - if (len < len_tab[index]) { - bits_tab[index] = bits; - len_tab[index] = len; - } - } + uint8_t max_run[2][32] = { 0 }; + +#define VLC_NUM_CODES 102 // excluding the escape + av_assert2(rl->n == VLC_NUM_CODES); + for (int i = VLC_NUM_CODES - 1, max_level, cur_run = 0; i >= 0; --i) { + int run = rl->table_run[i], level = rl->table_level[i]; + int last = i >= rl->last; + unsigned code = rl->table_vlc[i][0] << 1; + int len = rl->table_vlc[i][1] + 1; + + bits_tab[UNI_MPEG4_ENC_INDEX(last, run, level)] = code; + len_tab [UNI_MPEG4_ENC_INDEX(last, run, level)] = len; + bits_tab[UNI_MPEG4_ENC_INDEX(last, run, -level)] = code | 1; + len_tab [UNI_MPEG4_ENC_INDEX(last, run, -level)] = len; + + if (!max_run[last][level]) + max_run[last][level] = run + 1; + av_assert2(run + 1 <= max_run[last][level]); + + int run3 = run + max_run[last][level]; + int len3 = len + 7 + 2; + + if (run3 < 64 && len3 < len_tab[UNI_MPEG4_ENC_INDEX(last, run3, level)]) { + unsigned code3 = code | (0x3 << 2 | 0x2) << len; + bits_tab[UNI_MPEG4_ENC_INDEX(last, run3, level)] = code3; + len_tab [UNI_MPEG4_ENC_INDEX(last, run3, level)] = len3; + bits_tab[UNI_MPEG4_ENC_INDEX(last, run3, -level)] = code3 | 1; + len_tab [UNI_MPEG4_ENC_INDEX(last, run3, -level)] = len3; } + // table_run and table_level are ordered so that all the entries + // with the same last and run are consecutive and level is ascending + // among these entries. By traversing downwards we therefore automatically + // encounter max_level of a given run first, needed for escape method 1. + if (run != cur_run) { + max_level = level; + cur_run = run; + } else + av_assert2(max_level > level); + + code |= 0x3 << (len + 1); + len += 7 + 1; + level += max_level; + av_assert2(len_tab [UNI_MPEG4_ENC_INDEX(last, run, level)] >= len); + bits_tab[UNI_MPEG4_ENC_INDEX(last, run, level)] = code; + len_tab [UNI_MPEG4_ENC_INDEX(last, run, level)] = len; + bits_tab[UNI_MPEG4_ENC_INDEX(last, run, -level)] = code | 1; + len_tab [UNI_MPEG4_ENC_INDEX(last, run, -level)] = len; } } @@ -1277,8 +1182,6 @@ static av_cold void mpeg4_encode_init_static(void) { init_uni_dc_tab(); - ff_mpeg4_init_rl_intra(); - init_uni_mpeg4_rl_tab(&ff_mpeg4_rl_intra, uni_mpeg4_intra_rl_bits, uni_mpeg4_intra_rl_len); init_uni_mpeg4_rl_tab(&ff_h263_rl_inter, uni_mpeg4_inter_rl_bits, uni_mpeg4_inter_rl_len); @@ -1401,11 +1304,11 @@ void ff_mpeg4_encode_video_packet_header(MPVEncContext *const s) put_bits(&s->pb, 1, 0); /* no HEC */ } -#define OFFSET(x) offsetof(MPVEncContext, c.x) +#define OFFSET(x) offsetof(MPVEncContext, x) #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM static const AVOption options[] = { - { "data_partitioning", "Use data partitioning.", OFFSET(data_partitioning), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, - { "alternate_scan", "Enable alternate scantable.", OFFSET(alternate_scan), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, + { "data_partitioning", "Use data partitioning.", OFFSET(c.data_partitioning), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, + { "alternate_scan", "Enable alternate scantable.", OFFSET(c.alternate_scan), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, { "mpeg_quant", "Use MPEG quantizers instead of H.263", OFFSET(mpeg_quant), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, VE }, FF_MPV_COMMON_BFRAME_OPTS diff --git a/libavcodec/mpeg4videoenc.h b/libavcodec/mpeg4videoenc.h index 815f16f07379c..4e20b8aaa0d89 100644 --- a/libavcodec/mpeg4videoenc.h +++ b/libavcodec/mpeg4videoenc.h @@ -27,6 +27,18 @@ #include "put_bits.h" +enum { + MAX_PB2_INTRA_SIZE = 1 /* ac_pred */ + 5 /* max cbpy len */ + + 2 /* dquant */ + 1 /* interlaced dct */ + + 4 * (8 /* longest luma dct_dc_size */ + + 9 /* longest dc diff */ + 1 /* marker */) + + 2 * (9 + 9 + 1), + MAX_PB2_INTER_SIZE = 5 /* max cbpy len */ + + 2 /* dquant */ + 1 /* interlaced_dct */ + 1, + MAX_PB2_MB_SIZE = (FFMAX(MAX_PB2_INTER_SIZE, MAX_PB2_INTRA_SIZE) + 7) / 8, + MAX_AC_TEX_MB_SIZE = 64 * 6 * 30 /* longest escape code */ / 8, +}; + typedef struct MPVEncContext MPVEncContext; void ff_set_mpeg4_time(MPVEncContext *s); diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 55f7178bed2f3..f3e4d4c386a0d 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -335,9 +335,11 @@ av_cold int ff_mpv_init_context_frame(MpegEncContext *s) s->coded_block = s->coded_block_base + s->b8_stride + 1; } - if (s->h263_pred || s->h263_plus || !s->encoding) { + if (s->h263_pred || s->h263_aic || !s->encoding) { /* dc values */ // MN: we need these for error resilience of intra-frames + // Allocating them unconditionally for decoders also means + // that we don't need to reinitialize when e.g. h263_aic changes. if (!FF_ALLOCZ_TYPED_ARRAY(s->dc_val_base, yc_size)) return AVERROR(ENOMEM); s->dc_val[0] = s->dc_val_base + s->b8_stride + 1; diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index 20a5759958d94..68d70cc0e36e6 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -90,7 +90,6 @@ typedef struct MpegEncContext { int pb_frame; ///< PB-frame mode (0 = none, 1 = base, 2 = improved) /* the following codec id fields are deprecated in favor of codec_id */ - int h263_plus; ///< H.263+ headers int h263_flv; ///< use flv H.263 header enum AVCodecID codec_id; /* see AV_CODEC_ID_xxx */ @@ -254,7 +253,6 @@ typedef struct MpegEncContext { int data_partitioning; ///< data partitioning flag from header int partitioned_frame; ///< is current frame partitioned int low_delay; ///< no reordering needed / has no B-frames - int mpeg_quant; int padding_bug_score; ///< used to detect the VERY common padding bug in MPEG-4 /* divx specific, used to workaround (many) bugs in divx5 */ diff --git a/libavcodec/mpegvideo_dec.c b/libavcodec/mpegvideo_dec.c index b8b84ffd8da18..f8551b93c8890 100644 --- a/libavcodec/mpegvideo_dec.c +++ b/libavcodec/mpegvideo_dec.c @@ -424,13 +424,6 @@ av_cold void ff_mpeg_flush(AVCodecContext *avctx) s->pp_time = 0; } -void ff_mpv_report_decode_progress(MpegEncContext *s) -{ - if (s->pict_type != AV_PICTURE_TYPE_B && !s->partitioned_frame && !s->er.error_occurred) - ff_thread_progress_report(&s->cur_pic.ptr->progress, s->mb_y); -} - - static inline int hpel_motion_lowres(MpegEncContext *s, uint8_t *dest, const uint8_t *src, int field_based, int field_select, @@ -817,7 +810,7 @@ static inline void MPV_motion_lowres(MpegEncContext *s, } break; default: - av_assert2(0); + av_unreachable("No other mpegvideo MV types exist"); } } @@ -967,8 +960,8 @@ void mpv_reconstruct_mb_internal(MpegEncContext *s, int16_t block[12][64], } /* add dct residue */ - if (!(IS_MPEG12_H261(s) || s->msmpeg4_version != MSMP4_UNUSED || - (s->codec_id == AV_CODEC_ID_MPEG4 && !s->mpeg_quant))) { + if (is_mpeg12 != DEFINITELY_MPEG12_H261 && s->dct_unquantize_inter) { + // H.263, H.263+, H.263I, FLV, RV10, RV20 and MPEG-4 with MPEG-2 quantization add_dequant_dct(s, block[0], 0, dest_y , dct_linesize, s->qscale); add_dequant_dct(s, block[1], 1, dest_y + block_size, dct_linesize, s->qscale); add_dequant_dct(s, block[2], 2, dest_y + dct_offset , dct_linesize, s->qscale); @@ -980,6 +973,10 @@ void mpv_reconstruct_mb_internal(MpegEncContext *s, int16_t block[12][64], add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale); } } else if (is_mpeg12 == DEFINITELY_MPEG12_H261 || lowres_flag || (s->codec_id != AV_CODEC_ID_WMV2)) { + // H.261, MPEG-1, MPEG-2, MPEG-4 with H.263 quantization, + // MSMP4V1-3 and WMV1. + // Also RV30, RV40 and the VC-1 family when performing error resilience, + // but all blocks are skipped in this case. add_dct(s, block[0], 0, dest_y , dct_linesize); add_dct(s, block[1], 1, dest_y + block_size, dct_linesize); add_dct(s, block[2], 2, dest_y + dct_offset , dct_linesize); diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c index 6e9533ebc92b4..46901fc506254 100644 --- a/libavcodec/mpegvideo_enc.c +++ b/libavcodec/mpegvideo_enc.c @@ -313,14 +313,15 @@ av_cold void ff_dct_encode_init(MPVEncContext *const s) s->dct_quantize = dct_quantize_trellis_c; } -static av_cold void init_unquantize(MpegEncContext *const s, AVCodecContext *avctx) +static av_cold void init_unquantize(MPVEncContext *const s2, AVCodecContext *avctx) { + MpegEncContext *const s = &s2->c; MPVUnquantDSPContext unquant_dsp_ctx; ff_mpv_unquantize_init(&unquant_dsp_ctx, avctx->flags & AV_CODEC_FLAG_BITEXACT, s->q_scale_type); - if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) { + if (s2->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) { s->dct_unquantize_intra = unquant_dsp_ctx.dct_unquantize_mpeg2_intra; s->dct_unquantize_inter = unquant_dsp_ctx.dct_unquantize_mpeg2_inter; } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) { @@ -403,7 +404,7 @@ static av_cold int init_matrices(MPVMainEncContext *const m, AVCodecContext *avc } if (CONFIG_MPEG4_ENCODER && s->c.codec_id == AV_CODEC_ID_MPEG4 && - s->c.mpeg_quant) { + s->mpeg_quant) { intra_matrix = ff_mpeg4_default_intra_matrix; inter_matrix = ff_mpeg4_default_non_intra_matrix; } else if (s->c.out_format == FMT_H263 || s->c.out_format == FMT_H261) { @@ -559,9 +560,10 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx) case AV_PIX_FMT_YUV422P: s->c.chroma_format = CHROMA_422; break; + default: + av_unreachable("Already checked via CODEC_PIXFMTS"); case AV_PIX_FMT_YUVJ420P: case AV_PIX_FMT_YUV420P: - default: s->c.chroma_format = CHROMA_420; break; } @@ -838,7 +840,7 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx) //return -1; } - if (s->c.mpeg_quant || s->c.codec_id == AV_CODEC_ID_MPEG1VIDEO || s->c.codec_id == AV_CODEC_ID_MPEG2VIDEO || s->c.codec_id == AV_CODEC_ID_MJPEG || s->c.codec_id == AV_CODEC_ID_AMV || s->c.codec_id == AV_CODEC_ID_SPEEDHQ) { + if (s->mpeg_quant || s->c.codec_id == AV_CODEC_ID_MPEG1VIDEO || s->c.codec_id == AV_CODEC_ID_MPEG2VIDEO || s->c.codec_id == AV_CODEC_ID_MJPEG || s->c.codec_id == AV_CODEC_ID_AMV || s->c.codec_id == AV_CODEC_ID_SPEEDHQ) { // (a + x * 3 / 8) / x s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3); s->inter_quant_bias = 0; @@ -906,7 +908,6 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx) break; case AV_CODEC_ID_H263P: s->c.out_format = FMT_H263; - s->c.h263_plus = 1; /* Fx */ s->c.h263_aic = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0; s->c.modified_quant = s->c.h263_aic; @@ -942,8 +943,9 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx) avctx->delay = 0; s->c.low_delay = 1; s->c.modified_quant = 1; + // Set here to force allocation of dc_val; + // will be set later on a per-frame basis. s->c.h263_aic = 1; - s->c.h263_plus = 1; s->c.loop_filter = 1; s->c.unrestricted_mv = 0; break; @@ -992,7 +994,7 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx) s->c.low_delay = 1; break; default: - return AVERROR(EINVAL); + av_unreachable("List contains all codecs using ff_mpv_encode_init()"); } avctx->has_b_frames = !s->c.low_delay; @@ -1026,10 +1028,10 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx) * before calling ff_mpv_common_init(). */ s->parent = m; ff_mpv_idct_init(&s->c); - init_unquantize(&s->c, avctx); + init_unquantize(s, avctx); ff_fdctdsp_init(&s->fdsp, avctx); ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); - ff_pixblockdsp_init(&s->pdsp, avctx); + ff_pixblockdsp_init(&s->pdsp, 8); ret = me_cmp_init(m, avctx); if (ret < 0) return ret; @@ -2978,14 +2980,15 @@ static int encode_thread(AVCodecContext *c, void *arg){ int i; MBBackup best_s = { 0 }, backup_s; uint8_t bit_buf[2][MAX_MB_BYTES]; - uint8_t bit_buf2[2][MAX_MB_BYTES]; - uint8_t bit_buf_tex[2][MAX_MB_BYTES]; + // + 2 because ff_copy_bits() overreads + uint8_t bit_buf2[2][MAX_PB2_MB_SIZE + 2]; + uint8_t bit_buf_tex[2][MAX_AC_TEX_MB_SIZE + 2]; PutBitContext pb[2], pb2[2], tex_pb[2]; for(i=0; i<2; i++){ init_put_bits(&pb [i], bit_buf [i], MAX_MB_BYTES); - init_put_bits(&pb2 [i], bit_buf2 [i], MAX_MB_BYTES); - init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES); + init_put_bits(&pb2 [i], bit_buf2 [i], MAX_PB2_MB_SIZE); + init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_AC_TEX_MB_SIZE); } s->last_bits= put_bits_count(&s->pb); @@ -3006,25 +3009,17 @@ static int encode_thread(AVCodecContext *c, void *arg){ s->c.last_dc[0] = 128 * 8 / 13; s->c.last_dc[1] = 128 * 8 / 14; s->c.last_dc[2] = 128 * 8 / 14; +#if CONFIG_MPEG4_ENCODER + } else if (s->c.partitioned_frame) { + av_assert1(s->c.codec_id == AV_CODEC_ID_MPEG4); + ff_mpeg4_init_partitions(s); +#endif } s->c.mb_skip_run = 0; memset(s->c.last_mv, 0, sizeof(s->c.last_mv)); s->last_mv_dir = 0; - switch (s->c.codec_id) { - case AV_CODEC_ID_H263: - case AV_CODEC_ID_H263P: - case AV_CODEC_ID_FLV1: - if (CONFIG_H263_ENCODER) - s->c.gob_index = H263_GOB_HEIGHT(s->c.height); - break; - case AV_CODEC_ID_MPEG4: - if (CONFIG_MPEG4_ENCODER && s->c.partitioned_frame) - ff_mpeg4_init_partitions(s); - break; - } - s->c.resync_mb_x = 0; s->c.resync_mb_y = 0; s->c.first_slice_line = 1; @@ -3541,7 +3536,10 @@ static int encode_thread(AVCodecContext *c, void *arg){ } break; default: - av_log(s->c.avctx, AV_LOG_ERROR, "illegal MB type\n"); + av_unreachable("There is a case for every CANDIDATE_MB_TYPE_* " + "except CANDIDATE_MB_TYPE_SKIPPED which is never " + "the only candidate (always coupled with INTER) " + "so that it never reaches this switch"); } encode_mb(s, motion_x, motion_y); @@ -4019,7 +4017,7 @@ static int dct_quantize_trellis_c(MPVEncContext *const s, last_non_zero = 0; qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale]; matrix = n < 4 ? s->c.intra_matrix : s->c.chroma_intra_matrix; - if (s->c.mpeg_quant || s->c.out_format == FMT_MPEG1 || s->c.out_format == FMT_MJPEG) + if (s->mpeg_quant || s->c.out_format == FMT_MPEG1 || s->c.out_format == FMT_MJPEG) bias= 1<<(QMAT_SHIFT-1); if (n > 3 && s->intra_chroma_ac_vlc_length) { @@ -4334,7 +4332,7 @@ static int dct_quantize_refine(MPVEncContext *const s, //FIXME breaks denoise? dc= block[0]*q; // block[0] = (block[0] + (q >> 1)) / q; start_i = 1; -// if (s->c.mpeg_quant || s->c.out_format == FMT_MPEG1) +// if (s->mpeg_quant || s->c.out_format == FMT_MPEG1) // bias= 1<<(QMAT_SHIFT-1); if (n > 3 && s->intra_chroma_ac_vlc_length) { length = s->intra_chroma_ac_vlc_length; diff --git a/libavcodec/mpegvideo_motion.c b/libavcodec/mpegvideo_motion.c index edc49310929c6..a48b898dac1b1 100644 --- a/libavcodec/mpegvideo_motion.c +++ b/libavcodec/mpegvideo_motion.c @@ -813,7 +813,8 @@ static av_always_inline void mpv_motion_internal(MpegEncContext *s, } break; } - default: av_assert2(0); + default: + av_unreachable("No other mpegvideo MV types exist"); } } diff --git a/libavcodec/mpegvideodec.h b/libavcodec/mpegvideodec.h index bc4bc905908bb..8bc70b02c0490 100644 --- a/libavcodec/mpegvideodec.h +++ b/libavcodec/mpegvideodec.h @@ -57,7 +57,6 @@ int ff_mpv_frame_start(MpegEncContext *s, AVCodecContext *avctx); */ int ff_mpv_alloc_dummy_frames(MpegEncContext *s); void ff_mpv_reconstruct_mb(MpegEncContext *s, int16_t block[12][64]); -void ff_mpv_report_decode_progress(MpegEncContext *s); void ff_mpv_frame_end(MpegEncContext *s); int ff_mpv_export_qp_table(const MpegEncContext *s, AVFrame *f, diff --git a/libavcodec/mpegvideoenc.h b/libavcodec/mpegvideoenc.h index ec0304c4a0fba..5510b43f86ce5 100644 --- a/libavcodec/mpegvideoenc.h +++ b/libavcodec/mpegvideoenc.h @@ -147,6 +147,7 @@ typedef struct MPVEncContext { int last_mv_dir; ///< last mv_dir, used for B-frame encoding /* MPEG-4 specific */ + int mpeg_quant; PutBitContext tex_pb; ///< used for data partitioned VOPs PutBitContext pb2; ///< used for data partitioned VOPs diff --git a/libavcodec/msmpeg4dec.c b/libavcodec/msmpeg4dec.c index df67d435421b6..df5ab5186eba3 100644 --- a/libavcodec/msmpeg4dec.c +++ b/libavcodec/msmpeg4dec.c @@ -366,6 +366,9 @@ av_cold int ff_msmpeg4_decode_init(AVCodecContext *avctx) if (ff_h263_decode_init(avctx) < 0) return -1; + // We unquantize inter blocks as we parse them. + s->dct_unquantize_inter = NULL; + ff_msmpeg4_common_init(s); switch (s->msmpeg4_version) { @@ -379,6 +382,8 @@ av_cold int ff_msmpeg4_decode_init(AVCodecContext *avctx) break; case MSMP4_WMV2: break; + default: + av_unreachable("List contains all cases using ff_msmpeg4_decode_init()"); } s->slice_height= s->mb_height; //to avoid 1/0 if the first frame is not a keyframe @@ -472,6 +477,8 @@ int ff_msmpeg4_decode_picture_header(MpegEncContext * s) ms->dc_table_index = get_bits1(&s->gb); s->inter_intra_pred= 0; break; + default: + av_unreachable("ff_msmpeg4_decode_picture_header() only used by MSMP4V1-3, WMV1"); } s->no_rounding = 1; if(s->avctx->debug&FF_DEBUG_PICT_INFO) @@ -523,6 +530,8 @@ int ff_msmpeg4_decode_picture_header(MpegEncContext * s) s->inter_intra_pred = s->width*s->height < 320*240 && ms->bit_rate <= II_BITRATE; break; + default: + av_unreachable("ff_msmpeg4_decode_picture_header() only used by MSMP4V1-3, WMV1"); } if(s->avctx->debug&FF_DEBUG_PICT_INFO) diff --git a/libavcodec/msmpeg4enc.c b/libavcodec/msmpeg4enc.c index 795db6e4de9d4..3449328b3c768 100644 --- a/libavcodec/msmpeg4enc.c +++ b/libavcodec/msmpeg4enc.c @@ -221,7 +221,8 @@ static int msmpeg4_encode_picture_header(MPVMainEncContext *const m) find_best_tables(ms); - align_put_bits(&s->pb); + put_bits_assume_flushed(&s->pb); + put_bits(&s->pb, 2, s->c.pict_type - 1); put_bits(&s->pb, 5, s->c.qscale); diff --git a/libavcodec/nvdec_mpeg4.c b/libavcodec/nvdec_mpeg4.c index 7d158321aec24..827243903104e 100644 --- a/libavcodec/nvdec_mpeg4.c +++ b/libavcodec/nvdec_mpeg4.c @@ -70,7 +70,7 @@ static int nvdec_mpeg4_start_frame(AVCodecContext *avctx, .vop_time_increment_bitcount = m->time_increment_bits, .top_field_first = s->top_field_first, .resync_marker_disable = !m->resync_marker, - .quant_type = s->mpeg_quant, + .quant_type = m->mpeg_quant, .quarter_sample = s->quarter_sample, .short_video_header = avctx->codec->id == AV_CODEC_ID_H263, .divx_flags = s->divx_packed ? 5 : 0, diff --git a/libavcodec/pcm-dvdenc.c b/libavcodec/pcm-dvdenc.c index b1f01ee323be3..a740f0e381b07 100644 --- a/libavcodec/pcm-dvdenc.c +++ b/libavcodec/pcm-dvdenc.c @@ -19,6 +19,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/avassert.h" #include "libavutil/channel_layout.h" #include "avcodec.h" #include "bytestream.h" @@ -45,7 +46,7 @@ static av_cold int pcm_dvd_encode_init(AVCodecContext *avctx) freq = 1; break; default: - av_assert1(0); + av_unreachable("Already checked via CODEC_SAMPLERATES"); } switch (avctx->sample_fmt) { @@ -58,7 +59,7 @@ static av_cold int pcm_dvd_encode_init(AVCodecContext *avctx) quant = 2; break; default: - av_assert1(0); + av_unreachable("Already checked via CODEC_SAMPLEFMTS"); } avctx->bits_per_coded_sample = 16 + quant * 4; diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c index bff61f2195a0b..68b19451942cc 100644 --- a/libavcodec/pcm.c +++ b/libavcodec/pcm.c @@ -327,6 +327,8 @@ static av_cold av_unused int pcm_lut_decode_init(AVCodecContext *avctx) PCMLUTDecode *s = avctx->priv_data; switch (avctx->codec_id) { + default: + av_unreachable("pcm_lut_decode_init() only used with alaw, mulaw and vidc"); case AV_CODEC_ID_PCM_ALAW: for (int i = 0; i < 256; i++) s->table[i] = alaw2linear(i); diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c index 1fff244511ad8..110a374260d35 100644 --- a/libavcodec/pixblockdsp.c +++ b/libavcodec/pixblockdsp.c @@ -21,7 +21,6 @@ #include "config.h" #include "libavutil/attributes.h" #include "libavutil/intreadwrite.h" -#include "avcodec.h" #include "pixblockdsp.h" static void get_pixels_16_c(int16_t *restrict block, const uint8_t *pixels, @@ -85,40 +84,33 @@ static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1, } } -av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx) +av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, int bits_per_raw_sample) { - av_unused const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; + const unsigned high_bit_depth = bits_per_raw_sample > 8 && + bits_per_raw_sample <= 16; c->diff_pixels_unaligned = c->diff_pixels = diff_pixels_c; - switch (avctx->bits_per_raw_sample) { - case 9: - case 10: - case 12: - case 14: + if (high_bit_depth) { c->get_pixels_unaligned = get_pixels_unaligned_16_c; - c->get_pixels = get_pixels_16_c; - break; - default: - if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) { - c->get_pixels_unaligned = - c->get_pixels = get_pixels_8_c; - } - break; + c->get_pixels = get_pixels_16_c; + } else { + c->get_pixels_unaligned = + c->get_pixels = get_pixels_8_c; } #if ARCH_AARCH64 - ff_pixblockdsp_init_aarch64(c, avctx, high_bit_depth); + ff_pixblockdsp_init_aarch64(c, high_bit_depth); #elif ARCH_ARM - ff_pixblockdsp_init_arm(c, avctx, high_bit_depth); + ff_pixblockdsp_init_arm(c, high_bit_depth); #elif ARCH_PPC - ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth); + ff_pixblockdsp_init_ppc(c, high_bit_depth); #elif ARCH_RISCV - ff_pixblockdsp_init_riscv(c, avctx, high_bit_depth); + ff_pixblockdsp_init_riscv(c, high_bit_depth); #elif ARCH_X86 - ff_pixblockdsp_init_x86(c, avctx, high_bit_depth); + ff_pixblockdsp_init_x86(c, high_bit_depth); #elif ARCH_MIPS - ff_pixblockdsp_init_mips(c, avctx, high_bit_depth); + ff_pixblockdsp_init_mips(c, high_bit_depth); #endif } diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h index 215b0905d7f07..d493d0e22b74c 100644 --- a/libavcodec/pixblockdsp.h +++ b/libavcodec/pixblockdsp.h @@ -19,13 +19,17 @@ #ifndef AVCODEC_PIXBLOCKDSP_H #define AVCODEC_PIXBLOCKDSP_H +#include #include -#include "avcodec.h" +#define PIXBLOCKDSP_8BPP_GET_PIXELS_SUPPORTS_UNALIGNED \ + !(ARCH_ARM || ARCH_MIPS || ARCH_PPC || ARCH_RISCV) typedef struct PixblockDSPContext { void (*get_pixels)(int16_t *restrict block /* align 16 */, - const uint8_t *pixels /* align 8 */, + /* align 16 for > 8 bits; align 8 for <= 8 bits + * (or 1 if PIXBLOCKDSP_8BPP_GET_PIXELS_SUPPORTS_UNALIGNED is set) */ + const uint8_t *pixels, ptrdiff_t stride); void (*get_pixels_unaligned)(int16_t *restrict block /* align 16 */, const uint8_t *pixels, @@ -41,18 +45,18 @@ typedef struct PixblockDSPContext { } PixblockDSPContext; -void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx); -void ff_pixblockdsp_init_aarch64(PixblockDSPContext *c, AVCodecContext *avctx, +void ff_pixblockdsp_init(PixblockDSPContext *c, int bits_per_raw_sample); +void ff_pixblockdsp_init_aarch64(PixblockDSPContext *c, unsigned high_bit_depth); -void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx, +void ff_pixblockdsp_init_arm(PixblockDSPContext *c, unsigned high_bit_depth); -void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx, +void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, unsigned high_bit_depth); -void ff_pixblockdsp_init_riscv(PixblockDSPContext *c, AVCodecContext *avctx, +void ff_pixblockdsp_init_riscv(PixblockDSPContext *c, unsigned high_bit_depth); -void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx, +void ff_pixblockdsp_init_x86(PixblockDSPContext *c, unsigned high_bit_depth); -void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx, +void ff_pixblockdsp_init_mips(PixblockDSPContext *c, unsigned high_bit_depth); #endif /* AVCODEC_PIXBLOCKDSP_H */ diff --git a/libavcodec/ppc/pixblockdsp.c b/libavcodec/ppc/pixblockdsp.c index 01d14b4124170..75287b1e85373 100644 --- a/libavcodec/ppc/pixblockdsp.c +++ b/libavcodec/ppc/pixblockdsp.c @@ -27,7 +27,6 @@ #include "libavutil/ppc/cpu.h" #include "libavutil/ppc/util_altivec.h" -#include "libavcodec/avcodec.h" #include "libavcodec/pixblockdsp.h" #if HAVE_ALTIVEC @@ -263,7 +262,6 @@ static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1, #endif /* HAVE_VSX */ av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, - AVCodecContext *avctx, unsigned high_bit_depth) { #if HAVE_ALTIVEC diff --git a/libavcodec/proresenc_anatoliy.c b/libavcodec/proresenc_anatoliy.c index fc69b94780d4c..4fc40abaac8a8 100644 --- a/libavcodec/proresenc_anatoliy.c +++ b/libavcodec/proresenc_anatoliy.c @@ -27,6 +27,7 @@ * Known FOURCCs: 'ap4h' (444), 'apch' (HQ), 'apcn' (422), 'apcs' (LT), 'acpo' (Proxy) */ +#include "libavutil/avassert.h" #include "libavutil/mem.h" #include "libavutil/mem_internal.h" #include "libavutil/opt.h" @@ -845,20 +846,25 @@ static av_cold int prores_encode_init(AVCodecContext *avctx) } if (avctx->profile == AV_PROFILE_UNKNOWN) { - if (avctx->pix_fmt == AV_PIX_FMT_YUV422P10) { + switch (avctx->pix_fmt) { + case AV_PIX_FMT_YUV422P10: avctx->profile = AV_PROFILE_PRORES_STANDARD; av_log(avctx, AV_LOG_INFO, "encoding with ProRes standard (apcn) profile\n"); - } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P10) { + break; + case AV_PIX_FMT_YUV444P10: avctx->profile = AV_PROFILE_PRORES_4444; av_log(avctx, AV_LOG_INFO, "encoding with ProRes 4444 (ap4h) profile\n"); - } else if (avctx->pix_fmt == AV_PIX_FMT_YUVA444P10) { + break; + case AV_PIX_FMT_YUVA444P10: avctx->profile = AV_PROFILE_PRORES_4444; av_log(avctx, AV_LOG_INFO, "encoding with ProRes 4444+ (ap4h) profile\n"); - } else - av_assert0(0); + break; + default: + av_unreachable("Already checked via CODEC_PIXFMTS"); + } } else if (avctx->profile < AV_PROFILE_PRORES_PROXY || avctx->profile > AV_PROFILE_PRORES_XQ) { av_log( diff --git a/libavcodec/put_bits.h b/libavcodec/put_bits.h index 56c3f4cc6d17b..c3eee622d41ee 100644 --- a/libavcodec/put_bits.h +++ b/libavcodec/put_bits.h @@ -74,6 +74,16 @@ static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, s->bit_buf = 0; } +/** + * Inform the compiler that a PutBitContext is flushed (i.e. if it has just + * been initialized or flushed). Undefined behaviour occurs if this is used + * with a PutBitContext for which this is not true. + */ +static inline void put_bits_assume_flushed(const PutBitContext *s) +{ + av_assume(s->bit_left == BUF_BITS); +} + /** * @return the total number of bits written to the bitstream. */ diff --git a/libavcodec/ratecontrol.c b/libavcodec/ratecontrol.c index 06d998efcb61d..3dc44683d01cc 100644 --- a/libavcodec/ratecontrol.c +++ b/libavcodec/ratecontrol.c @@ -699,7 +699,7 @@ av_cold int ff_rate_control_init(MPVMainEncContext *const m) if (s->adaptive_quant) { unsigned mb_array_size = s->c.mb_stride * s->c.mb_height; - rcc->cplx_tab = av_malloc_array(mb_array_size, 2 * sizeof(rcc->cplx_tab)); + rcc->cplx_tab = av_malloc_array(mb_array_size, 2 * sizeof(*rcc->cplx_tab)); if (!rcc->cplx_tab) return AVERROR(ENOMEM); rcc->bits_tab = rcc->cplx_tab + mb_array_size; diff --git a/libavcodec/riscv/pixblockdsp_init.c b/libavcodec/riscv/pixblockdsp_init.c index 28caa99dfff0c..e59fba63cc4e3 100644 --- a/libavcodec/riscv/pixblockdsp_init.c +++ b/libavcodec/riscv/pixblockdsp_init.c @@ -24,7 +24,6 @@ #include "libavutil/attributes.h" #include "libavutil/cpu.h" #include "libavutil/riscv/cpu.h" -#include "libavcodec/avcodec.h" #include "libavcodec/pixblockdsp.h" void ff_get_pixels_8_rvi(int16_t *block, const uint8_t *pixels, @@ -42,7 +41,6 @@ void ff_diff_pixels_unaligned_rvv(int16_t *block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext *c, - AVCodecContext *avctx, unsigned high_bit_depth) { #if HAVE_RV diff --git a/libavcodec/rv10enc.c b/libavcodec/rv10enc.c index 984fe3379dd81..534b93fd81e7d 100644 --- a/libavcodec/rv10enc.c +++ b/libavcodec/rv10enc.c @@ -36,7 +36,7 @@ int ff_rv10_encode_picture_header(MPVMainEncContext *const m) MPVEncContext *const s = &m->s; int full_frame= 0; - align_put_bits(&s->pb); + put_bits_assume_flushed(&s->pb); put_bits(&s->pb, 1, 1); /* marker */ diff --git a/libavcodec/rv20enc.c b/libavcodec/rv20enc.c index ce0d435dcb209..5c3850c12fa24 100644 --- a/libavcodec/rv20enc.c +++ b/libavcodec/rv20enc.c @@ -38,6 +38,8 @@ int ff_rv20_encode_picture_header(MPVMainEncContext *const m) { MPVEncContext *const s = &m->s; + put_bits_assume_flushed(&s->pb); + put_bits(&s->pb, 2, s->c.pict_type); //I 0 vs. 1 ? put_bits(&s->pb, 1, 0); /* unknown bit */ put_bits(&s->pb, 5, s->c.qscale); @@ -48,12 +50,12 @@ int ff_rv20_encode_picture_header(MPVMainEncContext *const m) put_bits(&s->pb, 1, s->c.no_rounding); - av_assert0(s->f_code == 1); - av_assert0(!s->c.unrestricted_mv); - av_assert0(!s->c.alt_inter_vlc); - av_assert0(!s->c.umvplus); - av_assert0(s->c.modified_quant==1); - av_assert0(s->c.loop_filter==1); + av_assert1(s->f_code == 1); + av_assert1(!s->c.unrestricted_mv); + av_assert1(!s->c.alt_inter_vlc); + av_assert1(!s->c.umvplus); + av_assert1(s->c.modified_quant == 1); + av_assert1(s->c.loop_filter == 1); s->c.h263_aic = s->c.pict_type == AV_PICTURE_TYPE_I; if (s->c.h263_aic) { diff --git a/libavcodec/rv60dec.c b/libavcodec/rv60dec.c index d704ae512c2a2..2bbcb1d62093d 100644 --- a/libavcodec/rv60dec.c +++ b/libavcodec/rv60dec.c @@ -82,7 +82,7 @@ enum { }; static const VLCElem * cbp8_vlc[7][4]; -static const VLCElem * cbp16_vlc[7][3][4]; +static const VLCElem * cbp16_vlc[7][4][4]; typedef struct { const VLCElem * l0[2]; @@ -137,12 +137,12 @@ static av_cold void rv60_init_static_data(void) for (int i = 0; i < 7; i++) for (int j = 0; j < 4; j++) - cbp8_vlc[i][j] = gen_vlc(rv60_cbp8_lens[i][j], 64, &state); + cbp16_vlc[i][0][j] = cbp8_vlc[i][j] = gen_vlc(rv60_cbp8_lens[i][j], 64, &state); for (int i = 0; i < 7; i++) for (int j = 0; j < 3; j++) for (int k = 0; k < 4; k++) - cbp16_vlc[i][j][k] = gen_vlc(rv60_cbp16_lens[i][j][k], 64, &state); + cbp16_vlc[i][j + 1][k] = gen_vlc(rv60_cbp16_lens[i][j][k], 64, &state); build_coeff_vlc(rv60_intra_lens, intra_coeff_vlc, 5, &state); build_coeff_vlc(rv60_inter_lens, inter_coeff_vlc, 7, &state); @@ -1650,10 +1650,7 @@ static int decode_super_cbp(GetBitContext * gb, const VLCElem * vlc[4]) static int decode_cbp16(GetBitContext * gb, int subset, int qp) { int cb_set = rv60_qp_to_idx[qp]; - if (!subset) - return decode_super_cbp(gb, cbp8_vlc[cb_set]); - else - return decode_super_cbp(gb, cbp16_vlc[cb_set][subset - 1]); + return decode_super_cbp(gb, cbp16_vlc[cb_set][subset]); } static int decode_cu_r(RV60Context * s, AVFrame * frame, ThreadContext * thread, GetBitContext * gb, int xpos, int ypos, int log_size, int qp, int sel_qp) diff --git a/libavcodec/speedhqenc.c b/libavcodec/speedhqenc.c index ecba2cd840886..23ab86e8e2a92 100644 --- a/libavcodec/speedhqenc.c +++ b/libavcodec/speedhqenc.c @@ -27,6 +27,7 @@ * SpeedHQ encoder. */ +#include "libavutil/avassert.h" #include "libavutil/thread.h" #include "avcodec.h" @@ -36,6 +37,7 @@ #include "mpegvideo.h" #include "mpegvideodata.h" #include "mpegvideoenc.h" +#include "put_bits.h" #include "rl.h" #include "speedhq.h" #include "speedhqenc.h" @@ -100,6 +102,8 @@ static int speedhq_encode_picture_header(MPVMainEncContext *const m) SpeedHQEncContext *const ctx = (SpeedHQEncContext*)m; MPVEncContext *const s = &m->s; + put_bits_assume_flushed(&s->pb); + put_bits_le(&s->pb, 8, 100 - s->c.qscale * 2); /* FIXME why doubled */ put_bits_le(&s->pb, 24, 4); /* no second field */ @@ -259,7 +263,7 @@ static av_cold int speedhq_encode_init(AVCodecContext *avctx) avctx->codec_tag = MKTAG('S','H','Q','4'); break; default: - av_assert0(0); + av_unreachable("Already checked via CODEC_PIXFMTS"); } m->encode_picture_header = speedhq_encode_picture_header; diff --git a/libavcodec/speexdec.c b/libavcodec/speexdec.c index 60daab3b01563..94dce5420cc46 100644 --- a/libavcodec/speexdec.c +++ b/libavcodec/speexdec.c @@ -169,7 +169,7 @@ typedef struct SpeexSubmode { typedef struct SpeexMode { int modeID; /**< ID of the mode */ - int (*decode)(AVCodecContext *avctx, void *dec, GetBitContext *gb, float *out); + int (*decode)(AVCodecContext *avctx, void *dec, GetBitContext *gb, float *out, int packets_left); int frame_size; /**< Size of frames used for decoding */ int subframe_size; /**< Size of sub-frames used for decoding */ int lpc_size; /**< Order of LPC filter */ @@ -521,8 +521,8 @@ static const SpeexSubmode wb_submode4 = { split_cb_shape_sign_unquant, &split_cb_high, -1.f }; -static int nb_decode(AVCodecContext *, void *, GetBitContext *, float *); -static int sb_decode(AVCodecContext *, void *, GetBitContext *, float *); +static int nb_decode(AVCodecContext *, void *, GetBitContext *, float *, int packets_left); +static int sb_decode(AVCodecContext *, void *, GetBitContext *, float *, int packets_left); static const SpeexMode speex_modes[SPEEX_NB_MODES] = { { @@ -867,7 +867,7 @@ static void lsp_to_lpc(const float *freq, float *ak, int lpcrdr) } static int nb_decode(AVCodecContext *avctx, void *ptr_st, - GetBitContext *gb, float *out) + GetBitContext *gb, float *out, int packets_left) { DecoderState *st = ptr_st; float ol_gain = 0, ol_pitch_coef = 0, best_pitch_gain = 0, pitch_average = 0; @@ -1218,7 +1218,7 @@ static void qmf_synth(const float *x1, const float *x2, const float *a, float *y } static int sb_decode(AVCodecContext *avctx, void *ptr_st, - GetBitContext *gb, float *out) + GetBitContext *gb, float *out, int packets_left) { SpeexContext *s = avctx->priv_data; DecoderState *st = ptr_st; @@ -1234,9 +1234,11 @@ static int sb_decode(AVCodecContext *avctx, void *ptr_st, mode = st->mode; if (st->modeID > 0) { + if (packets_left <= 1) + return AVERROR_INVALIDDATA; low_innov_alias = out + st->frame_size; s->st[st->modeID - 1].innov_save = low_innov_alias; - ret = speex_modes[st->modeID - 1].decode(avctx, &s->st[st->modeID - 1], gb, out); + ret = speex_modes[st->modeID - 1].decode(avctx, &s->st[st->modeID - 1], gb, out, packets_left); if (ret < 0) return ret; } @@ -1559,7 +1561,7 @@ static int speex_decode_frame(AVCodecContext *avctx, AVFrame *frame, dst = (float *)frame->extended_data[0]; for (int i = 0; i < frames_per_packet; i++) { - ret = speex_modes[s->mode].decode(avctx, &s->st[s->mode], &s->gb, dst + i * s->frame_size); + ret = speex_modes[s->mode].decode(avctx, &s->st[s->mode], &s->gb, dst + i * s->frame_size, frames_per_packet - i); if (ret < 0) return ret; if (avctx->ch_layout.nb_channels == 2) diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c index 6319e9b0216e9..4c4f3018c518c 100644 --- a/libavcodec/svq3.c +++ b/libavcodec/svq3.c @@ -71,13 +71,14 @@ * svq3 decoder. */ +#define NUM_PICS 3 + typedef struct SVQ3Frame { AVFrame *f; - int16_t (*motion_val_buf[2])[2]; int16_t (*motion_val[2])[2]; - uint32_t *mb_type_buf, *mb_type; + uint32_t *mb_type; } SVQ3Frame; typedef struct SVQ3Context { @@ -103,7 +104,6 @@ typedef struct SVQ3Context { int adaptive_quant; int h_edge_pos; int v_edge_pos; - int last_frame_output; int slice_num; int qscale; int cbp; @@ -142,7 +142,10 @@ typedef struct SVQ3Context { DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15 * 8]; uint32_t dequant4_coeff[QP_MAX_NUM + 1][16]; int block_offset[2 * (16 * 3)]; - SVQ3Frame frames[3]; + SVQ3Frame frames[NUM_PICS]; + + uint32_t *mb_type_buf; + int16_t (*motion_val_buf)[2]; } SVQ3Context; #define FULLPEL_MODE 1 @@ -1114,14 +1117,139 @@ static void init_dequant4_coeff_table(SVQ3Context *s) } } +static av_cold int svq3_decode_extradata(AVCodecContext *avctx, SVQ3Context *s, + int seqh_offset) +{ + const uint8_t *extradata = avctx->extradata + seqh_offset; + unsigned int size = AV_RB32(extradata + 4); + GetBitContext gb; + int ret; + + if (size > avctx->extradata_size - seqh_offset - 8) + return AVERROR_INVALIDDATA; + extradata += 8; + init_get_bits(&gb, extradata, size * 8); + + /* 'frame size code' and optional 'width, height' */ + int frame_size_code = get_bits(&gb, 3); + int w, h; + switch (frame_size_code) { + case 0: + w = 160; + h = 120; + break; + case 1: + w = 128; + h = 96; + break; + case 2: + w = 176; + h = 144; + break; + case 3: + w = 352; + h = 288; + break; + case 4: + w = 704; + h = 576; + break; + case 5: + w = 240; + h = 180; + break; + case 6: + w = 320; + h = 240; + break; + case 7: + w = get_bits(&gb, 12); + h = get_bits(&gb, 12); + break; + } + ret = ff_set_dimensions(avctx, w, h); + if (ret < 0) + return ret; + + s->halfpel_flag = get_bits1(&gb); + s->thirdpel_flag = get_bits1(&gb); + + /* unknown fields */ + int unk0 = get_bits1(&gb); + int unk1 = get_bits1(&gb); + int unk2 = get_bits1(&gb); + int unk3 = get_bits1(&gb); + + s->low_delay = get_bits1(&gb); + avctx->has_b_frames = !s->low_delay; + + /* unknown field */ + int unk4 = get_bits1(&gb); + + av_log(avctx, AV_LOG_DEBUG, "Unknown fields %d %d %d %d %d\n", + unk0, unk1, unk2, unk3, unk4); + + if (skip_1stop_8data_bits(&gb) < 0) + return AVERROR_INVALIDDATA; + + s->has_watermark = get_bits1(&gb); + + if (!s->has_watermark) + return 0; + +#if CONFIG_ZLIB + unsigned watermark_width = get_interleaved_ue_golomb(&gb); + unsigned watermark_height = get_interleaved_ue_golomb(&gb); + int u1 = get_interleaved_ue_golomb(&gb); + int u2 = get_bits(&gb, 8); + int u3 = get_bits(&gb, 2); + int u4 = get_interleaved_ue_golomb(&gb); + unsigned long buf_len = watermark_width * + watermark_height * 4; + int offset = get_bits_count(&gb) + 7 >> 3; + + if (watermark_height <= 0 || + get_bits_left(&gb) <= 0 || + (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height) + return AVERROR_INVALIDDATA; + + av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n", + watermark_width, watermark_height); + av_log(avctx, AV_LOG_DEBUG, + "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n", + u1, u2, u3, u4, offset); + + uint8_t *buf = av_malloc(buf_len); + if (!buf) + return AVERROR(ENOMEM); + + if (uncompress(buf, &buf_len, extradata + offset, + size - offset) != Z_OK) { + av_log(avctx, AV_LOG_ERROR, + "could not uncompress watermark logo\n"); + av_free(buf); + return AVERROR_EXTERNAL; + } + s->watermark_key = av_bswap16(av_crc(av_crc_get_table(AV_CRC_16_CCITT), 0, buf, buf_len)); + + s->watermark_key = s->watermark_key << 16 | s->watermark_key; + av_log(avctx, AV_LOG_DEBUG, + "watermark key %#"PRIx32"\n", s->watermark_key); + av_free(buf); + + return 0; +#else + av_log(avctx, AV_LOG_ERROR, + "this svq3 file contains watermark which need zlib support compiled in\n"); + return AVERROR(ENOSYS); +#endif +} + static av_cold int svq3_decode_init(AVCodecContext *avctx) { SVQ3Context *s = avctx->priv_data; int m, x, y; unsigned char *extradata; - unsigned char *extradata_end; - unsigned int size; - int marker_found = 0; int ret; s->cur_pic = &s->frames[0]; @@ -1154,147 +1282,55 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx) /* prowl for the "SEQH" marker in the extradata */ extradata = (unsigned char *)avctx->extradata; - extradata_end = avctx->extradata + avctx->extradata_size; if (extradata) { for (m = 0; m + 8 < avctx->extradata_size; m++) { if (!memcmp(extradata, "SEQH", 4)) { - marker_found = 1; + /* if a match was found, parse the extra data */ + ret = svq3_decode_extradata(avctx, s, m); + if (ret < 0) + return ret; break; } extradata++; } } - /* if a match was found, parse the extra data */ - if (marker_found) { - GetBitContext gb; - int frame_size_code; - int unk0, unk1, unk2, unk3, unk4; - int w,h; - - size = AV_RB32(&extradata[4]); - if (size > extradata_end - extradata - 8) - return AVERROR_INVALIDDATA; - init_get_bits(&gb, extradata + 8, size * 8); - - /* 'frame size code' and optional 'width, height' */ - frame_size_code = get_bits(&gb, 3); - switch (frame_size_code) { - case 0: - w = 160; - h = 120; - break; - case 1: - w = 128; - h = 96; - break; - case 2: - w = 176; - h = 144; - break; - case 3: - w = 352; - h = 288; - break; - case 4: - w = 704; - h = 576; - break; - case 5: - w = 240; - h = 180; - break; - case 6: - w = 320; - h = 240; - break; - case 7: - w = get_bits(&gb, 12); - h = get_bits(&gb, 12); - break; - } - ret = ff_set_dimensions(avctx, w, h); - if (ret < 0) - return ret; - - s->halfpel_flag = get_bits1(&gb); - s->thirdpel_flag = get_bits1(&gb); - - /* unknown fields */ - unk0 = get_bits1(&gb); - unk1 = get_bits1(&gb); - unk2 = get_bits1(&gb); - unk3 = get_bits1(&gb); + s->mb_width = (avctx->width + 15) / 16; + s->mb_height = (avctx->height + 15) / 16; + s->mb_stride = s->mb_width + 1; + s->mb_num = s->mb_width * s->mb_height; + s->b_stride = 4 * s->mb_width; + s->h_edge_pos = s->mb_width * 16; + s->v_edge_pos = s->mb_height * 16; - s->low_delay = get_bits1(&gb); + const unsigned big_mb_num = s->mb_stride * (s->mb_height + 2) + 1; - /* unknown field */ - unk4 = get_bits1(&gb); + s->mb_type_buf = av_calloc(big_mb_num, NUM_PICS * sizeof(*s->mb_type_buf)); + if (!s->mb_type_buf) + return AVERROR(ENOMEM); + uint32_t *mb_type_buf = s->mb_type_buf + 2 * s->mb_stride + 1; - av_log(avctx, AV_LOG_DEBUG, "Unknown fields %d %d %d %d %d\n", - unk0, unk1, unk2, unk3, unk4); + const unsigned b4_stride = s->mb_width * 4 + 1; + const unsigned b4_array_size = b4_stride * s->mb_height * 4; + const unsigned motion_val_buf_size = b4_array_size + 4; - if (skip_1stop_8data_bits(&gb) < 0) - return AVERROR_INVALIDDATA; + s->motion_val_buf = av_calloc(motion_val_buf_size, + NUM_PICS * 2 * sizeof(*s->motion_val_buf)); + if (!s->motion_val_buf) + return AVERROR(ENOMEM); + int16_t (*motion_val_buf)[2] = s->motion_val_buf + 4; - s->has_watermark = get_bits1(&gb); - avctx->has_b_frames = !s->low_delay; - if (s->has_watermark) { -#if CONFIG_ZLIB - unsigned watermark_width = get_interleaved_ue_golomb(&gb); - unsigned watermark_height = get_interleaved_ue_golomb(&gb); - int u1 = get_interleaved_ue_golomb(&gb); - int u2 = get_bits(&gb, 8); - int u3 = get_bits(&gb, 2); - int u4 = get_interleaved_ue_golomb(&gb); - unsigned long buf_len = watermark_width * - watermark_height * 4; - int offset = get_bits_count(&gb) + 7 >> 3; - uint8_t *buf; - - if (watermark_height <= 0 || - get_bits_left(&gb) <= 0 || - (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height) - return AVERROR_INVALIDDATA; - - buf = av_malloc(buf_len); - if (!buf) - return AVERROR(ENOMEM); - - av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n", - watermark_width, watermark_height); - av_log(avctx, AV_LOG_DEBUG, - "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n", - u1, u2, u3, u4, offset); - if (uncompress(buf, &buf_len, extradata + 8 + offset, - size - offset) != Z_OK) { - av_log(avctx, AV_LOG_ERROR, - "could not uncompress watermark logo\n"); - av_free(buf); - return -1; - } - s->watermark_key = av_bswap16(av_crc(av_crc_get_table(AV_CRC_16_CCITT), 0, buf, buf_len)); + for (size_t i = 0; i < NUM_PICS; ++i) { + SVQ3Frame *const pic = &s->frames[i]; - s->watermark_key = s->watermark_key << 16 | s->watermark_key; - av_log(avctx, AV_LOG_DEBUG, - "watermark key %#"PRIx32"\n", s->watermark_key); - av_free(buf); -#else - av_log(avctx, AV_LOG_ERROR, - "this svq3 file contains watermark which need zlib support compiled in\n"); - return AVERROR(ENOSYS); -#endif + pic->mb_type = mb_type_buf; + mb_type_buf += big_mb_num; + for (size_t j = 0; j < FF_ARRAY_ELEMS(pic->motion_val); ++j) { + pic->motion_val[j] = motion_val_buf; + motion_val_buf += motion_val_buf_size; } } - s->mb_width = (avctx->width + 15) / 16; - s->mb_height = (avctx->height + 15) / 16; - s->mb_stride = s->mb_width + 1; - s->mb_num = s->mb_width * s->mb_height; - s->b_stride = 4 * s->mb_width; - s->h_edge_pos = s->mb_width * 16; - s->v_edge_pos = s->mb_height * 16; - s->intra4x4_pred_mode = av_mallocz(s->mb_stride * 2 * 8); if (!s->intra4x4_pred_mode) return AVERROR(ENOMEM); @@ -1316,49 +1352,14 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx) return 0; } -static void free_picture(SVQ3Frame *pic) -{ - int i; - for (i = 0; i < 2; i++) { - av_freep(&pic->motion_val_buf[i]); - } - av_freep(&pic->mb_type_buf); - - av_frame_unref(pic->f); -} - static int get_buffer(AVCodecContext *avctx, SVQ3Frame *pic) { SVQ3Context *s = avctx->priv_data; - const int big_mb_num = s->mb_stride * (s->mb_height + 1) + 1; - const int b4_stride = s->mb_width * 4 + 1; - const int b4_array_size = b4_stride * s->mb_height * 4; - int ret; - - if (!pic->motion_val_buf[0]) { - int i; - - pic->mb_type_buf = av_calloc(big_mb_num + s->mb_stride, sizeof(uint32_t)); - if (!pic->mb_type_buf) - return AVERROR(ENOMEM); - pic->mb_type = pic->mb_type_buf + 2 * s->mb_stride + 1; - - for (i = 0; i < 2; i++) { - pic->motion_val_buf[i] = av_calloc(b4_array_size + 4, 2 * sizeof(int16_t)); - if (!pic->motion_val_buf[i]) { - ret = AVERROR(ENOMEM); - goto fail; - } - - pic->motion_val[i] = pic->motion_val_buf[i] + 4; - } - } - - ret = ff_get_buffer(avctx, pic->f, - (s->pict_type != AV_PICTURE_TYPE_B) ? - AV_GET_BUFFER_FLAG_REF : 0); + int ret = ff_get_buffer(avctx, pic->f, + (s->pict_type != AV_PICTURE_TYPE_B) ? + AV_GET_BUFFER_FLAG_REF : 0); if (ret < 0) - goto fail; + return ret; if (!s->edge_emu_buffer) { s->edge_emu_buffer = av_calloc(pic->f->linesize[0], 17); @@ -1367,9 +1368,23 @@ static int get_buffer(AVCodecContext *avctx, SVQ3Frame *pic) } return 0; -fail: - free_picture(pic); - return ret; +} + +static av_cold int alloc_dummy_frame(AVCodecContext *avctx, SVQ3Frame *pic) +{ + av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n"); + av_frame_unref(pic->f); + int ret = get_buffer(avctx, pic); + if (ret < 0) + return ret; + + memset(pic->f->data[0], 0, avctx->height * pic->f->linesize[0]); + memset(pic->f->data[1], 0x80, (avctx->height / 2) * + pic->f->linesize[1]); + memset(pic->f->data[2], 0x80, (avctx->height / 2) * + pic->f->linesize[2]); + + return 0; } static int svq3_decode_frame(AVCodecContext *avctx, AVFrame *rframe, @@ -1382,11 +1397,8 @@ static int svq3_decode_frame(AVCodecContext *avctx, AVFrame *rframe, /* special case for last picture */ if (buf_size == 0) { - if (s->next_pic->f->data[0] && !s->low_delay && !s->last_frame_output) { - ret = av_frame_ref(rframe, s->next_pic->f); - if (ret < 0) - return ret; - s->last_frame_output = 1; + if (s->next_pic->f->data[0] && !s->low_delay) { + av_frame_move_ref(rframe, s->next_pic->f); *got_frame = 1; } return 0; @@ -1398,8 +1410,9 @@ static int svq3_decode_frame(AVCodecContext *avctx, AVFrame *rframe, if (ret < 0) return ret; - if (svq3_decode_slice_header(avctx)) - return -1; + ret = svq3_decode_slice_header(avctx); + if (ret < 0) + return ret; if (avpkt->size < s->mb_width * s->mb_height / 8) return AVERROR_INVALIDDATA; @@ -1435,29 +1448,15 @@ static int svq3_decode_frame(AVCodecContext *avctx, AVFrame *rframe, if (s->pict_type != AV_PICTURE_TYPE_I) { if (!s->last_pic->f->data[0]) { - av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n"); - av_frame_unref(s->last_pic->f); - ret = get_buffer(avctx, s->last_pic); + ret = alloc_dummy_frame(avctx, s->last_pic); if (ret < 0) return ret; - memset(s->last_pic->f->data[0], 0, avctx->height * s->last_pic->f->linesize[0]); - memset(s->last_pic->f->data[1], 0x80, (avctx->height / 2) * - s->last_pic->f->linesize[1]); - memset(s->last_pic->f->data[2], 0x80, (avctx->height / 2) * - s->last_pic->f->linesize[2]); } if (s->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f->data[0]) { - av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n"); - av_frame_unref(s->next_pic->f); - ret = get_buffer(avctx, s->next_pic); + ret = alloc_dummy_frame(avctx, s->next_pic); if (ret < 0) return ret; - memset(s->next_pic->f->data[0], 0, avctx->height * s->next_pic->f->linesize[0]); - memset(s->next_pic->f->data[1], 0x80, (avctx->height / 2) * - s->next_pic->f->linesize[1]); - memset(s->next_pic->f->data[2], 0x80, (avctx->height / 2) * - s->next_pic->f->linesize[2]); } } @@ -1512,8 +1511,9 @@ static int svq3_decode_frame(AVCodecContext *avctx, AVFrame *rframe, if (((get_bits_count(&s->gb_slice) & 7) == 0 || show_bits(&s->gb_slice, get_bits_left(&s->gb_slice) & 7) == 0)) { - if (svq3_decode_slice_header(avctx)) - return -1; + ret = svq3_decode_slice_header(avctx); + if (ret < 0) + return ret; } if (s->slice_type != s->pict_type) { avpriv_request_sample(avctx, "non constant slice type"); @@ -1583,10 +1583,10 @@ static av_cold int svq3_decode_end(AVCodecContext *avctx) { SVQ3Context *s = avctx->priv_data; - for (int i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) { - free_picture(&s->frames[i]); + for (int i = 0; i < NUM_PICS; i++) av_frame_free(&s->frames[i].f); - } + av_freep(&s->motion_val_buf); + av_freep(&s->mb_type_buf); av_freep(&s->slice_buf); av_freep(&s->intra4x4_pred_mode); av_freep(&s->edge_emu_buffer); diff --git a/libavcodec/tests/.gitignore b/libavcodec/tests/.gitignore index 0df4ae10a0285..2c5bbec7f9d77 100644 --- a/libavcodec/tests/.gitignore +++ b/libavcodec/tests/.gitignore @@ -1,3 +1,4 @@ +/apv /av1_levels /avcodec /avpacket diff --git a/libavcodec/tests/avcodec.c b/libavcodec/tests/avcodec.c index 83a5d0531636e..dde8226384082 100644 --- a/libavcodec/tests/avcodec.c +++ b/libavcodec/tests/avcodec.c @@ -167,6 +167,9 @@ FF_ENABLE_DEPRECATION_WARNINGS !(codec->capabilities & AV_CODEC_CAP_DELAY)) ERR("EOF_FLUSH encoder %s is not marked as having delay\n"); } else { + if ((codec2->update_thread_context || codec2->update_thread_context_for_user) && + !(codec->capabilities & AV_CODEC_CAP_FRAME_THREADS)) + ERR("Non-frame-threaded decoder %s has update_thread_context set"); if ((codec->type == AVMEDIA_TYPE_SUBTITLE) != (codec2->cb_type == FF_CODEC_CB_TYPE_DECODE_SUB)) ERR("Subtitle decoder %s does not implement decode_sub callback\n"); if (codec->type == AVMEDIA_TYPE_SUBTITLE && codec2->bsfs) diff --git a/libavcodec/tests/hashtable.c b/libavcodec/tests/hashtable.c new file mode 100644 index 0000000000000..02c0ac8afa025 --- /dev/null +++ b/libavcodec/tests/hashtable.c @@ -0,0 +1,110 @@ +/* + * Generic hashtable tests + * Copyright (C) 2024 Emma Worley + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/avassert.h" +#include "libavcodec/hashtable.h" + +int main(void) +{ + struct FFHashtableContext *ctx; + uint8_t k; + uint64_t v; + + // impossibly large allocation should fail gracefully + av_assert0(ff_hashtable_alloc(&ctx, -1, -1, -1) < 0); + + // hashtable can store up to 3 uint8_t->uint64_t entries + av_assert0(!ff_hashtable_alloc(&ctx, sizeof(k), sizeof(v), 3)); + + // unsuccessful deletes return 0 + k = 1; + av_assert0(!ff_hashtable_delete(ctx, &k)); + + // unsuccessful gets return 0 + k = 1; + av_assert0(!ff_hashtable_get(ctx, &k, &v)); + + // successful sets returns 1 + k = 1; + v = 1; + av_assert0(ff_hashtable_set(ctx, &k, &v)); + + // get should now contain 1 + k = 1; + v = 0; + av_assert0(ff_hashtable_get(ctx, &k, &v)); + av_assert0(v == 1); + + // updating sets should return 1 + k = 1; + v = 2; + av_assert0(ff_hashtable_set(ctx, &k, &v)); + + // get should now contain 2 + k = 1; + v = 0; + av_assert0(ff_hashtable_get(ctx, &k, &v)); + av_assert0(v == 2); + + // fill the table + k = 2; + v = 2; + av_assert0(ff_hashtable_set(ctx, &k, &v)); + k = 3; + v = 3; + av_assert0(ff_hashtable_set(ctx, &k, &v)); + + // inserting sets on a full table should return 0 + k = 4; + v = 4; + av_assert0(!ff_hashtable_set(ctx, &k, &v)); + + // updating sets on a full table should return 1 + k = 1; + v = 4; + av_assert0(ff_hashtable_set(ctx, &k, &v)); + v = 0; + av_assert0(ff_hashtable_get(ctx, &k, &v)); + av_assert0(v == 4); + + // successful deletes should return 1 + k = 1; + av_assert0(ff_hashtable_delete(ctx, &k)); + + // get should now return 0 + av_assert0(!ff_hashtable_get(ctx, &k, &v)); + + // sanity check remaining keys + k = 2; + v = 0; + av_assert0(ff_hashtable_get(ctx, &k, &v)); + av_assert0(v == 2); + k = 3; + v = 0; + av_assert0(ff_hashtable_get(ctx, &k, &v)); + av_assert0(v == 3); + + ff_hashtable_freep(&ctx); + + return 0; +} diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c index be503d78c6a3c..7cefca79bc6fd 100644 --- a/libavcodec/utvideoenc.c +++ b/libavcodec/utvideoenc.c @@ -24,6 +24,7 @@ * Ut Video encoder */ +#include "libavutil/avassert.h" #include "libavutil/imgutils.h" #include "libavutil/intreadwrite.h" #include "libavutil/mem.h" @@ -143,9 +144,7 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx) original_format = UTVIDEO_444; break; default: - av_log(avctx, AV_LOG_ERROR, "Unknown pixel format: %d\n", - avctx->pix_fmt); - return AVERROR_INVALIDDATA; + av_unreachable("Already checked via CODEC_PIXFMTS"); } ff_bswapdsp_init(&c->bdsp); @@ -153,7 +152,7 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx) if (c->frame_pred == PRED_GRADIENT) { av_log(avctx, AV_LOG_ERROR, "Gradient prediction is not supported.\n"); - return AVERROR_OPTION_NOT_FOUND; + return AVERROR_PATCHWELCOME; } /* @@ -646,7 +645,6 @@ static const AVOption options[] = { { "pred", "Prediction method", OFFSET(frame_pred), AV_OPT_TYPE_INT, { .i64 = PRED_LEFT }, PRED_NONE, PRED_MEDIAN, VE, .unit = "pred" }, { "none", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRED_NONE }, INT_MIN, INT_MAX, VE, .unit = "pred" }, { "left", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRED_LEFT }, INT_MIN, INT_MAX, VE, .unit = "pred" }, - { "gradient", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRED_GRADIENT }, INT_MIN, INT_MAX, VE, .unit = "pred" }, { "median", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRED_MEDIAN }, INT_MIN, INT_MAX, VE, .unit = "pred" }, { NULL}, diff --git a/libavcodec/vaapi_mpeg4.c b/libavcodec/vaapi_mpeg4.c index 8338c0732d333..533e6750a1c55 100644 --- a/libavcodec/vaapi_mpeg4.c +++ b/libavcodec/vaapi_mpeg4.c @@ -70,7 +70,7 @@ static int vaapi_mpeg4_start_frame(AVCodecContext *avctx, .obmc_disable = 1, .sprite_enable = ctx->vol_sprite_usage, .sprite_warping_accuracy = ctx->sprite_warping_accuracy, - .quant_type = s->mpeg_quant, + .quant_type = ctx->mpeg_quant, .quarter_sample = s->quarter_sample, .data_partitioned = s->data_partitioning, .reversible_vlc = ctx->rvlc, diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h index e3b90d2b62601..b018537af3387 100644 --- a/libavcodec/vc1dsp.h +++ b/libavcodec/vc1dsp.h @@ -30,7 +30,9 @@ #include "hpeldsp.h" #include "h264chroma.h" -typedef void (*vc1op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, ptrdiff_t line_size, int h); +typedef void (*vc1op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, + const uint8_t *pixels/*align 1*/, + ptrdiff_t line_size, int round); typedef struct VC1DSPContext { /* vc1 functions */ diff --git a/libavcodec/vc2enc.c b/libavcodec/vc2enc.c index 99ca95c40a099..b0588f6b58426 100644 --- a/libavcodec/vc2enc.c +++ b/libavcodec/vc2enc.c @@ -193,6 +193,8 @@ static uint16_t interleaved_ue_golomb_tab[256]; static uint16_t top_interleaved_ue_golomb_tab[256]; /// 1 x_{k-1} ... x_0 -> 2 * k static uint8_t golomb_len_tab[256]; +/// quant -> av_log2(ff_dirac_qscale_tab[quant]) + 32 +static uint8_t qscale_len_tab[FF_ARRAY_ELEMS(ff_dirac_qscale_tab)]; static av_cold void vc2_init_static_data(void) { @@ -202,6 +204,8 @@ static av_cold void vc2_init_static_data(void) interleaved_ue_golomb_tab[i] = (interleaved_ue_golomb_tab[i >> 1] << 2) | (i & 1); top_interleaved_ue_golomb_tab[i] = interleaved_ue_golomb_tab[i] ^ (1 << golomb_len_tab[i]); } + for (size_t i = 0; i < FF_ARRAY_ELEMS(qscale_len_tab); ++i) + qscale_len_tab[i] = av_log2(ff_dirac_qscale_tab[i]) + 32; } static av_always_inline void put_vc2_ue_uint_inline(PutBitContext *pb, uint32_t val) @@ -545,7 +549,7 @@ static void encode_subband(const VC2EncContext *s, PutBitContext *pb, dwtcoef *coeff = b->buf + top * b->stride; const uint64_t q_m = ((uint64_t)(s->qmagic_lut[quant][0])) << 2; const uint64_t q_a = s->qmagic_lut[quant][1]; - const int q_s = av_log2(ff_dirac_qscale_tab[quant]) + 32; + const int q_s = qscale_len_tab[quant]; for (y = top; y < bottom; y++) { for (x = left; x < right; x++) { @@ -586,7 +590,7 @@ static int count_hq_slice(SliceArgs *slice, int quant_idx) const int q_idx = quants[level][orientation]; const uint64_t q_m = ((uint64_t)s->qmagic_lut[q_idx][0]) << 2; const uint64_t q_a = s->qmagic_lut[q_idx][1]; - const int q_s = av_log2(ff_dirac_qscale_tab[q_idx]) + 32; + const int q_s = qscale_len_tab[q_idx]; const int left = b->width * slice->x / s->num_x; const int right = b->width *(slice->x+1) / s->num_x; diff --git a/libavcodec/vdpau_mpeg4.c b/libavcodec/vdpau_mpeg4.c index 91981935f5ba9..7ec7a74ad1d40 100644 --- a/libavcodec/vdpau_mpeg4.c +++ b/libavcodec/vdpau_mpeg4.c @@ -68,7 +68,7 @@ static int vdpau_mpeg4_start_frame(AVCodecContext *avctx, info->vop_fcode_backward = ctx->b_code; info->resync_marker_disable = !ctx->resync_marker; info->interlaced = !s->progressive_sequence; - info->quant_type = s->mpeg_quant; + info->quant_type = ctx->mpeg_quant; info->quarter_sample = s->quarter_sample; info->short_video_header = avctx->codec->id == AV_CODEC_ID_H263; info->rounding_control = s->no_rounding; diff --git a/libavcodec/vlc.c b/libavcodec/vlc.c index c49c801181979..3aa198a2778e0 100644 --- a/libavcodec/vlc.c +++ b/libavcodec/vlc.c @@ -42,6 +42,8 @@ { \ const uint8_t *ptr = (const uint8_t *)table + i * wrap; \ switch(size) { \ + default: \ + av_unreachable("Only uint8/16/32_t are used"); \ case 1: \ v = *(const uint8_t *)ptr; \ break; \ @@ -49,8 +51,6 @@ v = *(const uint16_t *)ptr; \ break; \ case 4: \ - default: \ - av_assert1(size == 4); \ v = *(const uint32_t *)ptr; \ break; \ } \ @@ -260,7 +260,7 @@ int ff_vlc_init_sparse(VLC *vlc, int nb_bits, int nb_codes, if (ret < 0) return ret; - av_assert0(symbols_size <= 2 || !symbols); + av_assert0(symbols_size <= 2U); j = 0; #define COPY(condition)\ for (int i = 0; i < nb_codes; i++) { \ diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c index fd416eed3a5ab..141f0941b402f 100644 --- a/libavcodec/vp9.c +++ b/libavcodec/vp9.c @@ -1140,7 +1140,8 @@ static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl, uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); break; default: - av_assert0(0); + av_unreachable("ff_vp9_partition_tree only has " + "the four PARTITION_* terminal codes"); } } else if (vpx_rac_get_prob_branchy(td->c, p[1])) { bp = PARTITION_SPLIT; diff --git a/libavcodec/vp9dec.h b/libavcodec/vp9dec.h index 851ee9f6dde1b..e41f47a82a524 100644 --- a/libavcodec/vp9dec.h +++ b/libavcodec/vp9dec.h @@ -220,8 +220,8 @@ struct VP9TileData { DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8]; DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8]; // block reconstruction intermediates - DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2]; - DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2]; + DECLARE_ALIGNED(64, uint8_t, tmp_y)[64 * 64 * 2]; + DECLARE_ALIGNED(64, uint8_t, tmp_uv)[2][64 * 64 * 2]; struct { int x, y; } min_mv, max_mv; int16_t *block_base, *block, *uvblock_base[2], *uvblock[2]; uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2]; diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index feb5d2ea5136c..729cb4f15c5da 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -6,10 +6,8 @@ clean:: OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \ vulkan/rangecoder.o vulkan/ffv1_vlc.o \ vulkan/ffv1_common.o vulkan/ffv1_reset.o \ - vulkan/ffv1_enc_common.o \ vulkan/ffv1_enc_rct.o vulkan/ffv1_enc_setup.o \ - vulkan/ffv1_enc_vlc.o vulkan/ffv1_enc_ac.o \ - vulkan/ffv1_enc.o vulkan/ffv1_enc_rgb.o + vulkan/ffv1_rct_search.o vulkan/ffv1_enc.o OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \ vulkan/rangecoder.o vulkan/ffv1_vlc.o \ diff --git a/libavcodec/vulkan/ffv1_common.comp b/libavcodec/vulkan/ffv1_common.comp index 64c1c2ce801be..3d40592739b5c 100644 --- a/libavcodec/vulkan/ffv1_common.comp +++ b/libavcodec/vulkan/ffv1_common.comp @@ -92,3 +92,90 @@ uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift) return sx; } + +#ifdef RGB +#define RGB_LBUF (RGB_LINECACHE - 1) +#define LADDR(p) (ivec2((p).x, ((p).y & RGB_LBUF))) + +ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, + int comp, int sw, uint8_t quant_table_idx, bool extend_lookup) +{ + const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? off + ivec2(1, -1) : off; + + /* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */ + VTYPE3 top = VTYPE3(TYPE(imageLoad(pred, sp + LADDR(yoff_border1 + ivec2(-1, -1)))[comp]), + TYPE(imageLoad(pred, sp + LADDR(off + ivec2(0, -1)))[comp]), + TYPE(imageLoad(pred, sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[comp])); + + /* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must + * return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous + * row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */ + TYPE cur = TYPE(imageLoad(pred, sp + LADDR(yoff_border1 + ivec2(-1, 0)))[comp]); + + int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] + + quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + + quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK]; + + if (expectEXT(extend_lookup, false)) { + TYPE cur2 = TYPE(0); + if (expectEXT(off.x > 0, true)) { + const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0); + cur2 = TYPE(imageLoad(pred, sp + LADDR(off + yoff_border2))[comp]); + } + base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK]; + + /* top-2 became current upon swap */ + TYPE top2 = TYPE(imageLoad(pred, sp + LADDR(off))[comp]); + base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK]; + } + + /* context, prediction */ + return ivec2(base, predict(cur, VTYPE2(top))); +} + +#else /* RGB */ + +#define LADDR(p) (p) + +ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, + int comp, int sw, uint8_t quant_table_idx, bool extend_lookup) +{ + const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0); + sp += off; + + VTYPE3 top = VTYPE3(TYPE(0), + TYPE(0), + TYPE(0)); + if (off.y > 0 && off != ivec2(0, 1)) + top[0] = TYPE(imageLoad(pred, sp + ivec2(-1, -1) + yoff_border1)[comp]); + if (off.y > 0) { + top[1] = TYPE(imageLoad(pred, sp + ivec2(0, -1))[comp]); + top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]); + } + + TYPE cur = TYPE(0); + if (off != ivec2(0, 0)) + cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]); + + int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] + + quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + + quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK]; + + if (expectEXT(extend_lookup, false)) { + TYPE cur2 = TYPE(0); + if (off.x > 0 && off != ivec2(1, 0)) { + const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0); + cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]); + } + base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK]; + + TYPE top2 = TYPE(0); + if (off.y > 1) + top2 = TYPE(imageLoad(pred, sp + ivec2(0, -2))[comp]); + base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK]; + } + + /* context, prediction */ + return ivec2(base, predict(cur, VTYPE2(top))); +} +#endif diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp index fc0175c715723..eb795dcba4526 100644 --- a/libavcodec/vulkan/ffv1_dec.comp +++ b/libavcodec/vulkan/ffv1_dec.comp @@ -20,93 +20,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef RGB -#define LADDR(p) (p) -#else -#define RGB_LINECACHE 2 -#define RGB_LBUF (RGB_LINECACHE - 1) -#define LADDR(p) (ivec2((p).x, ((p).y & RGB_LBUF))) -#endif - -#ifdef RGB -ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx) -{ - const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? ivec2(1, -1) : ivec2(0, 0); - - /* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */ - VTYPE3 top = VTYPE3(TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(-1, -1) + yoff_border1))[0]), - TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(0, -1)))[0]), - TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[0])); - - /* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must - * return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous - * row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */ - TYPE cur = TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(-1, 0) + yoff_border1))[0]); - - int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] + - quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + - quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK]; - - if (expectEXT(extend_lookup[quant_table_idx] > 0, false)) { - TYPE cur2 = TYPE(0); - if (expectEXT(off.x > 0, true)) { - const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0); - cur2 = TYPE(imageLoad(dec[p], sp + LADDR(off + yoff_border2))[0]); - } - base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK]; - - /* top-2 became current upon swap */ - TYPE top2 = TYPE(imageLoad(dec[p], sp + LADDR(off))[0]); - base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK]; - } - - /* context, prediction */ - return ivec2(base, predict(cur, VTYPE2(top))); -} -#else -ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx) -{ - const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0); - sp += off; - - VTYPE3 top = VTYPE3(TYPE(0), - TYPE(0), - TYPE(0)); - if (off.y > 0 && off != ivec2(0, 1)) - top[0] = TYPE(imageLoad(dec[p], sp + ivec2(-1, -1) + yoff_border1)[0]); - if (off.y > 0) { - top[1] = TYPE(imageLoad(dec[p], sp + ivec2(0, -1))[0]); - top[2] = TYPE(imageLoad(dec[p], sp + ivec2(min(1, sw - off.x - 1), -1))[0]); - } - - TYPE cur = TYPE(0); - if (off != ivec2(0, 0)) - cur = TYPE(imageLoad(dec[p], sp + ivec2(-1, 0) + yoff_border1)[0]); - - int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] + - quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + - quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK]; - - if ((quant_table[quant_table_idx][3][127] != 0) || - (quant_table[quant_table_idx][4][127] != 0)) { - TYPE cur2 = TYPE(0); - if (off.x > 0 && off != ivec2(1, 0)) { - const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0); - cur2 = TYPE(imageLoad(dec[p], sp + ivec2(-2, 0) + yoff_border2)[0]); - } - base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK]; - - TYPE top2 = TYPE(0); - if (off.y > 1) - top2 = TYPE(imageLoad(dec[p], sp + ivec2(0, -2))[0]); - base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK]; - } - - /* context, prediction */ - return ivec2(base, predict(cur, VTYPE2(top))); -} -#endif - #ifndef GOLOMB #ifdef CACHED_SYMBOL_READER shared uint8_t state[CONTEXT_SIZE]; @@ -143,6 +56,11 @@ int get_isymbol(inout RangeCoder c, uint state_off) void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p, int bits) { +#ifdef CACHED_SYMBOL_READER + if (gl_LocalInvocationID.x > 0) + return; +#endif + #ifndef RGB if (p > 0 && p < 3) { w >>= chroma_shift.x; @@ -171,8 +89,8 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w, #endif for (int x = 0; x < w; x++) { - ivec2 pr = get_pred(sp, ivec2(x, y), p, w, - quant_table_idx); + ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w, + quant_table_idx, extend_lookup[quant_table_idx] > 0); uint context_off = state_off + CONTEXT_SIZE*abs(pr[0]); #ifdef CACHED_SYMBOL_READER @@ -192,6 +110,8 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w, #ifdef CACHED_SYMBOL_READER } + + barrier(); sb.v = state[gl_LocalInvocationID.x]; #endif } @@ -216,10 +136,11 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w, for (int x = 0; x < w; x++) { ivec2 pos = sp + ivec2(x, y); int diff; - ivec2 pr = get_pred(sp, ivec2(x, y), p, w, - quant_table_idx); + ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w, + quant_table_idx, extend_lookup[quant_table_idx] > 0); - VlcState sb = VlcState(uint64_t(slice_state) + state_off + VLC_STATE_SIZE*abs(pr[0])); + uint context_off = state_off + VLC_STATE_SIZE*abs(pr[0]); + VlcState sb = VlcState(uint64_t(slice_state) + context_off); if (pr[0] == 0 && run_mode == 0) run_mode = 1; @@ -305,7 +226,6 @@ void writeout_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct) void decode_slice(inout SliceContext sc, const uint slice_idx) { - int run_index = 0; int w = sc.slice_dim.x; ivec2 sp = sc.slice_pos; @@ -322,8 +242,6 @@ void decode_slice(inout SliceContext sc, const uint slice_idx) /* PCM coding */ #ifndef GOLOMB if (sc.slice_coding_mode == 1) { - if (gl_LocalInvocationID.x > 0) - return; #ifndef RGB for (int p = 0; p < planes; p++) { int h = sc.slice_dim.y; @@ -355,11 +273,13 @@ void decode_slice(inout SliceContext sc, const uint slice_idx) if (p > 0 && p < 3) h >>= chroma_shift.y; + int run_index = 0; for (int y = 0; y < h; y++) decode_line(sc, sp, w, y, p, bits, slice_state_off[p], quant_table_idx[p], run_index); } #else + int run_index = 0; for (int y = 0; y < sc.slice_dim.y; y++) { for (int p = 0; p < color_planes; p++) decode_line(sc, sp, w, y, p, bits, @@ -375,4 +295,8 @@ void main(void) { const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; decode_slice(slice_ctx[slice_idx], slice_idx); + + uint32_t status = corrupt ? uint32_t(corrupt) : overread; + if (status != 0) + slice_status[2*slice_idx + 1] = status; } diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp b/libavcodec/vulkan/ffv1_dec_setup.comp index a27a878927a72..671f28e7e75d5 100644 --- a/libavcodec/vulkan/ffv1_dec_setup.comp +++ b/libavcodec/vulkan/ffv1_dec_setup.comp @@ -133,6 +133,8 @@ void main(void) for (int i = 0; i < slice_size; i++) crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8); - slice_crc_mismatch[slice_idx] = crc; + slice_status[2*slice_idx + 0] = crc; } + + slice_status[2*slice_idx + 1] = corrupt ? uint32_t(corrupt) : overread; } diff --git a/libavcodec/vulkan/ffv1_enc.comp b/libavcodec/vulkan/ffv1_enc.comp index 4b851fd7116bf..78372f5b3a76d 100644 --- a/libavcodec/vulkan/ffv1_enc.comp +++ b/libavcodec/vulkan/ffv1_enc.comp @@ -20,12 +20,226 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#ifndef GOLOMB +#ifdef CACHED_SYMBOL_READER +shared uint8_t state[CONTEXT_SIZE]; +#define WRITE(c, off, val) put_rac_direct(c, state[off], val) +#else +#define WRITE(c, off, val) put_rac(c, uint64_t(slice_state) + (state_off + off), val) +#endif + +/* Note - only handles signed values */ +void put_symbol(inout RangeCoder c, uint state_off, int v) +{ + bool is_nil = (v == 0); + WRITE(c, 0, is_nil); + if (is_nil) + return; + + const int a = abs(v); + const int e = findMSB(a); + + for (int i = 0; i < e; i++) + WRITE(c, 1 + min(i, 9), true); + WRITE(c, 1 + min(e, 9), false); + + for (int i = e - 1; i >= 0; i--) + WRITE(c, 22 + min(i, 9), bool(bitfieldExtract(a, i, 1))); + + WRITE(c, 22 - 11 + min(e, 10), v < 0); +} + +void encode_line_pcm(inout SliceContext sc, readonly uimage2D img, + ivec2 sp, int y, int p, int comp, int bits) +{ + int w = sc.slice_dim.x; + +#ifdef CACHED_SYMBOL_READER + if (gl_LocalInvocationID.x > 0) + return; +#endif + +#ifndef RGB + if (p > 0 && p < 3) { + w >>= chroma_shift.x; + sp >>= chroma_shift; + } +#endif + + for (int x = 0; x < w; x++) { + uint v = imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]; + for (int i = (bits - 1); i >= 0; i--) + put_rac_equi(sc.c, bool(bitfieldExtract(v, i, 1))); + } +} + +void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off, + ivec2 sp, int y, int p, int comp, int bits, + uint8_t quant_table_idx, const int run_index) +{ + int w = sc.slice_dim.x; + +#ifndef RGB + if (p > 0 && p < 3) { + w >>= chroma_shift.x; + sp >>= chroma_shift; + } +#endif + + for (int x = 0; x < w; x++) { + ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w, + quant_table_idx, extend_lookup[quant_table_idx] > 0); + d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1]; + + if (d[0] < 0) + d = -d; + + d[1] = fold(d[1], bits); + + uint context_off = state_off + CONTEXT_SIZE*d[0]; +#ifdef CACHED_SYMBOL_READER + u8buf sb = u8buf(uint64_t(slice_state) + context_off + gl_LocalInvocationID.x); + state[gl_LocalInvocationID.x] = sb.v; + barrier(); + if (gl_LocalInvocationID.x == 0) +#endif + + put_symbol(sc.c, context_off, d[1]); + +#ifdef CACHED_SYMBOL_READER + barrier(); + sb.v = state[gl_LocalInvocationID.x]; +#endif + } +} + +#else /* GOLOMB */ + +void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off, + ivec2 sp, int y, int p, int comp, int bits, + uint8_t quant_table_idx, inout int run_index) +{ + int w = sc.slice_dim.x; + +#ifndef RGB + if (p > 0 && p < 3) { + w >>= chroma_shift.x; + sp >>= chroma_shift; + } +#endif + + int run_count = 0; + bool run_mode = false; + + for (int x = 0; x < w; x++) { + ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w, + quant_table_idx, extend_lookup[quant_table_idx] > 0); + d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1]; + + if (d[0] < 0) + d = -d; + + d[1] = fold(d[1], bits); + + if (d[0] == 0) + run_mode = true; + + if (run_mode) { + if (d[1] != 0) { + /* A very unlikely loop */ + while (run_count >= 1 << log2_run[run_index]) { + run_count -= 1 << log2_run[run_index]; + run_index++; + put_bits(sc.pb, 1, 1); + } + + put_bits(sc.pb, 1 + log2_run[run_index], run_count); + if (run_index != 0) + run_index--; + run_count = 0; + run_mode = false; + if (d[1] > 0) + d[1]--; + } else { + run_count++; + } + } + + if (!run_mode) { + VlcState sb = VlcState(uint64_t(slice_state) + state_off + VLC_STATE_SIZE*d[0]); + Symbol sym = get_vlc_symbol(sb, d[1], bits); + put_bits(sc.pb, sym.bits, sym.val); + } + } + + if (run_mode) { + while (run_count >= (1 << log2_run[run_index])) { + run_count -= 1 << log2_run[run_index]; + run_index++; + put_bits(sc.pb, 1, 1); + } + + if (run_count > 0) + put_bits(sc.pb, 1, 1); + } +} +#endif + +#ifdef RGB +ivec4 load_components(ivec2 pos) +{ + ivec4 pix = ivec4(imageLoad(src[0], pos)); + if (planar_rgb != 0) { + for (int i = 1; i < (3 + transparency); i++) + pix[i] = int(imageLoad(src[i], pos)[0]); + } + + return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]], + pix[fmt_lut[2]], pix[fmt_lut[3]]); +} + +void transform_sample(inout ivec4 pix, ivec2 rct_coef) +{ + pix.b -= pix.g; + pix.r -= pix.g; + pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2; + pix.b += rct_offset; + pix.r += rct_offset; +} + +void preload_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct) +{ + for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) { + ivec2 lpos = sp + LADDR(ivec2(x, y)); + ivec2 pos = sc.slice_pos + ivec2(x, y); + + ivec4 pix = load_components(pos); + + if (expectEXT(apply_rct, true)) + transform_sample(pix, sc.slice_rct_coef); + + imageStore(tmp, lpos, pix); + } +} +#endif + void encode_slice(inout SliceContext sc, const uint slice_idx) { + ivec2 sp = sc.slice_pos; + +#ifndef RGB int bits = bits_per_raw_sample; +#else + int bits = 9; + if (bits != 8 || sc.slice_coding_mode != 0) + bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1); + + sp.y = int(gl_WorkGroupID.y)*RGB_LINECACHE; +#endif #ifndef GOLOMB if (sc.slice_coding_mode == 1) { +#ifndef RGB for (int c = 0; c < components; c++) { int h = sc.slice_dim.y; @@ -37,14 +251,26 @@ void encode_slice(inout SliceContext sc, const uint slice_idx) int comp = c - p; for (int y = 0; y < h; y++) - encode_line_pcm(sc, y, p, comp, bits); + encode_line_pcm(sc, src[p], sp, y, p, comp, bits); + } +#else + for (int y = 0; y < sc.slice_dim.y; y++) { + preload_rgb(sc, sp, sc.slice_dim.x, y, false); + + encode_line_pcm(sc, tmp, sp, y, 0, 1, bits); + encode_line_pcm(sc, tmp, sp, y, 0, 2, bits); + encode_line_pcm(sc, tmp, sp, y, 0, 0, bits); + if (transparency == 1) + encode_line_pcm(sc, tmp, sp, y, 0, 3, bits); } +#endif } else #endif { - uint64_t slice_state_off = uint64_t(slice_state) + - slice_idx*plane_state_size*codec_planes; + u8vec4 quant_table_idx = sc.quant_table_idx.xyyz; + u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size; +#ifndef RGB for (int c = 0; c < components; c++) { int run_index = 0; @@ -56,19 +282,77 @@ void encode_slice(inout SliceContext sc, const uint slice_idx) int comp = c - p; for (int y = 0; y < h; y++) - encode_line(sc, slice_state_off, y, p, comp, bits, run_index); + encode_line(sc, src[p], slice_state_off[c], sp, y, p, + comp, bits, quant_table_idx[c], run_index); + } +#else + int run_index = 0; + for (int y = 0; y < sc.slice_dim.y; y++) { + preload_rgb(sc, sp, sc.slice_dim.x, y, true); - /* For the second chroma plane, reuse the first plane's state */ - if (c != 1) - slice_state_off += plane_state_size; + encode_line(sc, tmp, slice_state_off[0], + sp, y, 0, 1, bits, quant_table_idx[0], run_index); + encode_line(sc, tmp, slice_state_off[1], + sp, y, 0, 2, bits, quant_table_idx[1], run_index); + encode_line(sc, tmp, slice_state_off[2], + sp, y, 0, 0, bits, quant_table_idx[2], run_index); + if (transparency == 1) + encode_line(sc, tmp, slice_state_off[3], + sp, y, 0, 3, bits, quant_table_idx[3], run_index); } +#endif + } +} + +void finalize_slice(inout SliceContext sc, const uint slice_idx) +{ +#ifdef CACHED_SYMBOL_READER + if (gl_LocalInvocationID.x > 0) + return; +#endif + +#ifdef GOLOMB + uint32_t enc_len = sc.hdr_len + flush_put_bits(sc.pb); +#else + uint32_t enc_len = rac_terminate(sc.c); +#endif + + u8buf bs = u8buf(sc.c.bytestream_start); + + /* Append slice length */ + u8vec4 enc_len_p = unpack8(enc_len); + bs[enc_len + 0].v = enc_len_p.z; + bs[enc_len + 1].v = enc_len_p.y; + bs[enc_len + 2].v = enc_len_p.x; + enc_len += 3; + + /* Calculate and write CRC */ + if (ec != 0) { + bs[enc_len].v = uint8_t(0); + enc_len++; + + uint32_t crc = crcref; + for (int i = 0; i < enc_len; i++) + crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8); + + if (crcref != 0x00000000) + crc ^= 0x8CD88196; + + u8vec4 crc_p = unpack8(crc); + bs[enc_len + 0].v = crc_p.x; + bs[enc_len + 1].v = crc_p.y; + bs[enc_len + 2].v = crc_p.z; + bs[enc_len + 3].v = crc_p.w; + enc_len += 4; } - finalize_slice(sc, slice_idx); + slice_results[slice_idx*2 + 0] = enc_len; + slice_results[slice_idx*2 + 1] = uint64_t(bs) - uint64_t(out_data); } void main(void) { const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; encode_slice(slice_ctx[slice_idx], slice_idx); + finalize_slice(slice_ctx[slice_idx], slice_idx); } diff --git a/libavcodec/vulkan/ffv1_enc_ac.comp b/libavcodec/vulkan/ffv1_enc_ac.comp deleted file mode 100644 index 0bbf58c5ddd04..0000000000000 --- a/libavcodec/vulkan/ffv1_enc_ac.comp +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2024 Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -void put_rac(inout RangeCoder c, uint64_t state, bool bit) -{ - put_rac_norenorm(c, state, bit); - if (c.range < 0x100) - renorm_encoder(c); -} - -/* Note - only handles signed values */ -void put_symbol(inout RangeCoder c, uint64_t state, int v) -{ - bool is_nil = (v == 0); - put_rac(c, state, is_nil); - if (is_nil) - return; - - const int a = abs(v); - const int e = findMSB(a); - - state += 1; - for (int i = 0; i < e; i++) - put_rac(c, state + min(i, 9), true); - put_rac(c, state + min(e, 9), false); - - state += 21; - for (int i = e - 1; i >= 0; i--) - put_rac(c, state + min(i, 9), bool(bitfieldExtract(a, i, 1))); - - put_rac(c, state - 11 + min(e, 10), v < 0); -} - -void encode_line_pcm(inout SliceContext sc, int y, int p, int comp, - int bits) -{ - ivec2 sp = sc.slice_pos; - int w = sc.slice_dim.x; - if (p > 0 && p < 3) { - w >>= chroma_shift.x; - sp >>= chroma_shift; - } - - for (int x = 0; x < w; x++) { - uint v = imageLoad(src[p], (sp + ivec2(x, y)))[comp]; - for (int i = (bits - 1); i >= 0; i--) - put_rac_equi(sc.c, bool(bitfieldExtract(v, i, 1))); - } -} - -void encode_line(inout SliceContext sc, uint64_t state, - int y, int p, int comp, int bits, const int run_index) -{ - ivec2 sp = sc.slice_pos; - - int w = sc.slice_dim.x; - if (p > 0 && p < 3) { - w >>= chroma_shift.x; - sp >>= chroma_shift; - } - - for (int x = 0; x < w; x++) { - const ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits); - put_symbol(sc.c, state + CONTEXT_SIZE*d[0], d[1]); - } -} diff --git a/libavcodec/vulkan/ffv1_enc_common.comp b/libavcodec/vulkan/ffv1_enc_common.comp deleted file mode 100644 index 62c0624b0e1f0..0000000000000 --- a/libavcodec/vulkan/ffv1_enc_common.comp +++ /dev/null @@ -1,101 +0,0 @@ -/* - * FFv1 codec - * - * Copyright (c) 2024 Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -ivec2 get_diff(ivec2 pos, ivec2 off, int p, int comp, int sw, int bits) -{ - const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0); - const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0); - - TYPE top2 = TYPE(0); - if (off.y > 1) - top2 = TYPE(imageLoad(src[p], pos + ivec2(0, -2))[comp]); - - VTYPE3 top = VTYPE3(TYPE(0), - TYPE(0), - TYPE(0)); - if (off.y > 0 && off != ivec2(0, 1)) - top[0] = TYPE(imageLoad(src[p], pos + ivec2(-1, -1) + yoff_border1)[comp]); - if (off.y > 0) { - top[1] = TYPE(imageLoad(src[p], pos + ivec2(0, -1))[comp]); - top[2] = TYPE(imageLoad(src[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]); - } - - VTYPE3 cur = VTYPE3(TYPE(0), - TYPE(0), - imageLoad(src[p], pos)[comp]); - if (off.x > 0 && off != ivec2(1, 0)) - cur[0] = TYPE(imageLoad(src[p], pos + ivec2(-2, 0) + yoff_border2)[comp]); - if (off != ivec2(0, 0)) - cur[1] = TYPE(imageLoad(src[p], pos + ivec2(-1, 0) + yoff_border1)[comp]); - - /* context, diff */ - ivec2 d = ivec2(get_context(VTYPE2(cur), top, top2, context_model), - cur[2] - predict(cur[1], VTYPE2(top))); - - if (d[0] < 0) - d = -d; - - d[1] = fold(d[1], bits); - - return d; -} - -void finalize_slice(inout SliceContext sc, const uint slice_idx) -{ -#ifdef GOLOMB - uint32_t enc_len = sc.hdr_len + flush_put_bits(sc.pb); -#else - uint32_t enc_len = rac_terminate(sc.c); -#endif - - u8buf bs = u8buf(sc.c.bytestream_start); - - /* Append slice length */ - u8vec4 enc_len_p = unpack8(enc_len); - bs[enc_len + 0].v = enc_len_p.z; - bs[enc_len + 1].v = enc_len_p.y; - bs[enc_len + 2].v = enc_len_p.x; - enc_len += 3; - - /* Calculate and write CRC */ - if (ec != 0) { - bs[enc_len].v = uint8_t(0); - enc_len++; - - uint32_t crc = crcref; - for (int i = 0; i < enc_len; i++) - crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8); - - if (crcref != 0x00000000) - crc ^= 0x8CD88196; - - u8vec4 crc_p = unpack8(crc); - bs[enc_len + 0].v = crc_p.x; - bs[enc_len + 1].v = crc_p.y; - bs[enc_len + 2].v = crc_p.z; - bs[enc_len + 3].v = crc_p.w; - enc_len += 4; - } - - slice_results[slice_idx*2 + 0] = enc_len; - slice_results[slice_idx*2 + 1] = uint64_t(bs) - uint64_t(out_data); -} diff --git a/libavcodec/vulkan/ffv1_enc_rgb.comp b/libavcodec/vulkan/ffv1_enc_rgb.comp deleted file mode 100644 index c176d94e8b246..0000000000000 --- a/libavcodec/vulkan/ffv1_enc_rgb.comp +++ /dev/null @@ -1,83 +0,0 @@ -/* - * FFv1 codec - * - * Copyright (c) 2024 Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -void encode_slice_rgb(inout SliceContext sc, const uint slice_idx) -{ - int bits = 9; - if (bits != 8 || sc.slice_coding_mode != 0) - bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1); - - int run_index = 0; - -#ifndef GOLOMB - if (sc.slice_coding_mode == 1) { - if (transparency == 1) { - for (int y = 0; y < sc.slice_dim.y; y++) { - encode_line_pcm(sc, y, 0, 1, bits); - encode_line_pcm(sc, y, 0, 2, bits); - encode_line_pcm(sc, y, 0, 0, bits); - encode_line_pcm(sc, y, 0, 3, bits); - } - } else { - for (int y = 0; y < sc.slice_dim.y; y++) { - encode_line_pcm(sc, y, 0, 1, bits); - encode_line_pcm(sc, y, 0, 2, bits); - encode_line_pcm(sc, y, 0, 0, bits); - } - } - } else -#endif - { - uint64_t slice_state_off = uint64_t(slice_state) + - slice_idx*plane_state_size*codec_planes; - - if (transparency == 1) { - for (int y = 0; y < sc.slice_dim.y; y++) { - encode_line(sc, slice_state_off + plane_state_size*0, - y, 0, 1, bits, run_index); - encode_line(sc, slice_state_off + plane_state_size*1, - y, 0, 2, bits, run_index); - encode_line(sc, slice_state_off + plane_state_size*1, - y, 0, 0, bits, run_index); - encode_line(sc, slice_state_off + plane_state_size*2, - y, 0, 3, bits, run_index); - } - } else { - for (int y = 0; y < sc.slice_dim.y; y++) { - encode_line(sc, slice_state_off + plane_state_size*0, - y, 0, 1, bits, run_index); - encode_line(sc, slice_state_off + plane_state_size*1, - y, 0, 2, bits, run_index); - encode_line(sc, slice_state_off + plane_state_size*1, - y, 0, 0, bits, run_index); - } - } - } - - finalize_slice(sc, slice_idx); -} - -void main(void) -{ - const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - encode_slice_rgb(slice_ctx[slice_idx], slice_idx); -} diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp b/libavcodec/vulkan/ffv1_enc_setup.comp index 44c13404d8545..5f8e6704b0aaa 100644 --- a/libavcodec/vulkan/ffv1_enc_setup.comp +++ b/libavcodec/vulkan/ffv1_enc_setup.comp @@ -20,7 +20,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -void init_slice(out SliceContext sc, const uint slice_idx) +uint8_t state[CONTEXT_SIZE]; + +void init_slice(inout SliceContext sc, const uint slice_idx) { /* Set coordinates */ uvec2 img_size = imageSize(src[0]); @@ -35,77 +37,66 @@ void init_slice(out SliceContext sc, const uint slice_idx) sc.slice_pos = ivec2(sxs, sys); sc.slice_dim = ivec2(sxe - sxs, sye - sys); - sc.slice_rct_coef = ivec2(1, 1); sc.slice_coding_mode = int(force_pcm == 1); sc.slice_reset_contexts = sc.slice_coding_mode == 1; sc.quant_table_idx = u8vec3(context_model); + if ((rct_search == 0) || (sc.slice_coding_mode == 1)) + sc.slice_rct_coef = ivec2(1, 1); + rac_init(sc.c, OFFBUF(u8buf, out_data, slice_idx * slice_size_max), slice_size_max); } -void put_rac_full(inout RangeCoder c, uint64_t state, bool bit) -{ - put_rac_norenorm(c, state, bit); - if (c.range < 0x100) - renorm_encoder_full(c); -} - -void put_symbol_unsigned(inout RangeCoder c, uint64_t state, uint v) +void put_usymbol(inout RangeCoder c, uint v) { bool is_nil = (v == 0); - put_rac_full(c, state, is_nil); + put_rac_direct(c, state[0], is_nil); if (is_nil) return; const int e = findMSB(v); - state += 1; for (int i = 0; i < e; i++) - put_rac_full(c, state + min(i, 9), true); - put_rac_full(c, state + min(e, 9), false); + put_rac_direct(c, state[1 + min(i, 9)], true); + put_rac_direct(c, state[1 + min(e, 9)], false); - state += 21; for (int i = e - 1; i >= 0; i--) - put_rac_full(c, state + min(i, 9), bool(bitfieldExtract(v, i, 1))); + put_rac_direct(c, state[22 + min(i, 9)], bool(bitfieldExtract(v, i, 1))); } -void write_slice_header(inout SliceContext sc, uint64_t state) +void write_slice_header(inout SliceContext sc) { - u8buf sb = u8buf(state); - [[unroll]] for (int i = 0; i < CONTEXT_SIZE; i++) - sb[i].v = uint8_t(128); + state[i] = uint8_t(128); - put_symbol_unsigned(sc.c, state, gl_WorkGroupID.x); - put_symbol_unsigned(sc.c, state, gl_WorkGroupID.y); - put_symbol_unsigned(sc.c, state, 0); - put_symbol_unsigned(sc.c, state, 0); + put_usymbol(sc.c, gl_WorkGroupID.x); + put_usymbol(sc.c, gl_WorkGroupID.y); + put_usymbol(sc.c, 0); + put_usymbol(sc.c, 0); for (int i = 0; i < codec_planes; i++) - put_symbol_unsigned(sc.c, state, sc.quant_table_idx[i]); + put_usymbol(sc.c, sc.quant_table_idx[i]); - put_symbol_unsigned(sc.c, state, pic_mode); - put_symbol_unsigned(sc.c, state, sar.x); - put_symbol_unsigned(sc.c, state, sar.y); + put_usymbol(sc.c, pic_mode); + put_usymbol(sc.c, sar.x); + put_usymbol(sc.c, sar.y); if (version >= 4) { - put_rac_full(sc.c, state, sc.slice_reset_contexts); - put_symbol_unsigned(sc.c, state, sc.slice_coding_mode); + put_rac_direct(sc.c, state[0], sc.slice_reset_contexts); + put_usymbol(sc.c, sc.slice_coding_mode); if (sc.slice_coding_mode != 1 && colorspace == 1) { - put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.y); - put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.x); + put_usymbol(sc.c, sc.slice_rct_coef.y); + put_usymbol(sc.c, sc.slice_rct_coef.x); } } } -void write_frame_header(inout SliceContext sc, uint64_t state) +void write_frame_header(inout SliceContext sc) { - u8buf sb = u8buf(state); - sb.v = uint8_t(128); - put_rac_full(sc.c, state, bool(key_frame)); + put_rac_equi(sc.c, bool(key_frame)); } #ifdef GOLOMB @@ -122,16 +113,12 @@ void main(void) { const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - /* Write slice data */ - uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE; - u8buf sb = u8buf(scratch_state); - init_slice(slice_ctx[slice_idx], slice_idx); if (slice_idx == 0) - write_frame_header(slice_ctx[slice_idx], scratch_state); + write_frame_header(slice_ctx[slice_idx]); - write_slice_header(slice_ctx[slice_idx], scratch_state); + write_slice_header(slice_ctx[slice_idx]); #ifdef GOLOMB init_golomb(slice_ctx[slice_idx]); diff --git a/libavcodec/vulkan/ffv1_enc_vlc.comp b/libavcodec/vulkan/ffv1_enc_vlc.comp deleted file mode 100644 index 7a4d39e307f28..0000000000000 --- a/libavcodec/vulkan/ffv1_enc_vlc.comp +++ /dev/null @@ -1,112 +0,0 @@ -/* - * FFv1 codec - * - * Copyright (c) 2024 Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -struct RLEState { - int count; - int diff; - int index; - bool mode; -}; - -void calc_new_state(inout RLEState state, int context) -{ - if (context == 0) - state.mode = false; - - if (!state.mode) - return; - - if (state.diff > 0) { - while (state.count >= (1 << log2_run[state.index])) { - state.count -= 1 << log2_run[state.index]; - state.index++; - } - if (state.index > 0) - state.index--; - state.count = 0; - state.mode = false; - if (state.diff > 0) - state.diff--; - } else { - state.count++; - } -} - -void encode_line(inout SliceContext sc, uint64_t state, - int y, int p, int comp, int bits, inout int run_index) -{ - ivec2 sp = sc.slice_pos; - - int w = sc.slice_dim.x; - if (p > 0 && p < 3) { - w >>= chroma_shift.x; - sp >>= chroma_shift; - } - - int run_count = 0; - bool run_mode = false; - - for (int x = 0; x < w; x++) { - ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits); - - if (d[0] == 0) - run_mode = true; - - if (run_mode) { - if (d[1] != 0) { - /* A very unlikely loop */ - while (run_count >= 1 << log2_run[run_index]) { - run_count -= 1 << log2_run[run_index]; - run_index++; - put_bits(sc.pb, 1, 1); - } - - put_bits(sc.pb, 1 + log2_run[run_index], run_count); - if (run_index != 0) - run_index--; - run_count = 0; - run_mode = false; - if (d[1] > 0) - d[1]--; - } else { - run_count++; - } - } - - if (!run_mode) { - VlcState sb = VlcState(state + VLC_STATE_SIZE*d[0]); - Symbol sym = get_vlc_symbol(sb, d[1], bits); - put_bits(sc.pb, sym.bits, sym.val); - } - } - - if (run_mode) { - while (run_count >= (1 << log2_run[run_index])) { - run_count -= 1 << log2_run[run_index]; - run_index++; - put_bits(sc.pb, 1, 1); - } - - if (run_count > 0) - put_bits(sc.pb, 1, 1); - } -} diff --git a/libavcodec/vulkan/ffv1_rct_search.comp b/libavcodec/vulkan/ffv1_rct_search.comp new file mode 100644 index 0000000000000..055bde46c40a5 --- /dev/null +++ b/libavcodec/vulkan/ffv1_rct_search.comp @@ -0,0 +1,139 @@ +/* + * FFv1 codec + * + * Copyright (c) 2024 Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +ivec3 load_components(ivec2 pos) +{ + ivec3 pix = ivec3(imageLoad(src[0], pos)); + if (planar_rgb != 0) { + for (int i = 1; i < 3; i++) + pix[i] = int(imageLoad(src[i], pos)[0]); + } + + return ivec3(pix[fmt_lut[0]], pix[fmt_lut[1]], pix[fmt_lut[2]]); +} + +#define NUM_CHECKS 15 +const ivec2 rct_y_coeff[NUM_CHECKS] = { + ivec2(0, 0), // 4G + + ivec2(0, 1), // 3G + B + ivec2(1, 0), // R + 3G + ivec2(1, 1), // R + 2G + B + + ivec2(0, 2), // 2G + 2B + ivec2(2, 0), // 2R + 2G + ivec2(2, 2), // 2R + 2B + + ivec2(0, 3), // 1G + 3B + ivec2(3, 0), // 3R + 1G + + ivec2(0, 4), // 4B + ivec2(4, 0), // 4R + + ivec2(1, 2), // R + G + 2B + ivec2(2, 1), // 2R + G + B + + ivec2(3, 1), // 3R + B + ivec2(1, 3), // R + 3B +}; + +shared ivec3 pix_buf[gl_WorkGroupSize.x + 1][gl_WorkGroupSize.y + 1] = { }; + +ivec3 transform_sample(ivec3 pix, ivec2 rct_coef) +{ + pix.b -= pix.g; + pix.r -= pix.g; + pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2; + pix.b += rct_offset; + pix.r += rct_offset; + return pix; +} + +uint get_dist(ivec3 cur) +{ + ivec3 LL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 1]; + ivec3 TL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 0]; + ivec3 TT = pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 0]; + + ivec3 pred = ivec3(predict(LL.r, ivec2(TL.r, TT.r)), + predict(LL.g, ivec2(TL.g, TT.g)), + predict(LL.b, ivec2(TL.b, TT.b))); + + uvec3 c = abs(pred - cur); + return mid_pred(c.r, c.g, c.b); +} + +shared uint score_cols[gl_WorkGroupSize.y] = { }; +shared uint score_mode[16] = { }; + +void process(ivec2 pos) +{ + ivec3 pix = load_components(pos); + + for (int i = 0; i < NUM_CHECKS; i++) { + ivec3 tx_pix = transform_sample(pix, rct_y_coeff[i]); + pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 1] = tx_pix; + memoryBarrierShared(); + + uint dist = get_dist(tx_pix); + atomicAdd(score_mode[i], dist); + } +} + +void coeff_search(inout SliceContext sc) +{ + uvec2 img_size = imageSize(src[0]); + uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0, + gl_NumWorkGroups.x, 0); + uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1, + gl_NumWorkGroups.x, 0); + uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0, + gl_NumWorkGroups.y, 0); + uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1, + gl_NumWorkGroups.y, 0); + + for (uint y = sys + gl_LocalInvocationID.y; y < sye; y += gl_WorkGroupSize.y) { + for (uint x = sxs + gl_LocalInvocationID.x; x < sxe; x += gl_WorkGroupSize.x) { + process(ivec2(x, y)); + } + } + + if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) { + uint min_score = 0xFFFFFFFF; + uint min_idx = 3; + for (int i = 0; i < NUM_CHECKS; i++) { + if (score_mode[i] < min_score) { + min_score = score_mode[i]; + min_idx = i; + } + } + sc.slice_rct_coef = rct_y_coeff[min_idx]; + } +} + +void main(void) +{ + if (force_pcm == 1) + return; + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; + coeff_search(slice_ctx[slice_idx]); +} diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp index 256b5f0e79742..8687b8bc3cf60 100644 --- a/libavcodec/vulkan/rangecoder.comp +++ b/libavcodec/vulkan/rangecoder.comp @@ -31,8 +31,9 @@ struct RangeCoder { uint8_t outstanding_byte; }; +#ifdef FULL_RENORM /* Full renorm version that can handle outstanding_byte == 0xFF */ -void renorm_encoder_full(inout RangeCoder c) +void renorm_encoder(inout RangeCoder c) { int bs_cnt = 0; u8buf bytestream = u8buf(c.bytestream); @@ -62,6 +63,8 @@ void renorm_encoder_full(inout RangeCoder c) c.low = bitfieldInsert(0, c.low, 8, 8); } +#else + /* Cannot deal with outstanding_byte == -1 in the name of speed */ void renorm_encoder(inout RangeCoder c) { @@ -90,59 +93,40 @@ void renorm_encoder(inout RangeCoder c) for (int i = 1; i < oc; i++) bs[i].v = fill; } +#endif -void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit) +void put_rac_internal(inout RangeCoder c, const int range1, bool bit) { - u8buf sb = u8buf(state); - uint val = uint(sb.v); - int range1 = uint16_t((c.range * val) >> 8); - #ifdef DEBUG - if (val == 0) - debugPrintfEXT("Error: state is zero (addr: 0x%lx)", uint64_t(sb)); if (range1 >= c.range) debugPrintfEXT("Error: range1 >= c.range"); if (range1 <= 0) debugPrintfEXT("Error: range1 <= 0"); #endif - int diff = c.range - range1; - if (bit) { - c.low += diff; - c.range = range1; - } else { - c.range = diff; - } + int ranged = c.range - range1; + c.low += bit ? ranged : 0; + c.range = bit ? range1 : ranged; - sb.v = zero_one_state[(uint(bit) << 8) + val]; + if (expectEXT(c.range < 0x100, false)) + renorm_encoder(c); +} -#ifdef DEBUG - if (sb.v == 0) - debugPrintfEXT("Error: inserted zero state from tab %i idx %i", bit, val); -#endif +void put_rac_direct(inout RangeCoder c, inout uint8_t state, bool bit) +{ + put_rac_internal(c, (c.range * state) >> 8, bit); + state = zero_one_state[(uint(bit) << 8) + state]; +} + +void put_rac(inout RangeCoder c, uint64_t state, bool bit) +{ + put_rac_direct(c, u8buf(state).v, bit); } /* Equiprobable bit */ void put_rac_equi(inout RangeCoder c, bool bit) { - int range1 = c.range >> 1; - -#ifdef DEBUG - if (range1 >= c.range) - debugPrintfEXT("Error: range1 >= c.range"); - if (range1 <= 0) - debugPrintfEXT("Error: range1 <= 0"); -#endif - - if (bit) { - c.low += c.range - range1; - c.range = range1; - } else { - c.range -= range1; - } - - if (expectEXT(c.range < 0x100, false)) - renorm_encoder(c); + put_rac_internal(c, c.range >> 1, bit); } void put_rac_terminate(inout RangeCoder c) @@ -226,11 +210,9 @@ void refill(inout RangeCoder c) } } -bool get_rac_direct(inout RangeCoder c, inout uint8_t state) +bool get_rac_internal(inout RangeCoder c, const int range1) { - int range1 = c.range * state >> 8; int ranged = c.range - range1; - bool bit = c.low >= ranged; c.low -= bit ? ranged : 0; c.range = (bit ? 0 : ranged) + (bit ? range1 : 0); @@ -238,6 +220,12 @@ bool get_rac_direct(inout RangeCoder c, inout uint8_t state) if (expectEXT(c.range < 0x100, false)) refill(c); + return bit; +} + +bool get_rac_direct(inout RangeCoder c, inout uint8_t state) +{ + bool bit = get_rac_internal(c, c.range * state >> 8); state = zero_one_state[state + (bit ? 256 : 0)]; return bit; } @@ -249,18 +237,5 @@ bool get_rac(inout RangeCoder c, uint64_t state) bool get_rac_equi(inout RangeCoder c) { - int range1 = c.range >> 1; - - c.range -= range1; - - bool bit = c.low >= c.range; - if (bit) { - c.low -= c.range; - c.range = range1; - } - - if (expectEXT(c.range < 0x100, false)) - refill(c); - - return bit; + return get_rac_internal(c, c.range >> 1); } diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c index f1313c840950c..7310ba1547960 100644 --- a/libavcodec/vulkan_decode.c +++ b/libavcodec/vulkan_decode.c @@ -142,6 +142,7 @@ static void init_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vkpic) vkpic->destroy_image_view = vk->DestroyImageView; vkpic->wait_semaphores = vk->WaitSemaphores; + vkpic->invalidate_memory_ranges = vk->InvalidateMappedMemoryRanges; } int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h index cbd22b3591128..bf6506f280e6b 100644 --- a/libavcodec/vulkan_decode.h +++ b/libavcodec/vulkan_decode.h @@ -114,6 +114,7 @@ typedef struct FFVulkanDecodePicture { /* Vulkan functions needed for destruction, as no other context is guaranteed to exist */ PFN_vkWaitSemaphores wait_semaphores; PFN_vkDestroyImageView destroy_image_view; + PFN_vkInvalidateMappedMemoryRanges invalidate_memory_ranges; } FFVulkanDecodePicture; /** diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c index cbde2f319a375..c839f4c3879e1 100644 --- a/libavcodec/vulkan_ffv1.c +++ b/libavcodec/vulkan_ffv1.c @@ -26,6 +26,8 @@ #include "libavutil/vulkan_spirv.h" #include "libavutil/mem.h" +#define RGB_LINECACHE 2 + extern const char *ff_source_common_comp; extern const char *ff_source_rangecoder_comp; extern const char *ff_source_ffv1_vlc_comp; @@ -219,7 +221,7 @@ static int vk_ffv1_start_frame(AVCodecContext *avctx, &fp->slice_status_buf, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - NULL, f->slice_count*sizeof(uint32_t), + NULL, 2*f->slice_count*sizeof(uint32_t), VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); if (err < 0) @@ -406,7 +408,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, 1, 2, 0, slice_status, - 0, f->slice_count*sizeof(uint32_t), + 0, 2*f->slice_count*sizeof(uint32_t), VK_FORMAT_UNDEFINED); ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup); @@ -536,10 +538,15 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) 1, 1, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); + ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader, + 1, 2, 0, + slice_status, + 0, 2*f->slice_count*sizeof(uint32_t), + VK_FORMAT_UNDEFINED); if (is_rgb) ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader, f->picture.f, vp->view.out, - 1, 2, + 1, 3, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); @@ -610,6 +617,7 @@ static void define_shared_code(FFVulkanShader *shd, int use32bit) GLSLC(0, #define DECODE ); + av_bprintf(&shd->src, "#define RGB_LINECACHE %i\n" ,RGB_LINECACHE); av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK); @@ -697,8 +705,8 @@ static int init_setup_shader(FFV1Context *f, FFVulkanContext *s, .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, .mem_quali = "writeonly", - .buf_content = "uint32_t slice_crc_mismatch", - .buf_elems = f->max_slice_count, + .buf_content = "uint32_t slice_status", + .buf_elems = 2*f->max_slice_count, }, }; RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0)); @@ -892,6 +900,14 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s, .elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format), .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, + { + .name = "slice_status_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_quali = "writeonly", + .buf_content = "uint32_t slice_status", + .buf_elems = 2*f->max_slice_count, + }, { .name = "dst", .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, @@ -903,7 +919,7 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2 + rgb, 0, 0)); + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3 + rgb, 0, 0)); GLSLD(ff_source_ffv1_dec_comp); @@ -936,7 +952,7 @@ static int init_indirect(AVCodecContext *avctx, FFVulkanContext *s, frames_ctx->format = AV_PIX_FMT_VULKAN; frames_ctx->sw_format = sw_format; frames_ctx->width = s->frames->width; - frames_ctx->height = f->num_v_slices*2; + frames_ctx->height = f->num_v_slices*RGB_LINECACHE; vk_frames = frames_ctx->hwctx; vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; @@ -1111,22 +1127,35 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx) static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data) { - AVHWDeviceContext *hwctx = _hwctx.nc; + AVHWDeviceContext *dev_ctx = _hwctx.nc; + AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; FFv1VulkanDecodePicture *fp = data; FFVulkanDecodePicture *vp = &fp->vp; + FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data; - ff_vk_decode_free_frame(hwctx, vp); + ff_vk_decode_free_frame(dev_ctx, vp); + + /* Invalidate slice/output data if needed */ + if (!(slice_status->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkMappedMemoryRange invalidate_data = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = slice_status->mem, + .offset = 0, + .size = 2*fp->slice_num*sizeof(uint32_t), + }; + vp->invalidate_memory_ranges(hwctx->act_dev, + 1, &invalidate_data); + } - if (fp->crc_checked) { - FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data; - for (int i = 0; i < fp->slice_num; i++) { - uint32_t crc_res; - crc_res = AV_RN32(slice_status->mapped_mem + i*sizeof(uint32_t)); - if (crc_res != 0) - av_log(hwctx, AV_LOG_ERROR, "CRC mismatch in slice %i, res: 0x%x\n", - i, crc_res); - } + for (int i = 0; i < fp->slice_num; i++) { + uint32_t crc_res = 0; + if (fp->crc_checked) + crc_res = AV_RN32(slice_status->mapped_mem + 2*i*sizeof(uint32_t) + 0); + uint32_t status = AV_RN32(slice_status->mapped_mem + 2*i*sizeof(uint32_t) + 4); + if (status || crc_res) + av_log(dev_ctx, AV_LOG_ERROR, "Slice %i status: 0x%x, CRC 0x%x\n", + i, status, crc_res); } av_buffer_unref(&vp->slices_buf); diff --git a/libavcodec/vvc.h b/libavcodec/vvc.h index 92639779c1db4..5490ddb4c81d1 100644 --- a/libavcodec/vvc.h +++ b/libavcodec/vvc.h @@ -154,6 +154,9 @@ enum { // {sps, ph}_num_{ver, hor}_virtual_boundaries should in [0, 3] VVC_MAX_VBS = 3, + + // 8.4.5.3 Decoding process for palette mode - maxNumPalettePredictorSize + VVC_MAX_NUM_PALETTE_PREDICTOR_SIZE = 63 }; #endif /* AVCODEC_VVC_H */ diff --git a/libavcodec/vvc/Makefile b/libavcodec/vvc/Makefile index 6a28d32bc25cc..10125ffc2d3bc 100644 --- a/libavcodec/vvc/Makefile +++ b/libavcodec/vvc/Makefile @@ -14,4 +14,5 @@ OBJS-$(CONFIG_VVC_DECODER) += vvc/dec.o \ vvc/mvs.o \ vvc/ps.o \ vvc/refs.o \ + vvc/sei.o \ vvc/thread.o \ diff --git a/libavcodec/vvc/cabac.c b/libavcodec/vvc/cabac.c index 55101448931b2..6847ce59aff10 100644 --- a/libavcodec/vvc/cabac.c +++ b/libavcodec/vvc/cabac.c @@ -928,6 +928,27 @@ static int truncated_binary_decode(VVCLocalContext *lc, const int c_max) return v; } +// 9.3.3.5 k-th order Exp - Golomb binarization process +static int kth_order_egk_decode(CABACContext *c, int k) +{ + int bit = 1; + int value = 0; + int symbol = 0; + + while (bit) { + bit = get_cabac_bypass(c); + value += bit << k++; + } + + if (--k) { + for (int i = 0; i < k; i++) + symbol = (symbol << 1) | get_cabac_bypass(c); + value += symbol; + } + + return value; +} + // 9.3.3.6 Limited k-th order Exp-Golomb binarization process static int limited_kth_order_egk_decode(CABACContext *c, const int k, const int max_pre_ext_len, const int trunc_suffix_len) { @@ -947,6 +968,17 @@ static int limited_kth_order_egk_decode(CABACContext *c, const int k, const int return val; } +// 9.3.3.7 Fixed-length binarization process +static int fixed_length_decode(CABACContext* c, const int len) +{ + int value = 0; + + for (int i = 0; i < len; i++) + value = (value << 1) | get_cabac_bypass(c); + + return value; +} + static av_always_inline void get_left_top(const VVCLocalContext *lc, uint8_t *left, uint8_t *top, const int x0, const int y0, const uint8_t *left_ctx, const uint8_t *top_ctx) @@ -990,11 +1022,7 @@ int ff_vvc_sao_type_idx_decode(VVCLocalContext *lc) int ff_vvc_sao_band_position_decode(VVCLocalContext *lc) { - int value = get_cabac_bypass(&lc->ep->cc); - - for (int i = 0; i < 4; i++) - value = (value << 1) | get_cabac_bypass(&lc->ep->cc); - return value; + return fixed_length_decode(&lc->ep->cc, 5); } int ff_vvc_sao_offset_abs_decode(VVCLocalContext *lc) @@ -1014,9 +1042,7 @@ int ff_vvc_sao_offset_sign_decode(VVCLocalContext *lc) int ff_vvc_sao_eo_class_decode(VVCLocalContext *lc) { - int ret = get_cabac_bypass(&lc->ep->cc) << 1; - ret |= get_cabac_bypass(&lc->ep->cc); - return ret; + return (get_cabac_bypass(&lc->ep->cc) << 1) | get_cabac_bypass(&lc->ep->cc); } int ff_vvc_alf_ctb_flag(VVCLocalContext *lc, const int rx, const int ry, const int c_idx) @@ -1351,6 +1377,58 @@ int ff_vvc_intra_chroma_pred_mode(VVCLocalContext *lc) return (get_cabac_bypass(&lc->ep->cc) << 1) | get_cabac_bypass(&lc->ep->cc); } +int ff_vvc_palette_predictor_run(VVCLocalContext *lc) +{ + return kth_order_egk_decode(&lc->ep->cc, 0); +} + +int ff_vvc_num_signalled_palette_entries(VVCLocalContext *lc) +{ + return kth_order_egk_decode(&lc->ep->cc, 0); +} + +int ff_vvc_new_palette_entries(VVCLocalContext *lc, const int bit_depth) +{ + return fixed_length_decode(&lc->ep->cc, bit_depth); +} + +bool ff_vvc_palette_escape_val_present_flag(VVCLocalContext *lc) +{ + return get_cabac_bypass(&lc->ep->cc); +} + +bool ff_vvc_palette_transpose_flag(VVCLocalContext *lc) +{ + return GET_CABAC(PALETTE_TRANSPOSE_FLAG); +} + +bool ff_vvc_run_copy_flag(VVCLocalContext *lc, const int prev_run_type, const int prev_run_position, const int cur_pos) +{ + uint8_t run_left_lut[] = { 0, 1, 2, 3, 4 }; + uint8_t run_top_lut[] = { 5, 6, 6, 7, 7 }; + + int bin_dist = cur_pos - prev_run_position - 1; + uint8_t *run_lut = prev_run_type == 1 ? run_top_lut : run_left_lut; + uint8_t ctx_inc = bin_dist <= 4 ? run_lut[bin_dist] : run_lut[4]; + + return GET_CABAC(RUN_COPY_FLAG + ctx_inc); +} + +bool ff_vvc_copy_above_palette_indices_flag(VVCLocalContext *lc) +{ + return GET_CABAC(COPY_ABOVE_PALETTE_INDICES_FLAG); +} + +int ff_vvc_palette_idx_idc(VVCLocalContext *lc, const int max_palette_index, const bool adjust) +{ + return truncated_binary_decode(lc, max_palette_index - adjust); +} + +int ff_vvc_palette_escape_val(VVCLocalContext *lc) +{ + return kth_order_egk_decode(&lc->ep->cc, 5); +} + int ff_vvc_general_merge_flag(VVCLocalContext *lc) { return GET_CABAC(GENERAL_MERGE_FLAG); @@ -1458,12 +1536,7 @@ int ff_vvc_merge_idx(VVCLocalContext *lc) int ff_vvc_merge_gpm_partition_idx(VVCLocalContext *lc) { - int i = 0; - - for (int j = 0; j < 6; j++) - i = (i << 1) | get_cabac_bypass(&lc->ep->cc); - - return i; + return fixed_length_decode(&lc->ep->cc, 6); } int ff_vvc_merge_gpm_idx(VVCLocalContext *lc, const int idx) @@ -1630,6 +1703,11 @@ int ff_vvc_tu_y_coded_flag(VVCLocalContext *lc) return lc->parse.prev_tu_cbf_y; } +int ff_vvc_cu_act_enabled_flag(VVCLocalContext *lc) +{ + return GET_CABAC(CU_ACT_ENABLED_FLAG); +} + int ff_vvc_cu_qp_delta_abs(VVCLocalContext *lc) { int v, i, k; diff --git a/libavcodec/vvc/cabac.h b/libavcodec/vvc/cabac.h index e9bc98e23a510..972890317edfb 100644 --- a/libavcodec/vvc/cabac.h +++ b/libavcodec/vvc/cabac.h @@ -81,6 +81,15 @@ int ff_vvc_intra_luma_mpm_remainder(VVCLocalContext *lc); int ff_vvc_cclm_mode_flag(VVCLocalContext *lc); int ff_vvc_cclm_mode_idx(VVCLocalContext *lc); int ff_vvc_intra_chroma_pred_mode(VVCLocalContext *lc); +int ff_vvc_palette_predictor_run(VVCLocalContext *lc); +int ff_vvc_num_signalled_palette_entries(VVCLocalContext *lc); +int ff_vvc_new_palette_entries(VVCLocalContext *lc, int bit_dpeth); +bool ff_vvc_palette_escape_val_present_flag(VVCLocalContext *lc); +bool ff_vvc_palette_transpose_flag(VVCLocalContext *lc); +bool ff_vvc_run_copy_flag(VVCLocalContext *lc, int prev_run_type, int prev_run_position, int cur_pos); +bool ff_vvc_copy_above_palette_indices_flag(VVCLocalContext *lc); +int ff_vvc_palette_idx_idc(VVCLocalContext *lc, int max_palette_index, bool adjust); +int ff_vvc_palette_escape_val(VVCLocalContext *lc); //inter int ff_vvc_general_merge_flag(VVCLocalContext *lc); @@ -111,6 +120,7 @@ int ff_vvc_bcw_idx(VVCLocalContext *lc, int no_backward_pred_flag); int ff_vvc_tu_cb_coded_flag(VVCLocalContext *lc); int ff_vvc_tu_cr_coded_flag(VVCLocalContext *lc, int tu_cb_coded_flag); int ff_vvc_tu_y_coded_flag(VVCLocalContext *lc); +int ff_vvc_cu_act_enabled_flag(VVCLocalContext *lc); int ff_vvc_cu_chroma_qp_offset_flag(VVCLocalContext *lc); int ff_vvc_cu_chroma_qp_offset_idx(VVCLocalContext *lc); int ff_vvc_tu_joint_cbcr_residual_flag(VVCLocalContext *lc, int tu_cb_coded_flag, int tu_cr_coded_flag); diff --git a/libavcodec/vvc/ctu.c b/libavcodec/vvc/ctu.c index 080b740cc68d7..ba4c89b1d1b14 100644 --- a/libavcodec/vvc/ctu.c +++ b/libavcodec/vvc/ctu.c @@ -25,6 +25,7 @@ #include "cabac.h" #include "ctu.h" #include "inter.h" +#include "intra.h" #include "mvs.h" #define PROF_TEMP_SIZE (PROF_BLOCK_SIZE) * sizeof(int16_t) @@ -391,6 +392,8 @@ static int hls_transform_unit(VVCLocalContext *lc, int x0, int y0,int tu_width, if (ret < 0) return ret; set_tb_tab(fc->tab.tu_coded_flag[tb->c_idx], tu->coded_flag[tb->c_idx], fc, tb); + } else if (cu->act_enabled_flag) { + memset(tb->coeffs, 0, tb->tb_width * tb->tb_height * sizeof(*tb->coeffs)); } if (tb->c_idx != CR) set_tb_size(fc, tb); @@ -501,13 +504,12 @@ static int skipped_transform_tree(VVCLocalContext *lc, int x0, int y0,int tu_wid SKIPPED_TRANSFORM_TREE(x0, y0 + trafo_height); } else { TransformUnit *tu = add_tu(fc, lc->cu, x0, y0, tu_width, tu_height); - const int has_chroma = sps->r->sps_chroma_format_idc && cu->tree_type != DUAL_TREE_LUMA; - const int c_start = cu->tree_type == DUAL_TREE_CHROMA ? CB : LUMA; - const int c_end = has_chroma ? VVC_MAX_SAMPLE_ARRAYS : CB; + int start, end; if (!tu) return AVERROR_INVALIDDATA; - for (int i = c_start; i < c_end; i++) { + ff_vvc_channel_range(&start, &end, cu->tree_type, sps->r->sps_chroma_format_idc); + for (int i = start; i < end; i++) { TransformBlock *tb = add_tb(tu, lc, x0, y0, tu_width >> sps->hshift[i], tu_height >> sps->vshift[i], i); if (i != CR) set_tb_size(fc, tb); @@ -895,7 +897,7 @@ static void derive_chroma_intra_pred_mode(VVCLocalContext *lc, enum IntraPredMode luma_intra_pred_mode = SAMPLE_CTB(fc->tab.ipm, x_cb, y_cb); if (cu->tree_type == SINGLE_TREE && sps->r->sps_chroma_format_idc == CHROMA_FORMAT_444 && - intra_chroma_pred_mode == 4 && intra_mip_flag) { + (intra_chroma_pred_mode == 4 || cu->act_enabled_flag) && intra_mip_flag) { cu->mip_chroma_direct_flag = 1; cu->intra_pred_mode_c = luma_intra_pred_mode; return; @@ -1007,34 +1009,38 @@ static void intra_luma_pred_modes(VVCLocalContext *lc) static void intra_chroma_pred_modes(VVCLocalContext *lc) { - const VVCSPS *sps = lc->fc->ps.sps; - CodingUnit *cu = lc->cu; - const int hs = sps->hshift[CHROMA]; - const int vs = sps->vshift[CHROMA]; + const VVCSPS *sps = lc->fc->ps.sps; + CodingUnit *cu = lc->cu; + const int hs = sps->hshift[CHROMA]; + const int vs = sps->vshift[CHROMA]; + int cclm_mode_flag = 0; + int cclm_mode_idx = 0; + int intra_chroma_pred_mode = 0; + + if (!cu->act_enabled_flag) { + cu->mip_chroma_direct_flag = 0; + if (sps->r->sps_bdpcm_enabled_flag && + (cu->cb_width >> hs) <= sps->max_ts_size && + (cu->cb_height >> vs) <= sps->max_ts_size) { + cu->bdpcm_flag[CB] = cu->bdpcm_flag[CR] = ff_vvc_intra_bdpcm_chroma_flag(lc); + } + if (cu->bdpcm_flag[CHROMA]) { + cu->intra_pred_mode_c = ff_vvc_intra_bdpcm_chroma_dir_flag(lc) ? INTRA_VERT : INTRA_HORZ; + } else { + const int cclm_enabled = get_cclm_enabled(lc, cu->x0, cu->y0); - cu->mip_chroma_direct_flag = 0; - if (sps->r->sps_bdpcm_enabled_flag && - (cu->cb_width >> hs) <= sps->max_ts_size && - (cu->cb_height >> vs) <= sps->max_ts_size) { - cu->bdpcm_flag[CB] = cu->bdpcm_flag[CR] = ff_vvc_intra_bdpcm_chroma_flag(lc); - } - if (cu->bdpcm_flag[CHROMA]) { - cu->intra_pred_mode_c = ff_vvc_intra_bdpcm_chroma_dir_flag(lc) ? INTRA_VERT : INTRA_HORZ; - } else { - const int cclm_enabled = get_cclm_enabled(lc, cu->x0, cu->y0); - int cclm_mode_flag = 0; - int cclm_mode_idx = 0; - int intra_chroma_pred_mode = 0; + if (cclm_enabled) + cclm_mode_flag = ff_vvc_cclm_mode_flag(lc); - if (cclm_enabled) - cclm_mode_flag = ff_vvc_cclm_mode_flag(lc); + if (cclm_mode_flag) + cclm_mode_idx = ff_vvc_cclm_mode_idx(lc); + else + intra_chroma_pred_mode = ff_vvc_intra_chroma_pred_mode(lc); + } + } - if (cclm_mode_flag) - cclm_mode_idx = ff_vvc_cclm_mode_idx(lc); - else - intra_chroma_pred_mode = ff_vvc_intra_chroma_pred_mode(lc); + if (!cu->bdpcm_flag[CHROMA]) derive_chroma_intra_pred_mode(lc, cclm_mode_flag, cclm_mode_idx, intra_chroma_pred_mode); - } } static PredMode pred_mode_decode(VVCLocalContext *lc, @@ -1047,13 +1053,15 @@ static PredMode pred_mode_decode(VVCLocalContext *lc, const H266RawSliceHeader *rsh = lc->sc->sh.r; const int ch_type = tree_type == DUAL_TREE_CHROMA ? 1 : 0; const int is_4x4 = cu->cb_width == 4 && cu->cb_height == 4; + const int is_128 = cu->cb_width == 128 || cu->cb_height == 128; + const int hs = sps->hshift[CHROMA]; + const int vs = sps->vshift[CHROMA]; int pred_mode_flag; int pred_mode_ibc_flag; PredMode pred_mode; cu->skip_flag = 0; if (!IS_I(rsh) || sps->r->sps_ibc_enabled_flag) { - const int is_128 = cu->cb_width == 128 || cu->cb_height == 128; if (tree_type != DUAL_TREE_CHROMA && ((!is_4x4 && mode_type != MODE_TYPE_INTRA) || (sps->r->sps_ibc_enabled_flag && !is_128))) { @@ -1088,6 +1096,14 @@ static PredMode pred_mode_decode(VVCLocalContext *lc, pred_mode = MODE_INTRA; } + if (pred_mode == MODE_INTRA && sps->r->sps_palette_enabled_flag && !is_128 && !cu->skip_flag && + mode_type != MODE_TYPE_INTER && ((cu->cb_width * cu->cb_height) > + (tree_type != DUAL_TREE_CHROMA ? 16 : (16 << hs << vs))) && + (mode_type != MODE_TYPE_INTRA || tree_type != DUAL_TREE_CHROMA)) { + if (ff_vvc_pred_mode_plt_flag(lc)) + pred_mode = MODE_PLT; + } + set_cb_tab(lc, fc->tab.cpm[cu->ch_type], pred_mode); if (tree_type == SINGLE_TREE) set_cb_tab(lc, fc->tab.cpm[CHROMA], pred_mode); @@ -1756,8 +1772,8 @@ static void fill_dmvr_info(const VVCLocalContext *lc) const VVCFrameContext *fc = lc->fc; const CodingUnit *cu = lc->cu; - if (cu->pred_mode == MODE_IBC) { - ff_vvc_set_intra_mvf(lc, 1); + if (cu->pred_mode == MODE_IBC || cu->pred_mode == MODE_PLT) { + ff_vvc_set_intra_mvf(lc, true, cu->pred_mode == MODE_IBC ? PF_IBC : PF_PLT, false); } else { const VVCPPS *pps = fc->ps.pps; const int w = cu->cb_width >> MIN_PU_LOG2; @@ -1806,17 +1822,345 @@ static int inter_data(VVCLocalContext *lc) return ret; } +static TransformUnit* palette_add_tu(VVCLocalContext *lc, const int start, const int end, const VVCTreeType tree_type) +{ + CodingUnit *cu = lc->cu; + const VVCSPS *sps = lc->fc->ps.sps; + TransformUnit *tu = add_tu(lc->fc, cu, cu->x0, cu->y0, cu->cb_width, cu->cb_height); + + if (!tu) + return NULL; + + for (int c = start; c < end; c++) { + const int w = tu->width >> sps->hshift[c]; + const int h = tu->height >> sps->vshift[c]; + TransformBlock *tb = add_tb(tu, lc, tu->x0, tu->y0, w, h, c); + if (c != CR) + set_tb_size(lc->fc, tb); + } + + for (int i = 0; i < FF_ARRAY_ELEMS(cu->plt); i++) + cu->plt[i].size = 0; + + return tu; +} + +static int palette_predicted(VVCLocalContext *lc, const bool local_dual_tree, int start, int end, + bool *predictor_reused, const int predictor_size, const int max_entries) +{ + CodingUnit *cu = lc->cu; + int nb_predicted = 0; + + if (local_dual_tree) { + start = LUMA; + end = VVC_MAX_SAMPLE_ARRAYS; + } + + for (int i = 0; i < predictor_size && nb_predicted < max_entries; i++) { + const int run = ff_vvc_palette_predictor_run(lc); + if (run == 1) + break; + + if (run > 1) + i += run - 1; + + if (i >= predictor_size) + return AVERROR_INVALIDDATA; + + predictor_reused[i] = true; + for (int c = start; c < end; c++) + cu->plt[c].entries[nb_predicted] = lc->ep->pp[c].entries[i]; + nb_predicted++; + } + + for (int c = start; c < end; c++) + cu->plt[c].size = nb_predicted; + + return 0; +} + +static int palette_signaled(VVCLocalContext *lc, const bool local_dual_tree, + const int start, const int end, const int max_entries) +{ + const VVCSPS *sps = lc->fc->ps.sps; + CodingUnit *cu = lc->cu; + const int nb_predicted = cu->plt[start].size; + const int nb_signaled = nb_predicted < max_entries ? ff_vvc_num_signalled_palette_entries(lc) : 0; + const int size = nb_predicted + nb_signaled; + const bool dual_tree_luma = local_dual_tree && cu->tree_type == DUAL_TREE_LUMA; + + if (size > max_entries) + return AVERROR_INVALIDDATA; + + for (int c = start; c < end; c++) { + Palette *plt = cu->plt + c; + for (int i = nb_predicted; i < size; i++) { + plt->entries[i] = ff_vvc_new_palette_entries(lc, sps->bit_depth); + if (dual_tree_luma) { + plt[CB].entries[i] = 1 << (sps->bit_depth - 1); + plt[CR].entries[i] = 1 << (sps->bit_depth - 1); + } + } + plt->size = size; + } + + return 0; +} + +static void palette_update_predictor(VVCLocalContext *lc, const bool local_dual_tree, int start, int end, + bool *predictor_reused, const int predictor_size) +{ + CodingUnit *cu = lc->cu; + const int max_predictor = VVC_MAX_NUM_PALETTE_PREDICTOR_SIZE >> (cu->tree_type != SINGLE_TREE && !local_dual_tree); + + if (local_dual_tree) { + start = LUMA; + end = VVC_MAX_SAMPLE_ARRAYS; + } + + for (int c = start; c < end; c++) { + Palette *pp = lc->ep->pp + c; + Palette *plt = cu->plt + c; + int i = cu->plt[start].size;; + + // copy unused predictors to the end of plt + for (int j = 0; j < predictor_size && i < max_predictor; j++) { + if (!predictor_reused[j]) { + plt->entries[i] = pp->entries[j]; + i++; + } + } + + memcpy(pp->entries, plt->entries, i * sizeof(pp->entries[0])); + pp->size = i; + } +} + +static void palette_qp(VVCLocalContext *lc, VVCTreeType tree_type, const bool escape_present) +{ + const VVCFrameContext *fc = lc->fc; + const VVCPPS *pps = fc->ps.pps; + const H266RawSliceHeader *rsh = lc->sc->sh.r; + const CodingUnit *cu = lc->cu; + + if (tree_type != DUAL_TREE_CHROMA) { + const bool has_qp_delta = escape_present && + pps->r->pps_cu_qp_delta_enabled_flag && !lc->parse.is_cu_qp_delta_coded; + set_qp_y(lc, cu->x0, cu->y0, has_qp_delta); + } + + if (tree_type != DUAL_TREE_LUMA) { + if (rsh->sh_cu_chroma_qp_offset_enabled_flag && !lc->parse.is_cu_chroma_qp_offset_coded) + chroma_qp_offset_decode(lc, 0, 1); + set_qp_c(lc); + } +} + +#define PALETTE_SET_PIXEL(xc, yc, pix) \ + do { \ + const int off = ((xc) >> hs) + ((yc) >> vs) * tb->tb_width; \ + if (sps->bit_depth == 8) \ + u8[off] = pix; \ + else \ + u16[off] = pix; \ + } while (0) + +#define PALETTE_INDEX(x, y) index[(y) * cu->cb_width + (x)] + +// 6.5.3 Horizontal and vertical traverse scan order array initialization process +// The hTravScan and vTravScan tables require approximately 576 KB of memory. +// To save space, we use a macro to achieve the same functionality. +#define TRAV_COL(p, wlog, mask) ((p & mask) ^ (-((p >> wlog) & 1) & mask)) +#define TRAV_ROW(p, hlog) (p >> hlog) +#define TRAV(trans, p, wlog, hlog, mask) (trans ? TRAV_ROW((p), hlog) : TRAV_COL((p), wlog, mask)) +#define TRAV_X(pos) TRAV(transpose, pos, wlog2, hlog2, wmask) +#define TRAV_Y(pos) TRAV(!transpose, pos, hlog2, wlog2, hmask) + +static int palette_subblock_data(VVCLocalContext *lc, + const int max_index, const int subset_id, const bool transpose, + uint8_t *run_type, uint8_t *index, int *prev_run_pos, bool *adjust) +{ + const CodingUnit *cu = lc->cu; + TransformUnit *tu = cu->tus.head; + const VVCSPS *sps = lc->fc->ps.sps; + const int min_pos = subset_id << 4; + const int max_pos = FFMIN(min_pos + 16, cu->cb_width * cu->cb_height); + const int wmask = cu->cb_width - 1; + const int hmask = cu->cb_height - 1; + const int wlog2 = av_log2(cu->cb_width); + const int hlog2 = av_log2(cu->cb_height); + const uint8_t esc = cu->plt[tu->tbs[0].c_idx].size; + uint8_t run_copy[16] = { 0 }; + + for (int i = min_pos; i < max_pos; i++) { + const int xc = TRAV_X(i); + const int yc = TRAV_Y(i); + + if (i > 0 && max_index > 0) + run_copy[i - min_pos] = ff_vvc_run_copy_flag(lc, run_type[i - 1], *prev_run_pos, i); + + run_type[i] = 0; + if (max_index > 0 && !run_copy[i - min_pos]) { + if (((!transpose && yc > 0) || (transpose && xc > 0)) + && i > 0 && !run_type[i - 1]) { + run_type[i] = ff_vvc_copy_above_palette_indices_flag(lc); + } + *prev_run_pos = i; + } else if (i > 0) { + run_type[i] = run_type[i - 1]; + } + } + + for (int i = min_pos; i < max_pos; i++) { + const int xc = TRAV_X(i); + const int yc = TRAV_Y(i); + const int prev_xc = i > 0 ? TRAV_X(i - 1) : 0; + const int prev_yc = i > 0 ? TRAV_Y(i - 1) : 0; + + int idx = 0; + if (max_index > 0 && !run_copy[i - min_pos] && !run_type[i]) { + if (max_index - *adjust > 0) + idx = ff_vvc_palette_idx_idc(lc, max_index, *adjust); + if (i > 0) { + const int ref_idx = !run_type[i - 1] ? + PALETTE_INDEX(prev_xc, prev_yc) : PALETTE_INDEX(xc - transpose, yc - !transpose); + idx += (idx >= ref_idx); + } + *adjust = true; + } else { + idx = PALETTE_INDEX(prev_xc, prev_yc); + } + + if (!run_type[i]) + PALETTE_INDEX(xc, yc) = idx; + else + PALETTE_INDEX(xc, yc) = PALETTE_INDEX(xc - transpose, yc - !transpose); + } + + for (int c = 0; c < tu->nb_tbs; c++) { + TransformBlock *tb = &tu->tbs[c]; + const Palette *plt = cu->plt + tb->c_idx; + const int scale = ff_vvc_palette_derive_scale(lc, tu, tb); + const int hs = sps->hshift[c]; + const int vs = sps->vshift[c]; + uint8_t *u8 = (uint8_t *)tb->coeffs; + uint16_t *u16 = (uint16_t *)tb->coeffs; + + for (int i = min_pos; i < max_pos; i++) { + const int xc = TRAV_X(i); + const int yc = TRAV_Y(i); + if (!(xc & hs) && !(yc & vs)) { + const int v = PALETTE_INDEX(xc, yc); + if (v == esc) { + const int coeff = ff_vvc_palette_escape_val(lc); + const int pixel = av_clip_intp2(RSHIFT(coeff * scale, 6), sps->bit_depth); + PALETTE_SET_PIXEL(xc, yc, pixel); + } else { + PALETTE_SET_PIXEL(xc, yc, plt->entries[v]); + } + } + } + } + + return 0; +} + +static int hls_palette_coding(VVCLocalContext *lc, const VVCTreeType tree_type) +{ + const VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const H266RawSliceHeader *rsh = lc->sc->sh.r; + CodingUnit *cu = lc->cu; + Palette *pp = lc->ep->pp; + const int max_entries = tree_type == SINGLE_TREE ? 31 : 15; + const bool local_dual_tree = tree_type != SINGLE_TREE && + (!IS_I(rsh) || (IS_I(rsh) && !sps->r->sps_qtbtt_dual_tree_intra_flag)); + bool escape_present = false; + bool transpose = false; + bool adjust = false; + int max_index = 0; + int prev_run_pos = 0; + + int predictor_size, start, end, ret; + bool reused[VVC_MAX_NUM_PALETTE_PREDICTOR_SIZE]; + uint8_t run_type[MAX_PALETTE_CU_SIZE * MAX_PALETTE_CU_SIZE]; + uint8_t index[MAX_PALETTE_CU_SIZE * MAX_PALETTE_CU_SIZE]; + + ff_vvc_channel_range(&start, &end, tree_type, sps->r->sps_chroma_format_idc); + + if (!palette_add_tu(lc, start, end, tree_type)) + return AVERROR(ENOMEM); + + predictor_size = pp[start].size; + memset(reused, 0, sizeof(reused[0]) * predictor_size); + + ret = palette_predicted(lc, local_dual_tree, start, end, reused, predictor_size, max_entries); + if (ret < 0) + return ret; + + ret = palette_signaled(lc, local_dual_tree, start, end, max_entries); + if (ret < 0) + return ret; + + palette_update_predictor(lc, local_dual_tree, start, end, reused, predictor_size); + + if (cu->plt[start].size > 0) + escape_present = ff_vvc_palette_escape_val_present_flag(lc); + + max_index = cu->plt[start].size - 1 + escape_present; + if (max_index > 0) { + adjust = false; + transpose = ff_vvc_palette_transpose_flag(lc); + } + + palette_qp(lc, tree_type, escape_present); + + index[0] = 0; + for (int i = 0; i <= (cu->cb_width * cu->cb_height - 1) >> 4; i++) + palette_subblock_data(lc, max_index, i, transpose, + run_type, index, &prev_run_pos, &adjust); + + return 0; +} + +static int intra_data(VVCLocalContext *lc) +{ + const VVCSPS *sps = lc->fc->ps.sps; + const CodingUnit *cu = lc->cu; + const VVCTreeType tree_type = cu->tree_type; + const bool pred_mode_plt_flag = cu->pred_mode == MODE_PLT; + int ret = 0; + + if (tree_type == SINGLE_TREE || tree_type == DUAL_TREE_LUMA) { + if (pred_mode_plt_flag) { + if ((ret = hls_palette_coding(lc, tree_type)) < 0) + return ret; + ff_vvc_set_intra_mvf(lc, false, PF_PLT, false); + } else { + intra_luma_pred_modes(lc); + ff_vvc_set_intra_mvf(lc, false, PF_INTRA, cu->ciip_flag); + } + } + if ((tree_type == SINGLE_TREE || tree_type == DUAL_TREE_CHROMA) && sps->r->sps_chroma_format_idc) { + if (pred_mode_plt_flag && tree_type == DUAL_TREE_CHROMA) { + if ((ret = hls_palette_coding(lc, tree_type)) < 0) + return ret; + } else if (!pred_mode_plt_flag) { + intra_chroma_pred_modes(lc); + } + } + + return ret; +} + static int hls_coding_unit(VVCLocalContext *lc, int x0, int y0, int cb_width, int cb_height, int cqt_depth, const VVCTreeType tree_type, VVCModeType mode_type) { - const VVCFrameContext *fc = lc->fc; - const VVCSPS *sps = fc->ps.sps; - const H266RawSliceHeader *rsh = lc->sc->sh.r; - const int hs = sps->hshift[CHROMA]; - const int vs = sps->vshift[CHROMA]; - const int is_128 = cb_width > 64 || cb_height > 64; - int pred_mode_plt_flag = 0; - int ret; + const VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const H266RawSliceHeader *rsh = lc->sc->sh.r; + const int is_128 = cb_width > 64 || cb_height > 64; + int ret = 0; CodingUnit *cu = add_cu(lc, x0, y0, cb_width, cb_height, cqt_depth, tree_type); @@ -1829,54 +2173,26 @@ static int hls_coding_unit(VVCLocalContext *lc, int x0, int y0, int cb_width, in mode_type = MODE_TYPE_INTRA; cu->pred_mode = pred_mode_decode(lc, tree_type, mode_type); - if (cu->pred_mode == MODE_INTRA && sps->r->sps_palette_enabled_flag && !is_128 && !cu->skip_flag && - mode_type != MODE_TYPE_INTER && ((cb_width * cb_height) > - (tree_type != DUAL_TREE_CHROMA ? 16 : (16 << hs << vs))) && - (mode_type != MODE_TYPE_INTRA || tree_type != DUAL_TREE_CHROMA)) { - pred_mode_plt_flag = ff_vvc_pred_mode_plt_flag(lc); - if (pred_mode_plt_flag) { - avpriv_report_missing_feature(fc->log_ctx, "Palette"); - return AVERROR_PATCHWELCOME; - } - } - if (cu->pred_mode == MODE_INTRA && sps->r->sps_act_enabled_flag && tree_type == SINGLE_TREE) { - avpriv_report_missing_feature(fc->log_ctx, "Adaptive Color Transform"); - return AVERROR_PATCHWELCOME; - } - if (cu->pred_mode == MODE_INTRA || cu->pred_mode == MODE_PLT) { - if (tree_type == SINGLE_TREE || tree_type == DUAL_TREE_LUMA) { - if (pred_mode_plt_flag) { - avpriv_report_missing_feature(fc->log_ctx, "Palette"); - return AVERROR_PATCHWELCOME; - } else { - intra_luma_pred_modes(lc); - } - ff_vvc_set_intra_mvf(lc, 0); - } - if ((tree_type == SINGLE_TREE || tree_type == DUAL_TREE_CHROMA) && sps->r->sps_chroma_format_idc) { - if (pred_mode_plt_flag && tree_type == DUAL_TREE_CHROMA) { - avpriv_report_missing_feature(fc->log_ctx, "Palette"); - return AVERROR_PATCHWELCOME; - } else if (!pred_mode_plt_flag) { - if (!cu->act_enabled_flag) - intra_chroma_pred_modes(lc); - } - } - } else if (tree_type != DUAL_TREE_CHROMA) { /* MODE_INTER or MODE_IBC */ - if ((ret = inter_data(lc)) < 0) - return ret; - } - if (cu->pred_mode != MODE_INTRA && !pred_mode_plt_flag && !lc->cu->pu.general_merge_flag) + if (cu->pred_mode == MODE_INTRA && sps->r->sps_act_enabled_flag && tree_type == SINGLE_TREE) + cu->act_enabled_flag = ff_vvc_cu_act_enabled_flag(lc); + + if (cu->pred_mode == MODE_INTRA || cu->pred_mode == MODE_PLT) + ret = intra_data(lc); + else if (tree_type != DUAL_TREE_CHROMA) /* MODE_INTER or MODE_IBC */ + ret = inter_data(lc); + + if (ret < 0) + return ret; + + if (cu->pred_mode != MODE_INTRA && cu->pred_mode != MODE_PLT && !lc->cu->pu.general_merge_flag) cu->coded_flag = ff_vvc_cu_coded_flag(lc); else - cu->coded_flag = !(cu->skip_flag || pred_mode_plt_flag); + cu->coded_flag = !(cu->skip_flag || cu->pred_mode == MODE_PLT); if (cu->coded_flag) { sbt_info(lc, sps); - if (sps->r->sps_act_enabled_flag && cu->pred_mode != MODE_INTRA && tree_type == SINGLE_TREE) { - avpriv_report_missing_feature(fc->log_ctx, "Adaptive Color Transform"); - return AVERROR_PATCHWELCOME; - } + if (sps->r->sps_act_enabled_flag && cu->pred_mode != MODE_INTRA && tree_type == SINGLE_TREE) + cu->act_enabled_flag = ff_vvc_cu_act_enabled_flag(lc); lc->parse.lfnst_dc_only = 1; lc->parse.lfnst_zero_out_sig_coeff_flag = 1; lc->parse.mts_dc_only = 1; @@ -1887,7 +2203,7 @@ static int hls_coding_unit(VVCLocalContext *lc, int x0, int y0, int cb_width, in cu->lfnst_idx = lfnst_idx_decode(lc); cu->mts_idx = mts_idx_decode(lc); set_qp_c(lc); - } else { + } else if (cu->pred_mode != MODE_PLT) { ret = skipped_transform_tree_unit(lc); if (ret < 0) return ret; @@ -2580,3 +2896,12 @@ void ff_vvc_ep_init_stat_coeff(EntryPoint *ep, persistent_rice_adaptation_enabled_flag ? 2 * (av_log2(bit_depth - 10)) : 0; } } + +void ff_vvc_channel_range(int *start, int *end, const VVCTreeType tree_type, const uint8_t chroma_format_idc) +{ + const bool has_chroma = chroma_format_idc && tree_type != DUAL_TREE_LUMA; + const bool has_luma = tree_type != DUAL_TREE_CHROMA; + + *start = has_luma ? LUMA : CB; + *end = has_chroma ? VVC_MAX_SAMPLE_ARRAYS : CB; +} diff --git a/libavcodec/vvc/ctu.h b/libavcodec/vvc/ctu.h index c5533c1ad086f..e37bacf9ddb02 100644 --- a/libavcodec/vvc/ctu.h +++ b/libavcodec/vvc/ctu.h @@ -36,6 +36,7 @@ #define MIN_CU_SIZE 4 #define MIN_CU_LOG2 2 #define MAX_CU_DEPTH 7 +#define MAX_PALETTE_CU_SIZE 64 #define MAX_PARTS_IN_CTU ((MAX_CTU_SIZE >> MIN_CU_LOG2) * (MAX_CTU_SIZE >> MIN_CU_LOG2)) @@ -224,6 +225,7 @@ typedef enum PredFlag { PF_L1 = 0x2, PF_BI = 0x3, PF_IBC = PF_L0 | 0x4, + PF_PLT = 0x8, } PredFlag; typedef enum IntraPredMode { @@ -277,6 +279,11 @@ typedef struct PredictionUnit { int cb_prof_flag[2]; } PredictionUnit; +typedef struct Palette { + uint8_t size; + uint16_t entries[VVC_MAX_NUM_PALETTE_PREDICTOR_SIZE]; +} Palette; + typedef struct CodingUnit { VVCTreeType tree_type; int x0; @@ -326,6 +333,8 @@ typedef struct CodingUnit { int8_t qp[4]; ///< QpY, Qp′Cb, Qp′Cr, Qp′CbCr + Palette plt[VVC_MAX_SAMPLE_ARRAYS]; + PredictionUnit pu; struct CodingUnit *next; ///< RefStruct reference @@ -356,6 +365,8 @@ typedef struct EntryPoint { int stat_coeff[VVC_MAX_SAMPLE_ARRAYS]; ///< StatCoeff + Palette pp[VVC_MAX_SAMPLE_ARRAYS]; // PalettePredictor + VVCCabacState cabac_state[VVC_CONTEXTS]; CABACContext cc; @@ -489,5 +500,6 @@ void ff_vvc_decode_neighbour(VVCLocalContext *lc, int x_ctb, int y_ctb, int rx, void ff_vvc_ctu_free_cus(CodingUnit **cus); int ff_vvc_get_qPy(const VVCFrameContext *fc, int xc, int yc); void ff_vvc_ep_init_stat_coeff(EntryPoint *ep, int bit_depth, int persistent_rice_adaptation_enabled_flag); +void ff_vvc_channel_range(int *start, int *end, VVCTreeType tree_type, uint8_t chroma_format_idc); #endif // AVCODEC_VVC_CTU_H diff --git a/libavcodec/vvc/dec.c b/libavcodec/vvc/dec.c index 0b6443a112f9c..381b42c421b3a 100644 --- a/libavcodec/vvc/dec.c +++ b/libavcodec/vvc/dec.c @@ -26,9 +26,12 @@ #include "libavcodec/hwconfig.h" #include "libavcodec/profiles.h" #include "libavutil/refstruct.h" +#include "libavcodec/aom_film_grain.h" +#include "libavcodec/thread.h" #include "libavutil/cpu.h" #include "libavutil/mem.h" #include "libavutil/thread.h" +#include "libavutil/film_grain_params.h" #include "dec.h" #include "ctu.h" @@ -506,23 +509,18 @@ static int slices_realloc(VVCFrameContext *fc) return 0; } -static int ep_init_cabac_decoder(SliceContext *sc, const int index, - const H2645NAL *nal, GetBitContext *gb, const CodedBitstreamUnit *unit) +static int get_ep_size(const H266RawSliceHeader *rsh, GetBitContext *gb, const H2645NAL *nal, const int header_size, const int ep_index) { - const H266RawSlice *slice = unit->content_ref; - const H266RawSliceHeader *rsh = sc->sh.r; - EntryPoint *ep = sc->eps + index; int size; - int ret; - if (index < rsh->num_entry_points) { + if (ep_index < rsh->num_entry_points) { int skipped = 0; int64_t start = (gb->index >> 3); - int64_t end = start + rsh->sh_entry_point_offset_minus1[index] + 1; - while (skipped < nal->skipped_bytes && nal->skipped_bytes_pos[skipped] <= start + slice->header_size) { + int64_t end = start + rsh->sh_entry_point_offset_minus1[ep_index] + 1; + while (skipped < nal->skipped_bytes && nal->skipped_bytes_pos[skipped] <= start + header_size) { skipped++; } - while (skipped < nal->skipped_bytes && nal->skipped_bytes_pos[skipped] <= end + slice->header_size) { + while (skipped < nal->skipped_bytes && nal->skipped_bytes_pos[skipped] <= end + header_size) { end--; skipped++; } @@ -531,6 +529,13 @@ static int ep_init_cabac_decoder(SliceContext *sc, const int index, } else { size = get_bits_left(gb) / 8; } + return size; +} + +static int ep_init_cabac_decoder(EntryPoint *ep, GetBitContext *gb, const int size) +{ + int ret; + av_assert0(gb->buffer + get_bits_count(gb) / 8 + size <= gb->buffer_end); ret = ff_init_cabac_decoder (&ep->cc, gb->buffer + get_bits_count(gb) / 8, size); if (ret < 0) @@ -539,6 +544,22 @@ static int ep_init_cabac_decoder(SliceContext *sc, const int index, return 0; } +static int ep_init(EntryPoint *ep, const int ctu_addr, const int ctu_end, GetBitContext *gb, const int size) +{ + const int ret = ep_init_cabac_decoder(ep, gb, size); + + if (ret < 0) + return ret; + + ep->ctu_start = ctu_addr; + ep->ctu_end = ctu_end; + + for (int c_idx = LUMA; c_idx <= CR; c_idx++) + ep->pp[c_idx].size = 0; + + return 0; +} + static int slice_init_entry_points(SliceContext *sc, VVCFrameContext *fc, const H2645NAL *nal, const CodedBitstreamUnit *unit) { @@ -562,20 +583,19 @@ static int slice_init_entry_points(SliceContext *sc, return ret; for (int i = 0; i < sc->nb_eps; i++) { - EntryPoint *ep = sc->eps + i; + const int size = get_ep_size(sc->sh.r, &gb, nal, slice->header_size, i); + const int ctu_end = (i + 1 == sc->nb_eps ? sh->num_ctus_in_curr_slice : sh->entry_point_start_ctu[i]); + EntryPoint *ep = sc->eps + i; - ep->ctu_start = ctu_addr; - ep->ctu_end = (i + 1 == sc->nb_eps ? sh->num_ctus_in_curr_slice : sh->entry_point_start_ctu[i]); + ret = ep_init(ep, ctu_addr, ctu_end, &gb, size); + if (ret < 0) + return ret; for (int j = ep->ctu_start; j < ep->ctu_end; j++) { const int rs = sc->sh.ctb_addr_in_curr_slice[j]; fc->tab.slice_idx[rs] = sc->slice_idx; } - ret = ep_init_cabac_decoder(sc, i, nal, &gb, unit); - if (ret < 0) - return ret; - if (i + 1 < sc->nb_eps) ctu_addr = sh->entry_point_start_ctu[i]; } @@ -601,6 +621,14 @@ static int ref_frame(VVCFrame *dst, const VVCFrame *src) av_refstruct_replace(&dst->sps, src->sps); av_refstruct_replace(&dst->pps, src->pps); + if (src->needs_fg) { + ret = av_frame_ref(dst->frame_grain, src->frame_grain); + if (ret < 0) + return ret; + + dst->needs_fg = src->needs_fg; + } + av_refstruct_replace(&dst->progress, src->progress); av_refstruct_replace(&dst->tab_dmvr_mvf, src->tab_dmvr_mvf); @@ -634,12 +662,14 @@ static av_cold void frame_context_free(VVCFrameContext *fc) for (int i = 0; i < FF_ARRAY_ELEMS(fc->DPB); i++) { ff_vvc_unref_frame(fc, &fc->DPB[i], ~0); av_frame_free(&fc->DPB[i].frame); + av_frame_free(&fc->DPB[i].frame_grain); } ff_vvc_frame_thread_free(fc); pic_arrays_free(fc); av_frame_free(&fc->output_frame); ff_vvc_frame_ps_free(&fc->ps); + ff_vvc_sei_reset(&fc->sei); } static av_cold int frame_context_init(VVCFrameContext *fc, AVCodecContext *avctx) @@ -655,6 +685,10 @@ static av_cold int frame_context_init(VVCFrameContext *fc, AVCodecContext *avctx fc->DPB[j].frame = av_frame_alloc(); if (!fc->DPB[j].frame) return AVERROR(ENOMEM); + + fc->DPB[j].frame_grain = av_frame_alloc(); + if (!fc->DPB[j].frame_grain) + return AVERROR(ENOMEM); } fc->cu_pool = av_refstruct_pool_alloc(sizeof(CodingUnit), 0); if (!fc->cu_pool) @@ -682,6 +716,10 @@ static int frame_context_setup(VVCFrameContext *fc, VVCContext *s) return ret; } } + + ret = ff_vvc_sei_replace(&fc->sei, &prev->sei); + if (ret < 0) + return ret; } if (IS_IDR(s)) { @@ -697,6 +735,65 @@ static int frame_context_setup(VVCFrameContext *fc, VVCContext *s) return 0; } +/* SEI does not affect decoding, so we ignore the return value */ +static void decode_prefix_sei(VVCFrameContext *fc, VVCContext *s) +{ + CodedBitstreamFragment *frame = &s->current_frame; + + for (int i = 0; i < frame->nb_units; i++) { + const CodedBitstreamUnit *unit = frame->units + i; + + if (unit->type == VVC_PREFIX_SEI_NUT) { + int ret = ff_vvc_sei_decode(&fc->sei, unit->content_ref, fc); + if (ret < 0) + return; + } + } +} + +static int set_side_data(VVCContext *s, VVCFrameContext *fc) +{ + AVFrame *out = fc->ref->frame; + + return ff_h2645_sei_to_frame(out, &fc->sei.common, AV_CODEC_ID_VVC, s->avctx, + NULL, fc->ps.sps->bit_depth, fc->ps.sps->bit_depth, fc->ref->poc); +} + +static int check_film_grain(VVCContext *s, VVCFrameContext *fc) +{ + int ret; + + fc->ref->needs_fg = (fc->sei.common.film_grain_characteristics && + fc->sei.common.film_grain_characteristics->present || + fc->sei.common.aom_film_grain.enable) && + !(s->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN) && + !s->avctx->hwaccel; + + if (fc->ref->needs_fg && + (fc->sei.common.film_grain_characteristics->present && + !ff_h274_film_grain_params_supported(fc->sei.common.film_grain_characteristics->model_id, + fc->ref->frame->format) || + !av_film_grain_params_select(fc->ref->frame))) { + av_log_once(s->avctx, AV_LOG_WARNING, AV_LOG_DEBUG, &s->film_grain_warning_shown, + "Unsupported film grain parameters. Ignoring film grain.\n"); + fc->ref->needs_fg = 0; + } + + if (fc->ref->needs_fg) { + fc->ref->frame_grain->format = fc->ref->frame->format; + fc->ref->frame_grain->width = fc->ref->frame->width; + fc->ref->frame_grain->height = fc->ref->frame->height; + + ret = ff_thread_get_buffer(s->avctx, fc->ref->frame_grain, 0); + if (ret < 0) + return ret; + + return av_frame_copy_props(fc->ref->frame_grain, fc->ref->frame); + } + + return 0; +} + static int frame_start(VVCContext *s, VVCFrameContext *fc, SliceContext *sc) { const VVCPH *ph = &fc->ps.ph; @@ -710,6 +807,16 @@ static int frame_start(VVCContext *s, VVCFrameContext *fc, SliceContext *sc) if ((ret = ff_vvc_set_new_ref(s, fc, &fc->frame)) < 0) goto fail; + decode_prefix_sei(fc, s); + + ret = set_side_data(s, fc); + if (ret < 0) + goto fail; + + ret = check_film_grain(s, fc); + if (ret < 0) + goto fail; + if (!IS_IDR(s)) ff_vvc_bump_frame(s, fc); @@ -914,6 +1021,15 @@ static int decode_nal_unit(VVCContext *s, VVCFrameContext *fc, AVBufferRef *buf_ if (ret < 0) return ret; break; + case VVC_PREFIX_SEI_NUT: + /* handle by decode_prefix_sei() */ + break; + + case VVC_SUFFIX_SEI_NUT: + /* SEI does not affect decoding, so we ignore the return value*/ + if (fc) + ff_vvc_sei_decode(&fc->sei, unit->content_ref, fc); + break; } return 0; @@ -958,14 +1074,60 @@ static int decode_nal_units(VVCContext *s, VVCFrameContext *fc, AVPacket *avpkt) return ret; } +static int frame_end(VVCContext *s, VVCFrameContext *fc) +{ + const AVFilmGrainParams *fgp; + int ret = 0; + + if (fc->ref->needs_fg) { + av_assert0(fc->ref->frame_grain->buf[0]); + fgp = av_film_grain_params_select(fc->ref->frame); + switch (fgp->type) { + case AV_FILM_GRAIN_PARAMS_NONE: + av_assert0(0); + return AVERROR_BUG; + case AV_FILM_GRAIN_PARAMS_H274: + ret = ff_h274_apply_film_grain(fc->ref->frame_grain, fc->ref->frame, + &s->h274db, fgp); + break; + case AV_FILM_GRAIN_PARAMS_AV1: + ret = ff_aom_apply_film_grain(fc->ref->frame_grain, fc->ref->frame, fgp); + break; + } + } + + if (!s->avctx->hwaccel && s->avctx->err_recognition & AV_EF_CRCCHECK) { + VVCSEI *sei = &fc->sei; + if (sei->picture_hash.present) { + int ret = ff_h274_hash_init(&s->hash_ctx, sei->picture_hash.hash_type); + if (ret < 0) + return ret; + + ret = ff_h274_hash_verify(s->hash_ctx, &sei->picture_hash, fc->ref->frame, fc->ps.pps->width, fc->ps.pps->height); + if (ret < 0) { + av_log(s->avctx, AV_LOG_ERROR, + "Verifying checksum for frame with decoder_order %d: failed\n", + (int)fc->decode_order); + if (s->avctx->err_recognition & AV_EF_EXPLODE) + return ret; + } + } + } + + return 0; +} + static int wait_delayed_frame(VVCContext *s, AVFrame *output, int *got_output) { VVCFrameContext *delayed = get_frame_context(s, s->fcs, s->nb_frames - s->nb_delayed); int ret = ff_vvc_frame_wait(s, delayed); - if (!ret && delayed->output_frame->buf[0] && output) { - av_frame_move_ref(output, delayed->output_frame); - *got_output = 1; + if (!ret) { + ret = frame_end(s, delayed); + if (ret >= 0 && delayed->output_frame->buf[0] && output) { + av_frame_move_ref(output, delayed->output_frame); + *got_output = 1; + } } s->nb_delayed--; @@ -1080,6 +1242,7 @@ static av_cold int vvc_decode_free(AVCodecContext *avctx) frame_context_free(s->fcs + i); av_free(s->fcs); } + ff_h274_hash_freep(&s->hash_ctx); ff_vvc_ps_uninit(&s->ps); ff_cbs_close(&s->cbc); diff --git a/libavcodec/vvc/dec.h b/libavcodec/vvc/dec.h index 6aa31215505cf..5f8065b38bad8 100644 --- a/libavcodec/vvc/dec.h +++ b/libavcodec/vvc/dec.h @@ -26,9 +26,11 @@ #include "libavcodec/videodsp.h" #include "libavcodec/vvc.h" +#include "libavcodec/h274.h" #include "ps.h" #include "dsp.h" +#include "sei.h" #define LUMA 0 #define CHROMA 1 @@ -70,12 +72,15 @@ typedef struct VVCWindow { typedef struct VVCFrame { struct AVFrame *frame; - + struct AVFrame *frame_grain; const VVCSPS *sps; ///< RefStruct reference const VVCPPS *pps; ///< RefStruct reference struct MvField *tab_dmvr_mvf; ///< RefStruct reference RefPicListTab **rpl_tab; ///< RefStruct reference RefPicListTab *rpl; ///< RefStruct reference + + int needs_fg; ///< 1 if grain needs to be applied by the decoder + int nb_rpl_elems; int ctb_count; @@ -124,6 +129,7 @@ typedef struct VVCFrameContext { struct AVFrame *output_frame; VVCFrameParamSets ps; + VVCSEI sei; SliceContext **slices; int nb_slices; @@ -216,6 +222,7 @@ typedef struct VVCContext { CodedBitstreamFragment current_frame; VVCParamSets ps; + H274FilmGrainDatabase h274db; int temporal_id; ///< temporal_id_plus1 - 1 int poc_tid0; @@ -226,6 +233,7 @@ typedef struct VVCContext { enum VVCNALUnitType vcl_unit_type; int no_output_before_recovery_flag; ///< NoOutputBeforeRecoveryFlag int gdr_recovery_point_poc; ///< recoveryPointPocVal + int film_grain_warning_shown; /** * Sequence counters for decoded and output frames, so that old @@ -241,6 +249,8 @@ typedef struct VVCContext { uint64_t nb_frames; ///< processed frames int nb_delayed; ///< delayed frames + + H274HashContext *hash_ctx; } VVCContext ; #endif /* AVCODEC_VVC_DEC_H */ diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h index fc4c3a679909c..ae22900931799 100644 --- a/libavcodec/vvc/dsp.h +++ b/libavcodec/vvc/dsp.h @@ -106,7 +106,7 @@ struct VVCLocalContext; typedef struct VVCIntraDSPContext { void (*intra_cclm_pred)(const struct VVCLocalContext *lc, int x0, int y0, int w, int h); - void (*lmcs_scale_chroma)(struct VVCLocalContext *lc, int *dst, const int *coeff, int w, int h, int x0_cu, int y0_cu); + void (*lmcs_scale_chroma)(struct VVCLocalContext *lc, int *coeff, int w, int h, int x0_cu, int y0_cu); void (*intra_pred)(const struct VVCLocalContext *lc, int x0, int y0, int w, int h, int c_idx); void (*pred_planar)(uint8_t *src, const uint8_t *top, const uint8_t *left, int w, int h, ptrdiff_t stride); void (*pred_mip)(uint8_t *src, const uint8_t *top, const uint8_t *left, int w, int h, ptrdiff_t stride, @@ -122,11 +122,12 @@ typedef struct VVCIntraDSPContext { typedef struct VVCItxDSPContext { void (*add_residual)(uint8_t *dst, const int *res, int width, int height, ptrdiff_t stride); - void (*add_residual_joint)(uint8_t *dst, const int *res, int width, int height, ptrdiff_t stride, int c_sign, int shift); - void (*pred_residual_joint)(int *buf, int width, int height, int c_sign, int shift); + void (*pred_residual_joint)(int *dst, const int *src, int width, int height, int c_sign, int shift); void (*itx[VVC_N_TX_TYPE][VVC_N_TX_SIZE])(int *coeffs, ptrdiff_t step, size_t nz); void (*transform_bdpcm)(int *coeffs, int width, int height, int vertical, int log2_transform_range); + + void (*adaptive_color_transform)(int *y, int *u, int *v, int width, int height); } VVCItxDSPContext; typedef struct VVCLMCSDSPContext { diff --git a/libavcodec/vvc/dsp_template.c b/libavcodec/vvc/dsp_template.c index 1aa1e027bdd17..13bd8cd4a161b 100644 --- a/libavcodec/vvc/dsp_template.c +++ b/libavcodec/vvc/dsp_template.c @@ -45,32 +45,12 @@ static void FUNC(add_residual)(uint8_t *_dst, const int *res, } } -static void FUNC(add_residual_joint)(uint8_t *_dst, const int *res, - const int w, const int h, const ptrdiff_t _stride, const int c_sign, const int shift) -{ - pixel *dst = (pixel *)_dst; - - const int stride = _stride / sizeof(pixel); - - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x++) { - const int r = ((*res) * c_sign) >> shift; - dst[x] = av_clip_pixel(dst[x] + r); - res++; - } - dst += stride; - } -} - -static void FUNC(pred_residual_joint)(int *buf, const int w, const int h, +static void FUNC(pred_residual_joint)(int *dst, const int *src, const int w, const int h, const int c_sign, const int shift) { - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x++) { - *buf = ((*buf) * c_sign) >> shift; - buf++; - } - } + const int size = w * h; + for (int i = 0; i < size; i++) + dst[i] = (src[i] * c_sign) >> shift; } static void FUNC(transform_bdpcm)(int *coeffs, const int width, const int height, @@ -94,6 +74,24 @@ static void FUNC(transform_bdpcm)(int *coeffs, const int width, const int height } } +// 8.7.4.6 Residual modification process for blocks using colour space conversion +static void FUNC(adaptive_color_transform)(int *y, int *u, int *v, const int width, const int height) +{ + const int size = width * height; + const int bits = BIT_DEPTH + 1; + + for (int i = 0; i < size; i++) { + const int y0 = av_clip_intp2(y[i], bits); + const int cg = av_clip_intp2(u[i], bits); + const int co = av_clip_intp2(v[i], bits); + const int t = y0 - (cg >> 1); + + y[i] = cg + t; + u[i] = t - (co >> 1); + v[i] = co + u[i]; + } +} + static void FUNC(ff_vvc_itx_dsp_init)(VVCItxDSPContext *const itx) { #define VVC_ITX(TYPE, type, s) \ @@ -106,7 +104,6 @@ static void FUNC(ff_vvc_itx_dsp_init)(VVCItxDSPContext *const itx) VVC_ITX(TYPE, type, 32); itx->add_residual = FUNC(add_residual); - itx->add_residual_joint = FUNC(add_residual_joint); itx->pred_residual_joint = FUNC(pred_residual_joint); itx->transform_bdpcm = FUNC(transform_bdpcm); VVC_ITX(DCT2, dct2, 2) @@ -115,6 +112,8 @@ static void FUNC(ff_vvc_itx_dsp_init)(VVCItxDSPContext *const itx) VVC_ITX_COMMON(DCT8, dct8) VVC_ITX_COMMON(DST7, dst7) + itx->adaptive_color_transform = FUNC(adaptive_color_transform); + #undef VVC_ITX #undef VVC_ITX_COMMON } diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c index a7f102bc643a3..3815668bcf5f0 100644 --- a/libavcodec/vvc/filter.c +++ b/libavcodec/vvc/filter.c @@ -385,6 +385,9 @@ static int boundary_strength(const VVCLocalContext *lc, const MvField *curr, con { RefPicList *rpl = lc->sc->rpl; + if (curr->pred_flag == PF_PLT) + return 0; + if (curr->pred_flag == PF_IBC) return FFABS(neigh->mv[0].x - curr->mv[0].x) >= 8 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 8; @@ -772,17 +775,15 @@ static int get_qp(const VVCFrameContext *fc, const uint8_t *src, const int x, co static void vvc_deblock(const VVCLocalContext *lc, int x0, int y0, const int rs, const int vertical) { - VVCFrameContext *fc = lc->fc; - const VVCSPS *sps = fc->ps.sps; - const int c_end = sps->r->sps_chroma_format_idc ? VVC_MAX_SAMPLE_ARRAYS : 1; - const int ctb_size = fc->ps.sps->ctb_size_y; - const DBParams *params = fc->tab.deblock + rs; - int x_end = FFMIN(x0 + ctb_size, fc->ps.pps->width); - int y_end = FFMIN(y0 + ctb_size, fc->ps.pps->height); - - //not use this yet, may needed by plt. - const uint8_t no_p[4] = { 0 }; - const uint8_t no_q[4] = { 0 } ; + VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const int c_end = sps->r->sps_chroma_format_idc ? VVC_MAX_SAMPLE_ARRAYS : 1; + const int ctb_size = fc->ps.sps->ctb_size_y; + const DBParams *params = fc->tab.deblock + rs; + int x_end = FFMIN(x0 + ctb_size, fc->ps.pps->width); + int y_end = FFMIN(y0 + ctb_size, fc->ps.pps->height); + const int log2_min_cb_size = fc->ps.sps->min_cb_log2_size_y; + const int min_cb_width = fc->ps.pps->min_cb_width; if (!vertical) { FFSWAP(int, x_end, y_end); @@ -802,6 +803,8 @@ static void vvc_deblock(const VVCLocalContext *lc, int x0, int y0, const int rs, const uint8_t horizontal_ctu_edge = !vertical && !(x % ctb_size); int32_t bs[4], beta[4], tc[4] = { 0 }, all_zero_bs = 1; uint8_t max_len_p[4], max_len_q[4]; + uint8_t no_p[4] = { 0 }; + uint8_t no_q[4] = { 0 }; for (int i = 0; i < DEBLOCK_STEP >> (2 - vs); i++) { int tx = x; @@ -818,6 +821,13 @@ static void vvc_deblock(const VVCLocalContext *lc, int x0, int y0, const int rs, tc[i] = TC_CALC(qp, bs[i]) ; max_filter_length(fc, tx, ty, c_idx, vertical, horizontal_ctu_edge, bs[i], &max_len_p[i], &max_len_q[i]); all_zero_bs = 0; + + if (sps->r->sps_palette_enabled_flag) { + const int cu_q = (ty >> log2_min_cb_size) * min_cb_width + (tx >> log2_min_cb_size); + const int cu_p = (ty - !vertical >> log2_min_cb_size) * min_cb_width + (tx - vertical >> log2_min_cb_size); + no_q[i] = fc->tab.cpm[!!c_idx][cu_q] == MODE_PLT; + no_p[i] = cu_p >= 0 && fc->tab.cpm[!!c_idx][cu_p] == MODE_PLT; + } } } diff --git a/libavcodec/vvc/intra.c b/libavcodec/vvc/intra.c index 41ed89c94623b..f56b43be66eeb 100644 --- a/libavcodec/vvc/intra.c +++ b/libavcodec/vvc/intra.c @@ -27,6 +27,10 @@ #include "intra.h" #include "itx_1d.h" +#define POS(c_idx, x, y) \ + &fc->frame->data[c_idx][((y) >> fc->ps.sps->vshift[c_idx]) * fc->frame->linesize[c_idx] + \ + (((x) >> fc->ps.sps->hshift[c_idx]) << fc->ps.sps->pixel_shift)] + static int is_cclm(enum IntraPredMode mode) { return mode == INTRA_LT_CCLM || mode == INTRA_L_CCLM || mode == INTRA_T_CCLM; @@ -164,28 +168,6 @@ static void derive_transform_type(const VVCFrameContext *fc, const VVCLocalConte *trv = mts_to_trv[cu->mts_idx]; } -static void add_residual_for_joint_coding_chroma(VVCLocalContext *lc, - const TransformUnit *tu, TransformBlock *tb, const int chroma_scale) -{ - const VVCFrameContext *fc = lc->fc; - const CodingUnit *cu = lc->cu; - const int c_sign = 1 - 2 * fc->ps.ph.r->ph_joint_cbcr_sign_flag; - const int shift = tu->coded_flag[1] ^ tu->coded_flag[2]; - const int c_idx = 1 + tu->coded_flag[1]; - const ptrdiff_t stride = fc->frame->linesize[c_idx]; - const int hs = fc->ps.sps->hshift[c_idx]; - const int vs = fc->ps.sps->vshift[c_idx]; - uint8_t *dst = &fc->frame->data[c_idx][(tb->y0 >> vs) * stride + - ((tb->x0 >> hs) << fc->ps.sps->pixel_shift)]; - if (chroma_scale) { - fc->vvcdsp.itx.pred_residual_joint(tb->coeffs, tb->tb_width, tb->tb_height, c_sign, shift); - fc->vvcdsp.intra.lmcs_scale_chroma(lc, tb->coeffs, tb->coeffs, tb->tb_width, tb->tb_height, cu->x0, cu->y0); - fc->vvcdsp.itx.add_residual(dst, tb->coeffs, tb->tb_width, tb->tb_height, stride); - } else { - fc->vvcdsp.itx.add_residual_joint(dst, tb->coeffs, tb->tb_width, tb->tb_height, stride, c_sign, shift); - } -} - static int add_reconstructed_area(VVCLocalContext *lc, const int ch_type, const int x0, const int y0, const int w, const int h) { const VVCSPS *sps = lc->fc->ps.sps; @@ -303,21 +285,15 @@ static void scale(int *out, const int *in, const int w, const int h, const int s // part of 8.7.3 Scaling process for transform coefficients static void derive_qp(const VVCLocalContext *lc, const TransformUnit *tu, TransformBlock *tb) { - const VVCSPS *sps = lc->fc->ps.sps; - const H266RawSliceHeader *rsh = lc->sc->sh.r; - const CodingUnit *cu = lc->cu; - int qp, qp_act_offset; + const VVCSPS *sps = lc->fc->ps.sps; + const H266RawSliceHeader *rsh = lc->sc->sh.r; + const CodingUnit *cu = lc->cu; + const bool is_jcbcr = tb->c_idx && tu->joint_cbcr_residual_flag && tu->coded_flag[CB] && tu->coded_flag[CR]; + const int idx = is_jcbcr ? JCBCR : tb->c_idx; + const int qp = cu->qp[idx] + (idx ? 0 : sps->qp_bd_offset); + const int act_offset[] = { -5, 1, 3, 1 }; + const int qp_act_offset = cu->act_enabled_flag ? act_offset[idx] : 0; - if (tb->c_idx == 0) { - //fix me - qp = cu->qp[LUMA] + sps->qp_bd_offset; - qp_act_offset = cu->act_enabled_flag ? -5 : 0; - } else { - const int is_jcbcr = tu->joint_cbcr_residual_flag && tu->coded_flag[CB] && tu->coded_flag[CR]; - const int idx = is_jcbcr ? JCBCR : tb->c_idx; - qp = cu->qp[idx]; - qp_act_offset = cu->act_enabled_flag ? 1 : 0; - } if (tb->ts) { const int qp_prime_ts_min = 4 + 6 * sps->r->sps_min_qp_prime_ts; @@ -336,29 +312,30 @@ static void derive_qp(const VVCLocalContext *lc, const TransformUnit *tu, Transf tb->bd_offset = (1 << tb->bd_shift) >> 1; } +static const uint8_t rem6[63 + 8 * 6 + 1] = { + 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, + 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, + 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, + 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, + 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, +}; + +static const uint8_t div6[63 + 8 * 6 + 1] = { + 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, + 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, +}; + +const static int level_scale[2][6] = { + { 40, 45, 51, 57, 64, 72 }, + { 57, 64, 72, 80, 90, 102 } +}; + //8.7.3 Scaling process for transform coefficients static av_always_inline int derive_scale(const TransformBlock *tb, const int sh_dep_quant_used_flag) { - static const uint8_t rem6[63 + 8 * 6 + 1] = { - 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, - 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, - 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, - 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, - 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, - }; - - static const uint8_t div6[63 + 8 * 6 + 1] = { - 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, - 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, - 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, - }; - - const static int level_scale[2][6] = { - { 40, 45, 51, 57, 64, 72 }, - { 57, 64, 72, 80, 90, 102 } - }; const int addin = sh_dep_quant_used_flag && !tb->ts; const int qp = tb->qp + addin; @@ -515,29 +492,67 @@ static void transform_bdpcm(TransformBlock *tb, const VVCLocalContext *lc, const tb->max_scan_x = tb->tb_width - 1; } -static void itransform(VVCLocalContext *lc, TransformUnit *tu, const int tu_idx, const int target_ch_type) +static void lmcs_scale_chroma(VVCLocalContext *lc, TransformUnit *tu, TransformBlock *tb, const int target_ch_type) { - const VVCFrameContext *fc = lc->fc; - const VVCSPS *sps = fc->ps.sps; - const VVCSH *sh = &lc->sc->sh; - const CodingUnit *cu = lc->cu; - const int ps = fc->ps.sps->pixel_shift; - DECLARE_ALIGNED(32, int, temp)[MAX_TB_SIZE * MAX_TB_SIZE]; + const VVCFrameContext *fc = lc->fc; + const VVCSH *sh = &lc->sc->sh; + const CodingUnit *cu = lc->cu; + const int c_idx = tb->c_idx; + const int ch_type = c_idx > 0; + const int w = tb->tb_width; + const int h = tb->tb_height; + const int chroma_scale = ch_type && sh->r->sh_lmcs_used_flag && fc->ps.ph.r->ph_chroma_residual_scale_flag && (w * h > 4); + const int has_jcbcr = tu->joint_cbcr_residual_flag && c_idx; + + for (int j = 0; j < 1 + has_jcbcr; j++) { + const bool is_jcbcr = j > 0; + const int jcbcr_idx = CB + tu->coded_flag[CB]; + TransformBlock *jcbcr = &tu->tbs[jcbcr_idx - tu->tbs[0].c_idx]; + int *coeffs = is_jcbcr ? jcbcr->coeffs : tb->coeffs; + + if (!j && has_jcbcr) { + const int c_sign = 1 - 2 * fc->ps.ph.r->ph_joint_cbcr_sign_flag; + const int shift = tu->coded_flag[CB] ^ tu->coded_flag[CR]; + fc->vvcdsp.itx.pred_residual_joint(jcbcr->coeffs, tb->coeffs, w, h, c_sign, shift); + } + if (chroma_scale) + fc->vvcdsp.intra.lmcs_scale_chroma(lc, coeffs, w, h, cu->x0, cu->y0); + } +} + +static void add_residual(const VVCLocalContext *lc, TransformUnit *tu, const int target_ch_type) +{ + const VVCFrameContext *fc = lc->fc; + const CodingUnit *cu = lc->cu; for (int i = 0; i < tu->nb_tbs; i++) { - TransformBlock *tb = &tu->tbs[i]; - const int c_idx = tb->c_idx; - const int ch_type = c_idx > 0; - - if (ch_type == target_ch_type && tb->has_coeffs) { - const int w = tb->tb_width; - const int h = tb->tb_height; - const int chroma_scale = ch_type && sh->r->sh_lmcs_used_flag && fc->ps.ph.r->ph_chroma_residual_scale_flag && (w * h > 4); - const ptrdiff_t stride = fc->frame->linesize[c_idx]; - const int hs = sps->hshift[c_idx]; - const int vs = sps->vshift[c_idx]; - uint8_t *dst = &fc->frame->data[c_idx][(tb->y0 >> vs) * stride + ((tb->x0 >> hs) << ps)]; + TransformBlock *tb = tu->tbs + i; + const int c_idx = tb->c_idx; + const int ch_type = c_idx > 0; + const ptrdiff_t stride = fc->frame->linesize[c_idx]; + const bool has_residual = tb->has_coeffs || cu->act_enabled_flag || + (c_idx && tu->joint_cbcr_residual_flag); + uint8_t *dst = POS(c_idx, tb->x0, tb->y0); + + if (ch_type == target_ch_type && has_residual) + fc->vvcdsp.itx.add_residual(dst, tb->coeffs, tb->tb_width, tb->tb_height, stride); + } +} +static void itransform(VVCLocalContext *lc, TransformUnit *tu, const int target_ch_type) +{ + const VVCFrameContext *fc = lc->fc; + const CodingUnit *cu = lc->cu; + TransformBlock *tbs = tu->tbs; + const bool is_act_luma = cu->act_enabled_flag && target_ch_type == LUMA; + + for (int i = 0; i < tu->nb_tbs; i++) { + TransformBlock *tb = tbs + i; + const int c_idx = tb->c_idx; + const int ch_type = c_idx > 0; + const bool do_itx = is_act_luma || !cu->act_enabled_flag && ch_type == target_ch_type; + + if (tb->has_coeffs && do_itx) { if (cu->bdpcm_flag[tb->c_idx]) transform_bdpcm(tb, lc, cu); dequant(lc, tu, tb); @@ -547,22 +562,22 @@ static void itransform(VVCLocalContext *lc, TransformUnit *tu, const int tu_idx, if (cu->apply_lfnst_flag[c_idx]) ilfnst_transform(lc, tb); derive_transform_type(fc, lc, tb, &trh, &trv); - if (w > 1 && h > 1) + if (tb->tb_width > 1 && tb->tb_height > 1) itx_2d(fc, tb, trh, trv); else itx_1d(fc, tb, trh, trv); } - - if (chroma_scale) - fc->vvcdsp.intra.lmcs_scale_chroma(lc, temp, tb->coeffs, w, h, cu->x0, cu->y0); - // TODO: Address performance issue here by combining transform, lmcs_scale_chroma, and add_residual into one function. - // Complete this task before implementing ASM code. - fc->vvcdsp.itx.add_residual(dst, chroma_scale ? temp : tb->coeffs, w, h, stride); - - if (tu->joint_cbcr_residual_flag && tb->c_idx) - add_residual_for_joint_coding_chroma(lc, tu, tb, chroma_scale); + lmcs_scale_chroma(lc, tu, tb, target_ch_type); } } + + if (is_act_luma) { + fc->vvcdsp.itx.adaptive_color_transform( + tbs[LUMA].coeffs, tbs[CB].coeffs, tbs[CR].coeffs, + tbs[LUMA].tb_width, tbs[LUMA].tb_height); + } + + add_residual(lc, tu, target_ch_type); } static int reconstruct(VVCLocalContext *lc) @@ -576,17 +591,13 @@ static int reconstruct(VVCLocalContext *lc) TransformUnit *tu = cu->tus.head; for (int i = 0; tu; i++) { predict_intra(lc, tu, i, ch_type); - itransform(lc, tu, i, ch_type); + itransform(lc, tu, ch_type); tu = tu->next; } } return 0; } -#define POS(c_idx, x, y) \ - &fc->frame->data[c_idx][((y) >> fc->ps.sps->vshift[c_idx]) * fc->frame->linesize[c_idx] + \ - (((x) >> fc->ps.sps->hshift[c_idx]) << fc->ps.sps->pixel_shift)] - #define IBC_POS(c_idx, x, y) \ (fc->tab.ibc_vir_buf[c_idx] + \ (x << ps) + (y + ((cu->y0 & ~(sps->ctb_size_y - 1)) >> vs)) * ibc_stride) @@ -639,11 +650,11 @@ static void ibc_fill_vir_buf(const VVCLocalContext *lc, const CodingUnit *cu) { const VVCFrameContext *fc = lc->fc; const VVCSPS *sps = fc->ps.sps; - const int has_chroma = sps->r->sps_chroma_format_idc && cu->tree_type != DUAL_TREE_LUMA; - const int start = cu->tree_type == DUAL_TREE_CHROMA; - const int end = has_chroma ? CR : LUMA; + int start, end; - for (int c_idx = start; c_idx <= end; c_idx++) { + ff_vvc_channel_range(&start, &end, cu->tree_type, sps->r->sps_chroma_format_idc); + + for (int c_idx = start; c_idx < end; c_idx++) { const int hs = sps->hshift[c_idx]; const int vs = sps->vshift[c_idx]; const int ps = sps->pixel_shift; @@ -658,6 +669,38 @@ static void ibc_fill_vir_buf(const VVCLocalContext *lc, const CodingUnit *cu) } } +int ff_vvc_palette_derive_scale(VVCLocalContext *lc, const TransformUnit *tu, TransformBlock *tb) +{ + const VVCSPS *sps = lc->fc->ps.sps; + const int qp_prime_ts_min = 4 + 6 * sps->r->sps_min_qp_prime_ts; + int qp; + + derive_qp(lc, tu, tb); + qp = FFMAX(qp_prime_ts_min, tb->qp); + return level_scale[0][rem6[qp]] << div6[qp]; +} + +// 8.4.5.3 Decoding process for palette mode +static void vvc_predict_palette(VVCLocalContext *lc) +{ + const VVCFrameContext *fc = lc->fc; + const CodingUnit *cu = lc->cu; + TransformUnit *tu = cu->tus.head; + const VVCSPS *sps = fc->ps.sps; + const int ps = sps->pixel_shift; + + for (int i = 0; i < tu->nb_tbs; i++) { + TransformBlock *tb = &tu->tbs[i]; + const int c_idx = tb->c_idx; + const int w = tb->tb_width; + const int h = tb->tb_height; + const ptrdiff_t stride = fc->frame->linesize[c_idx]; + uint8_t *dst = POS(c_idx, cu->x0, cu->y0); + + av_image_copy_plane(dst, stride, (uint8_t*)tb->coeffs, w << ps, w << ps, h); + } +} + int ff_vvc_reconstruct(VVCLocalContext *lc, const int rs, const int rx, const int ry) { const VVCFrameContext *fc = lc->fc; @@ -678,6 +721,8 @@ int ff_vvc_reconstruct(VVCLocalContext *lc, const int rs, const int rx, const in ff_vvc_predict_ciip(lc); else if (cu->pred_mode == MODE_IBC) vvc_predict_ibc(lc); + else if (cu->pred_mode == MODE_PLT) + vvc_predict_palette(lc); if (cu->coded_flag) { ret = reconstruct(lc); } else { diff --git a/libavcodec/vvc/intra.h b/libavcodec/vvc/intra.h index 8a02699135486..1201c70836fbd 100644 --- a/libavcodec/vvc/intra.h +++ b/libavcodec/vvc/intra.h @@ -45,5 +45,6 @@ int ff_vvc_intra_pred_angle_derive(int pred_mode); int ff_vvc_intra_inv_angle_derive(int pred_mode); int ff_vvc_wide_angle_mode_mapping(const CodingUnit *cu, int tb_width, int tb_height, int c_idx, int pred_mode_intra); +int ff_vvc_palette_derive_scale(VVCLocalContext *lc, const TransformUnit *tu, TransformBlock *tb); #endif // AVCODEC_VVC_INTRA_H diff --git a/libavcodec/vvc/intra_template.c b/libavcodec/vvc/intra_template.c index 440ac5b6cccc6..3ec6c72213e7f 100644 --- a/libavcodec/vvc/intra_template.c +++ b/libavcodec/vvc/intra_template.c @@ -428,7 +428,7 @@ static int FUNC(lmcs_derive_chroma_scale)(VVCLocalContext *lc, const int x0, con } // 8.7.5.3 Picture reconstruction with luma dependent chroma residual scaling process for chroma samples -static void FUNC(lmcs_scale_chroma)(VVCLocalContext *lc, int *dst, const int *coeff, +static void FUNC(lmcs_scale_chroma)(VVCLocalContext *lc, int *coeff, const int width, const int height, const int x0_cu, const int y0_cu) { const int chroma_scale = FUNC(lmcs_derive_chroma_scale)(lc, x0_cu, y0_cu); @@ -438,11 +438,10 @@ static void FUNC(lmcs_scale_chroma)(VVCLocalContext *lc, int *dst, const int *co const int c = av_clip_intp2(*coeff, BIT_DEPTH); if (c > 0) - *dst = (c * chroma_scale + (1 << 10)) >> 11; + *coeff = (c * chroma_scale + (1 << 10)) >> 11; else - *dst = -((-c * chroma_scale + (1 << 10)) >> 11); + *coeff = -((-c * chroma_scale + (1 << 10)) >> 11); coeff++; - dst++; } } } diff --git a/libavcodec/vvc/mvs.c b/libavcodec/vvc/mvs.c index 566df158a8aac..2cf67def7bed0 100644 --- a/libavcodec/vvc/mvs.c +++ b/libavcodec/vvc/mvs.c @@ -144,7 +144,9 @@ static int derive_temporal_colocated_mvs(const VVCLocalContext *lc, MvField temp const SliceContext *sc = lc->sc; RefPicList* refPicList = sc->rpl; - if (temp_col.pred_flag == PF_INTRA) + if (temp_col.pred_flag == PF_INTRA || + temp_col.pred_flag == PF_IBC || + temp_col.pred_flag == PF_PLT) return 0; if (sb_flag){ @@ -266,7 +268,7 @@ void ff_vvc_set_mvf(const VVCLocalContext *lc, const int x0, const int y0, const } } -void ff_vvc_set_intra_mvf(const VVCLocalContext *lc, const int dmvr) +void ff_vvc_set_intra_mvf(const VVCLocalContext *lc, const bool dmvr, const PredFlag pf, const bool ciip_flag) { const VVCFrameContext *fc = lc->fc; const CodingUnit *cu = lc->cu; @@ -277,7 +279,10 @@ void ff_vvc_set_intra_mvf(const VVCLocalContext *lc, const int dmvr) for (int dx = 0; dx < cu->cb_width; dx += min_pu_size) { const int x = cu->x0 + dx; const int y = cu->y0 + dy; - TAB_MVF(x, y).pred_flag = PF_INTRA; + MvField *mv = &TAB_MVF(x, y); + + mv->pred_flag = pf; + mv->ciip_flag = ciip_flag; } } } @@ -599,7 +604,19 @@ static void init_neighbour_context(NeighbourContext *ctx, const VVCLocalContext static av_always_inline PredMode pred_flag_to_mode(PredFlag pred) { - return pred == PF_IBC ? MODE_IBC : (pred == PF_INTRA ? MODE_INTRA : MODE_INTER); + static const PredMode lut[] = { + MODE_INTRA, // PF_INTRA + MODE_INTER, // PF_L0 + MODE_INTER, // PF_L1 + MODE_INTER, // PF_BI + 0, // invalid + MODE_IBC, // PF_IBC + 0, // invalid + 0, // invalid + MODE_PLT, // PF_PLT + }; + + return lut[pred]; } static int check_available(Neighbour *n, const VVCLocalContext *lc, const int check_mer) diff --git a/libavcodec/vvc/mvs.h b/libavcodec/vvc/mvs.h index b2242b2a4d9ea..7150c0b8cf26c 100644 --- a/libavcodec/vvc/mvs.h +++ b/libavcodec/vvc/mvs.h @@ -43,6 +43,6 @@ void ff_vvc_update_hmvp(VVCLocalContext *lc, const MotionInfo *mi); int ff_vvc_no_backward_pred_flag(const VVCLocalContext *lc); MvField* ff_vvc_get_mvf(const VVCFrameContext *fc, const int x0, const int y0); void ff_vvc_set_mvf(const VVCLocalContext *lc, const int x0, const int y0, const int w, const int h, const MvField *mvf); -void ff_vvc_set_intra_mvf(const VVCLocalContext *lc, int dmvr); +void ff_vvc_set_intra_mvf(const VVCLocalContext *lc, bool dmvr, PredFlag pf, bool ciip_flag); #endif //AVCODEC_VVC_MVS_H diff --git a/libavcodec/vvc/ps.c b/libavcodec/vvc/ps.c index e8c312d8ac05d..d9f46b219af90 100644 --- a/libavcodec/vvc/ps.c +++ b/libavcodec/vvc/ps.c @@ -408,6 +408,8 @@ static int pps_add_ctus(VVCPPS *pps, int *off, const int rx, const int ry, int start = *off; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { + if (*off >= pps->ctb_count) + return AVERROR_INVALIDDATA; pps->ctb_addr_in_slice[*off] = ctu_rs(rx + x, ry + y, pps); (*off)++; } @@ -415,16 +417,21 @@ static int pps_add_ctus(VVCPPS *pps, int *off, const int rx, const int ry, return *off - start; } -static void pps_single_slice_picture(VVCPPS *pps, int *off) +static int pps_single_slice_picture(VVCPPS *pps, int *off) { pps->num_ctus_in_slice[0] = 0; for (int j = 0; j < pps->r->num_tile_rows; j++) { for (int i = 0; i < pps->r->num_tile_columns; i++) { - pps->num_ctus_in_slice[0] += pps_add_ctus(pps, off, + const int ret = pps_add_ctus(pps, off, pps->col_bd[i], pps->row_bd[j], pps->r->col_width_val[i], pps->r->row_height_val[j]); + if (ret < 0) + return ret; + pps->num_ctus_in_slice[0] += ret; } } + + return 0; } static void subpic_tiles(int *tile_x, int *tile_y, int *tile_x_end, int *tile_y_end, @@ -451,50 +458,36 @@ static void subpic_tiles(int *tile_x, int *tile_y, int *tile_x_end, int *tile_y_ (*tile_y_end)++; } -static bool mark_tile_as_used(bool *tile_in_subpic, const int tx, const int ty, const int tile_columns) +static int pps_subpic_less_than_one_tile_slice(VVCPPS *pps, const VVCSPS *sps, const int i, const int tx, const int ty, int *off) { - const size_t tile_idx = ty * tile_columns + tx; - if (tile_in_subpic[tile_idx]) { - /* the tile is covered by other subpictures */ - return false; - } - tile_in_subpic[tile_idx] = true; - return true; -} - -static int pps_subpic_less_than_one_tile_slice(VVCPPS *pps, const VVCSPS *sps, const int i, const int tx, const int ty, int *off, bool *tile_in_subpic) -{ - const int subpic_bottom = sps->r->sps_subpic_ctu_top_left_y[i] + sps->r->sps_subpic_height_minus1[i]; - const int tile_bottom = pps->row_bd[ty] + pps->r->row_height_val[ty] - 1; - const bool is_final_subpic_in_tile = subpic_bottom == tile_bottom; - - if (is_final_subpic_in_tile && !mark_tile_as_used(tile_in_subpic, tx, ty, pps->r->num_tile_columns)) - return AVERROR_INVALIDDATA; - - pps->num_ctus_in_slice[i] = pps_add_ctus(pps, off, + const int ret = pps_add_ctus(pps, off, sps->r->sps_subpic_ctu_top_left_x[i], sps->r->sps_subpic_ctu_top_left_y[i], sps->r->sps_subpic_width_minus1[i] + 1, sps->r->sps_subpic_height_minus1[i] + 1); + if (ret < 0) + return ret; + pps->num_ctus_in_slice[i] = ret; return 0; } static int pps_subpic_one_or_more_tiles_slice(VVCPPS *pps, const int tile_x, const int tile_y, const int x_end, const int y_end, - const int i, int *off, bool *tile_in_subpic) + const int i, int *off) { for (int ty = tile_y; ty < y_end; ty++) { for (int tx = tile_x; tx < x_end; tx++) { - if (!mark_tile_as_used(tile_in_subpic, tx, ty, pps->r->num_tile_columns)) - return AVERROR_INVALIDDATA; - - pps->num_ctus_in_slice[i] += pps_add_ctus(pps, off, + const int ret = pps_add_ctus(pps, off, pps->col_bd[tx], pps->row_bd[ty], pps->r->col_width_val[tx], pps->r->row_height_val[ty]); + if (ret < 0) + return ret; + + pps->num_ctus_in_slice[i] += ret; } } return 0; } -static int pps_subpic_slice(VVCPPS *pps, const VVCSPS *sps, const int i, int *off, bool *tile_in_subpic) +static int pps_subpic_slice(VVCPPS *pps, const VVCSPS *sps, const int i, int *off) { int tx, ty, x_end, y_end; @@ -503,28 +496,25 @@ static int pps_subpic_slice(VVCPPS *pps, const VVCSPS *sps, const int i, int *of subpic_tiles(&tx, &ty, &x_end, &y_end, sps, pps, i); if (ty + 1 == y_end && sps->r->sps_subpic_height_minus1[i] + 1 < pps->r->row_height_val[ty]) - return pps_subpic_less_than_one_tile_slice(pps, sps, i, tx, ty, off, tile_in_subpic); + return pps_subpic_less_than_one_tile_slice(pps, sps, i, tx, ty, off); else - return pps_subpic_one_or_more_tiles_slice(pps, tx, ty, x_end, y_end, i, off, tile_in_subpic); + return pps_subpic_one_or_more_tiles_slice(pps, tx, ty, x_end, y_end, i, off); } static int pps_single_slice_per_subpic(VVCPPS *pps, const VVCSPS *sps, int *off) { + int ret; + if (!sps->r->sps_subpic_info_present_flag) { - pps_single_slice_picture(pps, off); + ret = pps_single_slice_picture(pps, off); + if (ret < 0) + return ret; } else { - bool tile_in_subpic[VVC_MAX_TILES_PER_AU] = {0}; for (int i = 0; i < pps->r->pps_num_slices_in_pic_minus1 + 1; i++) { - const int ret = pps_subpic_slice(pps, sps, i, off, tile_in_subpic); + const int ret = pps_subpic_slice(pps, sps, i, off); if (ret < 0) return ret; } - - // We only use tile_in_subpic to check that the subpictures don't overlap - // here; we don't use tile_in_subpic to check that the subpictures cover - // every tile. It is possible to avoid doing this work here because the - // covering property of subpictures is already guaranteed by the mechanisms - // which check every CTU belongs to a slice. } return 0; } @@ -538,9 +528,13 @@ static int pps_one_tile_slices(VVCPPS *pps, const int tile_idx, int i, int *off) ctu_xy(&rx, &ry, tile_x, tile_y, pps); ctu_y_end = ry + r->row_height_val[tile_y]; while (ry < ctu_y_end) { + int ret; pps->slice_start_offset[i] = *off; - pps->num_ctus_in_slice[i] = pps_add_ctus(pps, off, rx, ry, + ret = pps_add_ctus(pps, off, rx, ry, r->col_width_val[tile_x], r->slice_height_in_ctus[i]); + if (ret < 0) + return ret; + pps->num_ctus_in_slice[i] = ret; ry += r->slice_height_in_ctus[i++]; } i--; @@ -557,13 +551,17 @@ static int pps_multi_tiles_slice(VVCPPS *pps, const int tile_idx, const int i, i pps->num_ctus_in_slice[i] = 0; for (int ty = tile_y; ty <= tile_y + r->pps_slice_height_in_tiles_minus1[i]; ty++) { for (int tx = tile_x; tx <= tile_x + r->pps_slice_width_in_tiles_minus1[i]; tx++) { + int ret; const int idx = ty * r->num_tile_columns + tx; if (tile_in_slice[idx]) return AVERROR_INVALIDDATA; tile_in_slice[idx] = true; ctu_xy(&rx, &ry, tx, ty, pps); - pps->num_ctus_in_slice[i] += pps_add_ctus(pps, off, rx, ry, + ret = pps_add_ctus(pps, off, rx, ry, r->col_width_val[tx], r->row_height_val[ty]); + if (ret < 0) + return ret; + pps->num_ctus_in_slice[i] += ret; } } @@ -574,7 +572,7 @@ static int pps_rect_slice(VVCPPS *pps, const VVCSPS *sps) { const H266RawPPS *r = pps->r; bool tile_in_slice[VVC_MAX_TILES_PER_AU] = {false}; - int tile_idx = 0, off = 0; + int tile_idx = 0, off = 0, ret; if (r->pps_single_slice_per_subpic_flag) { return pps_single_slice_per_subpic(pps, sps, &off); @@ -586,9 +584,12 @@ static int pps_rect_slice(VVCPPS *pps, const VVCSPS *sps) if (tile_in_slice[tile_idx]) return AVERROR_INVALIDDATA; tile_in_slice[tile_idx] = true; - i = pps_one_tile_slices(pps, tile_idx, i, &off); + ret = pps_one_tile_slices(pps, tile_idx, i, &off); + if (ret < 0) + return ret; + i = ret; } else { - const int ret = pps_multi_tiles_slice(pps, tile_idx, i, &off, tile_in_slice); + ret = pps_multi_tiles_slice(pps, tile_idx, i, &off, tile_in_slice); if (ret < 0) return ret; } @@ -603,21 +604,28 @@ static int pps_rect_slice(VVCPPS *pps, const VVCSPS *sps) return 0; } -static void pps_no_rect_slice(VVCPPS* pps) +static int pps_no_rect_slice(VVCPPS* pps) { const H266RawPPS* r = pps->r; int rx, ry, off = 0; for (int tile_y = 0; tile_y < r->num_tile_rows; tile_y++) { for (int tile_x = 0; tile_x < r->num_tile_columns; tile_x++) { + int ret; ctu_xy(&rx, &ry, tile_x, tile_y, pps); - pps_add_ctus(pps, &off, rx, ry, r->col_width_val[tile_x], r->row_height_val[tile_y]); + ret = pps_add_ctus(pps, &off, rx, ry, r->col_width_val[tile_x], r->row_height_val[tile_y]); + if (ret < 0) + return ret; } } + + return 0; } static int pps_slice_map(VVCPPS *pps, const VVCSPS *sps) { + int ret; + pps->ctb_addr_in_slice = av_calloc(pps->ctb_count, sizeof(*pps->ctb_addr_in_slice)); if (!pps->ctb_addr_in_slice) return AVERROR(ENOMEM); @@ -625,7 +633,9 @@ static int pps_slice_map(VVCPPS *pps, const VVCSPS *sps) if (pps->r->pps_rect_slice_flag) return pps_rect_slice(pps, sps); - pps_no_rect_slice(pps); + ret = pps_no_rect_slice(pps); + if (ret < 0) + return ret; return 0; } @@ -839,7 +849,7 @@ static int lmcs_derive_lut(VVCLMCS *lmcs, const H266RawAPS *rlmcs, const H266Raw uint16_t input_pivot[LMCS_MAX_BIN_SIZE]; uint16_t scale_coeff[LMCS_MAX_BIN_SIZE]; uint16_t inv_scale_coeff[LMCS_MAX_BIN_SIZE]; - int i, delta_crs; + int i, delta_crs, sum_cw = 0; if (bit_depth > LMCS_MAX_BIT_DEPTH) return AVERROR_PATCHWELCOME; @@ -850,8 +860,12 @@ static int lmcs_derive_lut(VVCLMCS *lmcs, const H266RawAPS *rlmcs, const H266Raw lmcs->max_bin_idx = LMCS_MAX_BIN_SIZE - 1 - rlmcs->lmcs_delta_max_bin_idx; memset(cw, 0, sizeof(cw)); - for (int i = lmcs->min_bin_idx; i <= lmcs->max_bin_idx; i++) + for (int i = lmcs->min_bin_idx; i <= lmcs->max_bin_idx; i++) { cw[i] = org_cw + (1 - 2 * rlmcs->lmcs_delta_sign_cw_flag[i]) * rlmcs->lmcs_delta_abs_cw[i]; + sum_cw += cw[i]; + } + if (sum_cw > (1 << bit_depth) - 1) + return AVERROR_INVALIDDATA; delta_crs = (1 - 2 * rlmcs->lmcs_delta_sign_crs_flag) * rlmcs->lmcs_delta_abs_crs; @@ -859,13 +873,20 @@ static int lmcs_derive_lut(VVCLMCS *lmcs, const H266RawAPS *rlmcs, const H266Raw for (i = 0; i < LMCS_MAX_BIN_SIZE; i++) { input_pivot[i] = i * org_cw; lmcs->pivot[i + 1] = lmcs->pivot[i] + cw[i]; + if (i >= lmcs->min_bin_idx && i <= lmcs->max_bin_idx && + lmcs->pivot[i] % (1 << (bit_depth - 5)) != 0 && + lmcs->pivot[i] >> (bit_depth - 5) == lmcs->pivot[i + 1] >> (bit_depth - 5)) + return AVERROR_INVALIDDATA; scale_coeff[i] = (cw[i] * (1 << 11) + off) >> shift; if (cw[i] == 0) { inv_scale_coeff[i] = 0; lmcs->chroma_scale_coeff[i] = (1 << 11); } else { + const int cw_plus_d = cw[i] + delta_crs; + if (cw_plus_d < (org_cw >> 3) || cw_plus_d > ((org_cw << 3) - 1)) + return AVERROR_INVALIDDATA; inv_scale_coeff[i] = org_cw * (1 << 11) / cw[i]; - lmcs->chroma_scale_coeff[i] = org_cw * (1 << 11) / (cw[i] + delta_crs); + lmcs->chroma_scale_coeff[i] = org_cw * (1 << 11) / cw_plus_d; } } diff --git a/libavcodec/vvc/refs.c b/libavcodec/vvc/refs.c index 1cfca4820477d..79967b77d3fa5 100644 --- a/libavcodec/vvc/refs.c +++ b/libavcodec/vvc/refs.c @@ -52,6 +52,12 @@ void ff_vvc_unref_frame(VVCFrameContext *fc, VVCFrame *frame, int flags) frame->flags = 0; if (!frame->flags) { av_frame_unref(frame->frame); + + if (frame->needs_fg) { + av_frame_unref(frame->frame_grain); + frame->needs_fg = 0; + } + av_refstruct_unref(&frame->sps); av_refstruct_unref(&frame->pps); av_refstruct_unref(&frame->progress); @@ -154,6 +160,14 @@ static VVCFrame *alloc_frame(VVCContext *s, VVCFrameContext *fc) frame->ref_width = pps->r->pps_pic_width_in_luma_samples - win->left_offset - win->right_offset; frame->ref_height = pps->r->pps_pic_height_in_luma_samples - win->bottom_offset - win->top_offset; + if (fc->sei.frame_field_info.present) { + if (fc->sei.frame_field_info.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) + frame->frame->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST; + if (fc->sei.frame_field_info.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD || + fc->sei.frame_field_info.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD) + frame->frame->flags |= AV_FRAME_FLAG_INTERLACED; + } + frame->progress = alloc_progress(); if (!frame->progress) goto fail; @@ -285,7 +299,13 @@ int ff_vvc_output_frame(VVCContext *s, VVCFrameContext *fc, AVFrame *out, const if (frame->flags & VVC_FRAME_FLAG_CORRUPT) frame->frame->flags |= AV_FRAME_FLAG_CORRUPT; - ret = av_frame_ref(out, frame->frame); + ret = av_frame_ref(out, frame->needs_fg ? frame->frame_grain : frame->frame); + if (ret < 0) + return ret; + + if (!(s->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN)) + av_frame_remove_side_data(out, AV_FRAME_DATA_FILM_GRAIN_PARAMS); + if (frame->flags & VVC_FRAME_FLAG_BUMPING) ff_vvc_unref_frame(fc, frame, VVC_FRAME_FLAG_OUTPUT | VVC_FRAME_FLAG_BUMPING); else diff --git a/libavcodec/vvc/sei.c b/libavcodec/vvc/sei.c new file mode 100644 index 0000000000000..d8ab2bf245a18 --- /dev/null +++ b/libavcodec/vvc/sei.c @@ -0,0 +1,239 @@ +/* + * VVC Supplementary Enhancement Information messages + * + * copyright (c) 2024 Wu Jianhua + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "sei.h" +#include "dec.h" +#include "libavutil/refstruct.h" + +static int decode_film_grain_characteristics(H2645SEIFilmGrainCharacteristics *h, const SEIRawFilmGrainCharacteristics *s, const VVCFrameContext *fc) +{ + const VVCSPS *sps = fc->ps.sps; + + h->present = !s->fg_characteristics_cancel_flag; + if (h->present) { + h->model_id = s->fg_model_id; + h->separate_colour_description_present_flag = s->fg_separate_colour_description_present_flag; + if (h->separate_colour_description_present_flag) { + h->bit_depth_luma = s->fg_bit_depth_luma_minus8 + 8; + h->bit_depth_chroma = s->fg_bit_depth_chroma_minus8 + 8; + h->full_range = s->fg_full_range_flag; + h->color_primaries = s->fg_colour_primaries; + h->transfer_characteristics = s->fg_transfer_characteristics; + h->matrix_coeffs = s->fg_matrix_coeffs; + } else { + if (!sps) { + av_log(fc->log_ctx, AV_LOG_ERROR, + "No active SPS for film_grain_characteristics.\n"); + return AVERROR_INVALIDDATA; + } + h->bit_depth_luma = sps->bit_depth; + h->bit_depth_chroma = sps->bit_depth; + h->full_range = sps->r->vui.vui_full_range_flag; + h->color_primaries = sps->r->vui.vui_colour_primaries; + h->transfer_characteristics = sps->r->vui.vui_transfer_characteristics; + h->matrix_coeffs = sps->r->vui.vui_matrix_coeffs ; + } + + h->blending_mode_id = s->fg_blending_mode_id; + h->log2_scale_factor = s->fg_log2_scale_factor; + + for (int c = 0; c < 3; c++) { + h->comp_model_present_flag[c] = s->fg_comp_model_present_flag[c]; + if (h->comp_model_present_flag[c]) { + h->num_intensity_intervals[c] = s->fg_num_intensity_intervals_minus1[c] + 1; + h->num_model_values[c] = s->fg_num_model_values_minus1[c] + 1; + + if (h->num_model_values[c] > 6) + return AVERROR_INVALIDDATA; + + for (int i = 0; i < h->num_intensity_intervals[c]; i++) { + h->intensity_interval_lower_bound[c][i] = s->fg_intensity_interval_lower_bound[c][i]; + h->intensity_interval_upper_bound[c][i] = s->fg_intensity_interval_upper_bound[c][i]; + for (int j = 0; j < h->num_model_values[c]; j++) + h->comp_model_value[c][i][j] = s->fg_comp_model_value[c][i][j]; + } + } + } + + h->persistence_flag = s->fg_characteristics_persistence_flag; + } + + return 0; +} + +static int decode_decoded_picture_hash(H274SEIPictureHash *h, const SEIRawDecodedPictureHash *s) +{ + h->present = 1; + h->hash_type = s->dph_sei_hash_type; + if (h->hash_type == 0) + memcpy(h->md5, s->dph_sei_picture_md5, sizeof(h->md5)); + else if (h->hash_type == 1) + memcpy(h->crc, s->dph_sei_picture_crc, sizeof(h->crc)); + else if (h->hash_type == 2) + memcpy(h->checksum, s->dph_sei_picture_checksum, sizeof(h->checksum)); + + return 0; +} + +static int decode_display_orientation(H2645SEIDisplayOrientation *h, const SEIRawDisplayOrientation *s) +{ + int degrees[] = { 0, 0x8000, 0x4000, 0xC000 }; + + h->present = !s->display_orientation_cancel_flag; + if (h->present) { + if (s->display_orientation_transform_type > 7) + return AVERROR_INVALIDDATA; + + h->vflip = 0; + if (s->display_orientation_transform_type == 1 || + s->display_orientation_transform_type == 3 || + s->display_orientation_transform_type == 4 || + s->display_orientation_transform_type == 6) { + h->hflip = 1; + } else { + h->hflip = 0; + } + h->anticlockwise_rotation = degrees[s->display_orientation_transform_type >> 1]; + } + + return 0; +} + +static int decode_content_light_level_info(H2645SEIContentLight *h, const SEIRawContentLightLevelInfo *s) +{ + h->present = 1; + h->max_content_light_level = s->max_content_light_level; + h->max_pic_average_light_level = s->max_pic_average_light_level; + + return 0; +} + +static int decode_frame_field_info(H274SEIFrameFieldInfo *h, const SEIRawFrameFieldInformation *s) +{ + if (s->ffi_source_scan_type > 3) + return AVERROR_INVALIDDATA; + + h->present = 1; + if (s->ffi_field_pic_flag) { + if (s->ffi_bottom_field_flag) + h->picture_struct = AV_PICTURE_STRUCTURE_BOTTOM_FIELD; + else + h->picture_struct = AV_PICTURE_STRUCTURE_TOP_FIELD; + } else { + h->display_elemental_periods = s->ffi_display_elemental_periods_minus1 + 1; + } + + h->source_scan_type = s->ffi_source_scan_type; + h->duplicate_flag = s->ffi_duplicate_flag; + + return 0; +} + +static int decode_ambient_viewing_environment(H2645SEIAmbientViewingEnvironment *h, const SEIRawAmbientViewingEnvironment *s) +{ + h->present = 1; + h->ambient_illuminance = s->ambient_illuminance; + h->ambient_light_x = s->ambient_light_x; + h->ambient_light_y = s->ambient_light_y; + + return 0; +} + +static int decode_mastering_display_colour_volume(H2645SEIMasteringDisplay *h, const SEIRawMasteringDisplayColourVolume *s) +{ + h->present = 1; + + for (int c = 0; c < 3; c++) { + h->display_primaries[c][0] = s->display_primaries_x[c]; + h->display_primaries[c][1] = s->display_primaries_y[c]; + } + + h->white_point[0] = s->white_point_x; + h->white_point[1] = s->white_point_y; + + h->max_luminance = s->max_display_mastering_luminance; + h->min_luminance = s->min_display_mastering_luminance; + + return 0; +} + +int ff_vvc_sei_decode(VVCSEI *s, const H266RawSEI *sei, const struct VVCFrameContext *fc) +{ + H2645SEI *c = &s->common; + + if (!sei) + return AVERROR_INVALIDDATA; + + for (int i = 0; i < sei->message_list.nb_messages; i++) { + SEIRawMessage *message = &sei->message_list.messages[i]; + void *payload = message->payload; + + switch (message->payload_type) { + case SEI_TYPE_FILM_GRAIN_CHARACTERISTICS: + av_refstruct_unref(&c->film_grain_characteristics); + c->film_grain_characteristics = av_refstruct_allocz(sizeof(*c->film_grain_characteristics)); + if (!c->film_grain_characteristics) + return AVERROR(ENOMEM); + return decode_film_grain_characteristics(c->film_grain_characteristics, payload, fc); + + case SEI_TYPE_DECODED_PICTURE_HASH: + return decode_decoded_picture_hash(&s->picture_hash, payload); + + case SEI_TYPE_DISPLAY_ORIENTATION: + return decode_display_orientation(&s->common.display_orientation, payload); + + case SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO: + return decode_content_light_level_info(&s->common.content_light, payload); + + case SEI_TYPE_FRAME_FIELD_INFO: + return decode_frame_field_info(&s->frame_field_info, payload); + + case SEI_TYPE_AMBIENT_VIEWING_ENVIRONMENT: + return decode_ambient_viewing_environment(&s->common.ambient_viewing_environment, payload); + + case SEI_TYPE_MASTERING_DISPLAY_COLOUR_VOLUME: + return decode_mastering_display_colour_volume(&s->common.mastering_display, payload); + + default: + av_log(fc->log_ctx, AV_LOG_DEBUG, "Skipped %s SEI %d\n", + sei->nal_unit_header.nal_unit_type == VVC_PREFIX_SEI_NUT ? + "PREFIX" : "SUFFIX", message->payload_type); + return FF_H2645_SEI_MESSAGE_UNHANDLED; + } + } + + return 0; +} + +int ff_vvc_sei_replace(VVCSEI *dst, const VVCSEI *src) +{ + dst->picture_hash.present = 0; // drop hash + dst->frame_field_info.present = 0; // drop field info + return ff_h2645_sei_ctx_replace(&dst->common, &src->common); +} + +void ff_vvc_sei_reset(VVCSEI *s) +{ + ff_h2645_sei_reset(&s->common); + s->picture_hash.present = 0; + s->frame_field_info.present = 0; +} diff --git a/libavcodec/vvc/sei.h b/libavcodec/vvc/sei.h new file mode 100644 index 0000000000000..578b48a0e4382 --- /dev/null +++ b/libavcodec/vvc/sei.h @@ -0,0 +1,48 @@ +/* + * VVC Supplementary Enhancement Information messages + * + * copyright (c) 2024 Wu Jianhua + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VVC_SEI_H +#define AVCODEC_VVC_SEI_H + +#include + +#include "libavcodec/get_bits.h" +#include "libavcodec/cbs.h" +#include "libavcodec/cbs_h266.h" +#include "libavcodec/h2645_sei.h" +#include "libavcodec/sei.h" +#include "libavcodec/vvc.h" +#include "libavcodec/h274.h" + +typedef struct VVCSEI { + H2645SEI common; + H274SEIPictureHash picture_hash; + H274SEIFrameFieldInfo frame_field_info; +} VVCSEI; + +struct VVCFrameContext; + +int ff_vvc_sei_decode(VVCSEI *s, const H266RawSEI *sei, const struct VVCFrameContext *fc); +int ff_vvc_sei_replace(VVCSEI *dst, const VVCSEI *src); +void ff_vvc_sei_reset(VVCSEI *s); + +#endif /* AVCODEC_VVC_SEI_H */ diff --git a/libavcodec/vvc/thread.c b/libavcodec/vvc/thread.c index 6194416e14a95..2138341b0fe83 100644 --- a/libavcodec/vvc/thread.c +++ b/libavcodec/vvc/thread.c @@ -283,6 +283,13 @@ static void add_progress_listener(VVCFrame *ref, ProgressListener *l, ff_vvc_add_progress_listener(ref, (VVCProgressListener*)l); } +static void ep_init_wpp(EntryPoint *next, const EntryPoint *ep, const VVCSPS *sps) +{ + memcpy(next->cabac_state, ep->cabac_state, sizeof(next->cabac_state)); + memcpy(next->pp, ep->pp, sizeof(next->pp)); + ff_vvc_ep_init_stat_coeff(next, sps->bit_depth, sps->r->sps_persistent_rice_adaptation_enabled_flag); +} + static void schedule_next_parse(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, const VVCTask *t) { VVCFrameThread *ft = fc->ft; @@ -292,10 +299,8 @@ static void schedule_next_parse(VVCContext *s, VVCFrameContext *fc, const SliceC if (sps->r->sps_entropy_coding_sync_enabled_flag) { if (t->rx == fc->ps.pps->ctb_to_col_bd[t->rx]) { EntryPoint *next = ep + 1; - if (next < sc->eps + sc->nb_eps && !is_first_row(fc, t->rx, t->ry + 1)) { - memcpy(next->cabac_state, ep->cabac_state, sizeof(next->cabac_state)); - ff_vvc_ep_init_stat_coeff(next, sps->bit_depth, sps->r->sps_persistent_rice_adaptation_enabled_flag); - } + if (next < sc->eps + sc->nb_eps && !is_first_row(fc, t->rx, t->ry + 1)) + ep_init_wpp(next, ep, sps); } if (t->ry + 1 < ft->ctu_height && !is_first_row(fc, t->rx, t->ry + 1)) frame_thread_add_score(s, ft, t->rx, t->ry + 1, VVC_TASK_STAGE_PARSE); diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c index 889306aebd6fd..51487b72b5a0d 100644 --- a/libavcodec/wmaenc.c +++ b/libavcodec/wmaenc.c @@ -79,7 +79,7 @@ static av_cold int encode_init(AVCodecContext *avctx) AV_WL32(extradata, flags1); AV_WL16(extradata + 4, flags2); } else { - av_assert0(0); + av_unreachable("This function is only used with WMAV1/2 encoders"); } avctx->extradata = extradata; s->use_exp_vlc = flags2 & 0x0001; @@ -206,7 +206,7 @@ static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], // FIXME remove duplication relative to decoder if (s->use_variable_block_len) { - av_assert0(0); // FIXME not implemented + av_unreachable("use_variable_block_len unimplemented, set to 0 during init"); } else { /* fixed block len */ s->next_block_len_bits = s->frame_len_bits; @@ -306,7 +306,8 @@ static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], if (s->use_exp_vlc) { encode_exp_vlc(s, ch, fixed_exp); } else { - av_assert0(0); // FIXME not implemented + av_unreachable("use_exp_vlc always set to 1 during init"); + // FIXME not implemented // encode_exp_lsp(s, ch); } } @@ -365,7 +366,7 @@ static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], init_put_bits(&s->pb, buf, buf_size); if (s->use_bit_reservoir) - av_assert0(0); // FIXME not implemented + av_unreachable("use_bit_reseroir unimplemented, set to 0 during init"); else if (encode_block(s, src_coefs, total_gain) < 0) return INT_MAX; diff --git a/libavcodec/wmv2enc.c b/libavcodec/wmv2enc.c index f9fd918dbf6f6..592d1060d3973 100644 --- a/libavcodec/wmv2enc.c +++ b/libavcodec/wmv2enc.c @@ -28,6 +28,7 @@ #include "msmpeg4enc.h" #include "msmpeg4data.h" #include "msmpeg4_vc1_data.h" +#include "put_bits.h" #include "wmv2.h" #define WMV2_EXTRADATA_SIZE 4 @@ -78,6 +79,8 @@ static int wmv2_encode_picture_header(MPVMainEncContext *const m) MSMPEG4EncContext *const ms = &w->msmpeg4; MPVEncContext *const s = &m->s; + put_bits_assume_flushed(&s->pb); + put_bits(&s->pb, 1, s->c.pict_type - 1); if (s->c.pict_type == AV_PICTURE_TYPE_I) put_bits(&s->pb, 7, 0); diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 821c410a0f654..89ee8dc726ec9 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -184,7 +184,9 @@ X86ASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o X86ASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \ x86/vp9intrapred_16bpp.o \ x86/vp9itxfm.o \ + x86/vp9itxfm_avx512.o \ x86/vp9itxfm_16bpp.o \ + x86/vp9itxfm_16bpp_avx512.o \ x86/vp9lpf.o \ x86/vp9lpf_16bpp.o \ x86/vp9mc.o \ diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c index 4a0513d06d5dd..6b2ad4494b918 100644 --- a/libavcodec/x86/hpeldsp_init.c +++ b/libavcodec/x86/hpeldsp_init.c @@ -22,6 +22,9 @@ * MMX optimization by Nick Kurshev */ +#include +#include + #include "libavutil/attributes.h" #include "libavutil/cpu.h" #include "libavutil/x86/cpu.h" @@ -74,19 +77,263 @@ void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, /* MMX no rounding */ #define DEF(x, y) x ## _no_rnd_ ## y ## _mmx #define SET_RND MOVQ_WONE -#define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f) -#define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e) #define STATIC static #include "rnd_template.c" -#include "hpeldsp_rnd_template.c" #undef DEF #undef SET_RND -#undef PAVGBP -#undef PAVGB #undef STATIC +// this routine is 'slightly' suboptimal but mostly unused +static void avg_no_rnd_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h) +{ + MOVQ_ZERO(mm7); + MOVQ_WONE(mm6); // =2 for rnd and =1 for no_rnd version + __asm__ volatile( + "movq (%1), %%mm0 \n\t" + "movq 1(%1), %%mm4 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm4, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpckhbw %%mm7, %%mm5 \n\t" + "paddusw %%mm0, %%mm4 \n\t" + "paddusw %%mm1, %%mm5 \n\t" + "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" + "add %3, %1 \n\t" + ".p2align 3 \n\t" + "1: \n\t" + "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" + "movq 1(%1, %%"FF_REG_a"), %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "paddusw %%mm2, %%mm0 \n\t" + "paddusw %%mm3, %%mm1 \n\t" + "paddusw %%mm6, %%mm4 \n\t" + "paddusw %%mm6, %%mm5 \n\t" + "paddusw %%mm0, %%mm4 \n\t" + "paddusw %%mm1, %%mm5 \n\t" + "psrlw $2, %%mm4 \n\t" + "psrlw $2, %%mm5 \n\t" + "movq (%2, %%"FF_REG_a"), %%mm3 \n\t" + "packuswb %%mm5, %%mm4 \n\t" + "pcmpeqd %%mm2, %%mm2 \n\t" + "paddb %%mm2, %%mm2 \n\t" + PAVGB_MMX(%%mm3, %%mm4, %%mm5, %%mm2) + "movq %%mm5, (%2, %%"FF_REG_a") \n\t" + "add %3, %%"FF_REG_a" \n\t" + + "movq (%1, %%"FF_REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 + "movq 1(%1, %%"FF_REG_a"), %%mm4 \n\t" + "movq %%mm2, %%mm3 \n\t" + "movq %%mm4, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "punpckhbw %%mm7, %%mm5 \n\t" + "paddusw %%mm2, %%mm4 \n\t" + "paddusw %%mm3, %%mm5 \n\t" + "paddusw %%mm6, %%mm0 \n\t" + "paddusw %%mm6, %%mm1 \n\t" + "paddusw %%mm4, %%mm0 \n\t" + "paddusw %%mm5, %%mm1 \n\t" + "psrlw $2, %%mm0 \n\t" + "psrlw $2, %%mm1 \n\t" + "movq (%2, %%"FF_REG_a"), %%mm3 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "pcmpeqd %%mm2, %%mm2 \n\t" + "paddb %%mm2, %%mm2 \n\t" + PAVGB_MMX(%%mm3, %%mm0, %%mm1, %%mm2) + "movq %%mm1, (%2, %%"FF_REG_a") \n\t" + "add %3, %%"FF_REG_a" \n\t" + + "subl $2, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels) + :"D"(block), "r"((x86_reg)line_size) + :FF_REG_a, "memory"); +} + +static void put_no_rnd_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + MOVQ_BFE(mm6); + __asm__ volatile( + "lea (%3, %3), %%"FF_REG_a" \n\t" + ".p2align 3 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq 1(%1), %%mm1 \n\t" + "movq (%1, %3), %%mm2 \n\t" + "movq 1(%1, %3), %%mm3 \n\t" + PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%2) \n\t" + "movq %%mm5, (%2, %3) \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" + "movq (%1), %%mm0 \n\t" + "movq 1(%1), %%mm1 \n\t" + "movq (%1, %3), %%mm2 \n\t" + "movq 1(%1, %3), %%mm3 \n\t" + PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%2) \n\t" + "movq %%mm5, (%2, %3) \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r"((x86_reg)line_size) + :FF_REG_a, "memory"); +} + +static void put_no_rnd_pixels16_x2_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + MOVQ_BFE(mm6); + __asm__ volatile( + "lea (%3, %3), %%"FF_REG_a" \n\t" + ".p2align 3 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq 1(%1), %%mm1 \n\t" + "movq (%1, %3), %%mm2 \n\t" + "movq 1(%1, %3), %%mm3 \n\t" + PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%2) \n\t" + "movq %%mm5, (%2, %3) \n\t" + "movq 8(%1), %%mm0 \n\t" + "movq 9(%1), %%mm1 \n\t" + "movq 8(%1, %3), %%mm2 \n\t" + "movq 9(%1, %3), %%mm3 \n\t" + PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, 8(%2) \n\t" + "movq %%mm5, 8(%2, %3) \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" + "movq (%1), %%mm0 \n\t" + "movq 1(%1), %%mm1 \n\t" + "movq (%1, %3), %%mm2 \n\t" + "movq 1(%1, %3), %%mm3 \n\t" + PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%2) \n\t" + "movq %%mm5, (%2, %3) \n\t" + "movq 8(%1), %%mm0 \n\t" + "movq 9(%1), %%mm1 \n\t" + "movq 8(%1, %3), %%mm2 \n\t" + "movq 9(%1, %3), %%mm3 \n\t" + PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, 8(%2) \n\t" + "movq %%mm5, 8(%2, %3) \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r"((x86_reg)line_size) + :FF_REG_a, "memory"); +} + +static void put_no_rnd_pixels8_y2_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + MOVQ_BFE(mm6); + __asm__ volatile( + "lea (%3, %3), %%"FF_REG_a" \n\t" + "movq (%1), %%mm0 \n\t" + ".p2align 3 \n\t" + "1: \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq (%1, %%"FF_REG_a"),%%mm2\n\t" + PAVGBP_MMX_NO_RND(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) + "movq %%mm4, (%2) \n\t" + "movq %%mm5, (%2, %3) \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq (%1, %%"FF_REG_a"),%%mm0\n\t" + PAVGBP_MMX_NO_RND(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) + "movq %%mm4, (%2) \n\t" + "movq %%mm5, (%2, %3) \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r"((x86_reg)line_size) + :FF_REG_a, "memory"); +} + +static void avg_no_rnd_pixels16_x2_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + MOVQ_BFE(mm6); + __asm__ volatile( + ".p2align 3 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq 1(%1), %%mm1 \n\t" + "movq (%2), %%mm3 \n\t" + PAVGB_MMX_NO_RND(%%mm0, %%mm1, %%mm2, %%mm6) + PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6) + "movq %%mm0, (%2) \n\t" + "movq 8(%1), %%mm0 \n\t" + "movq 9(%1), %%mm1 \n\t" + "movq 8(%2), %%mm3 \n\t" + PAVGB_MMX_NO_RND(%%mm0, %%mm1, %%mm2, %%mm6) + PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6) + "movq %%mm0, 8(%2) \n\t" + "add %3, %1 \n\t" + "add %3, %2 \n\t" + "subl $1, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r"((x86_reg)line_size) + :"memory"); +} + +static void avg_no_rnd_pixels8_y2_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + MOVQ_BFE(mm6); + __asm__ volatile( + "lea (%3, %3), %%"FF_REG_a" \n\t" + "movq (%1), %%mm0 \n\t" + ".p2align 3 \n\t" + "1: \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq (%1, %%"FF_REG_a"), %%mm2 \n\t" + PAVGBP_MMX_NO_RND(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) + "movq (%2), %%mm3 \n\t" + PAVGB_MMX(%%mm3, %%mm4, %%mm0, %%mm6) + "movq (%2, %3), %%mm3 \n\t" + PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6) + "movq %%mm0, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" + + "movq (%1, %3), %%mm1 \n\t" + "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" + PAVGBP_MMX_NO_RND(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) + "movq (%2), %%mm3 \n\t" + PAVGB_MMX(%%mm3, %%mm4, %%mm2, %%mm6) + "movq (%2, %3), %%mm3 \n\t" + PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6) + "movq %%mm2, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" + + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r"((x86_reg)line_size) + :FF_REG_a, "memory"); +} + #if HAVE_MMX CALL_2X_PIXELS(avg_no_rnd_pixels16_y2_mmx, avg_no_rnd_pixels8_y2_mmx, 8) CALL_2X_PIXELS(put_no_rnd_pixels16_y2_mmx, put_no_rnd_pixels8_y2_mmx, 8) @@ -101,7 +348,6 @@ CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_mmx, put_no_rnd_pixels8_xy2_mmx, 8) #define SET_RND MOVQ_WTWO #define DEF(x, y) ff_ ## x ## _ ## y ## _mmx #define STATIC -#define NO_AVG #include "rnd_template.c" @@ -122,7 +368,6 @@ CALL_2X_PIXELS(put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8) CALL_2X_PIXELS(put_no_rnd_pixels16_x2 ## CPUEXT, ff_put_no_rnd_pixels8_x2 ## CPUEXT, 8) \ CALL_2X_PIXELS(put_pixels16_y2 ## CPUEXT, ff_put_pixels8_y2 ## CPUEXT, 8) \ CALL_2X_PIXELS(put_no_rnd_pixels16_y2 ## CPUEXT, ff_put_no_rnd_pixels8_y2 ## CPUEXT, 8) \ - CALL_2X_PIXELS(avg_pixels16 ## CPUEXT, ff_avg_pixels8 ## CPUEXT, 8) \ CALL_2X_PIXELS(avg_pixels16_x2 ## CPUEXT, ff_avg_pixels8_x2 ## CPUEXT, 8) \ CALL_2X_PIXELS(avg_pixels16_y2 ## CPUEXT, ff_avg_pixels8_y2 ## CPUEXT, 8) \ CALL_2X_PIXELS(avg_pixels16_xy2 ## CPUEXT, ff_avg_pixels8_xy2 ## CPUEXT, 8) \ @@ -170,7 +415,7 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags) c->put_pixels_tab[0][1] = ff_put_pixels16_x2_mmxext; c->put_pixels_tab[0][2] = put_pixels16_y2_mmxext; - c->avg_pixels_tab[0][0] = avg_pixels16_mmxext; + c->avg_pixels_tab[0][0] = ff_avg_pixels16_mmxext; c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext; c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext; c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext; diff --git a/libavcodec/x86/hpeldsp_rnd_template.c b/libavcodec/x86/hpeldsp_rnd_template.c deleted file mode 100644 index 2bff2d27660cf..0000000000000 --- a/libavcodec/x86/hpeldsp_rnd_template.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - * SIMD-optimized halfpel functions are compiled twice for rnd/no_rnd - * Copyright (c) 2000, 2001 Fabrice Bellard - * Copyright (c) 2003-2004 Michael Niedermayer - * - * MMX optimization by Nick Kurshev - * mostly rewritten by Michael Niedermayer - * and improved by Zdenek Kabelac - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include -#include - -// put_pixels -av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - MOVQ_BFE(mm6); - __asm__ volatile( - "lea (%3, %3), %%"FF_REG_a" \n\t" - ".p2align 3 \n\t" - "1: \n\t" - "movq (%1), %%mm0 \n\t" - "movq 1(%1), %%mm1 \n\t" - "movq (%1, %3), %%mm2 \n\t" - "movq 1(%1, %3), %%mm3 \n\t" - PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) - "movq %%mm4, (%2) \n\t" - "movq %%mm5, (%2, %3) \n\t" - "add %%"FF_REG_a", %1 \n\t" - "add %%"FF_REG_a", %2 \n\t" - "movq (%1), %%mm0 \n\t" - "movq 1(%1), %%mm1 \n\t" - "movq (%1, %3), %%mm2 \n\t" - "movq 1(%1, %3), %%mm3 \n\t" - PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) - "movq %%mm4, (%2) \n\t" - "movq %%mm5, (%2, %3) \n\t" - "add %%"FF_REG_a", %1 \n\t" - "add %%"FF_REG_a", %2 \n\t" - "subl $4, %0 \n\t" - "jnz 1b \n\t" - :"+g"(h), "+S"(pixels), "+D"(block) - :"r"((x86_reg)line_size) - :FF_REG_a, "memory"); -} - -av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - MOVQ_BFE(mm6); - __asm__ volatile( - "lea (%3, %3), %%"FF_REG_a" \n\t" - ".p2align 3 \n\t" - "1: \n\t" - "movq (%1), %%mm0 \n\t" - "movq 1(%1), %%mm1 \n\t" - "movq (%1, %3), %%mm2 \n\t" - "movq 1(%1, %3), %%mm3 \n\t" - PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) - "movq %%mm4, (%2) \n\t" - "movq %%mm5, (%2, %3) \n\t" - "movq 8(%1), %%mm0 \n\t" - "movq 9(%1), %%mm1 \n\t" - "movq 8(%1, %3), %%mm2 \n\t" - "movq 9(%1, %3), %%mm3 \n\t" - PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) - "movq %%mm4, 8(%2) \n\t" - "movq %%mm5, 8(%2, %3) \n\t" - "add %%"FF_REG_a", %1 \n\t" - "add %%"FF_REG_a", %2 \n\t" - "movq (%1), %%mm0 \n\t" - "movq 1(%1), %%mm1 \n\t" - "movq (%1, %3), %%mm2 \n\t" - "movq 1(%1, %3), %%mm3 \n\t" - PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) - "movq %%mm4, (%2) \n\t" - "movq %%mm5, (%2, %3) \n\t" - "movq 8(%1), %%mm0 \n\t" - "movq 9(%1), %%mm1 \n\t" - "movq 8(%1, %3), %%mm2 \n\t" - "movq 9(%1, %3), %%mm3 \n\t" - PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) - "movq %%mm4, 8(%2) \n\t" - "movq %%mm5, 8(%2, %3) \n\t" - "add %%"FF_REG_a", %1 \n\t" - "add %%"FF_REG_a", %2 \n\t" - "subl $4, %0 \n\t" - "jnz 1b \n\t" - :"+g"(h), "+S"(pixels), "+D"(block) - :"r"((x86_reg)line_size) - :FF_REG_a, "memory"); -} - -av_unused static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - MOVQ_BFE(mm6); - __asm__ volatile( - "lea (%3, %3), %%"FF_REG_a" \n\t" - "movq (%1), %%mm0 \n\t" - ".p2align 3 \n\t" - "1: \n\t" - "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%"FF_REG_a"),%%mm2\n\t" - PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) - "movq %%mm4, (%2) \n\t" - "movq %%mm5, (%2, %3) \n\t" - "add %%"FF_REG_a", %1 \n\t" - "add %%"FF_REG_a", %2 \n\t" - "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%"FF_REG_a"),%%mm0\n\t" - PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) - "movq %%mm4, (%2) \n\t" - "movq %%mm5, (%2, %3) \n\t" - "add %%"FF_REG_a", %1 \n\t" - "add %%"FF_REG_a", %2 \n\t" - "subl $4, %0 \n\t" - "jnz 1b \n\t" - :"+g"(h), "+S"(pixels), "+D"(block) - :"r"((x86_reg)line_size) - :FF_REG_a, "memory"); -} - -av_unused static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - MOVQ_BFE(mm6); - __asm__ volatile( - ".p2align 3 \n\t" - "1: \n\t" - "movq (%1), %%mm0 \n\t" - "movq 1(%1), %%mm1 \n\t" - "movq (%2), %%mm3 \n\t" - PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) - PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6) - "movq %%mm0, (%2) \n\t" - "movq 8(%1), %%mm0 \n\t" - "movq 9(%1), %%mm1 \n\t" - "movq 8(%2), %%mm3 \n\t" - PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) - PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6) - "movq %%mm0, 8(%2) \n\t" - "add %3, %1 \n\t" - "add %3, %2 \n\t" - "subl $1, %0 \n\t" - "jnz 1b \n\t" - :"+g"(h), "+S"(pixels), "+D"(block) - :"r"((x86_reg)line_size) - :"memory"); -} - -av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - MOVQ_BFE(mm6); - __asm__ volatile( - "lea (%3, %3), %%"FF_REG_a" \n\t" - "movq (%1), %%mm0 \n\t" - ".p2align 3 \n\t" - "1: \n\t" - "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%"FF_REG_a"), %%mm2 \n\t" - PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) - "movq (%2), %%mm3 \n\t" - PAVGB_MMX(%%mm3, %%mm4, %%mm0, %%mm6) - "movq (%2, %3), %%mm3 \n\t" - PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6) - "movq %%mm0, (%2) \n\t" - "movq %%mm1, (%2, %3) \n\t" - "add %%"FF_REG_a", %1 \n\t" - "add %%"FF_REG_a", %2 \n\t" - - "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" - PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) - "movq (%2), %%mm3 \n\t" - PAVGB_MMX(%%mm3, %%mm4, %%mm2, %%mm6) - "movq (%2, %3), %%mm3 \n\t" - PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6) - "movq %%mm2, (%2) \n\t" - "movq %%mm1, (%2, %3) \n\t" - "add %%"FF_REG_a", %1 \n\t" - "add %%"FF_REG_a", %2 \n\t" - - "subl $4, %0 \n\t" - "jnz 1b \n\t" - :"+g"(h), "+S"(pixels), "+D"(block) - :"r"((x86_reg)line_size) - :FF_REG_a, "memory"); -} diff --git a/libavcodec/x86/mpegvideoenc_template.c b/libavcodec/x86/mpegvideoenc_template.c index 85e9159f91077..dbb21871218ac 100644 --- a/libavcodec/x86/mpegvideoenc_template.c +++ b/libavcodec/x86/mpegvideoenc_template.c @@ -109,7 +109,7 @@ static int RENAME(dct_quantize)(MPVEncContext *const s, qmat = s->q_inter_matrix16[qscale][0]; } - if ((s->c.out_format == FMT_H263 || s->c.out_format == FMT_H261) && !s->c.mpeg_quant) { + if ((s->c.out_format == FMT_H263 || s->c.out_format == FMT_H261) && !s->mpeg_quant) { __asm__ volatile( "movd %%"FF_REG_a", %%xmm3 \n\t" // last_non_zero_p1 SPREADW("%%xmm3") diff --git a/libavcodec/x86/pixblockdsp_init.c b/libavcodec/x86/pixblockdsp_init.c index 51f2a0033a45b..f105775c2b1dc 100644 --- a/libavcodec/x86/pixblockdsp_init.c +++ b/libavcodec/x86/pixblockdsp_init.c @@ -28,7 +28,6 @@ void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c, - AVCodecContext *avctx, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); diff --git a/libavcodec/x86/rnd_template.c b/libavcodec/x86/rnd_template.c index b825eeba6e032..4590aeddf014b 100644 --- a/libavcodec/x86/rnd_template.c +++ b/libavcodec/x86/rnd_template.c @@ -96,82 +96,3 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel :"D"(block), "r"((x86_reg)line_size) :FF_REG_a, "memory"); } - -#ifndef NO_AVG -// avg_pixels -// this routine is 'slightly' suboptimal but mostly unused -av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h) -{ - MOVQ_ZERO(mm7); - SET_RND(mm6); // =2 for rnd and =1 for no_rnd version - __asm__ volatile( - "movq (%1), %%mm0 \n\t" - "movq 1(%1), %%mm4 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm4, %%mm5 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpckhbw %%mm7, %%mm1 \n\t" - "punpckhbw %%mm7, %%mm5 \n\t" - "paddusw %%mm0, %%mm4 \n\t" - "paddusw %%mm1, %%mm5 \n\t" - "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" - "add %3, %1 \n\t" - ".p2align 3 \n\t" - "1: \n\t" - "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" - "movq 1(%1, %%"FF_REG_a"), %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpckhbw %%mm7, %%mm1 \n\t" - "punpckhbw %%mm7, %%mm3 \n\t" - "paddusw %%mm2, %%mm0 \n\t" - "paddusw %%mm3, %%mm1 \n\t" - "paddusw %%mm6, %%mm4 \n\t" - "paddusw %%mm6, %%mm5 \n\t" - "paddusw %%mm0, %%mm4 \n\t" - "paddusw %%mm1, %%mm5 \n\t" - "psrlw $2, %%mm4 \n\t" - "psrlw $2, %%mm5 \n\t" - "movq (%2, %%"FF_REG_a"), %%mm3 \n\t" - "packuswb %%mm5, %%mm4 \n\t" - "pcmpeqd %%mm2, %%mm2 \n\t" - "paddb %%mm2, %%mm2 \n\t" - PAVGB_MMX(%%mm3, %%mm4, %%mm5, %%mm2) - "movq %%mm5, (%2, %%"FF_REG_a") \n\t" - "add %3, %%"FF_REG_a" \n\t" - - "movq (%1, %%"FF_REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 - "movq 1(%1, %%"FF_REG_a"), %%mm4 \n\t" - "movq %%mm2, %%mm3 \n\t" - "movq %%mm4, %%mm5 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpckhbw %%mm7, %%mm3 \n\t" - "punpckhbw %%mm7, %%mm5 \n\t" - "paddusw %%mm2, %%mm4 \n\t" - "paddusw %%mm3, %%mm5 \n\t" - "paddusw %%mm6, %%mm0 \n\t" - "paddusw %%mm6, %%mm1 \n\t" - "paddusw %%mm4, %%mm0 \n\t" - "paddusw %%mm5, %%mm1 \n\t" - "psrlw $2, %%mm0 \n\t" - "psrlw $2, %%mm1 \n\t" - "movq (%2, %%"FF_REG_a"), %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "pcmpeqd %%mm2, %%mm2 \n\t" - "paddb %%mm2, %%mm2 \n\t" - PAVGB_MMX(%%mm3, %%mm0, %%mm1, %%mm2) - "movq %%mm1, (%2, %%"FF_REG_a") \n\t" - "add %3, %%"FF_REG_a" \n\t" - - "subl $2, %0 \n\t" - "jnz 1b \n\t" - :"+g"(h), "+S"(pixels) - :"D"(block), "r"((x86_reg)line_size) - :FF_REG_a, "memory"); -} -#endif diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index 8d11dbc348022..4373fa3f04e64 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -114,7 +114,9 @@ itxfm_func(idct, idct, 32, ssse3); itxfm_func(idct, idct, 32, avx); itxfm_func(iwht, iwht, 4, mmx); itxfm_funcs(16, avx2); +itxfm_funcs(16, avx512icl); itxfm_func(idct, idct, 32, avx2); +itxfm_func(idct, idct, 32, avx512icl); #undef itxfm_func #undef itxfm_funcs @@ -406,6 +408,19 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact) init_ipred(32, avx2, tm, TM_VP8); } +#if ARCH_X86_64 + if (EXTERNAL_AVX512ICL(cpu_flags)) { + dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx512icl; + dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_avx512icl; + dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_avx512icl; + dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx512icl; + dsp->itxfm_add[TX_32X32][ADST_ADST] = + dsp->itxfm_add[TX_32X32][ADST_DCT] = + dsp->itxfm_add[TX_32X32][DCT_ADST] = + dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx512icl; + } +#endif + #undef init_fpel #undef init_subpel1 #undef init_subpel2 diff --git a/libavcodec/x86/vp9dsp_init_16bpp_template.c b/libavcodec/x86/vp9dsp_init_16bpp_template.c index f93ea2468ea59..db775f7c1a403 100644 --- a/libavcodec/x86/vp9dsp_init_16bpp_template.c +++ b/libavcodec/x86/vp9dsp_init_16bpp_template.c @@ -127,6 +127,8 @@ decl_itxfm_func(iwht, iwht, 4, BPC, mmxext); #if BPC == 10 decl_itxfm_func(idct, idct, 4, BPC, mmxext); decl_itxfm_funcs(4, BPC, ssse3); +decl_itxfm_funcs(16, BPC, avx512icl); +decl_itxfm_func(idct, idct, 32, BPC, avx512icl); #else decl_itxfm_func(idct, idct, 4, BPC, sse2); #endif @@ -233,6 +235,12 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp, int bitexact) #endif } +#if ARCH_X86_64 && BPC == 10 + if (EXTERNAL_AVX512ICL(cpu_flags)) { + init_itx_funcs(TX_16X16, 16, BPC, avx512icl); + init_itx_func_one(TX_32X32, idct, idct, 32, BPC, avx512icl); + } +#endif #endif /* HAVE_X86ASM */ ff_vp9dsp_init_16bpp_x86(dsp); diff --git a/libavcodec/x86/vp9itxfm_16bpp_avx512.asm b/libavcodec/x86/vp9itxfm_16bpp_avx512.asm new file mode 100644 index 0000000000000..11d1e453a70a9 --- /dev/null +++ b/libavcodec/x86/vp9itxfm_16bpp_avx512.asm @@ -0,0 +1,1165 @@ +;****************************************************************************** +;* VP9 IDCT SIMD optimizations +;* +;* Copyright (C) 2025 Two Orioles, LLC +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +%if ARCH_X86_64 && HAVE_AVX512ICL_EXTERNAL + +SECTION_RODATA 64 + +; Thw following set of constants are ordered to form the +; qword shuffle mask { 0, 2, 4, 6, 1, 3, 5, 7 } +%define deintq_perm pd_5520 +pd_5520: dd 5520 +pd_9760: dd 9760 +pd_10394: dd 10394 +pd_15426: dd 15426 +pd_804: dd 804 +pd_2404: dd 2404 +pd_6270: dd 6270 +pd_9102: dd 9102 +pd_11585: dd 11585 +pd_12665: dd 12665 +pd_7723: dd 7723 +pd_14811: dd 14811 +pd_7005: dd 7005 +pd_14053: dd 14053 +pd_8423: dd 8423 +pd_13623: dd 13623 + +pixel_clip: times 2 dw 0x7c00 +pixel_clip6: dd 2031648 ; 32 + (pixel_clip << 6) +pd_532480: dd 532480 ; 8192 + (32 << 14) +pd_8192: dd 8192 + +pd_1606: dd 1606 +pd_3196: dd 3196 +pd_3981: dd 3981 +pd_4756: dd 4756 +pd_11003: dd 11003 +pd_12140: dd 12140 +pd_13160: dd 13160 +pd_14449: dd 14449 +pd_15137: dd 15137 +pd_15679: dd 15679 +pd_15893: dd 15893 +pd_16069: dd 16069 +pd_16207: dd 16207 +pd_16305: dd 16305 +pd_16364: dd 16364 + +SECTION .text + +%define o_base (deintq_perm+128) +%define o(x) (r5 - o_base + (x)) +%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX) + +; dst1 = (src1 * coef1 - src2 * coef2 + rnd) >> 12 +; dst2 = (src1 * coef2 + src2 * coef1 + rnd) >> 12 +; skip round/shift if rnd is not a number +%macro ITX_MULSUB_2D 8-9 0 ; dst/src[1-2], tmp[1-3], rnd, coef[1-2], inv_dst2 +%if %8 < 32 + pmulld m%4, m%1, m%8 + pmulld m%3, m%2, m%8 +%else + vpbroadcastd m%3, [o(pd_%8)] + pmulld m%4, m%1, m%3 + pmulld m%3, m%2 +%endif +%if %7 < 32 + pmulld m%1, m%7 + pmulld m%2, m%7 +%else + vpbroadcastd m%5, [o(pd_%7)] + pmulld m%1, m%5 + pmulld m%2, m%5 +%endif +%if %9 + psubd m%4, m%6, m%4 + psubd m%2, m%4, m%2 +%else +%ifnum %6 + paddd m%4, m%6 +%endif + paddd m%2, m%4 +%endif +%ifnum %6 + paddd m%1, m%6 +%endif + psubd m%1, m%3 +%ifnum %6 + psrad m%2, 14 + psrad m%1, 14 +%endif +%endmacro + +%macro WRAP_YMM 1+ + INIT_YMM cpuname + %1 + INIT_ZMM cpuname +%endmacro + +%macro TRANSPOSE_4D 5 ; in[1-4], tmp + punpckhdq m%5, m%3, m%4 ; c2 d2 c3 d3 + punpckldq m%3, m%4 ; c0 d0 c1 d1 + punpckhdq m%4, m%1, m%2 ; a2 b2 a3 b3 + punpckldq m%1, m%2 ; a0 b0 a1 b1 + punpckhqdq m%2, m%1, m%3 ; a1 b1 c1 d1 + punpcklqdq m%1, m%3 ; a0 b0 c0 d0 + punpcklqdq m%3, m%4, m%5 ; a2 b2 c2 d2 + punpckhqdq m%4, m%5 ; a3 b3 c3 d3 +%endmacro + +%macro TRANSPOSE_4DQ 5 ; in[1-4], tmp + vshufi32x4 m%5, m%3, m%4, q3232 ; c2 c3 d2 d3 + vinserti32x8 m%3, ym%4, 1 ; c0 c1 d0 d1 + vshufi32x4 m%4, m%1, m%2, q3232 ; a2 a3 b2 b3 + vinserti32x8 m%1, ym%2, 1 ; a0 a1 b0 b1 + vshufi32x4 m%2, m%1, m%3, q3131 ; a1 b1 c1 d1 + vshufi32x4 m%1, m%3, q2020 ; a0 b0 c0 d0 + vshufi32x4 m%3, m%4, m%5, q2020 ; a2 b2 c2 d2 + vshufi32x4 m%4, m%5, q3131 ; a3 b3 c3 d3 +%endmacro + +%macro INV_TXFM_FN 3-4 0 ; type1, type2, size, eob_offset +cglobal vp9_i%1_i%2_%3_add_10, 4, 5, 0, dst, stride, c, eob, tx2 + %define %%p1 m(vp9_i%1_%3_internal_10) + lea r5, [o_base] + ; Jump to the 1st txfm function if we're not taking the fast path, which + ; in turn performs an indirect jump to the 2nd txfm function. + lea tx2q, [m(vp9_i%2_%3_internal_10).pass2] +%ifidn %1_%2, dct_dct + dec eobd + jnz %%p1 +%else +%if %4 + add eobd, %4 +%endif + ; jump to the 1st txfm function unless it's located directly after this + times ((%%end - %%p1) >> 31) & 1 jmp %%p1 +ALIGN function_align +%%end: +%endif +%endmacro + +%macro INV_TXFM_16X16_FN 2-3 0 ; type1, type2, eob_offset + INV_TXFM_FN %1, %2, 16x16, %3 +%ifidn %1_%2, dct_dct + imul r6d, [cq], 11585 + vpbroadcastd ym3, [o(pixel_clip)] + mov [cq], r3d + add r6d, 8192 + sar r6d, 14 + imul r6d, 11585 + or r3d, 8 + add r6d, 532480 + sar r6d, 20 + vpbroadcastw ym2, r6d + paddsw ym2, ym3 +.dconly_loop: + paddsw ym0, ym2, [dstq+strideq*0] + paddsw ym1, ym2, [dstq+strideq*1] + psubusw ym0, ym3 + psubusw ym1, ym3 + mova [dstq+strideq*0], ym0 + mova [dstq+strideq*1], ym1 + lea dstq, [dstq+strideq*2] + dec r3d + jg .dconly_loop + RET +%endif +%endmacro + +%macro IDCT16_PART1 0 +%if mmsize == 64 +.main_part1_fast: +%endif + pmulld m15, m1, [o(pd_16305)] {bcstd} ; t15a + pmulld m1, [o(pd_1606)] {bcstd} ; t8a + pmulld m9, m7, [o(pd_10394)] {bcstd} ; t9a + pmulld m7, [o(pd_12665)] {bcstd} ; t14a + pmulld m11, m5, [o(pd_14449)] {bcstd} ; t13a + pmulld m5, [o(pd_7723)] {bcstd} ; t10a + pmulld m13, m3, [o(pd_4756)] {bcstd} ; t11a + pmulld m3, [o(pd_15679)] {bcstd} ; t12a + pmulld m10, m6, [o(pd_9102)] {bcstd} ; t5a + pmulld m6, [o(pd_13623)] {bcstd} ; t6a + pmulld m14, m2, [o(pd_16069)] {bcstd} ; t7a + pmulld m2, [o(pd_3196)] {bcstd} ; t4a + pmulld m12, m4, [o(pd_15137)] {bcstd} ; t3 + pmulld m4, [o(pd_6270)] {bcstd} ; t2 + pmulld m0, m21 + REPX {psubd x, m20, x}, m9, m13, m10 + paddd m0, m20 + mova m18, m0 +%if mmsize == 64 ; for the ymm variant we only ever use the fast path + jmp %%main_part1b +.main_part1: + ITX_MULSUB_2D 1, 15, 16, 17, 18, _, 1606, 16305 ; t8a, t15a + ITX_MULSUB_2D 9, 7, 16, 17, 18, _, 12665, 10394 ; t9a, t14a + ITX_MULSUB_2D 5, 11, 16, 17, 18, _, 7723, 14449 ; t10a, t13a + ITX_MULSUB_2D 13, 3, 16, 17, 18, _, 15679, 4756 ; t11a, t12a + ITX_MULSUB_2D 10, 6, 16, 17, 18, _, 13623, 9102 ; t5a, t6a + ITX_MULSUB_2D 2, 14, 16, 17, 18, _, 3196, 16069 ; t4a, t7a + ITX_MULSUB_2D 4, 12, 16, 17, 18, _, 6270, 15137 ; t2, t3 + pmulld m0, m21 + pmulld m8, m21 + REPX {paddd x, m20}, m0, m9, m13, m10 + psubd m18, m0, m8 ; t1 + paddd m0, m8 ; t0 +%%main_part1b: +%endif + vpbroadcastd m19, [o(pd_15137)] + vpbroadcastd m16, [o(pd_6270)] + REPX {paddd x, m20}, m15, m7, m1, m11, m3, m5 + REPX {psrad x, 14 }, m15, m7, m1, m9, m11, m3, m5, m13 + paddd m17, m15, m7 ; t15 + psubd m15, m7 ; t14 + psubd m7, m3, m11 ; t13 + paddd m3, m11 ; t12 + psubd m11, m13, m5 ; t10 + paddd m5, m13 ; t11 + psubd m13, m1, m9 ; t9 + paddd m1, m9 ; t8 + ITX_MULSUB_2D 15, 13, 8, 9, _, 20, 16, 19 ; t9a, t14a + ITX_MULSUB_2D 7, 11, 8, 9, _, 20, 16, 19, 2 ; t13a, t10a + paddd m16, m1, m5 ; t8a + psubd m1, m5 ; t11a + paddd m8, m15, m11 ; t9 + psubd m15, m11 ; t10 + psubd m11, m17, m3 ; t12a + paddd m17, m3 ; t15a + psubd m9, m13, m7 ; t13 + paddd m13, m7 ; t14 + REPX {pmulld x, m21}, m11, m9, m1, m15 + REPX {paddd x, m20}, m2, m6, m14 + REPX {psrad x, 14 }, m10, m2, m6, m14 + psubd m3, m2, m10 ; t5a + paddd m10, m2 ; t4 + paddd m11, m20 + psubd m5, m11, m1 ; t11 + paddd m11, m1 ; t12 + psubd m1, m14, m6 ; t6a + paddd m14, m6 ; t7 + pmulld m1, m21 + pmulld m3, m21 + paddd m4, m20 + paddd m12, m20 + REPX {psrad x, 14 }, m4, m12, m0, m18 + paddd m9, m20 + paddd m2, m9, m15 ; t13a + psubd m9, m15 ; t10a + paddd m1, m20 + psubd m6, m1, m3 ; t5 + paddd m1, m3 ; t6 + REPX {psrad x, 14}, m6, m1, m11, m5, m2, m9 +%endmacro + +%macro IDCT16_PART2 0 + psubd m3, m0, m12 ; t3 + paddd m0, m12 ; t0 + psubd m12, m18, m4 ; t2 + paddd m18, m4 ; t1 + psubd m4, m3, m10 ; t4 + paddd m3, m10 ; t3 + psubd m10, m12, m6 ; t5 + paddd m12, m6 ; t2 + psubd m6, m18, m1 ; t6 + paddd m1, m18 ; t1 + psubd m7, m0, m14 ; t7 + paddd m0, m14 ; t0 + psubd m15, m0, m17 ; out15 + paddd m0, m17 ; out0 + psubd m14, m1, m13 ; out14 + paddd m1, m13 ; out1 + psubd m13, m12, m2 ; out13 + paddd m2, m12 ; out2 + psubd m12, m3, m11 ; out12 + paddd m3, m11 ; out3 + psubd m11, m4, m5 ; out11 + paddd m4, m5 ; out4 + paddd m5, m10, m9 ; out5 + psubd m10, m9 ; out10 + psubd m9, m6, m8 ; out9 + paddd m6, m8 ; out6 + psubd m8, m7, m16 ; out8 + paddd m7, m16 ; out7 +%endmacro + +INIT_ZMM avx512icl +INV_TXFM_16X16_FN dct, dct +INV_TXFM_16X16_FN dct, adst, 39-23-1 + +cglobal vp9_idct_16x16_internal_10, 0, 7, 22, dst, stride, c, eob, tx2 + mova m0, [cq+64* 0] + mova m1, [cq+64* 1] + mova m2, [cq+64* 2] + mova m3, [cq+64* 3] + mova m4, [cq+64* 4] + mova m5, [cq+64* 5] + mova m6, [cq+64* 6] + mova m7, [cq+64* 7] + vpbroadcastd m20, [o(pd_8192)] + vpbroadcastd m21, [o(pd_11585)] + sub eobd, 38 + jl .pass1_fast + mova m8, [cq+64* 8] + mova m9, [cq+64* 9] + mova m10, [cq+64*10] + mova m11, [cq+64*11] + mova m12, [cq+64*12] + mova m13, [cq+64*13] + mova m14, [cq+64*14] + mova m15, [cq+64*15] + call .main_part1 + call .main_part2 +.pass1_end: + TRANSPOSE_4DQ 0, 4, 8, 12, 16 + TRANSPOSE_4DQ 1, 5, 9, 13, 16 + TRANSPOSE_4DQ 2, 6, 10, 14, 16 + TRANSPOSE_4DQ 3, 7, 11, 15, 16 + TRANSPOSE_4D 8, 9, 10, 11, 16 + TRANSPOSE_4D 12, 13, 14, 15, 16 + mov r6d, 64*12 + jmp .pass1_transpose_end +.pass1_fast: + WRAP_YMM IDCT16_PART1 + WRAP_YMM IDCT16_PART2 +.pass1_fast_end: + vinserti32x8 m0, ym4, 1 + vinserti32x8 m8, ym12, 1 + vinserti32x8 m1, ym5, 1 + vinserti32x8 m9, ym13, 1 + vinserti32x8 m2, ym6, 1 + vinserti32x8 m10, ym14, 1 + vinserti32x8 m3, ym7, 1 + vinserti32x8 m11, ym15, 1 + vshufi32x4 m4, m0, m8, q3131 + vshufi32x4 m0, m8, q2020 + vshufi32x4 m5, m1, m9, q3131 + vshufi32x4 m1, m9, q2020 + vshufi32x4 m6, m2, m10, q3131 + vshufi32x4 m2, m10, q2020 + vshufi32x4 m7, m3, m11, q3131 + vshufi32x4 m3, m11, q2020 + mov r6d, 64*4 +.pass1_transpose_end: + pxor m16, m16 +.zero_loop: + mova [cq+r6+64*0], m16 + mova [cq+r6+64*1], m16 + mova [cq+r6+64*2], m16 + mova [cq+r6+64*3], m16 + sub r6d, 64*4 + jge .zero_loop + TRANSPOSE_4D 0, 1, 2, 3, 16 + TRANSPOSE_4D 4, 5, 6, 7, 16 + jmp tx2q +.pass2: + test eobd, eobd + jl .pass2_fast + call .main_part1 + jmp .pass2_end +.pass2_fast: + call .main_part1_fast +.pass2_end: + vpbroadcastd m3, [o(pixel_clip6)] + paddd m0, m3 + paddd m18, m3 + call .main_part2 + REPX {psrad x, 6}, m0, m1, m2, m3 + packssdw m0, m1 + lea r6, [strideq*3] + packssdw m1, m2, m3 + mova m2, [o(deintq_perm)] + vpbroadcastd m3, [o(pixel_clip)] + REPX {psrad x, 6}, m4, m5, m6, m7 + call .write_16x4 + packssdw m0, m4, m5 + packssdw m1, m6, m7 + REPX {psrad x, 6}, m8, m9, m10, m11 + call .write_16x4 + packssdw m0, m8, m9 + packssdw m1, m10, m11 +.pass2_end2: + REPX {psrad x, 6}, m12, m13, m14, m15 + call .write_16x4 + packssdw m0, m12, m13 + packssdw m1, m14, m15 + call .write_16x4 + RET +ALIGN function_align +.write_16x4: + mova ym16, [dstq+strideq*0] + vinserti32x8 m16, [dstq+strideq*1], 1 + mova ym17, [dstq+strideq*2] + vinserti32x8 m17, [dstq+r6 ], 1 + vpermq m0, m2, m0 + vpermq m1, m2, m1 + paddsw m16, m0 + paddsw m17, m1 + psubusw m16, m3 + psubusw m17, m3 + mova [dstq+strideq*0], ym16 + vextracti32x8 [dstq+strideq*1], m16, 1 + mova [dstq+strideq*2], ym17 + vextracti32x8 [dstq+r6 ], m17, 1 + lea dstq, [dstq+strideq*4] + ret +ALIGN function_align + IDCT16_PART1 + ret +ALIGN function_align +.main_part2: + IDCT16_PART2 + ret + +%macro IADST16_PART1 0 +%if mmsize == 64 +.main_part1_fast: +%endif + pmulld m15, m0, [o(pd_16364)] {bcstd} ; t1 + pmulld m0, [o(pd_804)] {bcstd} ; t0 + pmulld m13, m2, [o(pd_15893)] {bcstd} ; t3 + pmulld m2, [o(pd_3981)] {bcstd} ; t2 + pmulld m11, m4, [o(pd_14811)] {bcstd} ; t5 + pmulld m4, [o(pd_7005)] {bcstd} ; t4 + pmulld m9, m6, [o(pd_13160)] {bcstd} ; t7 + pmulld m6, [o(pd_9760)] {bcstd} ; t6 + pmulld m8, m7, [o(pd_11003)] {bcstd} ; t8 + pmulld m7, [o(pd_12140)] {bcstd} ; t9 + pmulld m10, m5, [o(pd_8423)] {bcstd} ; t10 + pmulld m5, [o(pd_14053)] {bcstd} ; t11 + pmulld m12, m3, [o(pd_5520)] {bcstd} ; t12 + pmulld m3, [o(pd_15426)] {bcstd} ; t13 + pmulld m14, m1, [o(pd_2404)] {bcstd} ; t14 + pmulld m1, [o(pd_16207)] {bcstd} ; t15 + REPX {psubd x, m20, x}, m15, m13, m11, m9 +%if mmsize == 64 ; for the ymm variant we only ever use the fast path + jmp %%main_part1b +ALIGN function_align +.main_part1: + ITX_MULSUB_2D 15, 0, 16, 17, 18, _, 804, 16364 ; t1, t0 + ITX_MULSUB_2D 13, 2, 16, 17, 18, _, 3981, 15893 ; t3, t2 + ITX_MULSUB_2D 11, 4, 16, 17, 18, _, 7005, 14811 ; t5, t4 + ITX_MULSUB_2D 9, 6, 16, 17, 18, _, 9760, 13160 ; t7, t6 + ITX_MULSUB_2D 7, 8, 16, 17, 18, _, 12140, 11003 ; t9, t8 + ITX_MULSUB_2D 5, 10, 16, 17, 18, _, 14053, 8423 ; t11, t10 + ITX_MULSUB_2D 3, 12, 16, 17, 18, _, 15426, 5520 ; t13, t12 + ITX_MULSUB_2D 1, 14, 16, 17, 18, _, 16207, 2404 ; t15, t14 + REPX {paddd x, m20}, m15, m13, m11, m9 +%%main_part1b: +%endif + REPX {paddd x, m20}, m0, m2, m4, m6 + psubd m16, m2, m10 ; t10a + paddd m2, m10 ; t2a + psubd m10, m9, m1 ; t15a + paddd m9, m1 ; t7a + psubd m1, m13, m5 ; t11a + paddd m13, m5 ; t3a + psubd m5, m6, m14 ; t14a + paddd m6, m14 ; t6a + REPX {psrad x, 14}, m16, m10, m1, m5 + psubd m14, m0, m8 ; t8a + paddd m0, m8 ; t0a + psubd m8, m15, m7 ; t9a + paddd m15, m7 ; t1a + psubd m7, m4, m12 ; t12a + paddd m4, m12 ; t4a + paddd m12, m11, m3 ; t5a + psubd m11, m3 ; t13a + REPX {psrad x, 14}, m14, m8, m7, m11 + vpbroadcastd m19, [o(pd_9102)] + vpbroadcastd m18, [o(pd_13623)] + ITX_MULSUB_2D 16, 1, 3, 17, _, _, 18, 19 ; t11, t10 + ITX_MULSUB_2D 10, 5, 3, 17, _, _, 19, 18 ; t14, t15 + vpbroadcastd m19, [o(pd_16069)] + vpbroadcastd m18, [o(pd_3196)] + ITX_MULSUB_2D 14, 8, 3, 17, _, _, 18, 19 ; t9, t8 + ITX_MULSUB_2D 11, 7, 3, 17, _, _, 19, 18 ; t12, t13 + vpbroadcastd m19, [o(pd_6270)] + vpbroadcastd m18, [o(pd_15137)] + REPX {psrad x, 14}, m15, m12, m0, m4 + psubd m3, m15, m12 ; t5 + paddd m15, m12 ; t1 + psubd m12, m0, m4 ; t4 + paddd m0, m4 ; t0 + REPX {psrad x, 14}, m2, m6, m13, m9 + psubd m4, m2, m6 ; t6 + paddd m2, m6 ; t2 + psubd m6, m13, m9 ; t7 + paddd m9, m13 ; t3 + REPX {paddd x, m20}, m8, m14, m1, m16 + psubd m13, m8, m11 ; t12a + paddd m8, m11 ; t8a + psubd m11, m14, m7 ; t13a + paddd m14, m7 ; t9a + psubd m7, m1, m10 ; t14a + paddd m1, m10 ; t10a + psubd m10, m16, m5 ; t15a + paddd m16, m5 ; t11a + REPX {psrad x, 14}, m13, m11, m7, m10 + ITX_MULSUB_2D 12, 3, 5, 17, _, _, 19, 18 ; t5a, t4a + ITX_MULSUB_2D 6, 4, 5, 17, _, _, 18, 19 ; t6a, t7a + ITX_MULSUB_2D 13, 11, 5, 17, _, _, 19, 18 ; t13, t12 + ITX_MULSUB_2D 10, 7, 5, 17, _, _, 18, 19 ; t14, t15 + REPX {psrad x, 14}, m8, m1, m14, m16 + psubd m5, m8, m1 ; t10 + paddd m1, m8 ; -out1 + psubd m8, m15, m9 ; t3a + paddd m15, m9 ; -out15 + psubd m9, m14, m16 ; t11 + paddd m14, m16 ; out14 + psubd m16, m0, m2 ; t2a + paddd m0, m2 ; out0 + REPX {paddd x, m20}, m11, m13, m12, m3 + paddd m2, m11, m10 ; out2 + psubd m11, m10 ; t14a + psubd m10, m13, m7 ; t15a + paddd m13, m7 ; -out13 + psubd m7, m12, m4 ; t7 + paddd m12, m4 ; out12 + psubd m4, m3, m6 ; t6 + paddd m3, m6 ; -out3 + REPX {psrad x, 14}, m10, m7, m11, m4 + REPX {pmulld x, m21}, m9, m10, m7, m8, m5, m11, m4, m16 + REPX {psrad x, 14}, m2, m13, m12, m3 +%endmacro + +%macro IADST16_PART2 0 + paddd m9, m20 + psubd m10, m20, m10 + paddd m7, m20 + psubd m8, m20, m8 + paddd m6, m9, m5 ; out6 + psubd m9, m5 ; out9 + psubd m5, m10, m11 ; out5 + paddd m10, m11 ; out10 + psubd m11, m7, m4 ; out11 + paddd m4, m7 ; out4 + psubd m7, m8, m16 ; out7 + paddd m8, m16 ; out8 +%endmacro + +%macro IADST16_PASS1_END 0 + pxor m16, m16 + psubd m1, m16, m1 + psubd m3, m16, m3 + psubd m13, m16, m13 + psubd m15, m16, m15 + REPX {psrad x, 14}, m4, m5, m6, m7, m8, m9, m10, m11 +%endmacro + +INV_TXFM_16X16_FN adst, dct, 39-18 +INV_TXFM_16X16_FN adst, adst + +cglobal vp9_iadst_16x16_internal_10, 0, 7, 22, dst, stride, c, eob, tx2 + mova m0, [cq+64* 0] + mova m1, [cq+64* 1] + mova m2, [cq+64* 2] + mova m3, [cq+64* 3] + mova m4, [cq+64* 4] + mova m5, [cq+64* 5] + mova m6, [cq+64* 6] + mova m7, [cq+64* 7] + vpbroadcastd m20, [o(pd_8192)] + vpbroadcastd m21, [o(pd_11585)] + sub eobd, 39 + jl .pass1_fast + mova m8, [cq+64* 8] + mova m9, [cq+64* 9] + mova m10, [cq+64*10] + mova m11, [cq+64*11] + mova m12, [cq+64*12] + mova m13, [cq+64*13] + mova m14, [cq+64*14] + mova m15, [cq+64*15] + call .main_part1 + call .main_part2 + IADST16_PASS1_END + jmp m(vp9_idct_16x16_internal_10).pass1_end +.pass1_fast: + WRAP_YMM IADST16_PART1 + WRAP_YMM IADST16_PART2 + WRAP_YMM IADST16_PASS1_END + jmp m(vp9_idct_16x16_internal_10).pass1_fast_end +.pass2: + test eobd, eobd + jl .pass2_fast + call .main_part1 + jmp .pass2_end +.pass2_fast: + call .main_part1_fast +.pass2_end: + vpbroadcastd m20, [o(pd_532480)] + call .main_part2 + vpbroadcastd m16, [o(pixel_clip6)] + REPX {paddd x, m16}, m0, m2, m12, m14 + REPX {psubd x, m16, x}, m1, m3, m13, m15 + REPX {psrad x, 6}, m0, m1, m2, m3 + packssdw m0, m1 + lea r6, [strideq*3] + packssdw m1, m2, m3 + mova m2, [o(deintq_perm)] + vpbroadcastd m3, [o(pixel_clip)] + REPX {psrad x, 20}, m4, m5, m6, m7 + call m(vp9_idct_16x16_internal_10).write_16x4 + packssdw m0, m4, m5 + packssdw m1, m6, m7 + paddsw m0, m3 + paddsw m1, m3 + REPX {psrad x, 20}, m8, m9, m10, m11 + call m(vp9_idct_16x16_internal_10).write_16x4 + packssdw m0, m8, m9 + packssdw m1, m10, m11 + paddsw m0, m3 + paddsw m1, m3 + jmp m(vp9_idct_16x16_internal_10).pass2_end2 +ALIGN function_align + IADST16_PART1 + ret +ALIGN function_align +.main_part2: + IADST16_PART2 + ret + +cglobal vp9_idct_idct_32x32_add_10, 4, 7, 23, 64*64, dst, stride, c, eob +%undef cmp + lea r5, [o_base] + dec eobd + jnz .pass1 + imul r6d, [cq], 11585 + vpbroadcastd m3, [o(pixel_clip)] + mov [cq], r3d + add r6d, 8192 + sar r6d, 14 + imul r6d, 11585 + or r3d, 16 + add r6d, 532480 + sar r6d, 20 + vpbroadcastw m2, r6d + paddsw m2, m3 +.dconly_loop: + paddsw m0, m2, [dstq+strideq*0] + paddsw m1, m2, [dstq+strideq*1] + psubusw m0, m3 + psubusw m1, m3 + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m1 + lea dstq, [dstq+strideq*2] + dec r3d + jg .dconly_loop + RET +.pass1: + vpbroadcastd m20, [o(pd_8192)] + vpbroadcastd m21, [o(pd_11585)] + cmp eobd, 135 + jl .pass1_fast + add cq, 64 + lea r4, [rsp+64*8] + cmp eobd, 579 + jl .pass1_right_fast + mov r6d, 128*28 + call .pass1_main + jmp .pass1_right_end +.pass1_right_fast: ; bottomright quadrant is zero + mova m0, [cq+128* 1] + mova m1, [cq+128* 3] + mova m2, [cq+128* 5] + mova m3, [cq+128* 7] + mova m4, [cq+128* 9] + mova m5, [cq+128*11] + mova m6, [cq+128*13] + mova m7, [cq+128*15] + call .main_fast + mova m0, [cq+128* 0] + mova m1, [cq+128* 2] + mova m2, [cq+128* 4] + mova m3, [cq+128* 6] + mova m4, [cq+128* 8] + mova m5, [cq+128*10] + mova m6, [cq+128*12] + mova m7, [cq+128*14] + call m(vp9_idct_16x16_internal_10).main_part1_fast + mov r6d, 128*12 + call .pass1_main_end +.pass1_right_end: + mova [r4+64* 8], m0 + mova [r4+64* 9], m1 + mova [r4+64*10], m2 + mova [r4+64*11], m3 + mova [r4+64*12], m4 + mova [r4+64*13], m5 + mova [r4+64*14], m6 + mova [r4+64*15], m7 + mova [r4+64*16], m16 + mova [r4+64*17], m17 + mova [r4+64*18], m18 + mova [r4+64*19], m19 + mova [r4+64*20], m8 + mova [r4+64*21], m9 + mova [r4+64*22], m10 + mova [r4+64*23], m11 + sub cq, 64 + sub r4, 64*8 + mov r6d, 128*28 + call .pass1_main + mova m12, [r4+64*20] + mova m13, [r4+64*21] + mova m14, [r4+64*22] + mova m15, [r4+64*23] + mova [r4+64*20], m8 + mova [r4+64*21], m9 + mova [r4+64*22], m10 + mova [r4+64*23], m11 + mova m8, [r4+64*16] + mova m9, [r4+64*17] + mova m10, [r4+64*18] + mova m11, [r4+64*19] + mova [r4+64*16], m16 + mova [r4+64*17], m17 + mova [r4+64*18], m18 + mova [r4+64*19], m19 + call .main + mova m0, [r4+64*16] + mova m1, [r4+64*17] + mova m2, [r4+64*18] + mova m3, [r4+64*19] + mova m4, [r4+64*20] + mova m5, [r4+64*21] + mova m6, [r4+64*22] + mova m7, [r4+64*23] + mova m8, [r4+64*24] + mova m9, [r4+64*25] + mova m10, [r4+64*26] + mova m11, [r4+64*27] + mova m12, [r4+64*28] + mova m13, [r4+64*29] + mova m14, [r4+64*30] + mova m15, [r4+64*31] + call m(vp9_idct_16x16_internal_10).main_part1 + call .pass2_main_left + mova m8, [r4+64* 8] + mova m9, [r4+64* 9] + mova m10, [r4+64*10] + mova m11, [r4+64*11] + mova m12, [r4+64*12] + mova m13, [r4+64*13] + mova m14, [r4+64*14] + mova m15, [r4+64*15] + TRANSPOSE_4DQ 8, 10, 12, 14, 16 + TRANSPOSE_4DQ 9, 11, 13, 15, 16 + call .main + call .pass2_main_right + mova m8, [r4+64*24] + mova m9, [r4+64*25] + mova m10, [r4+64*26] + mova m11, [r4+64*27] + mova m12, [r4+64*28] + mova m13, [r4+64*29] + mova m14, [r4+64*30] + mova m15, [r4+64*31] + TRANSPOSE_4DQ 8, 10, 12, 14, 16 + TRANSPOSE_4DQ 9, 11, 13, 15, 16 + call m(vp9_idct_16x16_internal_10).main_part1 + jmp .pass2_end +.pass1_fast: + mova m0, [cq+128* 1] + mova m1, [cq+128* 3] + mova m2, [cq+128* 5] + mova m3, [cq+128* 7] + mova m4, [cq+128* 9] + mova m5, [cq+128*11] + mova m6, [cq+128*13] + mova m7, [cq+128*15] + mov r4, rsp + call .main_fast + mova m0, [cq+128* 0] + mova m1, [cq+128* 2] + mova m2, [cq+128* 4] + mova m3, [cq+128* 6] + mova m4, [cq+128* 8] + mova m5, [cq+128*10] + mova m6, [cq+128*12] + mova m7, [cq+128*14] + call m(vp9_idct_16x16_internal_10).main_part1_fast + call m(vp9_idct_16x16_internal_10).main_part2 + mov r6d, 128*12 + call .pass1_main_end2 + mova [r4+64*16], m16 + mova [r4+64*17], m17 + mova [r4+64*18], m18 + mova [r4+64*19], m19 + mova [r4+64*20], m8 + mova [r4+64*21], m9 + mova [r4+64*22], m10 + mova [r4+64*23], m11 + call .main_fast + mova m0, [r4+64*16] + mova m1, [r4+64*17] + mova m2, [r4+64*18] + mova m3, [r4+64*19] + mova m4, [r4+64*20] + mova m5, [r4+64*21] + mova m6, [r4+64*22] + mova m7, [r4+64*23] + call m(vp9_idct_16x16_internal_10).main_part1_fast + call .pass2_main_left + call .main_fast + call .pass2_main_right + call m(vp9_idct_16x16_internal_10).main_part1_fast +.pass2_end: + paddd m0, m22 + paddd m18, m22 + call m(vp9_idct_16x16_internal_10).main_part2 + mova m20, [o(deintq_perm)] + rorx r2, strideq, 59 ; strideq*32 + vpbroadcastd m21, [o(pixel_clip)] + add r2, dstq +%assign i 0 +%rep 16 + mova m16, [r4+64*(15-i)] + mova m17, [r4+64*(i-16)] + mova m18, [r4-64*(17+i)] + paddd m19, m %+ i, m16 + psubd m0, m %+ i, m16 + call .write_32x2 + %assign i i+1 +%endrep + RET +ALIGN function_align +.write_32x2: + paddd m16, m17, m18 + psubd m17, m18 + REPX {psrad x, 6}, m19, m16, m0, m17 + packssdw m16, m19 + packssdw m17, m0 + sub r2, strideq + vpermq m16, m20, m16 + vpermq m17, m20, m17 + paddsw m16, [dstq] + paddsw m17, [r2 ] + psubusw m16, m21 + psubusw m17, m21 + mova [dstq], m16 + mova [r2 ], m17 + add dstq, strideq + ret +ALIGN function_align +.pass1_main: + mova m0, [cq+128* 1] + mova m1, [cq+128* 3] + mova m2, [cq+128* 5] + mova m3, [cq+128* 7] + mova m4, [cq+128* 9] + mova m5, [cq+128*11] + mova m6, [cq+128*13] + mova m7, [cq+128*15] + mova m8, [cq+128*17] + mova m9, [cq+128*19] + mova m10, [cq+128*21] + mova m11, [cq+128*23] + mova m12, [cq+128*25] + mova m13, [cq+128*27] + mova m14, [cq+128*29] + mova m15, [cq+128*31] + call .main + mova m0, [cq+128* 0] + mova m1, [cq+128* 2] + mova m2, [cq+128* 4] + mova m3, [cq+128* 6] + mova m4, [cq+128* 8] + mova m5, [cq+128*10] + mova m6, [cq+128*12] + mova m7, [cq+128*14] + mova m8, [cq+128*16] + mova m9, [cq+128*18] + mova m10, [cq+128*20] + mova m11, [cq+128*22] + mova m12, [cq+128*24] + mova m13, [cq+128*26] + mova m14, [cq+128*28] + mova m15, [cq+128*30] + call m(vp9_idct_16x16_internal_10).main_part1 +.pass1_main_end: + call m(vp9_idct_16x16_internal_10).main_part2 +.pass1_main_end2: + pxor m16, m16 +.pass1_zero_loop: + mova [cq+r6+128*0], m16 + mova [cq+r6+128*1], m16 + mova [cq+r6+128*2], m16 + mova [cq+r6+128*3], m16 + sub r6d, 128*4 + jge .pass1_zero_loop + mova m16, [r4+64*15] + mova m19, [r4+64*14] + mova m22, [r4+64*13] + mova m17, [r4+64*12] + psubd m18, m0, m16 + paddd m16, m0 + paddd m0, m19, m1 + psubd m19, m1, m19 + paddd m1, m17, m3 + psubd m3, m17 + paddd m17, m2, m22 + psubd m2, m22 + TRANSPOSE_4D 3, 2, 19, 18, 22 ; 28 29 30 31 + TRANSPOSE_4D 16, 0, 17, 1, 22 ; 0 1 2 3 + mova [r4+64*54], m3 + mova [r4+64*55], m19 + mova [r4+64*38], m2 + mova [r4+64*39], m18 + mova m2, [r4+64*11] + mova m19, [r4+64*10] + mova m3, [r4+64* 9] + mova m22, [r4+64* 8] + paddd m18, m4, m2 + psubd m4, m2 + paddd m2, m5, m19 + psubd m5, m19 + paddd m19, m6, m3 + psubd m6, m3 + paddd m3, m7, m22 + psubd m7, m22 + TRANSPOSE_4D 7, 6, 5, 4, 22 ; 24 25 26 27 + TRANSPOSE_4D 18, 2, 19, 3, 22 ; 4 5 6 7 + mova [r4+64*52], m7 + mova [r4+64*53], m5 + mova [r4+64*36], m6 + mova [r4+64*37], m4 + mova m7, [r4+64* 7] + mova m4, [r4+64* 6] + mova m5, [r4+64* 5] + mova m22, [r4+64* 4] + psubd m6, m8, m7 + paddd m8, m7 + psubd m7, m9, m4 + paddd m4, m9 + paddd m9, m10, m5 + psubd m10, m5 + paddd m5, m11, m22 + psubd m11, m22 + TRANSPOSE_4D 11, 10, 7, 6, 22 ; 20 21 22 23 + TRANSPOSE_4D 8, 4, 9, 5, 22 ; 8 9 10 11 + mova [r4+64*50], m11 + mova [r4+64*51], m7 + mova [r4+64*34], m10 + mova [r4+64*35], m6 + mova m6, [r4+64* 3] + mova m11, [r4+64* 2] + mova m7, [r4+64* 1] + mova m22, [r4+64* 0] + paddd m10, m12, m6 + psubd m12, m6 + paddd m6, m13, m11 + psubd m13, m11 + paddd m11, m14, m7 + psubd m14, m7 + paddd m7, m15, m22 + psubd m15, m22 + TRANSPOSE_4D 15, 14, 13, 12, 22 ; 16 17 18 19 + TRANSPOSE_4D 10, 6, 11, 7, 22 ; 12 13 14 15 + mova [r4+64*48], m15 + mova [r4+64*49], m13 + mova [r4+64*32], m14 + mova [r4+64*33], m12 + TRANSPOSE_4DQ 0, 2, 4, 6, 22 + TRANSPOSE_4DQ 1, 3, 5, 7, 22 + TRANSPOSE_4DQ 16, 18, 8, 10, 22 + TRANSPOSE_4DQ 17, 19, 9, 11, 22 + ret +ALIGN function_align +.pass2_main_left: + vpbroadcastd m22, [o(pixel_clip6)] + paddd m0, m22 + paddd m18, m22 + call m(vp9_idct_16x16_internal_10).main_part2 + mova [r4+64*16], m0 + mova [r4+64*17], m1 + mova [r4+64*18], m2 + mova [r4+64*19], m3 + mova [r4+64*20], m4 + mova [r4+64*21], m5 + mova [r4+64*22], m6 + mova [r4+64*23], m7 + mova [r4+64*24], m8 + mova [r4+64*25], m9 + mova [r4+64*26], m10 + mova [r4+64*27], m11 + mova [r4+64*28], m12 + mova [r4+64*29], m13 + mova [r4+64*30], m14 + mova [r4+64*31], m15 + add r4, 64*32 + mova m0, [r4+64* 0] + mova m1, [r4+64* 1] + mova m2, [r4+64* 2] + mova m3, [r4+64* 3] + mova m4, [r4+64* 4] + mova m5, [r4+64* 5] + mova m6, [r4+64* 6] + mova m7, [r4+64* 7] + jmp .pass2_main_transpose +ALIGN function_align +.pass2_main_right: + mova m0, [r4+64*16] + mova m1, [r4+64*17] + mova m2, [r4+64*18] + mova m3, [r4+64*19] + mova m4, [r4+64*20] + mova m5, [r4+64*21] + mova m6, [r4+64*22] + mova m7, [r4+64*23] +.pass2_main_transpose: + TRANSPOSE_4DQ 0, 2, 4, 6, 8 + TRANSPOSE_4DQ 1, 3, 5, 7, 8 + ret +ALIGN function_align +.main_fast: + pmulld m15, m0, [o(pd_16364)] {1to16} ; t31a + pmulld m0, [o(pd_804)] {1to16} ; t16a + pmulld m8, m7, [o(pd_11003)] {1to16} ; t17a + pmulld m7, [o(pd_12140)] {1to16} ; t30a + pmulld m11, m4, [o(pd_14811)] {1to16} ; t29a + pmulld m4, [o(pd_7005)] {1to16} ; t18a + pmulld m12, m3, [o(pd_5520)] {1to16} ; t19a + pmulld m3, [o(pd_15426)] {1to16} ; t28a + pmulld m13, m2, [o(pd_15893)] {1to16} ; t27a + pmulld m2, [o(pd_3981)] {1to16} ; t20a + pmulld m10, m5, [o(pd_8423)] {1to16} ; t21a + pmulld m5, [o(pd_14053)] {1to16} ; t26a + pmulld m9, m6, [o(pd_13160)] {1to16} ; t25a + pmulld m6, [o(pd_9760)] {1to16} ; t22a + pmulld m14, m1, [o(pd_2404)] {1to16} ; t23a + pmulld m1, [o(pd_16207)] {1to16} ; t24a + REPX {psubd x, m20, x}, m8, m12, m10, m14 + jmp .main2 +ALIGN function_align +.main: + ITX_MULSUB_2D 0, 15, 16, 17, 18, _, 804, 16364 ; t16a, t31a + ITX_MULSUB_2D 8, 7, 16, 17, 18, _, 12140, 11003 ; t17a, t30a + ITX_MULSUB_2D 4, 11, 16, 17, 18, _, 7005, 14811 ; t18a, t29a + ITX_MULSUB_2D 12, 3, 16, 17, 18, _, 15426, 5520 ; t19a, t28a + ITX_MULSUB_2D 2, 13, 16, 17, 18, _, 3981, 15893 ; t20a, t27a + ITX_MULSUB_2D 10, 5, 16, 17, 18, _, 14053, 8423 ; t21a, t26a + ITX_MULSUB_2D 6, 9, 16, 17, 18, _, 9760, 13160 ; t22a, t25a + ITX_MULSUB_2D 14, 1, 16, 17, 18, _, 16207, 2404 ; t23a, t24a + REPX {paddd x, m20}, m8, m12, m10, m14 +.main2: + REPX {paddd x, m20}, m0, m15, m7, m4, m3, m11 + REPX {psrad x, 14 }, m8, m0, m15, m7, m12, m4, m3, m11 + psubd m16, m0, m8 ; t17 + paddd m0, m8 ; t16 + psubd m8, m15, m7 ; t30 + paddd m15, m7 ; t31 + paddd m7, m12, m4 ; t19 + psubd m12, m4 ; t18 + paddd m4, m3, m11 ; t28 + psubd m3, m11 ; t29 + REPX {paddd x, m20}, m2, m13, m5, m6, m1, m9 + REPX {psrad x, 14 }, m10, m2, m13, m5, m14, m6, m1, m9 + psubd m11, m2, m10 ; t21 + paddd m2, m10 ; t20 + psubd m10, m13, m5 ; t26 + paddd m13, m5 ; t27 + psubd m5, m14, m6 ; t22 + paddd m6, m14 ; t23 + psubd m14, m1, m9 ; t25 + paddd m9, m1 ; t24 + vpbroadcastd m19, [o(pd_16069)] + vpbroadcastd m18, [o(pd_3196)] + ITX_MULSUB_2D 8, 16, 1, 17, _, 20, 18, 19 ; t17a, t30a + ITX_MULSUB_2D 3, 12, 1, 17, _, 20, 18, 19, 1 ; t29a, t18a + vpbroadcastd m19, [o(pd_9102)] + vpbroadcastd m18, [o(pd_13623)] + ITX_MULSUB_2D 10, 11, 1, 17, _, 20, 18, 19 ; t21a, t26a + ITX_MULSUB_2D 14, 5, 1, 17, _, 20, 18, 19, 1 ; t25a, t22a + paddd m1, m6, m2 ; t23a + psubd m6, m2 ; t20a + psubd m2, m9, m13 ; t27a + paddd m9, m13 ; t24a + psubd m13, m15, m4 ; t28a + paddd m15, m4 ; t31a + psubd m4, m8, m12 ; t18 + paddd m8, m12 ; t17 + psubd m12, m0, m7 ; t19a + paddd m0, m7 ; t16a + psubd m7, m16, m3 ; t29 + paddd m3, m16 ; t30 + paddd m16, m5, m10 ; t22 + psubd m5, m10 ; t21 + psubd m10, m14, m11 ; t26 + paddd m14, m11 ; t25 + vpbroadcastd m19, [o(pd_15137)] + vpbroadcastd m18, [o(pd_6270)] + ITX_MULSUB_2D 13, 12, 11, 17, _, 20, 18, 19 ; t19, t28 + ITX_MULSUB_2D 2, 6, 11, 17, _, 20, 18, 19, 1 ; t27, t20 + ITX_MULSUB_2D 7, 4, 11, 17, _, 20, 18, 19 ; t18a, t29a + ITX_MULSUB_2D 10, 5, 11, 17, _, 20, 18, 19, 1 ; t26a, t21a + psubd m11, m0, m1 ; t23 + paddd m0, m1 ; t16 + paddd m1, m16, m8 ; t17a + psubd m16, m8, m16 ; t22a + psubd m8, m15, m9 ; t24 + paddd m15, m9 ; t31 + psubd m9, m3, m14 ; t25a + paddd m14, m3 ; t30a + paddd m3, m6, m13 ; t19a + psubd m6, m13, m6 ; t20a + paddd m13, m10, m4 ; t29 + psubd m10, m4, m10 ; t26 + psubd m4, m12, m2 ; t27a + paddd m12, m2 ; t28a + paddd m2, m7, m5 ; t18 + psubd m7, m5 ; t21 + REPX {pmulld x, m21}, m10, m8, m4, m9, m7, m11, m6, m16 + mova [r4+64* 0], m0 + mova [r4+64* 1], m1 + mova [r4+64* 2], m2 + mova [r4+64* 3], m3 + mova [r4+64*12], m12 + mova [r4+64*13], m13 + mova [r4+64*14], m14 + mova [r4+64*15], m15 + REPX {paddd x, m20}, m10, m8, m4, m9 + psubd m5, m10, m7 ; t21a + paddd m10, m7 ; t26a + psubd m7, m8, m11 ; t23a + paddd m8, m11 ; t24a + REPX {psrad x, 14 }, m5, m10, m7, m8 + paddd m11, m4, m6 ; t27 + psubd m4, m6 ; t20 + psubd m6, m9, m16 ; t22 + paddd m9, m16 ; t25 + REPX {psrad x, 14 }, m11, m4, m6, m9 + mova [r4+64* 4], m4 + mova [r4+64* 5], m5 + mova [r4+64* 6], m6 + mova [r4+64* 7], m7 + mova [r4+64* 8], m8 + mova [r4+64* 9], m9 + mova [r4+64*10], m10 + mova [r4+64*11], m11 + ret + +%endif diff --git a/libavcodec/x86/vp9itxfm_avx512.asm b/libavcodec/x86/vp9itxfm_avx512.asm new file mode 100644 index 0000000000000..d51c50756d58f --- /dev/null +++ b/libavcodec/x86/vp9itxfm_avx512.asm @@ -0,0 +1,1629 @@ +;****************************************************************************** +;* VP9 IDCT SIMD optimizations +;* +;* Copyright (C) 2025 Two Orioles, LLC +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +%if ARCH_X86_64 && HAVE_AVX512ICL_EXTERNAL + +SECTION_RODATA 64 + +dup16_perm: db 0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7 + db 8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 12, 13, 14, 15, 14, 15 + db 16, 17, 16, 17, 18, 19, 18, 19, 20, 21, 20, 21, 22, 23, 22, 23 + db 24, 25, 24, 25, 26, 27, 26, 27, 28, 29, 28, 29, 30, 31, 30, 31 +itx_perm: dq 0x0000000820150440, 0x0000000231372604 + dq 0x0000000ca8041551, 0x00000006b9263715 + dq 0x00000001ec9d8c62, 0x0000000bfdbfae26 + dq 0x00000005648c9d73, 0x0000000f75aebf37 +deint_shuf: db 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 +int_shuf1: db 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15 +int_shuf2: db 8, 9, 0, 1, 10, 11, 2, 3, 12, 13, 4, 5, 14, 15, 6, 7 +pw_512: times 4 dw 512 +pw_m512: times 4 dw -512 +pw_15137_6270x2x4: times 4 dw 15137*2 + times 4 dw 6270*2 +pw_11585_m11585x2x4: times 4 dw 11585*2 +pw_m11585_11585x2x4: times 4 dw -11585*2 +pw_11585_11585x2: times 4 dw 11585*2 +int_mshift: db 142, 150, 0, 0, 174, 182, 0, 0 +pd_8192: dd 8192 +pw_804x2: times 2 dw 804*2 +pw_1606x2: times 2 dw 1606*2 +pw_3196x2: times 2 dw 3196*2 +pw_3981x2: times 2 dw 3981*2 +pw_6270x2: times 2 dw 6270*2 +pw_7005x2: times 2 dw 7005*2 +pw_7723x2: times 2 dw 7723*2 +pw_9760x2: times 2 dw 9760*2 +pw_12140x2: times 2 dw 12140*2 +pw_12665x2: times 2 dw 12665*2 +pw_13160x2: times 2 dw 13160*2 +pw_13623x2: times 2 dw 13623*2 +pw_14053x2: times 2 dw 14053*2 +pw_14449x2: times 2 dw 14449*2 +pw_14811x2: times 2 dw 14811*2 +pw_15137x2: times 2 dw 15137*2 +pw_15426x2: times 2 dw 15426*2 +pw_15679x2: times 2 dw 15679*2 +pw_15893x2: times 2 dw 15893*2 +pw_16069x2: times 2 dw 16069*2 +pw_16207x2: times 2 dw 16207*2 +pw_16305x2: times 2 dw 16305*2 +pw_16364x2: times 2 dw 16364*2 +pw_m2404x2: times 2 dw -2404*2 +pw_m4756x2: times 2 dw -4756*2 +pw_m5520x2: times 2 dw -5520*2 +pw_m8423x2: times 2 dw -8423*2 +pw_m9102x2: times 2 dw -9102*2 +pw_m10394x2: times 2 dw -10394*2 +pw_m11003x2: times 2 dw -11003*2 +pw_804_16364x2: dw 804*2, 16364*2 +pw_1606_16305x2: dw 1606*2, 16305*2 +pw_3196_16069x2: dw 3196*2, 16069*2 +pw_3981_15893x2: dw 3981*2, 15893*2 +pw_7005_14811x2: dw 7005*2, 14811*2 +pw_7723_14449x2: dw 7723*2, 14449*2 +pw_9760_13160x2: dw 9760*2, 13160*2 +pw_m2404_16207x2: dw -2404*2, 16207*2 +pw_m4756_15679x2: dw -4756*2, 15679*2 +pw_m5520_15426x2: dw -5520*2, 15426*2 +pw_m8423_14053x2: dw -8423*2, 14053*2 +pw_m9102_13623x2: dw -9102*2, 13623*2 +pw_m10394_12665x2: dw -10394*2, 12665*2 +pw_m11003_12140x2: dw -11003*2, 12140*2 + +%macro COEF_PAIR 2-3 0 +%if %3 & 4 +pw_%1_m%2: dw %1, -%2 +%else +pw_%1_%2: dw %1, %2 +%if %3 & 2 +pw_m%1_%2: dw -%1, %2 +%else +pw_m%2_%1: dw -%2, %1 +%endif +%endif +%if %3 & 1 +pw_m%1_m%2: dw -%1, -%2 +%endif +%endmacro + +COEF_PAIR 804, 16364 +COEF_PAIR 1606, 16305 +COEF_PAIR 3196, 16069, 1 +COEF_PAIR 3981, 15893 +COEF_PAIR 6270, 15137, 1 +COEF_PAIR 7005, 14811 +COEF_PAIR 7723, 14449 +COEF_PAIR 9102, 13623 +COEF_PAIR 9760, 13160 +COEF_PAIR 11585, 11585, 1 +COEF_PAIR 12140, 11003 +COEF_PAIR 12665, 10394 +COEF_PAIR 13623, 9102, 1 +COEF_PAIR 14053, 8423 +COEF_PAIR 15137, 6270 +COEF_PAIR 15426, 5520 +COEF_PAIR 15679, 4756 +COEF_PAIR 16069, 3196 +COEF_PAIR 16207, 2404 + +; ADST16-only: +COEF_PAIR 2404, 9760, 2 +COEF_PAIR 5520, 7005, 2 +COEF_PAIR 8423, 3981, 2 +COEF_PAIR 11003, 804, 2 +COEF_PAIR 12140, 16364, 5 +COEF_PAIR 14053, 15893, 5 +COEF_PAIR 15426, 14811, 5 +COEF_PAIR 16207, 13160, 5 +pw_11585_m11585: dw 11585, -11585 +pw_16069_m3196: dw 16069, -3196 +pw_9102_m13623: dw 9102, -13623 +pw_15137_m6270: dw 15137, -6270 +pw_6270_m15137: dw 6270, -15137 + +%define pw_11585x2 pw_11585_11585x2 +%define pw_m11585x2 pw_m11585_11585x2x4 + +SECTION .text + +%define o_base pw_512 + 128 +%define o(x) (r6 - (o_base) + (x)) +%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX) + +; flags: 1 = swap, 2 = interleave (l), 4 = interleave (t), 8 = no_pack, +; 16 = special_mul1, 32 = special_mul2, 64 = dst_in_tmp1 +%macro ITX_MUL2X_PACK 6-7 0 ; dst/src, tmp[1-2], rnd, coef[1-2], flags + mova m%2, m%4 +%if %7 & 16 + vpdpwssd m%2, m%1, [o(pw_%5)] {bcstd} + mova m%3, m%4 +%if %7 & 32 + vpdpwssd m%3, m%1, [o(pw_%6)] {bcstd} +%else + vpdpwssd m%3, m%1, m%6 +%endif +%elif %7 & 32 + vpdpwssd m%2, m%1, m%5 + mova m%3, m%4 + vpdpwssd m%3, m%1, [o(pw_%6)] {bcstd} +%elif %6 < 32 + vpdpwssd m%2, m%1, m%5 + mova m%3, m%4 + vpdpwssd m%3, m%1, m%6 +%elif %7 & 1 + vpdpwssd m%2, m%1, [o(pw_%5_%6)] {bcstd} + mova m%3, m%4 + vpdpwssd m%3, m%1, [o(pw_m%6_%5)] {bcstd} +%else + vpdpwssd m%2, m%1, [o(pw_m%6_%5)] {bcstd} + mova m%3, m%4 + vpdpwssd m%3, m%1, [o(pw_%5_%6)] {bcstd} +%endif +%if %7 & 2 + psrld m%2, 14 + pslld m%3, 2 + vpshrdd m%1, m%3, m%2, 16 +%elif %7 & 4 + ; compared to using shifts (as above) this has better throughput, + ; but worse latency and requires setting up the opmask/index + ; registers, so only use this method for the larger transforms +%if %7 & 64 + pslld m%2, 2 + vpmultishiftqb m%2{k7}, m13, m%3 +%else + pslld m%1, m%2, 2 + vpmultishiftqb m%1{k7}, m13, m%3 +%endif +%else + psrad m%2, 14 + psrad m%3, 14 +%if %7 & 8 == 0 + packssdw m%1, m%3, m%2 +%endif +%endif +%endmacro + +; dst1 = (src1 * coef1 - src2 * coef2 + rnd) >> 12 +; dst2 = (src1 * coef2 + src2 * coef1 + rnd) >> 12 +%macro ITX_MULSUB_2W 7 ; dst/src[1-2], tmp[1-2], rnd, coef[1-2] + punpcklwd m%3, m%2, m%1 + punpckhwd m%2, m%1 +%if %7 < 32 + mova m%1, m%5 + vpdpwssd m%1, m%3, m%7 + mova m%4, m%5 + vpdpwssd m%4, m%2, m%7 +%else + mova m%1, m%5 + vpdpwssd m%1, m%3, [o(pw_m%7_%6)] {bcstd} + mova m%4, m%5 + vpdpwssd m%4, m%2, [o(pw_m%7_%6)] {bcstd} +%endif + psrad m%1, 14 + psrad m%4, 14 + packssdw m%1, m%4 + mova m%4, m%5 +%if %7 < 32 + vpdpwssd m%4, m%2, m%6 + mova m%2, m%5 + vpdpwssd m%2, m%3, m%6 +%else + vpdpwssd m%4, m%2, [o(pw_%6_%7)] {bcstd} + mova m%2, m%5 + vpdpwssd m%2, m%3, [o(pw_%6_%7)] {bcstd} +%endif + psrad m%4, 14 + psrad m%2, 14 + packssdw m%2, m%4 +%endmacro + +; flags: 1 = swap, 2 = invert2, 4 = invert1 +%macro ADST_MULSUB_4W 10-11 0 ; dst1/src1, src2, dst2, tmp[1-2], rnd, coef[1-4], flags + mova m%3, m%6 +%if %11 & 1 + vpdpwssd m%3, m%1, [o(pw_m%8_%7)] {bcstd} +%else + vpdpwssd m%3, m%1, [o(pw_%7_%8)] {bcstd} +%endif +%if %11 & 4 + vpbroadcastd m%4, [o(pw_m%9_%10)] +%elif %11 & 2 + vpbroadcastd m%4, [o(pw_%9_m%10)] +%elif %11 & 1 + vpbroadcastd m%4, [o(pw_%10_%9)] +%else + vpbroadcastd m%4, [o(pw_%9_%10)] +%endif + pmaddwd m%4, m%2 + mova m%5, m%6 +%if %11 & 4 + vpdpwssd m%5, m%1, [o(pw_%8_m%7)] {bcstd} +%elif %11 & 1 + vpdpwssd m%5, m%1, [o(pw_%7_%8)] {bcstd} +%else + vpdpwssd m%5, m%1, [o(pw_m%8_%7)] {bcstd} +%endif +%if %11 & 2 + vpbroadcastd m%1, [o(pw_%10_%9)] +%elif %11 & 1 + vpbroadcastd m%1, [o(pw_%9_m%10)] +%else + vpbroadcastd m%1, [o(pw_m%10_%9)] +%endif + pmaddwd m%2, m%1 + paddd m%1, m%3, m%4 + psubd m%3, m%4 + paddd m%4, m%5, m%2 + psubd m%5, m%2 + pslld m%1, 2 + pslld m%3, 2 + vpmultishiftqb m%1{k7}, m13, m%4 + vpmultishiftqb m%3{k7}, m13, m%5 +%endmacro + +%macro WRAP_YMM 1+ + INIT_YMM cpuname + %1 + INIT_ZMM cpuname +%endmacro + +%macro INV_TXFM_FN 3-4 0 ; type1, type2, size, eob_offset +cglobal vp9_i%1_i%2_%3_add, 4, 5, 0, dst, stride, c, eob, tx2 + %undef cmp + %define %%p1 m(vp9_i%1_%3_internal) + lea r6, [o_base] + ; Jump to the 1st txfm function if we're not taking the fast path, which + ; in turn performs an indirect jump to the 2nd txfm function. + lea tx2q, [m(vp9_i%2_%3_internal).pass2] +%ifidn %1_%2, dct_dct + cmp eobd, 1 + jne %%p1 +%else +%if %4 + add eobd, %4 +%endif + ; jump to the 1st txfm function unless it's located directly after this + times ((%%end - %%p1) >> 31) & 1 jmp %%p1 +ALIGN function_align +%%end: +%endif +%endmacro + +%macro INV_TXFM_16X16_FN 2-3 0 ; type1, type2, eob_offset + INV_TXFM_FN %1, %2, 16x16, %3 +%ifidn %1_%2, dct_dct + movd xmm0, [o(pw_11585x2)] + pmulhrsw xmm3, xmm0, [cq] + pxor ym2, ym2 + pmulhrsw xmm3, xmm0 + pmulhrsw xmm3, [o(pw_512)] + mova [cq], xm2 + add r3d, 7 + vpbroadcastw ym3, xmm3 +.dconly_loop: + mova xm1, [dstq+strideq*0] + vinserti32x4 ym1, [dstq+strideq*1], 1 + punpcklbw ym0, ym1, ym2 + punpckhbw ym1, ym2 + paddw ym0, ym3 + paddw ym1, ym3 + packuswb ym0, ym1 + mova [dstq+strideq*0], xm0 + vextracti32x4 [dstq+strideq*1], ym0, 1 + lea dstq, [dstq+strideq*2] + dec r3d + jg .dconly_loop + RET +%endif +%endmacro + +%macro IDCT16_MAIN 0-1 0 ; idct32 +%if mmsize == 64 && %1 == 0 +.main_fast: +%endif + vpbroadcastd m2, [o(pw_1606_16305x2)] + vpbroadcastd m4, [o(pw_m10394_12665x2)] + vpbroadcastd m11, [o(pw_7723_14449x2)] + vpbroadcastd m12, [o(pw_m4756_15679x2)] + pmulhrsw m8, m2 ; t8a t15a + vpbroadcastd m2, [o(pw_3196_16069x2)] + pmulhrsw m0, m4 ; t9a t14a + vpbroadcastd m4, [o(pw_m9102_13623x2)] + pmulhrsw m5, m11 ; t10a t13a + vpbroadcastd m11, [o(pw_11585_11585x2)] + pmulhrsw m1, m12 ; t11a t12a + vbroadcasti32x4 m12, [o(pw_15137_6270x2x4)] + pmulhrsw m7, m2 ; t4a t7a + pmulhrsw m3, m4 ; t5a t6a + pmulhrsw m9, m11 ; t0 t1 + pmulhrsw m6, m12 ; t3 t2 +%if mmsize == 64 && %1 == 0 + jmp %%main2 +ALIGN function_align +.main: + punpckhwd m8, m7, m0 ; dct16 in15 in1 + punpcklwd m9, m4, m0 ; dct4 in2 in0 + punpckhwd m0, m3, m4 ; dct16 in7 in9 + punpcklwd m7, m1 ; dct8 in7 in1 + punpckhwd m1, m6 ; dct16 in3 in13 + punpcklwd m3, m5 ; dct8 in3 in5 + punpckhwd m5, m2 ; dct16 in11 in5 + punpcklwd m6, m2 ; dct4 in3 in1 + ITX_MUL2X_PACK 8, 2, 4, 10, 1606, 16305, 5 ; t8a t15a + ITX_MUL2X_PACK 0, 2, 4, 10, 12665, 10394, 5 ; t9a t14a + ITX_MUL2X_PACK 5, 2, 4, 10, 7723, 14449, 5 ; t10a t13a + ITX_MUL2X_PACK 1, 2, 4, 10, 15679, 4756, 5 ; t11a t12a + ITX_MUL2X_PACK 7, 2, 4, 10, 3196, 16069, 5 ; t4a t7a + ITX_MUL2X_PACK 3, 2, 4, 10, 13623, 9102, 5 ; t5a t6a + ITX_MUL2X_PACK 9, 2, 4, 10, 11585, 11585 ; t0 t1 + ITX_MUL2X_PACK 6, 2, 4, 10, 6270, 15137 ; t3 t2 +%%main2: +%endif + psubw m2, m8, m0 ; t9 t14 + paddw m8, m0 ; t8 t15 + psubw m4, m1, m5 ; t10 t13 + paddw m1, m5 ; t11 t12 + ITX_MUL2X_PACK 2, 0, 5, 10, 6270, 15137, (1|%1*4) ; t9a t14a + ITX_MUL2X_PACK 4, 0, 5, 10, m15137, 6270, (1|%1*4) ; t10a t13a + vbroadcasti32x4 m5, [o(deint_shuf)] + psubw m0, m8, m1 ; t11a t12a + paddw m8, m1 ; t8a t15a + psubw m1, m7, m3 ; t5a t6a + paddw m7, m3 ; t4 t7 + pshufb m8, m5 + pshufb m7, m5 + paddw m3, m2, m4 ; t9 t14 + psubw m2, m4 ; t10 t13 +%if %1 + vpbroadcastd m12, [o(pw_11585_11585)] + vpbroadcastd m11, [o(pw_m11585_11585)] + pshufb m3, m5 + ITX_MUL2X_PACK 1, 4, 5, 10, 12, 11 ; t5 t6 + ITX_MUL2X_PACK 0, 4, 5, 10, 11, 12, 8 ; t11 t12 + ITX_MUL2X_PACK 2, 0, 11, 10, 11, 12, 8 ; t10a t13a + packssdw m5, m11 ; t12 t13a + packssdw m4, m0 ; t11 t10a +%else + pshufb m0, m5 + ITX_MUL2X_PACK 1, 4, 5, 10, 11585_11585, m11585_11585, 48 ; t5 t6 + vpbroadcastd m11, [o(pw_11585x2)] + punpckhqdq m5, m0, m2 ; t12a t13 + punpcklqdq m0, m2 ; t11a t10 + psubw m4, m5, m0 + paddw m5, m0 + pmulhrsw m4, m11 ; t11 t10a + pmulhrsw m5, m11 ; t12 t13a +%endif + punpckhqdq m2, m7, m1 ; t7 t6 + punpcklqdq m7, m1 ; t4 t5 + psubw m1, m9, m6 ; t3 t2 + paddw m9, m6 ; t0 t1 + punpckhqdq m0, m8, m3 ; t15a t14 + punpcklqdq m8, m3 ; t8a t9 + psubw m3, m9, m2 ; t7 t6 + paddw m9, m2 ; t0 t1 + psubw m2, m1, m7 ; t4 t5 + paddw m1, m7 ; t3 t2 + psubw m7, m9, m0 ; out15 out14 + paddw m0, m9 ; out0 out1 + psubw m6, m1, m5 ; out12 out13 + paddw m1, m5 ; out3 out2 + psubw m5, m2, m4 ; out11 out10 + paddw m2, m4 ; out4 out5 + psubw m4, m3, m8 ; out8 out9 + paddw m3, m8 ; out7 out6 +%endmacro + +INIT_ZMM avx512icl +INV_TXFM_16X16_FN dct, dct +INV_TXFM_16X16_FN dct, adst, 39-23 + +cglobal vp9_idct_16x16_internal, 0, 5, 16, dst, stride, c, eob, tx2 + mova m15, [o(itx_perm)] + vpbroadcastd m10, [o(pd_8192)] + vpbroadcastq m13, [o(int_mshift)] + vpcmpub k7, m13, m10, 6 + sub eobd, 39 + jl .pass1_fast + vpermq m0, m15, [cq+64*0] + vpermq m1, m15, [cq+64*1] + vpermq m2, m15, [cq+64*2] + vpermq m3, m15, [cq+64*3] + vpermq m4, m15, [cq+64*4] + vpermq m5, m15, [cq+64*5] + vpermq m6, m15, [cq+64*6] + vpermq m7, m15, [cq+64*7] + call .main + vbroadcasti32x4 m12, [o(int_shuf1)] + vbroadcasti32x4 m11, [o(int_shuf2)] + pshufb m0, m12 + pshufb m8, m1, m11 + pshufb m2, m12 + pshufb m9, m3, m11 + pshufb m4, m12 + pshufb m14, m5, m11 + pshufb m6, m12 + pshufb m11, m7, m11 + punpckhdq m1, m0, m8 + punpckldq m0, m8 + punpckhdq m3, m2, m9 + punpckldq m2, m9 + punpckhdq m5, m4, m14 + punpckldq m4, m14 + punpckhdq m7, m6, m11 + punpckldq m6, m11 +.pass1_end: + vshufi32x4 m8, m4, m6, q3232 + vinserti32x8 m4, ym6, 1 + vshufi32x4 m6, m0, m2, q3232 + vinserti32x8 m0, ym2, 1 + vshufi32x4 m9, m5, m7, q3232 + vinserti32x8 m5, ym7, 1 + vshufi32x4 m7, m1, m3, q3232 + vinserti32x8 m1, ym3, 1 + vshufi32x4 m2, m0, m4, q3131 ; 4 5 + vshufi32x4 m0, m4, q2020 ; 0 1 + vshufi32x4 m4, m6, m8, q2020 ; 8 9 + vshufi32x4 m6, m8, q3131 ; 12 13 + vshufi32x4 m3, m1, m5, q3131 ; 6 7 + vshufi32x4 m1, m5, q2020 ; 2 3 + vshufi32x4 m5, m7, m9, q2020 ; 10 11 + vshufi32x4 m7, m9, q3131 ; 14 1 + jmp tx2q +.pass1_fast: + mova ym3, [o(dup16_perm)] + vbroadcasti32x4 ym9, [cq+32*0] + vbroadcasti32x4 ym6, [cq+32*4] + vpermb ym8, ym3, [cq+32*1] + vpermb ym0, ym3, [cq+32*7] + vpermb ym5, ym3, [cq+32*5] + vpermb ym1, ym3, [cq+32*3] + vpermb ym7, ym3, [cq+32*2] + vpermb ym3, ym3, [cq+32*6] + shufpd ym9, ym9, 0x0c + shufpd ym6, ym6, 0x0c + WRAP_YMM IDCT16_MAIN + vbroadcasti32x4 m8, [o(int_shuf1)] + vbroadcasti32x4 m9, [o(int_shuf2)] + vinserti32x8 m0, ym2, 1 ; 0 1 | 4 5 + vinserti32x8 m4, ym6, 1 ; 8 9 | 12 13 + vinserti32x8 m1, ym3, 1 ; 3 2 | 7 6 + vinserti32x8 m5, ym7, 1 ; 11 10 | 15 14 + vshufi32x4 m2, m0, m4, q3131 + vshufi32x4 m0, m4, q2020 + vshufi32x4 m4, m1, m5, q2020 + vshufi32x4 m1, m5, q3131 + pshufb m2, m8 + pshufb m0, m8 + pshufb m4, m9 + pshufb m1, m9 + punpckhdq m3, m2, m1 ; 6-7 + punpckldq m2, m1 ; 4-5 + punpckhdq m1, m0, m4 ; 2-3 + punpckldq m0, m4 ; 0-1 + jmp tx2q +.pass2: + test eobd, eobd + jl .pass2_fast + call .main + jmp .pass2_end +.pass2_fast: + punpcklqdq m9, m0, m0 + punpckhwd m8, m0, m0 + punpcklwd m7, m1, m1 + punpckhwd m1, m1 + punpcklqdq m6, m2, m2 + punpckhwd m5, m2, m2 + punpckhwd m0, m3, m3 + punpcklwd m3, m3 + call .main_fast +.pass2_end: + psrldq m8, m15, 1 + psrlq m12, m15, 12 + psrldq m9, m15, 2 + psrlq m13, m15, 20 + mova m10, m8 + vpermi2q m8, m0, m2 ; 0 1 4 5 + vpermt2q m0, m12, m2 + mova m11, m9 + vpermi2q m9, m1, m3 ; 2 3 6 7 + vpermt2q m1, m13, m3 + vpbroadcastd m2, [o(pw_512)] + vpermi2q m10, m4, m6 ; 8 9 12 13 + vpermt2q m4, m12, m6 + vpermi2q m11, m5, m7 ; 10 11 14 15 + vpermt2q m5, m13, m7 + REPX {pmulhrsw x, m2}, m0, m1, m4, m5, m8, m9, m10, m11 +.pass2_end2: + lea r3, [strideq*3] + lea r4, [dstq+strideq*4] + lea r5, [dstq+strideq*8] + lea r6, [r4 +strideq*8] + mova xm3, [dstq+strideq*0] + mova xm6, [dstq+strideq*2] + vinserti32x4 ym3, [dstq+strideq*1], 1 + vinserti32x4 ym6, [dstq+r3 ], 1 + vinserti32x4 m3, [r4+strideq*0], 2 + vinserti32x4 m6, [r4+strideq*2], 2 + vinserti32x4 m3, [r4+strideq*1], 3 + vinserti32x4 m6, [r4+r3 ], 3 + mova xm12, [r5+strideq*0] + mova xm13, [r5+strideq*2] + vinserti32x4 ym12, [r5+strideq*1], 1 + vinserti32x4 ym13, [r5+r3 ], 1 + vinserti32x4 m12, [r6+strideq*0], 2 + vinserti32x4 m13, [r6+strideq*2], 2 + vinserti32x4 m12, [r6+strideq*1], 3 + vinserti32x4 m13, [r6+r3 ], 3 + pxor m7, m7 + REPX {mova [cq+64*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7 + punpcklbw m2, m3, m7 + punpckhbw m3, m7 + paddw m0, m2 + paddw m8, m3 + packuswb m0, m8 + punpcklbw m2, m6, m7 + punpckhbw m6, m7 + paddw m1, m2 + paddw m9, m6 + packuswb m1, m9 + punpcklbw m2, m12, m7 + punpckhbw m12, m7 + paddw m2, m4 + paddw m10, m12 + packuswb m2, m10 + punpcklbw m3, m13, m7 + punpckhbw m13, m7 + paddw m3, m5 + paddw m11, m13 + packuswb m3, m11 + mova [dstq+strideq*0], xm0 + vextracti32x4 [dstq+strideq*1], ym0, 1 + mova [dstq+strideq*2], xm1 + vextracti32x4 [dstq+r3 ], ym1, 1 + vextracti32x4 [r4+strideq*0], m0, 2 + vextracti32x4 [r4+strideq*1], m0, 3 + vextracti32x4 [r4+strideq*2], m1, 2 + vextracti32x4 [r4+r3 ], m1, 3 + mova [r5+strideq*0], xm2 + vextracti32x4 [r5+strideq*1], ym2, 1 + mova [r5+strideq*2], xm3 + vextracti32x4 [r5+r3 ], ym3, 1 + vextracti32x4 [r6+strideq*0], m2, 2 + vextracti32x4 [r6+strideq*1], m2, 3 + vextracti32x4 [r6+strideq*2], m3, 2 + vextracti32x4 [r6+r3 ], m3, 3 + RET +ALIGN function_align + IDCT16_MAIN + ret + +%macro IADST16_MAIN 0 +%if mmsize == 64 +.main_fast: +%endif + punpcklwd m4, m3, m0 ; in7 in0 + punpcklwd m11, m1, m2 ; in3 in4 + punpckhwd m9, m2, m1 ; in5 in2 + punpckhwd m7, m0, m3 ; in1 in6 + ITX_MUL2X_PACK 4, 0, 6, 10, 11003_804, 12140_m16364, 116 ; t1a t0a + ITX_MUL2X_PACK 4, 5, 6, 10, m11003_804, m12140_m16364, 52 ; t9a t8a + ITX_MUL2X_PACK 11, 2, 6, 10, 5520_7005, 15426_m14811, 116 ; t5a t4a + ITX_MUL2X_PACK 11, 5, 6, 10, m5520_7005, m15426_m14811, 52 ; t13a t12a + ITX_MUL2X_PACK 9, 1, 6, 10, 8423_3981, 14053_m15893, 116 ; t3a t2a + ITX_MUL2X_PACK 9, 5, 6, 10, m8423_3981, m14053_m15893, 52 ; t11a t10a + ITX_MUL2X_PACK 7, 3, 6, 10, 2404_9760, 16207_m13160, 116 ; t7a t6a + ITX_MUL2X_PACK 7, 5, 6, 10, m2404_9760, m16207_m13160, 52 ; t15a t14a +%if mmsize == 64 ; for the ymm variant we only ever use the fast path + jmp %%main2 +ALIGN function_align +.main: + punpckhwd m8, m7, m0 ; in14 in1 + punpcklwd m0, m7 ; in0 in15 + punpcklwd m7, m6, m1 ; in12 in3 + punpckhwd m1, m6 ; in2 in13 + punpckhwd m6, m5, m2 ; in10 in5 + punpcklwd m2, m5 ; in4 in11 + punpcklwd m5, m4, m3 ; in8 in7 + punpckhwd m3, m4 ; in6 in9 + ADST_MULSUB_4W 0, 5, 4, 9, 11, 10, 804, 16364, 12140, 11003 ; t1a t0a, t9a t8a + ADST_MULSUB_4W 2, 7, 11, 5, 9, 10, 7005, 14811, 15426, 5520 ; t5a t4a, t13a t12a + ADST_MULSUB_4W 1, 6, 9, 5, 7, 10, 3981, 15893, 14053, 8423 ; t3a t2a, t11a t10a + ADST_MULSUB_4W 3, 8, 7, 5, 6, 10, 9760, 13160, 16207, 2404 ; t7a t6a, t15a t14a +%%main2: +%endif + psubw m5, m1, m3 ; t7 t6 + paddw m6, m1, m3 ; t3 t2 + psubw m1, m0, m2 ; t5 t4 + paddw m2, m0 ; t1 t0 + ADST_MULSUB_4W 4, 11, 8, 3, 0, 10, 3196, 16069, 16069, 3196, 1 ; t8a t9a, t12a t13a + ADST_MULSUB_4W 9, 7, 0, 3, 11, 10, 13623, 9102, 9102, 13623, 1 ; t10a t11a, t14a t15a + ADST_MULSUB_4W 1, 5, 11, 3, 7, 10, 6270, 15137, 15137, 6270, 2 ; out12 -out3, t7 t6 + psubw m3, m2, m6 ; t3a t2a + paddw m2, m6 ; -out15 out0 + ADST_MULSUB_4W 8, 0, 5, 6, 7, 10, 15137, 6270, 6270, 15137, 6 ; -out13 out2, t15a t14 + vbroadcasti32x4 m12, [o(deint_shuf)] + paddw m0, m4, m9 ; -out1 out14 + psubw m4, m9 ; t10 t11 + pshufb m2, m12 + pshufb m1, m12 + pshufb m8, m12 + pshufb m0, m12 + punpcklqdq m6, m1, m8 ; out12 -out13 + shufps m7, m0, m2, q1032 ; out14 -out15 +%endmacro + +%macro IADST16_PASS1_END 0 + shufps m0, m2, m0, q1032 ; out0 -out1 + punpckhqdq m1, m8, m1 ; out2 -out3 + mova m2, m10 + vpdpwssd m2, m5, [o(pw_m11585_m11585)] {bcstd} ; out5 + mova m8, m10 + vpdpwssd m8, m11, [o(pw_11585_11585)] {bcstd} ; out4 + mova m9, m10 + vpdpwssd m9, m5, [o(pw_m11585_11585)] {bcstd} ; out10 + mova m5, m10 + vpdpwssd m5, m11, [o(pw_11585_m11585)] {bcstd} ; out11 + mova m11, m10 + vpdpwssd m11, m3, [o(pw_m11585_m11585)] {bcstd} ; out7 + mova m14, m10 + vpdpwssd m14, m4, [o(pw_11585_11585)] {bcstd} ; out6 + mova m12, m10 + vpdpwssd m12, m3, [o(pw_m11585_11585)] {bcstd} ; out8 + mova m3, m10 + vpdpwssd m3, m4, [o(pw_m11585_11585)] {bcstd} ; out9 +%endmacro + +INV_TXFM_16X16_FN adst, dct, 39-18 +INV_TXFM_16X16_FN adst, adst + +cglobal vp9_iadst_16x16_internal, 0, 5, 16, dst, stride, c, eob, tx2 + mova m15, [o(itx_perm)] + psrlq m7, m15, 4 + vpermq m0, m15, [cq+64*0] ; 0 1 + vpermq m1, m7, [cq+64*1] ; 3 2 + vpermq m2, m15, [cq+64*2] ; 4 5 + vpermq m3, m7, [cq+64*3] ; 7 6 + vpbroadcastd m10, [o(pd_8192)] + vpbroadcastq m13, [o(int_mshift)] + vpcmpub k7, m13, m10, 6 + sub eobd, 39 + jl .pass1_fast + vpermq m4, m15, [cq+64*4] ; 8 9 + vpermq m5, m7, [cq+64*5] ; 11 10 + vpermq m6, m15, [cq+64*6] ; 12 13 + vpermq m7, m7, [cq+64*7] ; 15 14 + call .main + IADST16_PASS1_END + REPX {psrad x, 14}, m2, m8, m9, m5, m11, m14, m12, m3 + packssdw m2, m8, m2 ; out4 out5 + packssdw m5, m9, m5 ; out10 out11 + packssdw m4, m12, m3 ; out8 out9 + packssdw m3, m14, m11 ; out6 out7 + pxor m9, m9 + punpckhwd m8, m0, m1 + punpcklwd m0, m1 + psubw m8, m9, m8 + punpckhwd m1, m0, m8 + punpcklwd m0, m8 + punpckhwd m8, m2, m3 + punpcklwd m2, m3 + punpckhwd m3, m2, m8 + punpcklwd m2, m8 + punpckhwd m8, m4, m5 + punpcklwd m4, m5 + punpckhwd m5, m4, m8 + punpcklwd m4, m8 + punpckhwd m8, m6, m7 + punpcklwd m6, m7 + psubw m8, m9, m8 + punpckhwd m7, m6, m8 + punpcklwd m6, m8 + jmp m(vp9_idct_16x16_internal).pass1_end +.pass1_fast: + WRAP_YMM IADST16_MAIN + WRAP_YMM IADST16_PASS1_END + vinserti32x8 m0, ym6, 1 + vinserti32x8 m1, ym7, 1 + vinserti32x8 m8, ym12, 1 + vinserti32x8 m2, ym3, 1 + vinserti32x8 m14, ym9, 1 + vinserti32x8 m11, ym5, 1 + pslld m14, 2 + pslld m11, 2 + punpckhwd m4, m0, m1 + punpcklwd m0, m1 + vpmultishiftqb m14{k7}, m13, m8 + vpmultishiftqb m11{k7}, m13, m2 + psrlq m1, m15, 24 + pxor m2, m2 + psubw m2, m4 + punpckhwd m3, m0, m2 + punpcklwd m0, m2 + psrlq m2, m15, 28 + punpckhwd m4, m14, m11 + punpcklwd m14, m11 + mova m5, m2 + vpermi2q m2, m0, m14 + vpermt2q m0, m1, m14 + vpermi2q m1, m3, m4 + vpermt2q m3, m5, m4 + jmp tx2q +.pass2: + pshufd m1, m1, q1032 + pshufd m3, m3, q1032 + test eobd, eobd + jl .pass2_fast + pshufd m5, m5, q1032 + pshufd m7, m7, q1032 + call .main + jmp .pass2_end +.pass2_fast: + call .main_fast +.pass2_end: + vbroadcasti32x4 m9, [o(pw_11585_m11585x2x4)] + vbroadcasti32x4 m10, [o(pw_m11585_11585x2x4)] + punpckhqdq m1, m8 ; -out3 out2 + shufps m0, m2, q3210 ; -out1 out0 + pshufb m2, m11, m12 + pshufb m5, m12 + pshufb m3, m12 + pshufb m4, m12 + vbroadcasti32x4 m11, [o(pw_512)] + vpbroadcastd m12, [o(pw_512)] + punpcklqdq m8, m5, m2 ; t15a t7 + punpckhqdq m5, m2 ; t14a t6 + shufps m2, m3, m4, q1032 ; t2a t10 + shufps m3, m4, q3210 ; t3a t11 + psubsw m4, m2, m3 + paddsw m3, m2 + paddsw m2, m5, m8 + psubsw m5, m8 + pmulhrsw m4, m9 ; out8 out9 + pmulhrsw m3, m10 ; out7 out6 + pmulhrsw m2, m10 ; out5 out4 + pmulhrsw m5, m9 ; out10 out11 + pmulhrsw m6, m11 + pmulhrsw m7, m11 + pshufd m11, m11, q1032 + pmulhrsw m0, m11 + pmulhrsw m1, m11 + REPX {pmulhrsw x, m12}, m2, m3, m4, m5 + psrldq m8, m15, 2 + psrlq m12, m15, 20 + psrldq m10, m15, 1 + psrlq m13, m15, 12 + mova m9, m8 + vpermi2q m8, m0, m2 ; 0 1 4 5 + vpermt2q m0, m12, m2 + vpermi2q m9, m1, m3 ; 2 3 6 7 + vpermt2q m1, m12, m3 + mova m11, m10 + vpermi2q m10, m4, m6 ; 8 9 12 13 + vpermt2q m4, m13, m6 + vpermi2q m11, m5, m7 ; 10 11 14 15 + vpermt2q m5, m13, m7 + jmp m(vp9_idct_16x16_internal).pass2_end2 +ALIGN function_align + IADST16_MAIN + ret + +%macro IDCT_32x32_END 4 ; src, mem, stride[1-2] + pmovzxbw m10, [dstq+%3] + pmovzxbw m11, [r3 +%4] +%if %2 < 8 + paddw m8, m%2, m%1 + psubw m9, m%2, m%1 +%else + mova m9, [rsp+64*(%2-8)] + paddw m8, m9, m%1 + psubw m9, m%1 +%endif + pmulhrsw m8, m12 + pmulhrsw m9, m12 + paddw m8, m10 + paddw m9, m11 + packuswb m8, m9 + vpermq m8, m13, m8 + mova [dstq+%3], ym8 + vextracti32x8 [r3 +%4], m8, 1 +%if %2 == 3 || %2 == 7 || %2 == 11 + add dstq, r5 + sub r3, r5 +%endif +%endmacro + +cglobal vp9_idct_idct_32x32_add, 4, 7, 0, dst, stride, c, eob +%undef cmp + lea r6, [o_base] + cmp eobd, 1 + jne .pass1 + movd xmm0, [o(pw_11585x2)] + pmulhrsw xmm3, xmm0, [cq] + pxor m2, m2 + pmulhrsw xmm3, xmm0 + pmulhrsw xmm3, [o(pw_512)] + movd [cq], xm2 + add r3d, 15 + vpbroadcastw m3, xmm3 +.dconly_loop: + mova ym1, [dstq+strideq*0] + vinserti32x8 m1, [dstq+strideq*1], 1 + punpcklbw m0, m1, m2 + punpckhbw m1, m2 + paddw m0, m3 + paddw m1, m3 + packuswb m0, m1 + mova [dstq+strideq*0], ym0 + vextracti32x8 [dstq+strideq*1], m0, 1 + lea dstq, [dstq+strideq*2] + dec r3d + jg .dconly_loop + RET +.pass1: + PROLOGUE 0, 7, 30, 64*16, dst, stride, c, eob + sub eobd, 135 + jl .fast + mova m0, [cq+64* 0] + mova m14, [cq+64* 2] + mova m1, [cq+64* 4] + mova m15, [cq+64* 6] + mova m2, [cq+64* 8] + mova m16, [cq+64*10] + mova m3, [cq+64*12] + mova m17, [cq+64*14] + mova m4, [cq+64*16] + mova m18, [cq+64*18] + mova m5, [cq+64*20] + mova m19, [cq+64*22] + mova m6, [cq+64*24] + mova m20, [cq+64*26] + mova m7, [cq+64*28] + mova m21, [cq+64*30] + call .idct16 + mova [rsp+64*0], m14 + mova [rsp+64*1], m15 + mova [rsp+64*2], m16 + mova [rsp+64*3], m17 + mova [rsp+64*4], m18 + mova [rsp+64*5], m19 + mova [rsp+64*6], m20 + mova [rsp+64*7], m21 + mova m22, [cq+64* 1] + mova m23, [cq+64* 3] + mova m24, [cq+64* 5] + mova m25, [cq+64* 7] + mova m26, [cq+64* 9] + mova m27, [cq+64*11] + mova m28, [cq+64*13] + mova m29, [cq+64*15] + mova m14, [cq+64*17] + mova m15, [cq+64*19] + mova m16, [cq+64*21] + mova m17, [cq+64*23] + mova m18, [cq+64*25] + mova m19, [cq+64*27] + mova m20, [cq+64*29] + mova m21, [cq+64*31] + call .main + psubw m13, m0, m29 ; 31 + paddw m0, m29 ; 0 + psubw m29, m1, m28 ; 30 + paddw m1, m28 ; 1 + psubw m28, m2, m27 ; 29 + paddw m2, m27 ; 2 + psubw m27, m3, m26 ; 28 + paddw m3, m26 ; 3 + psubw m26, m4, m25 ; 27 + paddw m4, m25 ; 4 + psubw m25, m5, m24 ; 26 + paddw m5, m24 ; 5 + psubw m24, m6, m23 ; 25 + paddw m6, m23 ; 6 + psubw m23, m7, m22 ; 24 + paddw m7, m22 ; 7 + punpckhwd m8, m0, m1 ; a4 b4 a5 b5 a6 b6 a7 b7 + punpcklwd m0, m1 ; a0 b0 a1 b1 a2 b2 a3 b3 + punpckhwd m1, m2, m3 ; c4 d4 c5 d5 c6 d6 c7 d7 + punpcklwd m2, m3 ; c0 d0 c1 d1 c2 d2 c3 d3 + punpckhwd m22, m4, m5 ; e4 f4 e5 f5 e6 f6 e7 f7 + punpcklwd m4, m5 ; e0 f0 e1 f1 e2 f2 e3 f3 + punpckhwd m5, m6, m7 ; g4 h4 g5 h5 g6 h6 g7 h7 + punpcklwd m6, m7 ; g0 h0 g1 h1 g2 h2 g3 h3 + punpckhwd m3, m23, m24 + punpcklwd m23, m24 + punpckhwd m24, m25, m26 + punpcklwd m25, m26 + punpckhwd m26, m27, m28 + punpcklwd m27, m28 + punpckhwd m28, m29, m13 + punpcklwd m29, m13 + punpckhdq m7, m0, m2 ; a2 b2 c2 d2 a3 b3 c3 d3 + punpckldq m0, m2 ; a0 b0 c0 d0 a1 b1 c1 d1 + punpckhdq m2, m4, m6 ; e2 f2 g2 h2 e3 f3 g3 h3 + punpckldq m4, m6 ; e0 f0 g0 h0 e1 f1 g1 h1 + punpckhdq m6, m8, m1 ; a6 b6 c6 d6 a7 b7 c7 d7 + punpckldq m8, m1 ; a4 b4 c4 d4 a5 b5 c5 d5 + punpckhdq m1, m22, m5 ; e6 f6 g6 h6 e7 f7 g7 h7 + punpckldq m22, m5 ; e4 f4 g4 h5 e5 f5 g5 h5 + punpckhdq m13, m23, m25 + punpckldq m23, m25 + punpckhdq m25, m27, m29 + punpckldq m27, m29 + punpckhdq m9, m3, m24 + punpckldq m3, m24 + punpckhdq m24, m26, m28 + punpckldq m26, m28 + punpcklqdq m5, m23, m27 ; d00 d08 d16 d24 + punpckhqdq m23, m27 ; d01 d09 d17 d25 + punpckhqdq m27, m13, m25 ; d03 d11 d19 d27 + punpcklqdq m13, m25 ; d02 d10 d18 d26 + punpckhqdq m25, m3, m26 ; d05 d13 d21 d29 + punpcklqdq m3, m26 ; d04 d12 d20 d28 + punpckhqdq m26, m9, m24 ; d07 d15 d23 d31 + punpcklqdq m9, m24 ; d06 d14 d22 d30 + mova [rsp+64*12], m23 + mova [rsp+64*13], m27 + mova [rsp+64*14], m25 + mova [rsp+64*15], m26 + punpckhqdq m24, m8, m22 ; a05 a13 a21 a29 + punpcklqdq m8, m22 ; a04 a12 a20 a28 + punpckhqdq m22, m0, m4 ; a01 a09 a17 a25 + punpcklqdq m0, m4 ; a00 a08 a16 a24 + punpckhqdq m23, m7, m2 ; a03 a11 a19 a27 + punpcklqdq m7, m2 ; a02 a10 a18 a26 + punpckhqdq m25, m6, m1 ; a07 a15 a23 a31 + punpcklqdq m6, m1 ; a06 a14 a22 a30 + mova m2, [rsp+64*0] + mova m11, [rsp+64*1] + mova m12, [rsp+64*2] + mova m29, [rsp+64*3] + mova m27, [rsp+64*4] + mova m26, [rsp+64*5] + mova m4, [rsp+64*6] + mova m28, [rsp+64*7] + psubw m1, m2, m21 ; 23 + paddw m2, m21 ; 8 + psubw m21, m11, m20 ; 22 + paddw m11, m20 ; 9 + psubw m20, m12, m19 ; 21 + paddw m12, m19 ; 10 + psubw m19, m29, m18 ; 20 + paddw m29, m18 ; 11 + psubw m18, m27, m17 ; 19 + paddw m27, m17 ; 12 + psubw m17, m26, m16 ; 18 + paddw m26, m16 ; 13 + paddw m16, m4, m15 ; 14 + psubw m4, m15 ; 17 + mova m15, m6 + psubw m6, m28, m14 ; 16 + paddw m28, m14 ; 15 + mova m14, m7 + punpcklwd m7, m6, m4 + punpckhwd m6, m4 + punpckhwd m4, m17, m18 + punpcklwd m17, m18 + punpckhwd m18, m19, m20 + punpcklwd m19, m20 + punpckhwd m20, m21, m1 + punpcklwd m21, m1 + punpckhwd m1, m2, m11 ; i4 j4 i5 j5 i6 j6 i7 j7 + punpcklwd m2, m11 ; i0 j1 i1 j1 i2 j2 i3 j3 + punpckhwd m11, m12, m29 ; k4 l4 k5 l5 k6 l6 k7 l7 + punpcklwd m12, m29 ; k0 l0 k1 l1 k2 l2 k3 l3 + punpckhwd m29, m27, m26 ; m4 n4 m5 n5 m6 n6 m7 n7 + punpcklwd m27, m26 ; m0 n0 m1 n1 m2 n2 m3 n3 + punpckhwd m26, m16, m28 ; o4 p4 o5 p5 o6 p6 o7 p7 + punpcklwd m16, m28 ; o0 p0 o1 p1 o2 p2 o3 p3 + punpckhdq m28, m2, m12 ; i2 j2 k2 l2 i3 j3 k3 l3 + punpckldq m2, m12 ; i0 j0 k0 l0 i1 j1 k1 l1 + punpckhdq m12, m27, m16 ; m2 n2 o2 p2 m3 n3 o3 p3 + punpckldq m27, m16 ; m0 n0 o0 p0 m1 n1 o1 p1 + punpckhdq m16, m1, m11 ; i6 j6 k6 l6 i7 j7 k7 l7 + punpckldq m1, m11 ; i4 j4 k4 l4 i5 j5 k5 l5 + punpckhdq m11, m29, m26 ; m6 n6 o6 p6 m7 n7 o7 p7 + punpckldq m29, m26 ; m4 n4 o4 p4 m5 n5 o5 p5 + punpckhdq m26, m19, m21 + punpckldq m19, m21 + punpckhdq m21, m6, m4 + punpckldq m6, m4 + punpckhdq m4, m18, m20 + punpckldq m18, m20 + punpckhdq m20, m7, m17 + punpckldq m7, m17 + punpcklqdq m17, m28, m12 ; b02 b10 b18 b26 + punpckhqdq m28, m12 ; b03 b11 b19 b27 + punpckhqdq m12, m2, m27 ; b01 b09 b17 b25 + punpcklqdq m2, m27 ; b00 b08 b16 b24 + punpckhqdq m27, m1, m29 ; b05 b13 b21 b29 + punpcklqdq m1, m29 ; b04 b12 b20 b28 + punpckhqdq m29, m16, m11 ; b07 b15 b23 b31 + punpcklqdq m16, m11 ; b06 b14 b22 b30 + mova [rsp+64* 8], m12 + mova [rsp+64* 9], m28 + mova [rsp+64*10], m27 + mova [rsp+64*11], m29 + punpckhqdq m27, m20, m26 ; c03 c11 c19 c27 + punpcklqdq m20, m26 ; c02 c10 c18 c26 + punpckhqdq m26, m7, m19 ; c01 c09 c17 c25 + punpcklqdq m7, m19 ; c00 c08 c16 c24 + punpckhqdq m28, m6, m18 ; c05 c13 c21 c29 + punpcklqdq m6, m18 ; c04 c12 c20 c28 + punpckhqdq m29, m21, m4 ; c07 c15 c23 c31 + punpcklqdq m21, m4 ; c06 c14 c22 c30 + mov r3d, 64*28 + pxor m4, m4 +.zero_loop: + mova [cq+r3+64*0], m4 + mova [cq+r3+64*1], m4 + mova [cq+r3+64*2], m4 + mova [cq+r3+64*3], m4 + sub r3d, 64*4 + jge .zero_loop + vshufi32x4 m4, m0, m2, q3232 ; a16 a24 b16 b24 + vinserti32x8 m0, ym2, 1 ; a00 a08 b00 b08 + vshufi32x4 m2, m7, m5, q3232 ; c16 c24 d16 d24 + vinserti32x8 m7, ym5, 1 ; c00 c08 d00 d08 + vshufi32x4 m5, m8, m1, q3232 ; a20 a28 b20 b28 + vinserti32x8 m1, m8, ym1, 1 ; a04 a12 b04 b12 + vshufi32x4 m8, m6, m3, q3232 ; c20 c28 d20 d28 + vinserti32x8 m6, ym3, 1 ; c04 c12 d04 d12 + vshufi32x4 m3, m1, m6, q3131 ; 12 + vshufi32x4 m1, m6, q2020 ; 4 + vshufi32x4 m6, m4, m2, q3131 ; 24 + vshufi32x4 m4, m2, q2020 ; 16 + vshufi32x4 m2, m0, m7, q3131 ; 8 + vshufi32x4 m0, m7, q2020 ; 0 + vshufi32x4 m7, m5, m8, q3131 ; 28 + vshufi32x4 m5, m8, q2020 ; 20 + vshufi32x4 m18, m14, m17, q3232 ; a18 a26 b18 b26 + vinserti32x8 m14, ym17, 1 ; a02 a10 b02 b10 + vshufi32x4 m17, m20, m13, q3232 ; c18 c26 d18 d26 + vinserti32x8 m20, ym13, 1 ; c02 c10 d02 d10 + vshufi32x4 m13, m21, m9, q3232 ; c22 c30 d22 d30 + vinserti32x8 m21, ym9, 1 ; c06 c14 d06 d14 + vshufi32x4 m19, m15, m16, q3232 ; a22 a30 b22 b30 + vinserti32x8 m15, ym16, 1 ; a06 a14 b06 b14 + vshufi32x4 m16, m14, m20, q3131 ; 10 + vshufi32x4 m14, m20, q2020 ; 2 + vshufi32x4 m20, m18, m17, q3131 ; 26 + vshufi32x4 m18, m17, q2020 ; 18 + vshufi32x4 m17, m15, m21, q3131 ; 14 + vshufi32x4 m15, m21, q2020 ; 6 + vshufi32x4 m21, m19, m13, q3131 ; 30 + vshufi32x4 m19, m13, q2020 ; 22 + call .idct16 + mova [rsp+64*0], m14 + mova [rsp+64*1], m15 + mova [rsp+64*2], m16 + mova [rsp+64*3], m17 + mova [rsp+64*4], m18 + mova [rsp+64*5], m19 + mova [rsp+64*6], m20 + mova [rsp+64*7], m21 + mova m15, [rsp+64* 8] + mova m16, [rsp+64* 9] + mova m17, [rsp+64*10] + mova m19, [rsp+64*11] + mova m20, [rsp+64*12] + mova m21, [rsp+64*13] + mova m13, [rsp+64*14] + mova m18, [rsp+64*15] + vshufi32x4 m14, m22, m15, q3232 ; a17 a25 b17 b25 + vinserti32x8 m22, ym15, 1 ; a01 a09 b01 b09 + vshufi32x4 m15, m23, m16, q3232 ; a19 a27 b19 b27 + vinserti32x8 m23, ym16, 1 ; a03 a11 b03 b11 + vshufi32x4 m16, m24, m17, q3232 ; a21 a29 b21 b29 + vinserti32x8 m24, ym17, 1 ; a05 a13 b05 b13 + vshufi32x4 m17, m25, m19, q3232 ; a23 a31 b23 b31 + vinserti32x8 m25, ym19, 1 ; a07 a15 b07 b15 + vinserti32x8 m8, m26, ym20, 1 ; c01 c09 d01 d09 + vshufi32x4 m26, m20, q3232 ; c17 c25 d17 d25 + vinserti32x8 m9, m27, ym21, 1 ; c03 c11 d03 d11 + vshufi32x4 m27, m21, q3232 ; c19 c27 d19 d27 + vinserti32x8 m11, m28, ym13, 1 ; c05 c13 d05 d13 + vshufi32x4 m28, m13, q3232 ; c21 c29 d21 d29 + vinserti32x8 m12, m29, ym18, 1 ; c07 c15 d07 d15 + vshufi32x4 m29, m18, q3232 ; c23 c31 d23 d31 + vshufi32x4 m18, m14, m26, q3131 ; 25 + vshufi32x4 m14, m26, q2020 ; 17 + vshufi32x4 m19, m15, m27, q3131 ; 27 + vshufi32x4 m15, m27, q2020 ; 19 + vshufi32x4 m20, m16, m28, q3131 ; 29 + vshufi32x4 m16, m28, q2020 ; 21 + vshufi32x4 m21, m17, m29, q3131 ; 31 + vshufi32x4 m17, m29, q2020 ; 23 + vshufi32x4 m26, m22, m8, q3131 ; 9 + vshufi32x4 m22, m8, q2020 ; 1 + vshufi32x4 m27, m23, m9, q3131 ; 11 + vshufi32x4 m23, m9, q2020 ; 3 + vshufi32x4 m28, m24, m11, q3131 ; 13 + vshufi32x4 m24, m11, q2020 ; 5 + vshufi32x4 m29, m25, m12, q3131 ; 15 + vshufi32x4 m25, m12, q2020 ; 7 + call .main + jmp .end +.fast: + mova m14, [o(dup16_perm)] + pmovzxbw m9, [cq+64*0] + pmovzxbw m6, [cq+64*8] + vpermb m8, m14, [cq+64* 2] + vpermb m0, m14, [cq+64*14] + vpermb m5, m14, [cq+64*10] + vpermb m1, m14, [cq+64* 6] + vpermb m7, m14, [cq+64* 4] + vpermb m3, m14, [cq+64*12] + vpbroadcastd m10, [o(pd_8192)] + vpbroadcastq m13, [o(int_mshift)] + packuswb m9, m9 + packuswb m6, m6 + vpcmpub k7, m13, m10, 6 + IDCT16_MAIN 1 + vpermb m21, m14, [cq+64* 1] + vpermb m17, m14, [cq+64*15] + vpermb m20, m14, [cq+64* 9] + vpermb m15, m14, [cq+64* 7] + vpermb m18, m14, [cq+64* 5] + vpermb m16, m14, [cq+64*11] + vpermb m19, m14, [cq+64*13] + vpermb m14, m14, [cq+64* 3] + call .main_packed_fast + punpcklwd m8, m0, m2 + punpckhwd m0, m2 + punpcklwd m2, m1, m3 + punpckhwd m1, m3 + punpcklwd m3, m4, m6 + punpckhwd m4, m6 + punpcklwd m6, m5, m7 + punpckhwd m5, m7 + punpcklwd m7, m14, m16 + punpckhwd m14, m16 + punpcklwd m16, m15, m17 + punpckhwd m15, m17 + punpcklwd m17, m19, m21 + punpckhwd m19, m21 + punpckhwd m21, m18, m20 + punpcklwd m18, m20 + punpcklwd m20, m8, m1 + punpckhwd m8, m1 + punpcklwd m1, m0, m2 + punpckhwd m0, m2 + punpcklwd m2, m3, m5 + punpckhwd m3, m5 + punpcklwd m5, m4, m6 + punpckhwd m4, m6 + punpcklwd m6, m7, m15 + punpckhwd m7, m15 + punpcklwd m15, m14, m16 + punpckhwd m14, m16 + punpckhwd m16, m18, m19 + punpcklwd m18, m19 + punpcklwd m19, m21, m17 + punpckhwd m21, m17 + punpcklwd m17, m8, m0 ; a2 a6 aa ae + punpckhwd m8, m0 ; a3 a7 ab af + punpcklwd m0, m20, m1 ; a0 a4 a8 ac + punpckhwd m20, m1 ; a1 a5 a9 ad + punpcklwd m1, m2, m5 ; b0 b4 b8 bc + punpckhwd m2, m5 ; b1 b5 b9 bd + punpcklwd m5, m3, m4 ; b2 b6 ba be + punpckhwd m3, m4 ; b3 b7 bb bf + punpcklwd m4, m6, m15 ; c0 c4 c8 cc + punpckhwd m6, m15 ; c1 c5 c9 cd + punpcklwd m15, m7, m14 ; c2 c6 ca ce + punpckhwd m7, m14 ; c3 c7 cb cf + punpcklwd m14, m18, m19 ; d0 d4 d8 dc + punpckhwd m18, m19 ; d1 d5 d9 dd + punpcklwd m9, m16, m21 ; d2 d6 da de + punpckhwd m16, m21 ; d3 d7 db df + mov r3d, 64*12 + pxor ym21, ym21 +.fast_zero_loop: + mova [cq+r3+64*0], ym21 + mova [cq+r3+64*1], ym21 + mova [cq+r3+64*2], ym21 + mova [cq+r3+64*3], ym21 + sub r3d, 64*4 + jge .fast_zero_loop + vshufi32x4 m21, m0, m1, q3232 ; a8 ac b8 bc + vinserti32x8 m0, ym1, 1 ; a0 a4 b0 b4 + vinserti32x8 m1, m17, ym5, 1 ; a2 a6 b2 b6 + vshufi32x4 m5, m17, m5, q3232 ; aa ae ba be + vinserti32x8 m17, m8, ym3, 1 ; a3 a7 b3 b7 + vshufi32x4 m19, m8, m3, q3232 ; ab af bb bf + vinserti32x8 m3, m4, ym14, 1 ; c0 c4 d0 d4 + vshufi32x4 m4, m14, q3232 ; c8 cc d8 dc + vinserti32x8 m14, m20, ym2, 1 ; a1 a5 b1 b5 + vshufi32x4 m20, m2, q3232 ; a9 ad b9 bd + vinserti32x8 m2, m6, ym18, 1 ; c1 c5 d1 d5 + vshufi32x4 m6, m18, q3232 ; c9 cd d9 dd + vinserti32x8 m18, m15, ym9, 1 ; c2 c6 d2 d6 + vshufi32x4 m15, m9, q3232 ; ca ce da de + vinserti32x8 m9, m7, ym16, 1 ; c3 c7 d3 d7 + vshufi32x4 m7, m16, q3232 ; cb cf db df + vshufi32x4 m22, m14, m2, q2020 ; 1 + vshufi32x4 m24, m14, m2, q3131 ; 5 + vshufi32x4 m23, m17, m9, q2020 ; 3 + vshufi32x4 m25, m17, m9, q3131 ; 7 + vshufi32x4 m16, m5, m15, q2020 ; 10 + vshufi32x4 m17, m5, m15, q3131 ; 14 + vshufi32x4 m14, m1, m18, q2020 ; 2 + vshufi32x4 m15, m1, m18, q3131 ; 6 + vshufi32x4 m1, m0, m3, q3131 ; 4 + vshufi32x4 m0, m3, q2020 ; 0 + vshufi32x4 m3, m21, m4, q3131 ; 12 + vshufi32x4 m2, m21, m4, q2020 ; 8 + vshufi32x4 m26, m20, m6, q2020 ; 9 + vshufi32x4 m28, m20, m6, q3131 ; 13 + vshufi32x4 m27, m19, m7, q2020 ; 11 + vshufi32x4 m29, m19, m7, q3131 ; 15 + call .idct16_fast + mova [rsp+64*0], m14 + mova [rsp+64*1], m15 + mova [rsp+64*2], m16 + mova [rsp+64*3], m17 + mova [rsp+64*4], m18 + mova [rsp+64*5], m19 + mova [rsp+64*6], m20 + mova [rsp+64*7], m21 + call .main_fast +.end: + lea r4, [strideq*3] + vpbroadcastd m12, [o(pw_512)] + movshdup m13, [o(itx_perm)] + lea r3, [dstq+r4*8] + lea r5, [strideq+r4] ; stride*4 + add r3, r5 ; dst+stride*28 + IDCT_32x32_END 29, 0, strideq*0, r4 + IDCT_32x32_END 28, 1, strideq*1, strideq*2 + IDCT_32x32_END 27, 2, strideq*2, strideq*1 + IDCT_32x32_END 26, 3, r4 , strideq*0 + IDCT_32x32_END 25, 4, strideq*0, r4 + IDCT_32x32_END 24, 5, strideq*1, strideq*2 + IDCT_32x32_END 23, 6, strideq*2, strideq*1 + IDCT_32x32_END 22, 7, r4 , strideq*0 + IDCT_32x32_END 21, 8, strideq*0, r4 + IDCT_32x32_END 20, 9, strideq*1, strideq*2 + IDCT_32x32_END 19, 10, strideq*2, strideq*1 + IDCT_32x32_END 18, 11, r4 , strideq*0 + IDCT_32x32_END 17, 12, strideq*0, r4 + IDCT_32x32_END 16, 13, strideq*1, strideq*2 + IDCT_32x32_END 15, 14, strideq*2, strideq*1 + IDCT_32x32_END 14, 15, r4 , strideq*0 + RET +ALIGN function_align +.idct16_fast: + vpbroadcastd m21, [o(pw_16305x2)] + vpbroadcastd m8, [o(pw_1606x2)] + vpbroadcastd m18, [o(pw_m10394x2)] + vpbroadcastd m9, [o(pw_12665x2)] + pmulhrsw m21, m14 ; t15a + vpbroadcastd m19, [o(pw_14449x2)] + pmulhrsw m14, m8 ; t8a + vpbroadcastd m8, [o(pw_7723x2)] + pmulhrsw m18, m17 ; t9a + vpbroadcastd m20, [o(pw_m4756x2)] + pmulhrsw m17, m9 ; t14a + vpbroadcastd m9, [o(pw_15679x2)] + pmulhrsw m19, m16 ; t13a + vpbroadcastd m5, [o(pw_m9102x2)] + pmulhrsw m16, m8 ; t10a + vpbroadcastd m8, [o(pw_13623x2)] + pmulhrsw m20, m15 ; t11a + vpbroadcastd m7, [o(pw_16069x2)] + pmulhrsw m15, m9 ; t12a + vpbroadcastd m9, [o(pw_3196x2)] + pmulhrsw m5, m3 ; t5a + vpbroadcastd m6, [o(pw_15137x2)] + pmulhrsw m3, m8 ; t6a + vpbroadcastd m8, [o(pw_6270x2)] + pmulhrsw m7, m1 ; t7a + vpbroadcastd m4, [o(pw_11585x2)] + pmulhrsw m1, m9 ; t4 + vpbroadcastd m10, [o(pd_8192)] + pmulhrsw m6, m2 ; t3 + pmulhrsw m2, m8 ; t2 + pmulhrsw m4, m0 ; t0 + mova m0, m4 ; t1 + jmp .idct16b +ALIGN function_align +.idct16: + vpbroadcastd m10, [o(pd_8192)] + ITX_MULSUB_2W 14, 21, 8, 9, 10, 1606, 16305 ; t8a, t15a + ITX_MULSUB_2W 18, 17, 8, 9, 10, 12665, 10394 ; t9a, t14a + ITX_MULSUB_2W 16, 19, 8, 9, 10, 7723, 14449 ; t10a, t13a + ITX_MULSUB_2W 20, 15, 8, 9, 10, 15679, 4756 ; t11a, t12 + ITX_MULSUB_2W 5, 3, 8, 9, 10, 13623, 9102 ; t5a, t6a + ITX_MULSUB_2W 1, 7, 8, 9, 10, 3196, 16069 ; t4a, t7a + ITX_MULSUB_2W 2, 6, 8, 9, 10, 6270, 15137 ; t2, t3 + ITX_MULSUB_2W 0, 4, 8, 9, 10, 11585, 11585 ; t1, t0 +.idct16b: + paddw m8, m20, m16 ; t11 + psubw m20, m16 ; t10 + paddw m16, m15, m19 ; t12 + psubw m15, m19 ; t13 + psubw m19, m14, m18 ; t9 + paddw m14, m18 ; t8 + psubw m18, m21, m17 ; t14 + paddw m21, m17 ; t15 + vpbroadcastd m11, [o(pw_6270_15137)] + vpbroadcastd m12, [o(pw_m15137_6270)] + ITX_MULSUB_2W 18, 19, 9, 17, 10, 11, 12 ; t9a, t14a + vpbroadcastd m11, [o(pw_m6270_m15137)] + ITX_MULSUB_2W 15, 20, 9, 17, 10, 12, 11 ; t10a, t13a + vpbroadcastd m11, [o(pw_11585_11585)] + vpbroadcastd m12, [o(pw_m11585_11585)] + paddw m9, m7, m3 ; t7 + psubw m3, m7, m3 ; t6a + paddw m7, m1, m5 ; t4 + psubw m1, m5 ; t5a + psubw m17, m14, m8 ; t11a + paddw m8, m14 ; t8a + paddw m14, m18, m15 ; t9 + psubw m18, m15 ; t10 + psubw m15, m19, m20 ; t13 + paddw m19, m20 ; t14 + paddw m20, m21, m16 ; t15a + psubw m16, m21, m16 ; t12a + ITX_MULSUB_2W 3, 1, 5, 21, 10, 11, 12 ; t5, t6 + ITX_MULSUB_2W 15, 18, 5, 21, 10, 11, 12 ; t10a, t13a + ITX_MULSUB_2W 16, 17, 5, 21, 10, 11, 12 ; t11, t12 + psubw m5, m0, m2 ; t2 + paddw m2, m0 ; t1 + paddw m0, m4, m6 ; t0 + psubw m4, m6 ; t3 + psubw m6, m2, m1 ; t6 + paddw m1, m2 ; t1 + paddw m2, m5, m3 ; t2 + psubw m5, m3 ; t5 + paddw m3, m4, m7 ; t3 + psubw m4, m7 ; t4 + psubw m7, m0, m9 ; t7 + paddw m0, m9 ; t0 + psubw m21, m0, m20 ; out15 + paddw m0, m20 ; out0 + psubw m20, m1, m19 ; out14 + paddw m1, m19 ; out1 + psubw m19, m2, m18 ; out13 + paddw m2, m18 ; out2 + psubw m18, m3, m17 ; out12 + paddw m3, m17 ; out3 + psubw m17, m4, m16 ; out11 + paddw m4, m16 ; out4 + psubw m16, m5, m15 ; out10 + paddw m5, m15 ; out5 + psubw m15, m6, m14 ; out9 + paddw m6, m14 ; out6 + psubw m14, m7, m8 ; out8 + paddw m7, m8 ; out7 + ret +ALIGN function_align +.main_fast: + vpbroadcastd m21, [o(pw_16364x2)] + vpbroadcastd m8, [o(pw_804x2)] + vpbroadcastd m14, [o(pw_m11003x2)] + vpbroadcastd m9, [o(pw_12140x2)] + pmulhrsw m21, m22 ; t31a + vpbroadcastd m17, [o(pw_14811x2)] + pmulhrsw m22, m8 ; t16a + vpbroadcastd m8, [o(pw_7005x2)] + pmulhrsw m14, m29 ; t30a + vpbroadcastd m18, [o(pw_m5520x2)] + pmulhrsw m29, m9 ; t17a + vpbroadcastd m9, [o(pw_15426x2)] + pmulhrsw m17, m26 ; t29a + vpbroadcastd m19, [o(pw_15893x2)] + pmulhrsw m26, m8 ; t18a + vpbroadcastd m8, [o(pw_3981x2)] + pmulhrsw m18, m25 ; t19a + vpbroadcastd m16, [o(pw_m8423x2)] + pmulhrsw m25, m9 ; t28a + vpbroadcastd m9, [o(pw_14053x2)] + pmulhrsw m19, m24 ; t27a + vpbroadcastd m15, [o(pw_13160x2)] + pmulhrsw m24, m8 ; t20a + vpbroadcastd m8, [o(pw_9760x2)] + pmulhrsw m16, m27 ; t21a + vpbroadcastd m20, [o(pw_m2404x2)] + pmulhrsw m27, m9 ; t26a + vpbroadcastd m9, [o(pw_16207x2)] + pmulhrsw m15, m28 ; t25a + pmulhrsw m28, m8 ; t22a + pmulhrsw m20, m23 ; t23a + pmulhrsw m23, m9 ; t24a + jmp .main2 +ALIGN function_align +.main: + ITX_MULSUB_2W 22, 21, 8, 9, 10, 804, 16364 ; t16a, t31a + ITX_MULSUB_2W 14, 29, 8, 9, 10, 12140, 11003 ; t17a, t30a + ITX_MULSUB_2W 26, 17, 8, 9, 10, 7005, 14811 ; t18a, t29a + ITX_MULSUB_2W 18, 25, 8, 9, 10, 15426, 5520 ; t19a, t28a + ITX_MULSUB_2W 24, 19, 8, 9, 10, 3981, 15893 ; t20a, t27a + ITX_MULSUB_2W 16, 27, 8, 9, 10, 14053, 8423 ; t21a, t26a + ITX_MULSUB_2W 28, 15, 8, 9, 10, 9760, 13160 ; t22a, t25a + ITX_MULSUB_2W 20, 23, 8, 9, 10, 16207, 2404 ; t23a, t24a +.main2: + psubw m8, m22, m14 ; t17 + paddw m22, m14 ; t16 + paddw m14, m18, m26 ; t19 + psubw m18, m26 ; t18 + psubw m26, m24, m16 ; t21 + paddw m24, m16 ; t20 + psubw m16, m20, m28 ; t22 + paddw m28, m20 ; t23 + psubw m20, m23, m15 ; t25 + paddw m23, m15 ; t24 + psubw m15, m21, m29 ; t30 + paddw m21, m29 ; t31 + psubw m29, m19, m27 ; t26 + paddw m19, m27 ; t27 + paddw m27, m25, m17 ; t28 + psubw m25, m17 ; t29 + ITX_MULSUB_2W 15, 8, 9, 17, 10, 3196, 16069 ; t17a, t30a + ITX_MULSUB_2W 25, 18, 9, 17, 10, m16069, 3196 ; t18a, t29a + ITX_MULSUB_2W 29, 26, 9, 17, 10, 13623, 9102 ; t21a, t26a + ITX_MULSUB_2W 20, 16, 9, 17, 10, m9102, 13623 ; t22a, t25a + psubw m17, m21, m27 ; t28a + paddw m21, m27 ; t31a + psubw m27, m15, m25 ; t18 + paddw m15, m25 ; t17 + psubw m25, m20, m29 ; t21 + paddw m20, m29 ; t22 + psubw m29, m8, m18 ; t29 + paddw m8, m18 ; t30 + psubw m18, m22, m14 ; t19a + paddw m22, m14 ; t16a + psubw m14, m28, m24 ; t20a + paddw m24, m28 ; t23a + paddw m28, m16, m26 ; t25 + psubw m16, m26 ; t26 + psubw m26, m23, m19 ; t27a + paddw m23, m19 ; t24a + vpbroadcastd m12, [o(pw_m15137_6270)] + vpbroadcastd m11, [o(pw_6270_15137)] + ITX_MULSUB_2W 29, 27, 9, 19, 10, 11, 12 ; t18a, t29a + ITX_MULSUB_2W 17, 18, 9, 19, 10, 11, 12 ; t19, t28 + vpbroadcastd m11, [o(pw_m6270_m15137)] + ITX_MULSUB_2W 16, 25, 9, 19, 10, 12, 11 ; t21a, t26a + ITX_MULSUB_2W 26, 14, 9, 19, 10, 12, 11 ; t20, t27 + vpbroadcastd m12, [o(pw_m11585_11585)] + vpbroadcastd m11, [o(pw_11585_11585)] + psubw m19, m27, m25 ; t26 + paddw m27, m25 ; t29 + psubw m25, m17, m26 ; t20a + paddw m17, m26 ; t19a + paddw m26, m18, m14 ; t28a + psubw m18, m14 ; t27a + paddw m14, m22, m24 ; t16 + psubw m22, m24 ; t23 + psubw m24, m29, m16 ; t21 + paddw m16, m29 ; t18 + paddw m29, m21, m23 ; t31 + psubw m21, m23 ; t24 + psubw m23, m15, m20 ; t22a + paddw m15, m20 ; t17a + psubw m20, m8, m28 ; t25a + paddw m28, m8 ; t30a + ITX_MULSUB_2W 18, 25, 8, 9, 10, 11, 12 ; t20, t27 + ITX_MULSUB_2W 19, 24, 8, 9, 10, 11, 12 ; t21a, t26a + ITX_MULSUB_2W 21, 22, 8, 9, 10, 11, 12 ; t23a, t24a + ITX_MULSUB_2W 20, 23, 8, 9, 10, 11, 12 ; t22, t25 + ret +ALIGN function_align +.main_packed_fast: + vpbroadcastd m8, [o(pw_804_16364x2)] + vpbroadcastd m9, [o(pw_m11003_12140x2)] + vpbroadcastd m11, [o(pw_7005_14811x2)] + vpbroadcastd m12, [o(pw_m5520_15426x2)] + pmulhrsw m21, m8 ; t16a, t31a + vpbroadcastd m8, [o(pw_3981_15893x2)] + pmulhrsw m17, m9 ; t17a, t30a + vpbroadcastd m9, [o(pw_m8423_14053x2)] + pmulhrsw m20, m11 ; t18a, t29a + vpbroadcastd m11, [o(pw_9760_13160x2)] + pmulhrsw m15, m12 ; t19a, t28a + vpbroadcastd m12, [o(pw_m2404_16207x2)] + pmulhrsw m18, m8 ; t20a, t27a + pmulhrsw m16, m9 ; t21a, t26a + pmulhrsw m19, m11 ; t22a, t25a + pmulhrsw m14, m12 ; t23a, t24a + psubw m8, m21, m17 ; t17 t30 + paddw m21, m17 ; t16 t31 + psubw m17, m15, m20 ; t18 t29 + paddw m20, m15 ; t19 t28 + psubw m15, m18, m16 ; t21 t26 + paddw m18, m16 ; t20 t27 + psubw m16, m14, m19 ; t22 t25 + paddw m14, m19 ; t23 t24 + ITX_MUL2X_PACK 8, 9, 19, 10, 3196, 16069, 5 ; t17a t30a + ITX_MUL2X_PACK 17, 9, 19, 10, m16069, 3196, 5 ; t18a t29a + ITX_MUL2X_PACK 15, 9, 19, 10, 13623, 9102, 5 ; t21a t26a + ITX_MUL2X_PACK 16, 9, 19, 10, m9102, 13623, 5 ; t22a t25a + vpbroadcastd m11, [o(pw_m15137_6270)] + psubw m19, m21, m20 ; t19a t28a + paddw m21, m20 ; t16a t31a + psubw m20, m14, m18 ; t20a t27a + paddw m14, m18 ; t23a t24a + psubw m18, m8, m17 ; t18 t29 + paddw m8, m17 ; t17 t30 + psubw m17, m16, m15 ; t21 t26 + paddw m15, m16 ; t22 t25 + ITX_MUL2X_PACK 18, 9, 16, 10, 6270_15137, 11, 20 ; t18a t29a + ITX_MUL2X_PACK 19, 9, 16, 10, 6270_15137, 11, 20 ; t19 t28 + ITX_MUL2X_PACK 20, 9, 16, 10, 11, m6270_m15137, 36 ; t20 t27 + ITX_MUL2X_PACK 17, 9, 16, 10, 11, m6270_m15137, 36 ; t21a t26a + vbroadcasti32x4 m9, [o(deint_shuf)] + psubw m16, m21, m14 ; t23 t24 + paddw m14, m21 ; t16 t31 + psubw m21, m8, m15 ; t22a t25a + paddw m15, m8 ; t17a t30a + psubw m8, m18, m17 ; t21 t26 + paddw m18, m17 ; t18 t29 + paddw m17, m19, m20 ; t19a t28a + psubw m19, m20 ; t20a t27a + vpbroadcastd m11, [o(pw_m11585_11585)] + vpbroadcastd m12, [o(pw_11585_11585)] + REPX {pshufb x, m9}, m14, m15, m18, m17 + mova m9, m10 + vpdpwssd m9, m16, m11 + mova m20, m10 + vpdpwssd m20, m21, m11 + psrad m9, 14 + psrad m20, 14 + packssdw m9, m20 ; t23a t22 + mova m20, m10 + vpdpwssd m20, m16, m12 + mova m16, m10 + vpdpwssd m16, m21, m12 + psrad m20, 14 + psrad m16, 14 + packssdw m16, m20, m16 ; t24a t25 + ITX_MUL2X_PACK 8, 21, 20, 10, 11, 12, 8 ; t21a t26a + ITX_MUL2X_PACK 19, 8, 11, 10, 11, 12, 8 ; t20 t27 + packssdw m11, m20 ; t27 t26a + packssdw m8, m21 ; t20 t21a + punpcklqdq m20, m14, m15 ; t16 t17a + punpckhqdq m14, m15 ; t31 t30a + punpckhqdq m15, m17, m18 ; t28a t29 + punpcklqdq m17, m18 ; t19a t18 + psubw m21, m0, m14 ; out31 out30 + paddw m0, m14 ; out0 out1 + psubw m14, m7, m20 ; out16 out17 + paddw m7, m20 ; out15 out14 + psubw m20, m1, m15 ; out28 out29 + paddw m1, m15 ; out3 out2 + psubw m15, m6, m17 ; out19 out18 + paddw m6, m17 ; out12 out13 + psubw m17, m4, m9 ; out23 out22 + paddw m4, m9 ; out8 out9 + psubw m18, m3, m16 ; out24 out25 + paddw m3, m16 ; out7 out6 + psubw m16, m5, m8 ; out20 out21 + paddw m5, m8 ; out11 out10 + psubw m19, m2, m11 ; out27 out26 + paddw m2, m11 ; out4 out5 + ret + +%endif diff --git a/libavcodec/xvididct.c b/libavcodec/xvididct.c index 2eddc5978c544..317e4e82cdaf2 100644 --- a/libavcodec/xvididct.c +++ b/libavcodec/xvididct.c @@ -32,7 +32,6 @@ #include "config.h" #include "libavutil/attributes.h" -#include "avcodec.h" #include "idctdsp.h" #include "xvididct.h" @@ -330,27 +329,16 @@ static void xvid_idct_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) ff_add_pixels_clamped_c(block, dest, line_size); } -av_cold void ff_xvid_idct_init(IDCTDSPContext *c, AVCodecContext *avctx) +av_cold void ff_xvid_idct_init(IDCTDSPContext *c) { - const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; - - if (high_bit_depth || avctx->lowres || - !(avctx->idct_algo == FF_IDCT_AUTO || - avctx->idct_algo == FF_IDCT_XVID)) - return; - - if (avctx->idct_algo == FF_IDCT_XVID) { - c->idct_put = xvid_idct_put; - c->idct_add = xvid_idct_add; - c->idct = ff_xvid_idct; - c->perm_type = FF_IDCT_PERM_NONE; - } + c->idct_put = xvid_idct_put; + c->idct_add = xvid_idct_add; + c->idct = ff_xvid_idct; + c->perm_type = FF_IDCT_PERM_NONE; #if ARCH_X86 ff_xvid_idct_init_x86(c); #elif ARCH_MIPS ff_xvid_idct_init_mips(c); #endif - - ff_init_scantable_permutation(c->idct_permutation, c->perm_type); } diff --git a/libavcodec/xvididct.h b/libavcodec/xvididct.h index 1395cfd8e1e73..496071a034f90 100644 --- a/libavcodec/xvididct.h +++ b/libavcodec/xvididct.h @@ -21,12 +21,11 @@ #include -#include "avcodec.h" #include "idctdsp.h" void ff_xvid_idct(int16_t *const in); -void ff_xvid_idct_init(IDCTDSPContext *c, AVCodecContext *avctx); +void ff_xvid_idct_init(IDCTDSPContext *c); void ff_xvid_idct_init_x86(IDCTDSPContext *c); void ff_xvid_idct_init_mips(IDCTDSPContext *c); diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 0effe4127ffd3..97f8f1727203e 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -208,6 +208,7 @@ OBJS-$(CONFIG_BILATERAL_FILTER) += vf_bilateral.o OBJS-$(CONFIG_BILATERAL_CUDA_FILTER) += vf_bilateral_cuda.o vf_bilateral_cuda.ptx.o OBJS-$(CONFIG_BITPLANENOISE_FILTER) += vf_bitplanenoise.o OBJS-$(CONFIG_BLACKDETECT_FILTER) += vf_blackdetect.o +OBJS-$(CONFIG_BLACKDETECT_VULKAN_FILTER) += vf_blackdetect_vulkan.o OBJS-$(CONFIG_BLACKFRAME_FILTER) += vf_blackframe.o OBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o framesync.o OBJS-$(CONFIG_BLEND_VULKAN_FILTER) += vf_blend_vulkan.o framesync.o vulkan.o vulkan_filter.o @@ -471,6 +472,7 @@ OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vf_scale_vulkan.o vulkan.o vulka OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale_eval.o framesync.o OBJS-$(CONFIG_SCALE2REF_NPP_FILTER) += vf_scale_npp.o scale_eval.o OBJS-$(CONFIG_SCDET_FILTER) += vf_scdet.o +OBJS-$(CONFIG_SCDET_VULKAN_FILTER) += vf_scdet_vulkan.o OBJS-$(CONFIG_SCHARR_FILTER) += vf_convolution.o OBJS-$(CONFIG_SCROLL_FILTER) += vf_scroll.o OBJS-$(CONFIG_SEGMENT_FILTER) += f_segment.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 5ea33cdf01b91..3bc045b28f552 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -192,6 +192,7 @@ extern const FFFilter ff_vf_bilateral; extern const FFFilter ff_vf_bilateral_cuda; extern const FFFilter ff_vf_bitplanenoise; extern const FFFilter ff_vf_blackdetect; +extern const FFFilter ff_vf_blackdetect_vulkan; extern const FFFilter ff_vf_blackframe; extern const FFFilter ff_vf_blend; extern const FFFilter ff_vf_blend_vulkan; @@ -443,6 +444,7 @@ extern const FFFilter ff_vf_scale_vulkan; extern const FFFilter ff_vf_scale2ref; extern const FFFilter ff_vf_scale2ref_npp; extern const FFFilter ff_vf_scdet; +extern const FFFilter ff_vf_scdet_vulkan; extern const FFFilter ff_vf_scharr; extern const FFFilter ff_vf_scroll; extern const FFFilter ff_vf_segment; diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c index c76d43a215ea7..56f635a4130fb 100644 --- a/libavfilter/avfilter.c +++ b/libavfilter/avfilter.c @@ -1071,7 +1071,8 @@ int ff_filter_frame(AVFilterLink *link, AVFrame *frame) strcmp(link->dst->filter->name, "format") && strcmp(link->dst->filter->name, "idet") && strcmp(link->dst->filter->name, "null") && - strcmp(link->dst->filter->name, "scale")) { + strcmp(link->dst->filter->name, "scale") && + strcmp(link->dst->filter->name, "libplacebo")) { av_assert1(frame->format == link->format); av_assert1(frame->width == link->w); av_assert1(frame->height == link->h); diff --git a/libavfilter/avfiltergraph.c b/libavfilter/avfiltergraph.c index 5e93f93aab362..2d6036df7423e 100644 --- a/libavfilter/avfiltergraph.c +++ b/libavfilter/avfiltergraph.c @@ -1068,8 +1068,8 @@ static void swap_channel_layouts_on_filter(AVFilterContext *filter) } /* no penalty for LFE channel mismatch */ - if (av_channel_layout_channel_from_index(&in_chlayout, AV_CHAN_LOW_FREQUENCY) >= 0 && - av_channel_layout_channel_from_index(&out_chlayout, AV_CHAN_LOW_FREQUENCY) >= 0) + if (av_channel_layout_index_from_channel(&in_chlayout, AV_CHAN_LOW_FREQUENCY) >= 0 && + av_channel_layout_index_from_channel(&out_chlayout, AV_CHAN_LOW_FREQUENCY) >= 0) score += 10; av_channel_layout_from_mask(&in_chlayout, av_channel_layout_subset(&in_chlayout, ~AV_CH_LOW_FREQUENCY)); av_channel_layout_from_mask(&out_chlayout, av_channel_layout_subset(&out_chlayout, ~AV_CH_LOW_FREQUENCY)); diff --git a/libavfilter/vf_blackdetect.c b/libavfilter/vf_blackdetect.c index 21f35f705dd74..8be33a814dd83 100644 --- a/libavfilter/vf_blackdetect.c +++ b/libavfilter/vf_blackdetect.c @@ -31,6 +31,7 @@ #include "libavutil/timestamp.h" #include "avfilter.h" #include "filters.h" +#include "formats.h" #include "video.h" typedef struct BlackDetectContext { @@ -45,6 +46,7 @@ typedef struct BlackDetectContext { double picture_black_ratio_th; double pixel_black_th; unsigned int pixel_black_th_i; + int alpha; unsigned int nb_black_pixels; ///< number of black pixels counted so far AVRational time_base; @@ -63,6 +65,7 @@ static const AVOption blackdetect_options[] = { { "pic_th", "set the picture black ratio threshold", OFFSET(picture_black_ratio_th), AV_OPT_TYPE_DOUBLE, {.dbl=.98}, 0, 1, FLAGS }, { "pixel_black_th", "set the pixel black threshold", OFFSET(pixel_black_th), AV_OPT_TYPE_DOUBLE, {.dbl=.10}, 0, 1, FLAGS }, { "pix_th", "set the pixel black threshold", OFFSET(pixel_black_th), AV_OPT_TYPE_DOUBLE, {.dbl=.10}, 0, 1, FLAGS }, + { "alpha", "check alpha instead of luma", OFFSET(alpha), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, { NULL } }; @@ -71,11 +74,21 @@ AVFILTER_DEFINE_CLASS(blackdetect); #define YUVJ_FORMATS \ AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P +#define YUVA_FORMATS \ + AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P, \ + AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA444P12, AV_PIX_FMT_YUVA444P16, \ + AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA422P12, AV_PIX_FMT_YUVA422P16, \ + AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA420P16 + static const enum AVPixelFormat yuvj_formats[] = { YUVJ_FORMATS, AV_PIX_FMT_NONE }; -static const enum AVPixelFormat pix_fmts[] = { +static const enum AVPixelFormat yuva_formats[] = { + YUVA_FORMATS, AV_PIX_FMT_NONE +}; + +static const enum AVPixelFormat yuv_formats[] = { AV_PIX_FMT_GRAY8, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, @@ -91,13 +104,23 @@ static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV444P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16, - AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P, - AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA444P12, AV_PIX_FMT_YUVA444P16, - AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA422P12, AV_PIX_FMT_YUVA422P16, - AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA420P16, - AV_PIX_FMT_NONE + YUVA_FORMATS, AV_PIX_FMT_NONE }; +static int query_format(const AVFilterContext *ctx, + AVFilterFormatsConfig **cfg_in, + AVFilterFormatsConfig **cfg_out) +{ + const BlackDetectContext *s = ctx->priv; + AVFilterFormats *formats; + if (s->alpha) + formats = ff_make_format_list(yuva_formats); + else + formats = ff_make_format_list(yuv_formats); + + return ff_set_common_formats2(ctx, cfg_in, cfg_out, formats); +} + static int config_input(AVFilterLink *inlink) { AVFilterContext *ctx = inlink->dst; @@ -114,9 +137,9 @@ static int config_input(AVFilterLink *inlink) return AVERROR(ENOMEM); av_log(s, AV_LOG_VERBOSE, - "black_min_duration:%s pixel_black_th:%f picture_black_ratio_th:%f\n", + "black_min_duration:%s pixel_black_th:%f picture_black_ratio_th:%f alpha:%d\n", av_ts2timestr(s->black_min_duration, &s->time_base), - s->pixel_black_th, s->picture_black_ratio_th); + s->pixel_black_th, s->picture_black_ratio_th, s->alpha); return 0; } @@ -140,7 +163,8 @@ static int black_counter(AVFilterContext *ctx, void *arg, const unsigned int threshold = s->pixel_black_th_i; unsigned int *counterp = &s->counter[jobnr]; AVFrame *in = arg; - const int linesize = in->linesize[0]; + const int plane = s->alpha ? 3 : 0; + const int linesize = in->linesize[plane]; const int w = in->width; const int h = in->height; const int start = (h * jobnr) / nb_jobs; @@ -149,7 +173,7 @@ static int black_counter(AVFilterContext *ctx, void *arg, unsigned int counter = 0; if (s->depth == 8) { - const uint8_t *p = in->data[0] + start * linesize; + const uint8_t *p = in->data[plane] + start * linesize; for (int i = 0; i < size; i++) { for (int x = 0; x < w; x++) @@ -157,7 +181,7 @@ static int black_counter(AVFilterContext *ctx, void *arg, p += linesize; } } else { - const uint16_t *p = (const uint16_t *)(in->data[0] + start * linesize); + const uint16_t *p = (const uint16_t *)(in->data[plane] + start * linesize); for (int i = 0; i < size; i++) { for (int x = 0; x < w; x++) @@ -180,7 +204,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *picref) const int max = (1 << s->depth) - 1; const int factor = (1 << (s->depth - 8)); const int full = picref->color_range == AVCOL_RANGE_JPEG || - ff_fmt_is_in(picref->format, yuvj_formats); + ff_fmt_is_in(picref->format, yuvj_formats) || + s->alpha; s->pixel_black_th_i = full ? s->pixel_black_th * max : // luminance_minimum_value + pixel_black_th * luminance_range_size @@ -252,6 +277,6 @@ const FFFilter ff_vf_blackdetect = { .priv_size = sizeof(BlackDetectContext), FILTER_INPUTS(blackdetect_inputs), FILTER_OUTPUTS(ff_video_default_filterpad), - FILTER_PIXFMTS_ARRAY(pix_fmts), + FILTER_QUERY_FUNC2(query_format), .uninit = uninit, }; diff --git a/libavfilter/vf_blackdetect_vulkan.c b/libavfilter/vf_blackdetect_vulkan.c new file mode 100644 index 0000000000000..4e977abe3d773 --- /dev/null +++ b/libavfilter/vf_blackdetect_vulkan.c @@ -0,0 +1,431 @@ +/* + * Copyright 2025 (c) Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include "libavutil/vulkan_spirv.h" +#include "libavutil/opt.h" +#include "libavutil/timestamp.h" +#include "vulkan_filter.h" + +#include "filters.h" +#include "video.h" + +typedef struct BlackDetectVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + FFVulkanShader shd; + AVBufferPool *sum_buf_pool; + + double black_min_duration_time; + double picture_black_ratio_th; + double pixel_black_th; + int alpha; + + int64_t black_start; + int64_t black_end; +} BlackDetectVulkanContext; + +typedef struct BlackDetectPushData { + float threshold; +} BlackDetectPushData; + +typedef struct BlackDetectBuf { +#define SLICES 16 + uint32_t slice_sum[SLICES]; +} BlackDetectBuf; + +static av_cold int init_filter(AVFilterContext *ctx) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + BlackDetectVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + const int plane = s->alpha ? 3 : 0; + + const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->vkctx.input_format); + if (pixdesc->flags & AV_PIX_FMT_FLAG_RGB) { + av_log(ctx, AV_LOG_ERROR, "RGB inputs are not supported\n"); + return AVERROR(ENOTSUP); + } + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + RET(ff_vk_shader_init(vkctx, &s->shd, "blackdetect", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_KHR_shader_subgroup_ballot" }, 1, + 32, 32, 1, + 0)); + shd = &s->shd; + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, float threshold; ); + GLSLC(0, }; ); + + ff_vk_shader_add_push_const(shd, 0, sizeof(BlackDetectPushData), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = av_pix_fmt_count_planes(s->vkctx.input_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, { + .name = "sum_buffer", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "uint slice_sum[];", + } + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0)); + + GLSLC(0, shared uint wg_sum; ); + GLSLC(0, ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, wg_sum = 0u; ); + GLSLC(1, barrier(); ); + GLSLC(0, ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLF(1, if (!IS_WITHIN(pos, imageSize(input_img[%d]))) ,plane); + GLSLC(2, return; ); + GLSLF(1, float value = imageLoad(input_img[%d], pos).x; ,plane); + GLSLC(1, uvec4 isblack = subgroupBallot(value <= threshold); ); + GLSLC(1, if (subgroupElect()) ); + GLSLC(2, atomicAdd(wg_sum, subgroupBallotBitCount(isblack)); ); + GLSLC(1, barrier(); ); + GLSLC(1, if (gl_LocalInvocationIndex == 0u) ); + GLSLF(2, atomicAdd(slice_sum[gl_WorkGroupID.x %% %du], wg_sum); ,SLICES); + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + + s->initialized = 1; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static void evaluate(AVFilterLink *link, AVFrame *in, + const BlackDetectBuf *sum) +{ + AVFilterContext *ctx = link->dst; + BlackDetectVulkanContext *s = ctx->priv; + FilterLink *inl = ff_filter_link(link); + uint64_t nb_black_pixels = 0; + double ratio; + + for (int i = 0; i < FF_ARRAY_ELEMS(sum->slice_sum); i++) + nb_black_pixels += sum->slice_sum[i]; + + ratio = (double) nb_black_pixels / (link->w * link->h); + + av_log(ctx, AV_LOG_DEBUG, + "frame:%"PRId64" picture_black_ratio:%f pts:%s t:%s type:%c\n", + inl->frame_count_out, ratio, + av_ts2str(in->pts), av_ts2timestr(in->pts, &in->time_base), + av_get_picture_type_char(in->pict_type)); + + if (ratio >= s->picture_black_ratio_th) { + if (s->black_start == AV_NOPTS_VALUE) { + s->black_start = in->pts; + av_dict_set(&in->metadata, "lavfi.black_start", + av_ts2timestr(in->pts, &in->time_base), 0); + } + } else if (s->black_start != AV_NOPTS_VALUE) { + av_dict_set(&in->metadata, "lavfi.black_end", + av_ts2timestr(in->pts, &in->time_base), 0); + if ((in->pts - s->black_start) >= s->black_min_duration_time / av_q2d(in->time_base)) { + av_log(s, AV_LOG_INFO, + "black_start:%s black_end:%s black_duration:%s\n", + av_ts2timestr(s->black_start, &in->time_base), + av_ts2timestr(in->pts, &in->time_base), + av_ts2timestr(in->pts - s->black_start, &in->time_base)); + } + s->black_start = AV_NOPTS_VALUE; + } +} + +static int blackdetect_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFilterContext *ctx = link->dst; + BlackDetectVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + VkImageView in_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[4]; + int nb_img_bar = 0; + + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + FFVkExecContext *exec = NULL; + AVBufferRef *sum_buf = NULL; + FFVkBuffer *sum_vk; + + BlackDetectBuf *sum; + BlackDetectPushData push_data; + + if (in->color_range == AVCOL_RANGE_JPEG || s->alpha) { + push_data.threshold = s->pixel_black_th; + } else { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(vkctx->input_format); + const int depth = desc->comp[0].depth; + const int ymin = 16 << (depth - 8); + const int ymax = 235 << (depth - 8); + const int imax = (1 << depth) - 1; + push_data.threshold = (s->pixel_black_th * (ymax - ymin) + ymin) / imax; + } + + if (!s->initialized) + RET(init_filter(ctx)); + + err = ff_vk_get_pooled_buffer(vkctx, &s->sum_buf_pool, &sum_buf, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + NULL, + sizeof(BlackDetectBuf), + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (err < 0) + return err; + sum_vk = (FFVkBuffer *)sum_buf->data; + sum = (BlackDetectBuf *) sum_vk->mapped_mem; + + exec = ff_vk_exec_get(vkctx, &s->e); + ff_vk_exec_start(vkctx, exec); + + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, in_views, in, FF_VK_REP_FLOAT)); + + ff_vk_shader_update_img_array(vkctx, exec, &s->shd, in, in_views, 0, 0, + VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); + + ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + /* zero sum buffer */ + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_NONE, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = sum_vk->buf, + .size = sum_vk->size, + .offset = 0, + }, + .bufferMemoryBarrierCount = 1, + }); + + vk->CmdFillBuffer(exec->buf, sum_vk->buf, 0, sum_vk->size, 0x0); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = sum_vk->buf, + .size = sum_vk->size, + .offset = 0, + }, + .bufferMemoryBarrierCount = 1, + }); + + RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd, 0, 1, 0, + sum_vk, 0, sum_vk->size, + VK_FORMAT_UNDEFINED)); + + ff_vk_exec_bind_shader(vkctx, exec, &s->shd); + ff_vk_shader_update_push_const(vkctx, exec, &s->shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(push_data), &push_data); + + vk->CmdDispatch(exec->buf, + FFALIGN(in->width, s->shd.lg_size[0]) / s->shd.lg_size[0], + FFALIGN(in->height, s->shd.lg_size[1]) / s->shd.lg_size[1], + s->shd.lg_size[2]); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_HOST_BIT, + .srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .dstAccessMask = VK_ACCESS_HOST_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = sum_vk->buf, + .size = sum_vk->size, + .offset = 0, + }, + .bufferMemoryBarrierCount = 1, + }); + + RET(ff_vk_exec_submit(vkctx, exec)); + ff_vk_exec_wait(vkctx, exec); + evaluate(link, in, sum); + + av_buffer_unref(&sum_buf); + return ff_filter_frame(outlink, in); + +fail: + if (exec) + ff_vk_exec_discard_deps(&s->vkctx, exec); + av_frame_free(&in); + av_buffer_unref(&sum_buf); + return err; +} + +static void blackdetect_vulkan_uninit(AVFilterContext *avctx) +{ + BlackDetectVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd); + + av_buffer_pool_uninit(&s->sum_buf_pool); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +static int config_output(AVFilterLink *outlink) +{ + AVFilterContext *ctx = outlink->src; + BlackDetectVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(vkctx->input_format); + + if (s->alpha && !(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) { + av_log(ctx, AV_LOG_ERROR, "Input format %s does not have an alpha channel\n", + av_get_pix_fmt_name(vkctx->input_format)); + return AVERROR(EINVAL); + } + + if (desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_XYZ) || + !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) { + av_log(ctx, AV_LOG_ERROR, "Input format %s is not planar YUV\n", + av_get_pix_fmt_name(vkctx->input_format)); + return AVERROR(EINVAL); + } + + return ff_vk_filter_config_output(outlink); +} + +#define OFFSET(x) offsetof(BlackDetectVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption blackdetect_vulkan_options[] = { + { "d", "set minimum detected black duration in seconds", OFFSET(black_min_duration_time), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 0, DBL_MAX, FLAGS }, + { "black_min_duration", "set minimum detected black duration in seconds", OFFSET(black_min_duration_time), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 0, DBL_MAX, FLAGS }, + { "picture_black_ratio_th", "set the picture black ratio threshold", OFFSET(picture_black_ratio_th), AV_OPT_TYPE_DOUBLE, {.dbl=.98}, 0, 1, FLAGS }, + { "pic_th", "set the picture black ratio threshold", OFFSET(picture_black_ratio_th), AV_OPT_TYPE_DOUBLE, {.dbl=.98}, 0, 1, FLAGS }, + { "pixel_black_th", "set the pixel black threshold", OFFSET(pixel_black_th), AV_OPT_TYPE_DOUBLE, {.dbl=.10}, 0, 1, FLAGS }, + { "pix_th", "set the pixel black threshold", OFFSET(pixel_black_th), AV_OPT_TYPE_DOUBLE, {.dbl=.10}, 0, 1, FLAGS }, + { "alpha", "check alpha instead of luma", OFFSET(alpha), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(blackdetect_vulkan); + +static const AVFilterPad blackdetect_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &blackdetect_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, +}; + +static const AVFilterPad blackdetect_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &config_output, + }, +}; + +const FFFilter ff_vf_blackdetect_vulkan = { + .p.name = "blackdetect_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Detect video intervals that are (almost) black."), + .p.priv_class = &blackdetect_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .priv_size = sizeof(BlackDetectVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &blackdetect_vulkan_uninit, + FILTER_INPUTS(blackdetect_vulkan_inputs), + FILTER_OUTPUTS(blackdetect_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c index 80b66de735b5e..fb676a7fc9981 100644 --- a/libavfilter/vf_gblur_vulkan.c +++ b/libavfilter/vf_gblur_vulkan.c @@ -171,7 +171,6 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, RET(ff_vk_shader_register_exec(&s->vkctx, &s->e, shd)); RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL, NULL, - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); RET(ff_vk_map_buffer(&s->vkctx, params_buf, &kernel_mapped, 0)); diff --git a/libavfilter/vf_interlace_vulkan.c b/libavfilter/vf_interlace_vulkan.c index b5cd321fef217..7afb30c2d76f6 100644 --- a/libavfilter/vf_interlace_vulkan.c +++ b/libavfilter/vf_interlace_vulkan.c @@ -189,7 +189,9 @@ static int interlace_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) AVFrame *out = NULL, *input_top, *input_bot; AVFilterContext *ctx = link->dst; InterlaceVulkanContext *s = ctx->priv; + const AVFilterLink *inlink = ctx->inputs[0]; AVFilterLink *outlink = ctx->outputs[0]; + FilterLink *l = ff_filter_link(outlink); if (!s->initialized) RET(init_filter(ctx)); @@ -226,6 +228,9 @@ static int interlace_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) if (s->mode == MODE_TFF) out->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST; + out->pts = av_rescale_q(out->pts, inlink->time_base, outlink->time_base); + out->duration = av_rescale_q(1, av_inv_q(l->frame_rate), outlink->time_base); + av_frame_free(&s->cur); av_frame_free(&in); @@ -260,9 +265,12 @@ static void interlace_vulkan_uninit(AVFilterContext *avctx) static int config_out_props(AVFilterLink *outlink) { + AVFilterLink *inlink = outlink->src->inputs[0]; + const FilterLink *il = ff_filter_link(inlink); FilterLink *ol = ff_filter_link(outlink); - ol->frame_rate = av_mul_q(ol->frame_rate, av_make_q(1, 2)); + ol->frame_rate = av_mul_q(il->frame_rate, av_make_q(1, 2)); + outlink->time_base = av_mul_q(inlink->time_base, av_make_q(2, 1)); return ff_vk_filter_config_output(outlink); } diff --git a/libavfilter/vf_libplacebo.c b/libavfilter/vf_libplacebo.c index 86e1f43dea8c2..9ff64053cc0ea 100644 --- a/libavfilter/vf_libplacebo.c +++ b/libavfilter/vf_libplacebo.c @@ -193,8 +193,14 @@ typedef struct LibplaceboContext { int color_range; int color_primaries; int color_trc; + int rotation; AVDictionary *extra_opts; +#if PL_API_VER >= 351 + pl_cache cache; + char *shader_cache; +#endif + int have_hwdevice; /* pl_render_params */ @@ -522,6 +528,21 @@ static int libplacebo_init(AVFilterContext *avctx) return AVERROR(ENOMEM); } +#if PL_API_VER >= 351 + if (s->shader_cache && s->shader_cache[0]) { + s->cache = pl_cache_create(pl_cache_params( + .log = s->log, + .get = pl_cache_get_file, + .set = pl_cache_set_file, + .priv = s->shader_cache, + )); + if (!s->cache) { + libplacebo_uninit(avctx); + return AVERROR(ENOMEM); + } + } +#endif + if (s->out_format_string) { s->out_format = av_get_pix_fmt(s->out_format_string); if (s->out_format == AV_PIX_FMT_NONE) { @@ -676,6 +697,9 @@ static int init_vulkan(AVFilterContext *avctx, const AVVulkanDeviceContext *hwct } s->gpu = s->vulkan->gpu; +#if PL_API_VER >= 351 + pl_gpu_set_cache(s->gpu, s->cache); +#endif /* Parse the user shaders, if requested */ if (s->shader_bin_len) @@ -714,6 +738,9 @@ static void libplacebo_uninit(AVFilterContext *avctx) av_freep(&s->inputs); } +#if PL_API_VER >= 351 + pl_cache_destroy(&s->cache); +#endif pl_options_free(&s->opts); pl_vulkan_destroy(&s->vulkan); pl_log_destroy(&s->log); @@ -802,6 +829,13 @@ static void update_crops(AVFilterContext *ctx, LibplaceboInput *in, image->crop.y0 = av_expr_eval(s->crop_y_pexpr, s->var_values, NULL); image->crop.x1 = image->crop.x0 + s->var_values[VAR_CROP_W]; image->crop.y1 = image->crop.y0 + s->var_values[VAR_CROP_H]; + image->rotation = s->rotation; + if (s->rotation % PL_ROTATION_180 == PL_ROTATION_90) { + /* Libplacebo expects the input crop relative to the actual frame + * dimensions, so un-transpose them here */ + FFSWAP(float, image->crop.x0, image->crop.y0); + FFSWAP(float, image->crop.x1, image->crop.y1); + } if (src == ref) { /* Only update the target crop once, for the 'reference' frame */ @@ -1198,6 +1232,14 @@ static int libplacebo_config_input(AVFilterLink *inlink) AVFilterContext *avctx = inlink->dst; LibplaceboContext *s = avctx->priv; + if (s->rotation % PL_ROTATION_180 == PL_ROTATION_90) { + /* Swap width and height for 90 degree rotations to make the size and + * scaling calculations work out correctly */ + FFSWAP(int, inlink->w, inlink->h); + if (inlink->sample_aspect_ratio.num) + inlink->sample_aspect_ratio = av_inv_q(inlink->sample_aspect_ratio); + } + if (inlink->format == AV_PIX_FMT_VULKAN) return ff_vk_filter_config_input(inlink); @@ -1328,6 +1370,9 @@ static const AVOption libplacebo_options[] = { { "fillcolor", "Background fill color", OFFSET(fillcolor), AV_OPT_TYPE_COLOR, {.str = "black@0"}, .flags = DYNAMIC }, { "corner_rounding", "Corner rounding radius", OFFSET(corner_rounding), AV_OPT_TYPE_FLOAT, {.dbl = 0.0}, 0.0, 1.0, .flags = DYNAMIC }, { "extra_opts", "Pass extra libplacebo-specific options using a :-separated list of key=value pairs", OFFSET(extra_opts), AV_OPT_TYPE_DICT, .flags = DYNAMIC }, +#if PL_API_VER >= 351 + { "shader_cache", "Set shader cache path", OFFSET(shader_cache), AV_OPT_TYPE_STRING, {.str = NULL}, .flags = STATIC }, +#endif {"colorspace", "select colorspace", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64=-1}, -1, AVCOL_SPC_NB-1, DYNAMIC, .unit = "colorspace"}, {"auto", "keep the same colorspace", 0, AV_OPT_TYPE_CONST, {.i64=-1}, INT_MIN, INT_MAX, STATIC, .unit = "colorspace"}, @@ -1386,6 +1431,13 @@ static const AVOption libplacebo_options[] = { {"smpte2084", NULL, 0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_SMPTE2084}, INT_MIN, INT_MAX, STATIC, .unit = "color_trc"}, {"arib-std-b67", NULL, 0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_ARIB_STD_B67}, INT_MIN, INT_MAX, STATIC, .unit = "color_trc"}, + {"rotate", "rotate the input clockwise", OFFSET(rotation), AV_OPT_TYPE_INT, {.i64=PL_ROTATION_0}, PL_ROTATION_0, PL_ROTATION_360, DYNAMIC, .unit = "rotation"}, + {"0", NULL, 0, AV_OPT_TYPE_CONST, {.i64=PL_ROTATION_0}, .flags = STATIC, .unit = "rotation"}, + {"90", NULL, 0, AV_OPT_TYPE_CONST, {.i64=PL_ROTATION_90}, .flags = STATIC, .unit = "rotation"}, + {"180", NULL, 0, AV_OPT_TYPE_CONST, {.i64=PL_ROTATION_180}, .flags = STATIC, .unit = "rotation"}, + {"270", NULL, 0, AV_OPT_TYPE_CONST, {.i64=PL_ROTATION_270}, .flags = STATIC, .unit = "rotation"}, + {"360", NULL, 0, AV_OPT_TYPE_CONST, {.i64=PL_ROTATION_360}, .flags = STATIC, .unit = "rotation"}, + { "upscaler", "Upscaler function", OFFSET(upscaler), AV_OPT_TYPE_STRING, {.str = "spline36"}, .flags = DYNAMIC }, { "downscaler", "Downscaler function", OFFSET(downscaler), AV_OPT_TYPE_STRING, {.str = "mitchell"}, .flags = DYNAMIC }, { "frame_mixer", "Frame mixing function", OFFSET(frame_mixer), AV_OPT_TYPE_STRING, {.str = "none"}, .flags = DYNAMIC }, diff --git a/libavfilter/vf_scdet_vulkan.c b/libavfilter/vf_scdet_vulkan.c new file mode 100644 index 0000000000000..fadc0842aeb83 --- /dev/null +++ b/libavfilter/vf_scdet_vulkan.c @@ -0,0 +1,412 @@ +/* + * Copyright 2025 (c) Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "libavutil/vulkan_spirv.h" +#include "libavutil/opt.h" +#include "libavutil/timestamp.h" +#include "vulkan_filter.h" + +#include "filters.h" + +typedef struct SceneDetectVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + FFVulkanShader shd; + AVBufferPool *det_buf_pool; + + double threshold; + int sc_pass; + + int nb_planes; + double prev_mafd; + AVFrame *prev; + AVFrame *cur; +} SceneDetectVulkanContext; + +typedef struct SceneDetectBuf { +#define SLICES 16 + uint32_t frame_sad[SLICES]; +} SceneDetectBuf; + +static av_cold int init_filter(AVFilterContext *ctx) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + SceneDetectVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->vkctx.input_format); + const int lumaonly = !(pixdesc->flags & AV_PIX_FMT_FLAG_RGB) && + (pixdesc->flags & AV_PIX_FMT_FLAG_PLANAR); + s->nb_planes = lumaonly ? 1 : av_pix_fmt_count_planes(s->vkctx.input_format); + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + RET(ff_vk_shader_init(vkctx, &s->shd, "scdet", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_KHR_shader_subgroup_arithmetic" }, 1, + 32, 32, 1, + 0)); + shd = &s->shd; + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "prev_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_UINT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = av_pix_fmt_count_planes(s->vkctx.input_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, { + .name = "cur_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_UINT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = av_pix_fmt_count_planes(s->vkctx.input_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, { + .name = "sad_buffer", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "uint frame_sad[];", + } + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 3, 0, 0)); + + GLSLC(0, shared uint wg_sum; ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLF(1, const uint slice = gl_WorkGroupID.x %% %u; ,SLICES); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLC(1, wg_sum = 0; ); + GLSLC(1, barrier(); ); + for (int i = 0; i < s->nb_planes; i++) { + GLSLF(1, if (IS_WITHIN(pos, imageSize(cur_img[%d]))) { ,i); + GLSLF(2, uvec4 prev = imageLoad(prev_img[%d], pos); ,i); + GLSLF(2, uvec4 cur = imageLoad(cur_img[%d], pos); ,i); + GLSLC(2, uvec4 sad = abs(ivec4(cur) - ivec4(prev)); ); + GLSLC(2, uint sum = subgroupAdd(sad.x + sad.y + sad.z); ); + GLSLC(2, if (subgroupElect()) ); + GLSLC(3, atomicAdd(wg_sum, sum); ); + GLSLC(1, } ); + } + GLSLC(1, barrier(); ); + GLSLC(1, if (gl_LocalInvocationIndex == 0) ); + GLSLC(2, atomicAdd(frame_sad[slice], wg_sum); ); + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + + s->initialized = 1; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static double evaluate(AVFilterContext *ctx, const SceneDetectBuf *buf) +{ + SceneDetectVulkanContext *s = ctx->priv; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->vkctx.input_format); + const AVFilterLink *inlink = ctx->inputs[0]; + uint64_t count; + double mafd, diff; + + uint64_t sad = 0; + for (int i = 0; i < SLICES; i++) + sad += buf->frame_sad[i]; + + av_assert2(s->nb_planes == 1 || !(desc->log2_chroma_w || desc->log2_chroma_h)); + count = s->nb_planes * inlink->w * inlink->h; + mafd = (double) sad * 100.0 / count / (1ULL << desc->comp[0].depth); + diff = fabs(mafd - s->prev_mafd); + s->prev_mafd = mafd; + + return av_clipf(FFMIN(mafd, diff), 0.0, 100.0); +} + +static int scdet_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFilterContext *ctx = link->dst; + SceneDetectVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + VkImageView prev_views[AV_NUM_DATA_POINTERS]; + VkImageView cur_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[8]; + int nb_img_bar = 0; + + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + FFVkExecContext *exec = NULL; + AVBufferRef *buf = NULL; + FFVkBuffer *buf_vk; + + SceneDetectBuf *sad; + double score = 0.0; + char str[64]; + + if (!s->initialized) + RET(init_filter(ctx)); + + av_frame_free(&s->prev); + s->prev = s->cur; + s->cur = av_frame_clone(in); + if (!s->prev) + goto done; + + RET(ff_vk_get_pooled_buffer(vkctx, &s->det_buf_pool, &buf, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + NULL, + sizeof(SceneDetectBuf), + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)); + buf_vk = (FFVkBuffer *)buf->data; + sad = (SceneDetectBuf *) buf_vk->mapped_mem; + + exec = ff_vk_exec_get(vkctx, &s->e); + ff_vk_exec_start(vkctx, exec); + + RET(ff_vk_exec_add_dep_frame(vkctx, exec, s->prev, + VK_PIPELINE_STAGE_2_NONE, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, prev_views, s->prev, FF_VK_REP_UINT)); + + ff_vk_shader_update_img_array(vkctx, exec, &s->shd, s->prev, prev_views, 0, 0, + VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); + + ff_vk_frame_barrier(vkctx, exec, s->prev, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + RET(ff_vk_exec_add_dep_frame(vkctx, exec, s->cur, + VK_PIPELINE_STAGE_2_NONE, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, cur_views, s->cur, FF_VK_REP_UINT)); + + ff_vk_shader_update_img_array(vkctx, exec, &s->shd, s->cur, cur_views, 0, 1, + VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); + + ff_vk_frame_barrier(vkctx, exec, s->cur, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + /* zero buffer */ + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_NONE, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = buf_vk->buf, + .size = buf_vk->size, + .offset = 0, + }, + .bufferMemoryBarrierCount = 1, + }); + + vk->CmdFillBuffer(exec->buf, buf_vk->buf, 0, buf_vk->size, 0x0); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = buf_vk->buf, + .size = buf_vk->size, + .offset = 0, + }, + .bufferMemoryBarrierCount = 1, + }); + + RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd, 0, 2, 0, + buf_vk, 0, buf_vk->size, + VK_FORMAT_UNDEFINED)); + + ff_vk_exec_bind_shader(vkctx, exec, &s->shd); + + vk->CmdDispatch(exec->buf, + FFALIGN(in->width, s->shd.lg_size[0]) / s->shd.lg_size[0], + FFALIGN(in->height, s->shd.lg_size[1]) / s->shd.lg_size[1], + s->shd.lg_size[2]); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_HOST_BIT, + .srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .dstAccessMask = VK_ACCESS_HOST_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = buf_vk->buf, + .size = buf_vk->size, + .offset = 0, + }, + .bufferMemoryBarrierCount = 1, + }); + + RET(ff_vk_exec_submit(vkctx, exec)); + ff_vk_exec_wait(vkctx, exec); + score = evaluate(ctx, sad); + +done: + snprintf(str, sizeof(str), "%0.3f", s->prev_mafd); + av_dict_set(&in->metadata, "lavfi.scd.mafd", str, 0); + snprintf(str, sizeof(str), "%0.3f", score); + av_dict_set(&in->metadata, "lavfi.scd.score", str, 0); + + if (score >= s->threshold) { + const char *pts = av_ts2timestr(in->pts, &link->time_base); + av_dict_set(&in->metadata, "lavfi.scd.time", pts, 0); + av_log(s, AV_LOG_INFO, "lavfi.scd.score: %.3f, lavfi.scd.time: %s\n", + score, pts); + } + + av_buffer_unref(&buf); + if (!s->sc_pass || score >= s->threshold) + return ff_filter_frame(outlink, in); + else { + av_frame_free(&in); + return 0; + } + +fail: + if (exec) + ff_vk_exec_discard_deps(&s->vkctx, exec); + av_frame_free(&in); + av_buffer_unref(&buf); + return err; +} + +static void scdet_vulkan_uninit(AVFilterContext *avctx) +{ + SceneDetectVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + av_frame_free(&s->prev); + av_frame_free(&s->cur); + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd); + + av_buffer_pool_uninit(&s->det_buf_pool); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(SceneDetectVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption scdet_vulkan_options[] = { + { "threshold", "set scene change detect threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 10.}, 0, 100., FLAGS }, + { "t", "set scene change detect threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 10.}, 0, 100., FLAGS }, + { "sc_pass", "Set the flag to pass scene change frames", OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS }, + { "s", "Set the flag to pass scene change frames", OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(scdet_vulkan); + +static const AVFilterPad scdet_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &scdet_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, +}; + +static const AVFilterPad scdet_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_output, + }, +}; + +const FFFilter ff_vf_scdet_vulkan = { + .p.name = "scdet_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Detect video scene change"), + .p.priv_class = &scdet_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .priv_size = sizeof(SceneDetectVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &scdet_vulkan_uninit, + FILTER_INPUTS(scdet_vulkan_inputs), + FILTER_OUTPUTS(scdet_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; diff --git a/libavfilter/x86/vf_spp.c b/libavfilter/x86/vf_spp.c index 498660d7d0121..f8e5727bfcfa2 100644 --- a/libavfilter/x86/vf_spp.c +++ b/libavfilter/x86/vf_spp.c @@ -21,159 +21,9 @@ #include "libavutil/attributes.h" #include "libavutil/cpu.h" -#include "libavutil/crc.h" -#include "libavutil/x86/asm.h" #include "libavfilter/vf_spp.h" #if HAVE_MMX_INLINE -static void hardthresh_mmx(int16_t dst[64], const int16_t src[64], - int qp, const uint8_t *permutation) -{ - int bias = 0; //FIXME - unsigned int threshold1; - - threshold1 = qp * ((1<<4) - bias) - 1; - -#define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \ - "movq " #src0 ", %%mm0 \n" \ - "movq " #src1 ", %%mm1 \n" \ - "movq " #src2 ", %%mm2 \n" \ - "movq " #src3 ", %%mm3 \n" \ - "psubw %%mm4, %%mm0 \n" \ - "psubw %%mm4, %%mm1 \n" \ - "psubw %%mm4, %%mm2 \n" \ - "psubw %%mm4, %%mm3 \n" \ - "paddusw %%mm5, %%mm0 \n" \ - "paddusw %%mm5, %%mm1 \n" \ - "paddusw %%mm5, %%mm2 \n" \ - "paddusw %%mm5, %%mm3 \n" \ - "paddw %%mm6, %%mm0 \n" \ - "paddw %%mm6, %%mm1 \n" \ - "paddw %%mm6, %%mm2 \n" \ - "paddw %%mm6, %%mm3 \n" \ - "psubusw %%mm6, %%mm0 \n" \ - "psubusw %%mm6, %%mm1 \n" \ - "psubusw %%mm6, %%mm2 \n" \ - "psubusw %%mm6, %%mm3 \n" \ - "psraw $3, %%mm0 \n" \ - "psraw $3, %%mm1 \n" \ - "psraw $3, %%mm2 \n" \ - "psraw $3, %%mm3 \n" \ - \ - "movq %%mm0, %%mm7 \n" \ - "punpcklwd %%mm2, %%mm0 \n" /*A*/ \ - "punpckhwd %%mm2, %%mm7 \n" /*C*/ \ - "movq %%mm1, %%mm2 \n" \ - "punpcklwd %%mm3, %%mm1 \n" /*B*/ \ - "punpckhwd %%mm3, %%mm2 \n" /*D*/ \ - "movq %%mm0, %%mm3 \n" \ - "punpcklwd %%mm1, %%mm0 \n" /*A*/ \ - "punpckhwd %%mm7, %%mm3 \n" /*C*/ \ - "punpcklwd %%mm2, %%mm7 \n" /*B*/ \ - "punpckhwd %%mm2, %%mm1 \n" /*D*/ \ - \ - "movq %%mm0, " #dst0 " \n" \ - "movq %%mm7, " #dst1 " \n" \ - "movq %%mm3, " #dst2 " \n" \ - "movq %%mm1, " #dst3 " \n" - - __asm__ volatile( - "movd %2, %%mm4 \n" - "movd %3, %%mm5 \n" - "movd %4, %%mm6 \n" - "packssdw %%mm4, %%mm4 \n" - "packssdw %%mm5, %%mm5 \n" - "packssdw %%mm6, %%mm6 \n" - "packssdw %%mm4, %%mm4 \n" - "packssdw %%mm5, %%mm5 \n" - "packssdw %%mm6, %%mm6 \n" - REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0)) - REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0)) - REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0)) - REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0)) - : : "r" (src), "r" (dst), "g" (threshold1+1), "g" (threshold1+5), "g" (threshold1-4) //FIXME maybe more accurate then needed? - ); - dst[0] = (src[0] + 4) >> 3; -} - -static void softthresh_mmx(int16_t dst[64], const int16_t src[64], - int qp, const uint8_t *permutation) -{ - int bias = 0; //FIXME - unsigned int threshold1; - - threshold1 = qp*((1<<4) - bias) - 1; - -#undef REQUANT_CORE -#define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \ - "movq " #src0 ", %%mm0 \n" \ - "movq " #src1 ", %%mm1 \n" \ - "pxor %%mm6, %%mm6 \n" \ - "pxor %%mm7, %%mm7 \n" \ - "pcmpgtw %%mm0, %%mm6 \n" \ - "pcmpgtw %%mm1, %%mm7 \n" \ - "pxor %%mm6, %%mm0 \n" \ - "pxor %%mm7, %%mm1 \n" \ - "psubusw %%mm4, %%mm0 \n" \ - "psubusw %%mm4, %%mm1 \n" \ - "pxor %%mm6, %%mm0 \n" \ - "pxor %%mm7, %%mm1 \n" \ - "movq " #src2 ", %%mm2 \n" \ - "movq " #src3 ", %%mm3 \n" \ - "pxor %%mm6, %%mm6 \n" \ - "pxor %%mm7, %%mm7 \n" \ - "pcmpgtw %%mm2, %%mm6 \n" \ - "pcmpgtw %%mm3, %%mm7 \n" \ - "pxor %%mm6, %%mm2 \n" \ - "pxor %%mm7, %%mm3 \n" \ - "psubusw %%mm4, %%mm2 \n" \ - "psubusw %%mm4, %%mm3 \n" \ - "pxor %%mm6, %%mm2 \n" \ - "pxor %%mm7, %%mm3 \n" \ - \ - "paddsw %%mm5, %%mm0 \n" \ - "paddsw %%mm5, %%mm1 \n" \ - "paddsw %%mm5, %%mm2 \n" \ - "paddsw %%mm5, %%mm3 \n" \ - "psraw $3, %%mm0 \n" \ - "psraw $3, %%mm1 \n" \ - "psraw $3, %%mm2 \n" \ - "psraw $3, %%mm3 \n" \ - \ - "movq %%mm0, %%mm7 \n" \ - "punpcklwd %%mm2, %%mm0 \n" /*A*/ \ - "punpckhwd %%mm2, %%mm7 \n" /*C*/ \ - "movq %%mm1, %%mm2 \n" \ - "punpcklwd %%mm3, %%mm1 \n" /*B*/ \ - "punpckhwd %%mm3, %%mm2 \n" /*D*/ \ - "movq %%mm0, %%mm3 \n" \ - "punpcklwd %%mm1, %%mm0 \n" /*A*/ \ - "punpckhwd %%mm7, %%mm3 \n" /*C*/ \ - "punpcklwd %%mm2, %%mm7 \n" /*B*/ \ - "punpckhwd %%mm2, %%mm1 \n" /*D*/ \ - \ - "movq %%mm0, " #dst0 " \n" \ - "movq %%mm7, " #dst1 " \n" \ - "movq %%mm3, " #dst2 " \n" \ - "movq %%mm1, " #dst3 " \n" - - __asm__ volatile( - "movd %2, %%mm4 \n" - "movd %3, %%mm5 \n" - "packssdw %%mm4, %%mm4 \n" - "packssdw %%mm5, %%mm5 \n" - "packssdw %%mm4, %%mm4 \n" - "packssdw %%mm5, %%mm5 \n" - REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0)) - REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0)) - REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0)) - REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0)) - : : "r" (src), "r" (dst), "g" (threshold1), "rm" (4) //FIXME maybe more accurate then needed? - ); - - dst[0] = (src[0] + 4) >> 3; -} - static void store_slice_mmx(uint8_t *dst, const int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale, @@ -223,20 +73,7 @@ av_cold void ff_spp_init_x86(SPPContext *s) int cpu_flags = av_get_cpu_flags(); if (cpu_flags & AV_CPU_FLAG_MMX) { - static const uint32_t mmx_idct_perm_crc = 0xe5e8adc4; - uint32_t idct_perm_crc = - av_crc(av_crc_get_table(AV_CRC_32_IEEE), 0, - s->dct->idct_permutation, - sizeof(s->dct->idct_permutation)); - int64_t bps; s->store_slice = store_slice_mmx; - av_opt_get_int(s->dct, "bits_per_sample", 0, &bps); - if (bps <= 8 && idct_perm_crc == mmx_idct_perm_crc) { - switch (s->mode) { - case 0: s->requantize = hardthresh_mmx; break; - case 1: s->requantize = softthresh_mmx; break; - } - } } #endif } diff --git a/libavformat/Makefile b/libavformat/Makefile index 6c9992adab689..9884b4a4cb5a7 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -62,6 +62,7 @@ OBJS-$(CONFIG_RTPDEC) += rdt.o \ rtpdec_mpeg12.o \ rtpdec_mpeg4.o \ rtpdec_mpegts.o \ + rtpdec_opus.o \ rtpdec_qcelp.o \ rtpdec_qdm2.o \ rtpdec_qt.o \ @@ -637,6 +638,7 @@ OBJS-$(CONFIG_WEBM_CHUNK_MUXER) += webm_chunk.o OBJS-$(CONFIG_WEBP_MUXER) += webpenc.o OBJS-$(CONFIG_WEBVTT_DEMUXER) += webvttdec.o subtitles.o OBJS-$(CONFIG_WEBVTT_MUXER) += webvttenc.o +OBJS-$(CONFIG_WHIP_MUXER) += whip.o avc.o http.o srtp.o tls_openssl.o OBJS-$(CONFIG_WSAUD_DEMUXER) += westwood_aud.o OBJS-$(CONFIG_WSAUD_MUXER) += westwood_audenc.o OBJS-$(CONFIG_WSD_DEMUXER) += wsddec.o rawdec.o diff --git a/libavformat/allformats.c b/libavformat/allformats.c index b5a23f9c179be..17215d733ded7 100644 --- a/libavformat/allformats.c +++ b/libavformat/allformats.c @@ -94,7 +94,6 @@ extern const FFInputFormat ff_av1_demuxer; extern const FFInputFormat ff_avi_demuxer; extern const FFOutputFormat ff_avi_muxer; extern const FFOutputFormat ff_avif_muxer; -extern const FFInputFormat ff_avisynth_demuxer; extern const FFOutputFormat ff_avm2_muxer; extern const FFInputFormat ff_avr_demuxer; extern const FFInputFormat ff_avs_demuxer; @@ -155,7 +154,6 @@ extern const FFInputFormat ff_dv_demuxer; extern const FFOutputFormat ff_dv_muxer; extern const FFInputFormat ff_dvbsub_demuxer; extern const FFInputFormat ff_dvbtxt_demuxer; -extern const FFInputFormat ff_dvdvideo_demuxer; extern const FFInputFormat ff_dxa_demuxer; extern const FFInputFormat ff_ea_demuxer; extern const FFInputFormat ff_ea_cdata_demuxer; @@ -517,6 +515,7 @@ extern const FFOutputFormat ff_webp_muxer; extern const FFInputFormat ff_webvtt_demuxer; extern const FFOutputFormat ff_webvtt_muxer; extern const FFInputFormat ff_wsaud_demuxer; +extern const FFOutputFormat ff_whip_muxer; extern const FFOutputFormat ff_wsaud_muxer; extern const FFInputFormat ff_wsd_demuxer; extern const FFInputFormat ff_wsvqa_demuxer; @@ -573,7 +572,9 @@ extern const FFInputFormat ff_image_xpm_pipe_demuxer; extern const FFInputFormat ff_image_xwd_pipe_demuxer; /* external libraries */ +extern const FFInputFormat ff_avisynth_demuxer; extern const FFOutputFormat ff_chromaprint_muxer; +extern const FFInputFormat ff_dvdvideo_demuxer; extern const FFInputFormat ff_libgme_demuxer; extern const FFInputFormat ff_libmodplug_demuxer; extern const FFInputFormat ff_libopenmpt_demuxer; diff --git a/libavformat/avformat.h b/libavformat/avformat.h index 498c3020a5852..2034d2aecc14f 100644 --- a/libavformat/avformat.h +++ b/libavformat/avformat.h @@ -1870,10 +1870,6 @@ typedef struct AVFormatContext { /** * A callback for closing the streams opened with AVFormatContext.io_open(). * - * Using this is preferred over io_close, because this can return an error. - * Therefore this callback is used instead of io_close by the generic - * libavformat code if io_close is NULL or the default. - * * @param s the format context * @param pb IO context to be closed and freed * @return 0 on success, a negative AVERROR code on failure diff --git a/libavformat/avio.c b/libavformat/avio.c index d109f3adff03d..b146ac9f19234 100644 --- a/libavformat/avio.c +++ b/libavformat/avio.c @@ -339,8 +339,9 @@ static const struct URLProtocol *url_find_protocol(const char *filename) } } av_freep(&protocols); - if (av_strstart(filename, "https:", NULL) || av_strstart(filename, "tls:", NULL)) - av_log(NULL, AV_LOG_WARNING, "https protocol not found, recompile FFmpeg with " + if (av_strstart(filename, "https:", NULL) || av_strstart(filename, "tls:", NULL) || + av_strstart(filename, "dtls:", NULL)) + av_log(NULL, AV_LOG_WARNING, "https or dtls protocol not found, recompile FFmpeg with " "openssl, gnutls or securetransport enabled.\n"); return NULL; diff --git a/libavformat/demux.c b/libavformat/demux.c index 2795863567361..ecd4f40da9bcc 100644 --- a/libavformat/demux.c +++ b/libavformat/demux.c @@ -383,11 +383,10 @@ void avformat_close_input(AVFormatContext **ps) if (ffifmt(s->iformat)->read_close) ffifmt(s->iformat)->read_close(s); + ff_format_io_close(s, &pb); avformat_free_context(s); *ps = NULL; - - avio_close(pb); } static void force_codec_ids(AVFormatContext *s, AVStream *st) diff --git a/libavformat/dhav.c b/libavformat/dhav.c index b2ead99609cb5..d9db775802d36 100644 --- a/libavformat/dhav.c +++ b/libavformat/dhav.c @@ -22,6 +22,7 @@ #include +#include "libavutil/intreadwrite.h" #include "libavutil/mem.h" #include "libavutil/parseutils.h" #include "avio_internal.h" @@ -232,37 +233,60 @@ static void get_timeinfo(unsigned date, struct tm *timeinfo) timeinfo->tm_sec = sec; } +#define MAX_DURATION_BUFFER_SIZE (1024*1024) + static int64_t get_duration(AVFormatContext *s) { - DHAVContext *dhav = s->priv_data; int64_t start_pos = avio_tell(s->pb); + int64_t end_pos = -1; int64_t start = 0, end = 0; struct tm timeinfo; - int max_interations = 100000; + uint8_t *end_buffer; + int64_t end_buffer_size; + int64_t end_buffer_pos; + int64_t offset; + unsigned date; if (!s->pb->seekable) return 0; - avio_seek(s->pb, avio_size(s->pb) - 8, SEEK_SET); - while (avio_tell(s->pb) > 12 && max_interations--) { - if (avio_rl32(s->pb) == MKTAG('d','h','a','v')) { - int64_t seek_back = avio_rl32(s->pb); + if (start_pos + 16 > avio_size(s->pb)) + return 0; - avio_seek(s->pb, -seek_back, SEEK_CUR); - read_chunk(s); - get_timeinfo(dhav->date, &timeinfo); - end = av_timegm(&timeinfo) * 1000LL; + avio_skip(s->pb, 16); + date = avio_rl32(s->pb); + get_timeinfo(date, &timeinfo); + start = av_timegm(&timeinfo) * 1000LL; + + end_buffer_size = FFMIN(MAX_DURATION_BUFFER_SIZE, avio_size(s->pb)); + end_buffer = av_malloc(end_buffer_size); + if (!end_buffer) { + avio_seek(s->pb, start_pos, SEEK_SET); + return 0; + } + end_buffer_pos = avio_size(s->pb) - end_buffer_size; + avio_seek(s->pb, end_buffer_pos, SEEK_SET); + avio_read(s->pb, end_buffer, end_buffer_size); + + offset = end_buffer_size - 8; + while (offset > 0) { + if (AV_RL32(end_buffer + offset) == MKTAG('d','h','a','v')) { + int64_t seek_back = AV_RL32(end_buffer + offset + 4); + end_pos = end_buffer_pos + offset - seek_back + 8; break; } else { - avio_seek(s->pb, -12, SEEK_CUR); + offset -= 9; } } - avio_seek(s->pb, start_pos, SEEK_SET); + if (end_pos < 0 || end_pos + 16 > end_buffer_pos + end_buffer_size) { + avio_seek(s->pb, start_pos, SEEK_SET); + return 0; + } - read_chunk(s); - get_timeinfo(dhav->date, &timeinfo); - start = av_timegm(&timeinfo) * 1000LL; + date = AV_RL32(end_buffer + (end_pos - end_buffer_pos) + 16); + get_timeinfo(date, &timeinfo); + end = av_timegm(&timeinfo) * 1000LL; avio_seek(s->pb, start_pos, SEEK_SET); diff --git a/libavformat/http.c b/libavformat/http.c index f7b2a8a02933d..ff63c259699a8 100644 --- a/libavformat/http.c +++ b/libavformat/http.c @@ -562,6 +562,12 @@ int ff_http_averror(int status_code, int default_averror) return default_averror; } +const char* ff_http_get_new_location(URLContext *h) +{ + HTTPContext *s = h->priv_data; + return s->new_location; +} + static int http_write_reply(URLContext* h, int status_code) { int ret, body = 0, reply_code, message_len; diff --git a/libavformat/http.h b/libavformat/http.h index 5f650ef143f77..d1b691826bf3b 100644 --- a/libavformat/http.h +++ b/libavformat/http.h @@ -62,4 +62,6 @@ int ff_http_do_new_request2(URLContext *h, const char *uri, AVDictionary **optio int ff_http_averror(int status_code, int default_averror); +const char* ff_http_get_new_location(URLContext *h); + #endif /* AVFORMAT_HTTP_H */ diff --git a/libavformat/imfdec.c b/libavformat/imfdec.c index a86b4763ff888..b4df37daa3574 100644 --- a/libavformat/imfdec.c +++ b/libavformat/imfdec.c @@ -380,6 +380,7 @@ static int open_track_resource_context(AVFormatContext *s, track_resource->ctx->io_open = s->io_open; track_resource->ctx->io_close2 = s->io_close2; + track_resource->ctx->opaque = s->opaque; track_resource->ctx->flags |= s->flags & ~AVFMT_FLAG_CUSTOM_IO; if ((ret = ff_copy_whiteblacklists(track_resource->ctx, s)) < 0) diff --git a/libavformat/matroska.c b/libavformat/matroska.c index bbad9a7f549a0..60584e268731d 100644 --- a/libavformat/matroska.c +++ b/libavformat/matroska.c @@ -82,6 +82,7 @@ const CodecTags ff_mkv_codec_tags[]={ {"V_AVS3" , AV_CODEC_ID_AVS3}, {"V_DIRAC" , AV_CODEC_ID_DIRAC}, {"V_FFV1" , AV_CODEC_ID_FFV1}, + {"V_JPEG2000" , AV_CODEC_ID_JPEG2000}, {"V_MJPEG" , AV_CODEC_ID_MJPEG}, {"V_MPEG1" , AV_CODEC_ID_MPEG1VIDEO}, {"V_MPEG2" , AV_CODEC_ID_MPEG2VIDEO}, diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c index 29e35e6dd4db0..da5166319e90d 100644 --- a/libavformat/matroskadec.c +++ b/libavformat/matroskadec.c @@ -2877,6 +2877,11 @@ static int mkv_parse_video_codec(MatroskaTrack *track, AVCodecParameters *par, { if (!strcmp(track->codec_id, "V_MS/VFW/FOURCC") && track->codec_priv.size >= 40) { + uint32_t size = AV_RL32A(track->codec_priv.data); + // VFW extradata is padded to an even length, yet + // the size field contains the real length. + if (size & 1 && size == track->codec_priv.size - 1) + --track->codec_priv.size; track->ms_compat = 1; par->bits_per_coded_sample = AV_RL16(track->codec_priv.data + 14); par->codec_tag = AV_RL32(track->codec_priv.data + 16); @@ -3824,9 +3829,6 @@ static int matroska_parse_webvtt(MatroskaDemuxContext *matroska, text_len = len; } - if (text_len <= 0) - return AVERROR_INVALIDDATA; - err = av_new_packet(pkt, text_len); if (err < 0) { return err; diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c index 6d0d791f180ee..408890fa89914 100644 --- a/libavformat/matroskaenc.c +++ b/libavformat/matroskaenc.c @@ -1960,8 +1960,8 @@ static int mkv_write_track(AVFormatContext *s, MatroskaMuxContext *mkv, // look for a codec ID string specific to mkv to use, // if none are found, use AVI codes - if (par->codec_id == AV_CODEC_ID_FFV1) { - /* FFV1 is actually supported natively in Matroska, + if (par->codec_id == AV_CODEC_ID_JPEG2000) { + /* JPEG2000 is actually supported natively in Matroska, * yet we use the VfW way to mux it for compatibility * with old demuxers. (FIXME: Are they really important?) */ } else if (par->codec_id != AV_CODEC_ID_RAWVIDEO || par->codec_tag) { diff --git a/libavformat/movenc.c b/libavformat/movenc.c index 4bc8bd1b2ab76..402611e81ed1e 100644 --- a/libavformat/movenc.c +++ b/libavformat/movenc.c @@ -3966,7 +3966,7 @@ static int mov_write_edts_tag(AVIOContext *pb, MOVMuxContext *mov, int flags = 0; if (track->entry) { - if (start_dts != track->cluster[0].dts || start_ct != track->cluster[0].cts) { + if (start_dts != track->cluster[0].dts || (start_ct != track->cluster[0].cts && track->cluster[0].dts >= 0)) { av_log(mov->fc, AV_LOG_DEBUG, "EDTS using dts:%"PRId64" cts:%d instead of dts:%"PRId64" cts:%"PRId64" tid:%d\n", @@ -6504,14 +6504,14 @@ static int mov_flush_fragment(AVFormatContext *s, int force) av_rescale(mov->tracks[first_track].cluster[0].dts, AV_TIME_BASE, mov->tracks[first_track].timescale), (has_video ? starts_with_key : mov->tracks[first_track].cluster[0].flags & MOV_SYNC_SAMPLE) ? AVIO_DATA_MARKER_SYNC_POINT : AVIO_DATA_MARKER_BOUNDARY_POINT); - for (i = 0; i < mov->nb_tracks; i++) { + for (i = first_track; i < mov->nb_tracks; i++) { MOVTrack *track = &mov->tracks[i]; int buf_size, write_moof = 1, moof_tracks = -1; uint8_t *buf; + if (!track->entry) + continue; if (mov->flags & FF_MOV_FLAG_SEPARATE_MOOF) { - if (!track->entry) - continue; mdat_size = avio_tell(track->mdat_buf); moof_tracks = i; } else { @@ -6928,7 +6928,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) trk->flags |= MOV_TRACK_CTTS; trk->cluster[trk->entry].cts = pkt->pts - pkt->dts; trk->cluster[trk->entry].flags = 0; - if (trk->start_cts == AV_NOPTS_VALUE) + if (trk->start_cts == AV_NOPTS_VALUE || (pkt->dts <= 0 && trk->start_cts > pkt->pts - pkt->dts)) trk->start_cts = pkt->pts - pkt->dts; if (trk->end_pts == AV_NOPTS_VALUE) trk->end_pts = trk->cluster[trk->entry].dts + @@ -7731,6 +7731,12 @@ static int mov_init(AVFormatContext *s) FF_MOV_FLAG_FRAG_EVERY_FRAME)) mov->flags |= FF_MOV_FLAG_FRAGMENT; + if (mov->flags & FF_MOV_FLAG_HYBRID_FRAGMENTED && + mov->flags & FF_MOV_FLAG_FASTSTART) { + av_log(s, AV_LOG_ERROR, "Setting both hybrid_fragmented and faststart is not supported.\n"); + return AVERROR(EINVAL); + } + /* Set other implicit flags immediately */ if (mov->flags & FF_MOV_FLAG_HYBRID_FRAGMENTED) mov->flags |= FF_MOV_FLAG_FRAGMENT; diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c index 54594b3a11bae..deb69a0548bdd 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c @@ -940,6 +940,8 @@ static int mpegts_set_stream_info(AVStream *st, PESContext *pes, mpegts_find_stream_type(st, pes->stream_type, ISO_types); if (pes->stream_type == STREAM_TYPE_AUDIO_MPEG2 || pes->stream_type == STREAM_TYPE_AUDIO_AAC) sti->request_probe = 50; + if (pes->stream_type == STREAM_TYPE_PRIVATE_DATA) + sti->request_probe = AVPROBE_SCORE_STREAM_RETRY; if ((prog_reg_desc == AV_RL32("HDMV") || prog_reg_desc == AV_RL32("HDPR")) && st->codecpar->codec_id == AV_CODEC_ID_NONE) { @@ -2508,7 +2510,7 @@ static void pmt_cb(MpegTSFilter *filter, const uint8_t *section, int section_len if (!st) goto out; - if (pes && !pes->stream_type) + if (pes && pes->stream_type != stream_type) mpegts_set_stream_info(st, pes, stream_type, prog_reg_desc); add_pid_to_program(prg, pid); diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c index 9baf8040a9017..da3ef815db237 100644 --- a/libavformat/oggdec.c +++ b/libavformat/oggdec.c @@ -77,6 +77,7 @@ static void free_stream(AVFormatContext *s, int i) av_freep(&stream->private); av_freep(&stream->new_metadata); + av_freep(&stream->new_extradata); } //FIXME We could avoid some structure duplication @@ -239,10 +240,6 @@ static int ogg_replace_stream(AVFormatContext *s, uint32_t serial, char *magic, os->start_trimming = 0; os->end_trimming = 0; - /* Chained files have extradata as a new packet */ - if (codec == &ff_opus_codec) - os->header = -1; - return i; } @@ -892,6 +889,16 @@ static int ogg_read_packet(AVFormatContext *s, AVPacket *pkt) os->new_metadata_size = 0; } + if (os->new_extradata) { + ret = av_packet_add_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, + os->new_extradata, os->new_extradata_size); + if (ret < 0) + return ret; + + os->new_extradata = NULL; + os->new_extradata_size = 0; + } + return psize; } diff --git a/libavformat/oggdec.h b/libavformat/oggdec.h index bc670d0f1e58e..c15fbe738eb4a 100644 --- a/libavformat/oggdec.h +++ b/libavformat/oggdec.h @@ -42,8 +42,8 @@ struct ogg_codec { * Attempt to process a packet as a data packet * @return < 0 (AVERROR) code or -1 on error * == 0 if the packet was a regular data packet. - * == 0 or 1 if the packet was a header from a chained bitstream. - * (1 will cause the packet to be skiped in calling code (ogg_packet()) + * == 1 if the packet was a header from a chained bitstream. + * This will cause the packet to be skiped in calling code (ogg_packet() */ int (*packet)(AVFormatContext *, int); /** @@ -94,6 +94,8 @@ struct ogg_stream { int end_trimming; ///< set the number of packets to drop from the end uint8_t *new_metadata; size_t new_metadata_size; + uint8_t *new_extradata; + size_t new_extradata_size; void *private; }; diff --git a/libavformat/oggparseflac.c b/libavformat/oggparseflac.c index f25ed9cc15544..d66b85b09e833 100644 --- a/libavformat/oggparseflac.c +++ b/libavformat/oggparseflac.c @@ -27,6 +27,8 @@ #include "oggdec.h" #define OGG_FLAC_METADATA_TYPE_STREAMINFO 0x7F +#define OGG_FLAC_MAGIC "\177FLAC" +#define OGG_FLAC_MAGIC_SIZE sizeof(OGG_FLAC_MAGIC)-1 static int flac_header (AVFormatContext * s, int idx) @@ -78,6 +80,27 @@ flac_header (AVFormatContext * s, int idx) return 1; } +static int +flac_packet (AVFormatContext * s, int idx) +{ + struct ogg *ogg = s->priv_data; + struct ogg_stream *os = ogg->streams + idx; + + if (os->psize > OGG_FLAC_MAGIC_SIZE && + !memcmp( + os->buf + os->pstart, + OGG_FLAC_MAGIC, + OGG_FLAC_MAGIC_SIZE)) + return 1; + + if (os->psize > 0 && + ((os->buf[os->pstart] & 0x7F) == FLAC_METADATA_TYPE_VORBIS_COMMENT)) { + return 1; + } + + return 0; +} + static int old_flac_header (AVFormatContext * s, int idx) { @@ -127,10 +150,11 @@ old_flac_header (AVFormatContext * s, int idx) } const struct ogg_codec ff_flac_codec = { - .magic = "\177FLAC", - .magicsize = 5, + .magic = OGG_FLAC_MAGIC, + .magicsize = OGG_FLAC_MAGIC_SIZE, .header = flac_header, .nb_header = 2, + .packet = flac_packet, }; const struct ogg_codec ff_old_flac_codec = { diff --git a/libavformat/oggparseopus.c b/libavformat/oggparseopus.c index 218e9df581406..65b93b405324c 100644 --- a/libavformat/oggparseopus.c +++ b/libavformat/oggparseopus.c @@ -36,6 +36,51 @@ struct oggopus_private { #define OPUS_SEEK_PREROLL_MS 80 #define OPUS_HEAD_SIZE 19 +static int parse_opus_header(AVFormatContext *avf, AVStream *st, struct ogg_stream *os, + struct oggopus_private *priv, uint8_t *packet, + size_t psize) +{ + int channels; + int ret; + + if (psize < OPUS_HEAD_SIZE || (AV_RL8(packet + 8) & 0xF0) != 0) + return AVERROR_INVALIDDATA; + + st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; + st->codecpar->codec_id = AV_CODEC_ID_OPUS; + + channels = AV_RL8(packet + 9); + if (st->codecpar->ch_layout.nb_channels && + channels != st->codecpar->ch_layout.nb_channels) { + av_log(avf, AV_LOG_ERROR, "Channel change is not supported\n"); + return AVERROR_PATCHWELCOME; + } + + st->codecpar->ch_layout.nb_channels = channels; + + priv->pre_skip = AV_RL16(packet + 10); + st->codecpar->initial_padding = priv->pre_skip; + os->start_trimming = priv->pre_skip; + /*orig_sample_rate = AV_RL32(packet + 12);*/ + /*gain = AV_RL16(packet + 16);*/ + /*channel_map = AV_RL8 (packet + 18);*/ + + ret = ff_alloc_extradata(st->codecpar, os->psize); + if (ret < 0) + return ret; + + memcpy(st->codecpar->extradata, packet, os->psize); + + st->codecpar->sample_rate = 48000; + st->codecpar->seek_preroll = av_rescale(OPUS_SEEK_PREROLL_MS, + st->codecpar->sample_rate, 1000); + avpriv_set_pts_info(st, 64, 1, 48000); + + priv->need_comments = 1; + + return 1; +} + static int opus_header(AVFormatContext *avf, int idx) { struct ogg *ogg = avf->priv_data; @@ -43,7 +88,6 @@ static int opus_header(AVFormatContext *avf, int idx) AVStream *st = avf->streams[idx]; struct oggopus_private *priv = os->private; uint8_t *packet = os->buf + os->pstart; - int ret; if (!priv) { priv = os->private = av_mallocz(sizeof(*priv)); @@ -51,32 +95,8 @@ static int opus_header(AVFormatContext *avf, int idx) return AVERROR(ENOMEM); } - if (os->flags & OGG_FLAG_BOS) { - if (os->psize < OPUS_HEAD_SIZE || (AV_RL8(packet + 8) & 0xF0) != 0) - return AVERROR_INVALIDDATA; - st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; - st->codecpar->codec_id = AV_CODEC_ID_OPUS; - st->codecpar->ch_layout.nb_channels = AV_RL8(packet + 9); - - priv->pre_skip = AV_RL16(packet + 10); - st->codecpar->initial_padding = priv->pre_skip; - os->start_trimming = priv->pre_skip; - /*orig_sample_rate = AV_RL32(packet + 12);*/ - /*gain = AV_RL16(packet + 16);*/ - /*channel_map = AV_RL8 (packet + 18);*/ - - if ((ret = ff_alloc_extradata(st->codecpar, os->psize)) < 0) - return ret; - - memcpy(st->codecpar->extradata, packet, os->psize); - - st->codecpar->sample_rate = 48000; - st->codecpar->seek_preroll = av_rescale(OPUS_SEEK_PREROLL_MS, - st->codecpar->sample_rate, 1000); - avpriv_set_pts_info(st, 64, 1, 48000); - priv->need_comments = 1; - return 1; - } + if (os->flags & OGG_FLAG_BOS) + return parse_opus_header(avf, st, os, priv, packet, os->psize); if (priv->need_comments) { if (os->psize < 8 || memcmp(packet, "OpusTags", 8)) @@ -125,6 +145,19 @@ static int opus_packet(AVFormatContext *avf, int idx) return AVERROR_INVALIDDATA; } + if (os->psize > 8 && !memcmp(packet, "OpusHead", 8)) { + ret = parse_opus_header(avf, st, os, priv, packet, os->psize); + if (ret < 0) + return ret; + + return 1; + } + + if (os->psize > 8 && !memcmp(packet, "OpusTags", 8)) { + priv->need_comments = 0; + return 1; + } + if ((!os->lastpts || os->lastpts == AV_NOPTS_VALUE) && !(os->flags & OGG_FLAG_EOS)) { int seg, d; int duration; diff --git a/libavformat/oggparsevorbis.c b/libavformat/oggparsevorbis.c index 9f50ab9ffc5ac..62cc2da6de70a 100644 --- a/libavformat/oggparsevorbis.c +++ b/libavformat/oggparsevorbis.c @@ -293,6 +293,62 @@ static int vorbis_update_metadata(AVFormatContext *s, int idx) return ret; } +static int vorbis_parse_header(AVFormatContext *s, AVStream *st, + const uint8_t *p, unsigned int psize) +{ + unsigned blocksize, bs0, bs1; + int srate; + int channels; + + if (psize != 30) + return AVERROR_INVALIDDATA; + + p += 7; /* skip "\001vorbis" tag */ + + if (bytestream_get_le32(&p) != 0) /* vorbis_version */ + return AVERROR_INVALIDDATA; + + channels = bytestream_get_byte(&p); + if (st->codecpar->ch_layout.nb_channels && + channels != st->codecpar->ch_layout.nb_channels) { + av_log(s, AV_LOG_ERROR, "Channel change is not supported\n"); + return AVERROR_PATCHWELCOME; + } + st->codecpar->ch_layout.nb_channels = channels; + srate = bytestream_get_le32(&p); + p += 4; // skip maximum bitrate + st->codecpar->bit_rate = bytestream_get_le32(&p); // nominal bitrate + p += 4; // skip minimum bitrate + + blocksize = bytestream_get_byte(&p); + bs0 = blocksize & 15; + bs1 = blocksize >> 4; + + if (bs0 > bs1) + return AVERROR_INVALIDDATA; + if (bs0 < 6 || bs1 > 13) + return AVERROR_INVALIDDATA; + + if (bytestream_get_byte(&p) != 1) /* framing_flag */ + return AVERROR_INVALIDDATA; + + st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; + st->codecpar->codec_id = AV_CODEC_ID_VORBIS; + + if (srate > 0) { + if (st->codecpar->sample_rate && + srate != st->codecpar->sample_rate) { + av_log(s, AV_LOG_ERROR, "Sample rate change is not supported\n"); + return AVERROR_PATCHWELCOME; + } + + st->codecpar->sample_rate = srate; + avpriv_set_pts_info(st, 64, 1, srate); + } + + return 1; +} + static int vorbis_header(AVFormatContext *s, int idx) { struct ogg *ogg = s->priv_data; @@ -329,50 +385,10 @@ static int vorbis_header(AVFormatContext *s, int idx) priv->packet[pkt_type >> 1] = av_memdup(os->buf + os->pstart, os->psize); if (!priv->packet[pkt_type >> 1]) return AVERROR(ENOMEM); - if (os->buf[os->pstart] == 1) { - const uint8_t *p = os->buf + os->pstart + 7; /* skip "\001vorbis" tag */ - unsigned blocksize, bs0, bs1; - int srate; - int channels; - - if (os->psize != 30) - return AVERROR_INVALIDDATA; - - if (bytestream_get_le32(&p) != 0) /* vorbis_version */ - return AVERROR_INVALIDDATA; - - channels = bytestream_get_byte(&p); - if (st->codecpar->ch_layout.nb_channels && - channels != st->codecpar->ch_layout.nb_channels) { - av_log(s, AV_LOG_ERROR, "Channel change is not supported\n"); - return AVERROR_PATCHWELCOME; - } - st->codecpar->ch_layout.nb_channels = channels; - srate = bytestream_get_le32(&p); - p += 4; // skip maximum bitrate - st->codecpar->bit_rate = bytestream_get_le32(&p); // nominal bitrate - p += 4; // skip minimum bitrate - - blocksize = bytestream_get_byte(&p); - bs0 = blocksize & 15; - bs1 = blocksize >> 4; - - if (bs0 > bs1) - return AVERROR_INVALIDDATA; - if (bs0 < 6 || bs1 > 13) - return AVERROR_INVALIDDATA; - - if (bytestream_get_byte(&p) != 1) /* framing_flag */ - return AVERROR_INVALIDDATA; - - st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; - st->codecpar->codec_id = AV_CODEC_ID_VORBIS; - - if (srate > 0) { - st->codecpar->sample_rate = srate; - avpriv_set_pts_info(st, 64, 1, srate); - } - } else if (os->buf[os->pstart] == 3) { + if (pkt_type == 1) + return vorbis_parse_header(s, st, os->buf + os->pstart, os->psize); + + if (pkt_type == 3) { if (vorbis_update_metadata(s, idx) >= 0 && priv->len[1] > 10) { unsigned new_len; diff --git a/libavformat/protocols.c b/libavformat/protocols.c index 93a6d67261e8f..d394454d414f3 100644 --- a/libavformat/protocols.c +++ b/libavformat/protocols.c @@ -62,6 +62,7 @@ extern const URLProtocol ff_subfile_protocol; extern const URLProtocol ff_tee_protocol; extern const URLProtocol ff_tcp_protocol; extern const URLProtocol ff_tls_protocol; +extern const URLProtocol ff_dtls_protocol; extern const URLProtocol ff_udp_protocol; extern const URLProtocol ff_udplite_protocol; extern const URLProtocol ff_unix_protocol; diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c index a7d5a79a83c14..ebd5402bce89f 100644 --- a/libavformat/rtpdec.c +++ b/libavformat/rtpdec.c @@ -61,12 +61,6 @@ static const RTPDynamicProtocolHandler speex_dynamic_handler = { .codec_id = AV_CODEC_ID_SPEEX, }; -static const RTPDynamicProtocolHandler opus_dynamic_handler = { - .enc_name = "opus", - .codec_type = AVMEDIA_TYPE_AUDIO, - .codec_id = AV_CODEC_ID_OPUS, -}; - static const RTPDynamicProtocolHandler t140_dynamic_handler = { /* RFC 4103 */ .enc_name = "t140", .codec_type = AVMEDIA_TYPE_SUBTITLE, @@ -125,7 +119,7 @@ static const RTPDynamicProtocolHandler *const rtp_dynamic_protocol_handler_list[ &ff_vp9_dynamic_handler, &gsm_dynamic_handler, &l24_dynamic_handler, - &opus_dynamic_handler, + &ff_opus_dynamic_handler, &realmedia_mp3_dynamic_handler, &speex_dynamic_handler, &t140_dynamic_handler, @@ -531,43 +525,6 @@ int ff_rtp_send_rtcp_feedback(RTPDemuxContext *s, URLContext *fd, return 0; } -static int opus_write_extradata(AVCodecParameters *codecpar) -{ - uint8_t *bs; - int ret; - - /* This function writes an extradata with a channel mapping family of 0. - * This mapping family only supports mono and stereo layouts. And RFC7587 - * specifies that the number of channels in the SDP must be 2. - */ - if (codecpar->ch_layout.nb_channels > 2) { - return AVERROR_INVALIDDATA; - } - - ret = ff_alloc_extradata(codecpar, 19); - if (ret < 0) - return ret; - - bs = (uint8_t *)codecpar->extradata; - - /* Opus magic */ - bytestream_put_buffer(&bs, "OpusHead", 8); - /* Version */ - bytestream_put_byte (&bs, 0x1); - /* Channel count */ - bytestream_put_byte (&bs, codecpar->ch_layout.nb_channels); - /* Pre skip */ - bytestream_put_le16 (&bs, 0); - /* Input sample rate */ - bytestream_put_le32 (&bs, 48000); - /* Output gain */ - bytestream_put_le16 (&bs, 0x0); - /* Mapping family */ - bytestream_put_byte (&bs, 0x0); - - return 0; -} - /** * open a new RTP parse context for stream 'st'. 'st' can be NULL for * MPEG-2 TS streams. @@ -576,7 +533,6 @@ RTPDemuxContext *ff_rtp_parse_open(AVFormatContext *s1, AVStream *st, int payload_type, int queue_size) { RTPDemuxContext *s; - int ret; s = av_mallocz(sizeof(RTPDemuxContext)); if (!s) @@ -600,16 +556,13 @@ RTPDemuxContext *ff_rtp_parse_open(AVFormatContext *s1, AVStream *st, if (st->codecpar->sample_rate == 8000) st->codecpar->sample_rate = 16000; break; - case AV_CODEC_ID_OPUS: - ret = opus_write_extradata(st->codecpar); - if (ret < 0) { - av_log(s1, AV_LOG_ERROR, - "Error creating opus extradata: %s\n", - av_err2str(ret)); - av_free(s); - return NULL; - } + case AV_CODEC_ID_PCM_MULAW: { + AVCodecParameters *par = st->codecpar; + par->bits_per_coded_sample = av_get_bits_per_sample(par->codec_id); + par->block_align = par->ch_layout.nb_channels * par->bits_per_coded_sample / 8; + par->bit_rate = par->block_align * 8LL * par->sample_rate; break; + } default: break; } diff --git a/libavformat/rtpdec_formats.h b/libavformat/rtpdec_formats.h index 72a8f16a90999..1ff2a72d2ae00 100644 --- a/libavformat/rtpdec_formats.h +++ b/libavformat/rtpdec_formats.h @@ -77,6 +77,7 @@ extern const RTPDynamicProtocolHandler ff_mpeg4_generic_dynamic_handler; extern const RTPDynamicProtocolHandler ff_mpegts_dynamic_handler; extern const RTPDynamicProtocolHandler ff_ms_rtp_asf_pfa_handler; extern const RTPDynamicProtocolHandler ff_ms_rtp_asf_pfv_handler; +extern const RTPDynamicProtocolHandler ff_opus_dynamic_handler; extern const RTPDynamicProtocolHandler ff_qcelp_dynamic_handler; extern const RTPDynamicProtocolHandler ff_qdm2_dynamic_handler; extern const RTPDynamicProtocolHandler ff_qt_rtp_aud_handler; diff --git a/libavformat/rtpdec_opus.c b/libavformat/rtpdec_opus.c new file mode 100644 index 0000000000000..4ed9d8842bd74 --- /dev/null +++ b/libavformat/rtpdec_opus.c @@ -0,0 +1,151 @@ +/* + * RTP Depacketization of Opus, RFC 7587 + * Copyright (c) 2025 Jonathan Baudanza + * Copyright (c) 2022 Erik Linge + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/bytestream.h" +#include "libavutil/mem.h" +#include "libavutil/avstring.h" +#include "rtpdec_formats.h" +#include "internal.h" + +static int opus_duration(const uint8_t *src, int size) +{ + unsigned nb_frames = 1; + unsigned toc = src[0]; + unsigned toc_config = toc >> 3; + unsigned toc_count = toc & 3; + unsigned frame_size = toc_config < 12 ? FFMAX(480, 960 * (toc_config & 3)) : + toc_config < 16 ? 480 << (toc_config & 1) : + 120 << (toc_config & 3); + if (toc_count == 3) { + if (size<2) + return AVERROR_INVALIDDATA; + nb_frames = src[1] & 0x3F; + } else if (toc_count) { + nb_frames = 2; + } + + return frame_size * nb_frames; +} + +static int opus_write_extradata(AVCodecParameters *codecpar) +{ + uint8_t *bs; + int ret; + + /* This function writes an extradata with a channel mapping family of 0. + * This mapping family only supports mono and stereo layouts. And RFC7587 + * specifies that the number of channels in the SDP must be 2. + */ + if (codecpar->ch_layout.nb_channels > 2) { + return AVERROR_INVALIDDATA; + } + + ret = ff_alloc_extradata(codecpar, 19); + if (ret < 0) + return ret; + + bs = (uint8_t *)codecpar->extradata; + + /* Opus magic */ + bytestream_put_buffer(&bs, "OpusHead", 8); + /* Version */ + bytestream_put_byte (&bs, 0x1); + /* Channel count */ + bytestream_put_byte (&bs, codecpar->ch_layout.nb_channels); + /* Pre skip */ + bytestream_put_le16 (&bs, 0); + /* Input sample rate */ + bytestream_put_le32 (&bs, 48000); + /* Output gain */ + bytestream_put_le16 (&bs, 0x0); + /* Mapping family */ + bytestream_put_byte (&bs, 0x0); + + return 0; +} + +static int opus_init(AVFormatContext *s, int st_index, PayloadContext *priv_data) +{ + return opus_write_extradata(s->streams[st_index]->codecpar); +} + +static int opus_parse_packet(AVFormatContext *ctx, PayloadContext *data, + AVStream *st, AVPacket *pkt, uint32_t *timestamp, + const uint8_t *buf, int len, uint16_t seq, + int flags) +{ + int rv; + int duration; + + if ((rv = av_new_packet(pkt, len)) < 0) + return rv; + + memcpy(pkt->data, buf, len); + pkt->stream_index = st->index; + + duration = opus_duration(buf, len); + if (duration != AVERROR_INVALIDDATA) { + pkt->duration = duration; + } + + return 0; +} + +static int parse_fmtp(AVFormatContext *s, + AVStream *stream, PayloadContext *data, + const char *attr, const char *value) +{ + if (!strcmp(attr, "sprop-maxcapturerate")) { + int rate = atoi(value); + if (rate < 8000 || rate > 48000) { + av_log(s, AV_LOG_ERROR, + "fmtp field 'sprop-maxcapturerate' must be between 8000 to 48000 (provided value: %s)", + value); + return AVERROR_INVALIDDATA; + } + stream->codecpar->sample_rate = rate; + } + return 0; +} + +static int opus_parse_sdp_line(AVFormatContext *s, int st_index, + PayloadContext *data, const char *line) +{ + const char *p; + + if (st_index < 0) + return 0; + + if (av_strstart(line, "fmtp:", &p)) { + return ff_parse_fmtp(s, s->streams[st_index], data, p, parse_fmtp); + } + return 0; +} + +const RTPDynamicProtocolHandler ff_opus_dynamic_handler = { + .enc_name = "opus", + .codec_type = AVMEDIA_TYPE_AUDIO, + .codec_id = AV_CODEC_ID_OPUS, + .parse_packet = opus_parse_packet, + .init = opus_init, + .parse_sdp_a_line = opus_parse_sdp_line, +}; diff --git a/libavformat/srtp.h b/libavformat/srtp.h index 3189f8f54bd0e..35224cc9ba901 100644 --- a/libavformat/srtp.h +++ b/libavformat/srtp.h @@ -27,7 +27,7 @@ struct AVAES; struct AVHMAC; -struct SRTPContext { +typedef struct SRTPContext { struct AVAES *aes; struct AVHMAC *hmac; int rtp_hmac_size, rtcp_hmac_size; @@ -40,7 +40,7 @@ struct SRTPContext { uint32_t roc; uint32_t rtcp_index; -}; +} SRTPContext; int ff_srtp_set_crypto(struct SRTPContext *s, const char *suite, const char *params); diff --git a/libavformat/tls.c b/libavformat/tls.c index f96ff6215d63f..e06b7022bf4a6 100644 --- a/libavformat/tls.c +++ b/libavformat/tls.c @@ -1,6 +1,7 @@ /* - * TLS/SSL Protocol + * TLS/DTLS/SSL Protocol * Copyright (c) 2011 Martin Storsjo + * Copyright (c) 2025 Jack Lau * * This file is part of FFmpeg. * @@ -20,6 +21,7 @@ */ #include "avformat.h" +#include "internal.h" #include "network.h" #include "os_support.h" #include "url.h" @@ -93,7 +95,7 @@ int ff_tls_open_underlying(TLSShared *c, URLContext *parent, const char *uri, AV c->listen = 1; } - ff_url_join(buf, sizeof(buf), "tcp", NULL, c->underlying_host, port, "%s", p); + ff_url_join(buf, sizeof(buf), c->is_dtls ? "udp" : "tcp", NULL, c->underlying_host, port, "%s", p); hints.ai_flags = AI_NUMERICHOST; if (!getaddrinfo(c->underlying_host, NULL, &hints, &ai)) { @@ -124,7 +126,65 @@ int ff_tls_open_underlying(TLSShared *c, URLContext *parent, const char *uri, AV } freeenv_utf8(env_http_proxy); - return ffurl_open_whitelist(&c->tcp, buf, AVIO_FLAG_READ_WRITE, - &parent->interrupt_callback, options, - parent->protocol_whitelist, parent->protocol_blacklist, parent); + if (c->is_dtls) { + av_dict_set_int(options, "connect", 1, 0); + av_dict_set_int(options, "fifo_size", 0, 0); + /* Set the max packet size to the buffer size. */ + av_dict_set_int(options, "pkt_size", c->mtu, 0); + } + ret = ffurl_open_whitelist(c->is_dtls ? &c->udp : &c->tcp, buf, AVIO_FLAG_READ_WRITE, + &parent->interrupt_callback, options, + parent->protocol_whitelist, parent->protocol_blacklist, parent); + if (c->is_dtls) { + if (ret < 0) { + av_log(c, AV_LOG_ERROR, "WHIP: Failed to connect udp://%s:%d\n", c->underlying_host, port); + return ret; + } + /* Make the socket non-blocking, set to READ and WRITE mode after connected */ + ff_socket_nonblock(ffurl_get_file_handle(c->udp), 1); + c->udp->flags |= AVIO_FLAG_READ | AVIO_FLAG_NONBLOCK; + } + return ret; } + +/** + * Read all data from the given URL url and store it in the given buffer bp. + */ +int ff_url_read_all(const char *url, AVBPrint *bp) +{ + int ret = 0; + AVDictionary *opts = NULL; + URLContext *uc = NULL; + char buf[MAX_URL_SIZE]; + + ret = ffurl_open_whitelist(&uc, url, AVIO_FLAG_READ, NULL, &opts, NULL, NULL, NULL); + if (ret < 0) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to open url %s\n", url); + goto end; + } + + while (1) { + ret = ffurl_read(uc, buf, sizeof(buf)); + if (ret == AVERROR_EOF) { + /* Reset the error because we read all response as answer util EOF. */ + ret = 0; + break; + } + if (ret <= 0) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to read from url=%s, key is %s\n", url, bp->str); + goto end; + } + + av_bprintf(bp, "%.*s", ret, buf); + if (!av_bprint_is_complete(bp)) { + av_log(NULL, AV_LOG_ERROR, "TLS: Exceed max size %.*s, %s\n", ret, buf, bp->str); + ret = AVERROR(EIO); + goto end; + } + } + +end: + ffurl_closep(&uc); + av_dict_free(&opts); + return ret; +} \ No newline at end of file diff --git a/libavformat/tls.h b/libavformat/tls.h index 6c6aa01a9a928..cb626f1977a30 100644 --- a/libavformat/tls.h +++ b/libavformat/tls.h @@ -1,6 +1,7 @@ /* - * TLS/SSL Protocol + * TLS/DTLS/SSL Protocol * Copyright (c) 2011 Martin Storsjo + * Copyright (c) 2025 Jack Lau * * This file is part of FFmpeg. * @@ -22,10 +23,27 @@ #ifndef AVFORMAT_TLS_H #define AVFORMAT_TLS_H +#include "libavutil/bprint.h" #include "libavutil/opt.h" #include "url.h" +/** + * Maximum size limit of a certificate and private key size. + */ +#define MAX_CERTIFICATE_SIZE 8192 + +enum DTLSState { + DTLS_STATE_NONE, + + /* Whether DTLS handshake is finished. */ + DTLS_STATE_FINISHED, + /* Whether DTLS session is closed. */ + DTLS_STATE_CLOSED, + /* Whether DTLS handshake is failed. */ + DTLS_STATE_FAILED, +}; + typedef struct TLSShared { char *ca_file; int verify; @@ -40,6 +58,25 @@ typedef struct TLSShared { int numerichost; URLContext *tcp; + + int is_dtls; + + enum DTLSState state; + + int use_external_udp; + URLContext *udp; + + /* The fingerprint of certificate, used in SDP offer. */ + char *fingerprint; + + /* The certificate and private key content used for DTLS handshake */ + char* cert_buf; + char* key_buf; + /** + * The size of RTP packet, should generally be set to MTU. + * Note that pion requires a smaller value, for example, 1200. + */ + int mtu; } TLSShared; #define TLS_OPTFL (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_ENCODING_PARAM) @@ -51,10 +88,27 @@ typedef struct TLSShared { {"key_file", "Private key file", offsetof(pstruct, options_field . key_file), AV_OPT_TYPE_STRING, .flags = TLS_OPTFL }, \ {"listen", "Listen for incoming connections", offsetof(pstruct, options_field . listen), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, .flags = TLS_OPTFL }, \ {"verifyhost", "Verify against a specific hostname", offsetof(pstruct, options_field . host), AV_OPT_TYPE_STRING, .flags = TLS_OPTFL }, \ - {"http_proxy", "Set proxy to tunnel through", offsetof(pstruct, options_field . http_proxy), AV_OPT_TYPE_STRING, .flags = TLS_OPTFL } + {"http_proxy", "Set proxy to tunnel through", offsetof(pstruct, options_field . http_proxy), AV_OPT_TYPE_STRING, .flags = TLS_OPTFL }, \ + {"use_external_udp", "Use external UDP from muxer or demuxer", offsetof(pstruct, options_field . use_external_udp), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 1, .flags = TLS_OPTFL }, \ + {"mtu", "Maximum Transmission Unit", offsetof(pstruct, options_field . mtu), AV_OPT_TYPE_INT, { .i64 = 0}, INT64_MIN, INT64_MAX, .flags = TLS_OPTFL}, \ + {"fingerprint", "The optional fingerprint for DTLS", offsetof(pstruct, options_field . fingerprint), AV_OPT_TYPE_STRING, .flags = TLS_OPTFL}, \ + {"cert_buf", "The optional certificate buffer for DTLS", offsetof(pstruct, options_field . cert_buf), AV_OPT_TYPE_STRING, .flags = TLS_OPTFL}, \ + {"key_buf", "The optional private key buffer for DTLS", offsetof(pstruct, options_field . key_buf), AV_OPT_TYPE_STRING, .flags = TLS_OPTFL} int ff_tls_open_underlying(TLSShared *c, URLContext *parent, const char *uri, AVDictionary **options); +int ff_url_read_all(const char *url, AVBPrint *bp); + +int ff_dtls_set_udp(URLContext *h, URLContext *udp); + +int ff_dtls_export_materials(URLContext *h, char *dtls_srtp_materials, size_t materials_sz); + +int ff_dtls_state(URLContext *h); + +int ff_ssl_read_key_cert(char *key_url, char *cert_url, char *key_buf, size_t key_sz, char *cert_buf, size_t cert_sz, char **fingerprint); + +int ff_ssl_gen_key_cert(char *key_buf, size_t key_sz, char *cert_buf, size_t cert_sz, char **fingerprint); + void ff_gnutls_init(void); void ff_gnutls_deinit(void); diff --git a/libavformat/tls_openssl.c b/libavformat/tls_openssl.c index 8b0cf9efb23e9..b589d5d90a4b7 100644 --- a/libavformat/tls_openssl.c +++ b/libavformat/tls_openssl.c @@ -1,6 +1,7 @@ /* - * TLS/SSL Protocol + * TLS/DTLS/SSL Protocol * Copyright (c) 2011 Martin Storsjo + * Copyright (c) 2025 Jack Lau * * This file is part of FFmpeg. * @@ -19,8 +20,10 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/mem.h" #include "network.h" #include "os_support.h" +#include "libavutil/random_seed.h" #include "url.h" #include "tls.h" #include "libavutil/opt.h" @@ -29,6 +32,436 @@ #include #include +/** + * Returns a heap‐allocated null‐terminated string containing + * the PEM‐encoded public key. Caller must free. + */ +static char *pkey_to_pem_string(EVP_PKEY *pkey) { + BIO *mem = NULL; + BUF_MEM *bptr = NULL; + char *pem_str = NULL; + + // Create a memory BIO + if (!(mem = BIO_new(BIO_s_mem()))) + goto err; + + // Write public key in PEM form + if (!PEM_write_bio_PrivateKey(mem, pkey, NULL, NULL, 0, NULL, NULL)) + goto err; + + // Extract pointer/length + BIO_get_mem_ptr(mem, &bptr); + if (!bptr || !bptr->length) + goto err; + + // Allocate string (+1 for NUL) + pem_str = av_malloc(bptr->length + 1); + if (!pem_str) + goto err; + + // Copy data & NUL‐terminate + memcpy(pem_str, bptr->data, bptr->length); + pem_str[bptr->length] = '\0'; + +cleanup: + BIO_free(mem); + return pem_str; + +err: + // error path: free and return NULL + free(pem_str); + pem_str = NULL; + goto cleanup; +} + +/** + * Serialize an X509 certificate to a av_malloc’d PEM string. + * Caller must free the returned pointer. + */ +static char *cert_to_pem_string(X509 *cert) +{ + BIO *mem = BIO_new(BIO_s_mem()); + BUF_MEM *bptr = NULL; + char *out = NULL; + + if (!mem) goto err; + + /* Write the PEM certificate */ + if (!PEM_write_bio_X509(mem, cert)) + goto err; + + BIO_get_mem_ptr(mem, &bptr); + if (!bptr || !bptr->length) goto err; + + out = av_malloc(bptr->length + 1); + if (!out) goto err; + + memcpy(out, bptr->data, bptr->length); + out[bptr->length] = '\0'; + +cleanup: + BIO_free(mem); + return out; + +err: + free(out); + out = NULL; + goto cleanup; +} + + +/** + * Generate a SHA-256 fingerprint of an X.509 certificate. + * + * @param ctx AVFormatContext for logging (can be NULL) + * @param cert X509 certificate to fingerprint + * @return Newly allocated fingerprint string in "AA:BB:CC:…" format, + * or NULL on error (logs via av_log if ctx is not NULL). + * Caller must free() the returned string. + */ +static char *generate_fingerprint(X509 *cert) +{ + unsigned char md[EVP_MAX_MD_SIZE]; + int n = 0; + AVBPrint fingerprint; + char *result = NULL; + int i; + + /* To prevent a crash during cleanup, always initialize it. */ + av_bprint_init(&fingerprint, 0, AV_BPRINT_SIZE_UNLIMITED); + + if (X509_digest(cert, EVP_sha256(), md, &n) != 1) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to generate fingerprint, %s\n", ERR_error_string(ERR_get_error(), NULL)); + goto end; + } + + for (i = 0; i < n; i++) { + av_bprintf(&fingerprint, "%02X", md[i]); + if (i + 1 < n) + av_bprintf(&fingerprint, ":"); + } + + if (!fingerprint.str || !strlen(fingerprint.str)) { + av_log(NULL, AV_LOG_ERROR, "TLS: Fingerprint is empty\n"); + goto end; + } + + result = av_strdup(fingerprint.str); + if (!result) { + av_log(NULL, AV_LOG_ERROR, "TLS: Out of memory generating fingerprint\n"); + } + +end: + av_bprint_finalize(&fingerprint, NULL); + return result; +} + +int ff_ssl_read_key_cert(char *key_url, char *cert_url, char *key_buf, size_t key_sz, char *cert_buf, size_t cert_sz, char **fingerprint) +{ + int ret = 0; + BIO *key_b = NULL, *cert_b = NULL; + AVBPrint key_bp, cert_bp; + EVP_PKEY *pkey; + X509 *cert; + char *key_tem = NULL, *cert_tem = NULL; + + /* To prevent a crash during cleanup, always initialize it. */ + av_bprint_init(&key_bp, 1, MAX_CERTIFICATE_SIZE); + av_bprint_init(&cert_bp, 1, MAX_CERTIFICATE_SIZE); + + /* Read key file. */ + ret = ff_url_read_all(key_url, &key_bp); + if (ret < 0) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to open key file %s\n", key_url); + goto end; + } + + if (!(key_b = BIO_new(BIO_s_mem()))) { + ret = AVERROR(ENOMEM); + goto end; + } + + BIO_write(key_b, key_bp.str, key_bp.len); + pkey = PEM_read_bio_PrivateKey(key_b, NULL, NULL, NULL); + if (!pkey) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to read private key from %s\n", key_url); + ret = AVERROR(EIO); + goto end; + } + + /* Read certificate. */ + ret = ff_url_read_all(cert_url, &cert_bp); + if (ret < 0) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to open cert file %s\n", cert_url); + goto end; + } + + if (!(cert_b = BIO_new(BIO_s_mem()))) { + ret = AVERROR(ENOMEM); + goto end; + } + + BIO_write(cert_b, cert_bp.str, cert_bp.len); + cert = PEM_read_bio_X509(cert_b, NULL, NULL, NULL); + if (!cert) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to read certificate from %s\n", cert_url); + ret = AVERROR(EIO); + goto end; + } + + key_tem = pkey_to_pem_string(pkey); + cert_tem = cert_to_pem_string(cert); + + snprintf(key_buf, key_sz, "%s", key_tem); + snprintf(cert_buf, cert_sz, "%s", cert_tem); + + /* Generate fingerprint. */ + *fingerprint = generate_fingerprint(cert); + if (!*fingerprint) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to generate fingerprint from %s\n", cert_url); + ret = AVERROR(EIO); + goto end; + } + +end: + BIO_free(key_b); + av_bprint_finalize(&key_bp, NULL); + BIO_free(cert_b); + av_bprint_finalize(&cert_bp, NULL); + if (key_tem) av_free(key_tem); + if (cert_tem) av_free(cert_tem); + return ret; +} + +static int openssl_gen_private_key(EVP_PKEY **pkey, EC_KEY **eckey) +{ + int ret = 0; + + /** + * Note that secp256r1 in openssl is called NID_X9_62_prime256v1 or prime256v1 in string, + * not NID_secp256k1 or secp256k1 in string. + * + * TODO: Should choose the curves in ClientHello.supported_groups, for example: + * Supported Group: x25519 (0x001d) + * Supported Group: secp256r1 (0x0017) + * Supported Group: secp384r1 (0x0018) + */ +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + EC_GROUP *ecgroup = NULL; + int curve = NID_X9_62_prime256v1; +#else + const char *curve = SN_X9_62_prime256v1; +#endif + +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + *pkey = EVP_PKEY_new(); + *eckey = EC_KEY_new(); + ecgroup = EC_GROUP_new_by_curve_name(curve); + if (!ecgroup) { + av_log(NULL, AV_LOG_ERROR, "TLS: Create EC group by curve=%d failed, %s", curve, ERR_error_string(ERR_get_error(), NULL)); + goto einval_end; + } + +#if OPENSSL_VERSION_NUMBER < 0x10100000L // v1.1.x + /* For openssl 1.0, we must set the group parameters, so that cert is ok. */ + EC_GROUP_set_asn1_flag(ecgroup, OPENSSL_EC_NAMED_CURVE); +#endif + + if (EC_KEY_set_group(*eckey, ecgroup) != 1) { + av_log(NULL, AV_LOG_ERROR, "TLS: Generate private key, EC_KEY_set_group failed, %s\n", ERR_error_string(ERR_get_error(), NULL)); + goto einval_end; + } + + if (EC_KEY_generate_key(*eckey) != 1) { + av_log(NULL, AV_LOG_ERROR, "TLS: Generate private key, EC_KEY_generate_key failed, %s\n", ERR_error_string(ERR_get_error(), NULL)); + goto einval_end; + } + + if (EVP_PKEY_set1_EC_KEY(*pkey, *eckey) != 1) { + av_log(NULL, AV_LOG_ERROR, "TLS: Generate private key, EVP_PKEY_set1_EC_KEY failed, %s\n", ERR_error_string(ERR_get_error(), NULL)); + goto einval_end; + } +#else + *pkey = EVP_EC_gen(curve); + if (!*pkey) { + av_log(NULL, AV_LOG_ERROR, "TLS: Generate private key, EVP_EC_gen curve=%s failed, %s\n", curve, ERR_error_string(ERR_get_error(), NULL)); + goto einval_end; + } +#endif + goto end; + +einval_end: + ret = AVERROR(EINVAL); +end: +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + EC_GROUP_free(ecgroup); +#endif + return ret; +} + +static int openssl_gen_certificate(EVP_PKEY *pkey, X509 **cert, char **fingerprint) +{ + int ret = 0, serial, expire_day; + const char *aor = "lavf"; + X509_NAME* subject = NULL; + + *cert= X509_new(); + if (!*cert) { + goto enomem_end; + } + + // TODO: Support non-self-signed certificate, for example, load from a file. + subject = X509_NAME_new(); + if (!subject) { + goto enomem_end; + } + + serial = (int)av_get_random_seed(); + if (ASN1_INTEGER_set(X509_get_serialNumber(*cert), serial) != 1) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to set serial, %s\n", ERR_error_string(ERR_get_error(), NULL)); + goto einval_end; + } + + if (X509_NAME_add_entry_by_txt(subject, "CN", MBSTRING_ASC, aor, strlen(aor), -1, 0) != 1) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to set CN, %s\n", ERR_error_string(ERR_get_error(), NULL)); + goto einval_end; + } + + if (X509_set_issuer_name(*cert, subject) != 1) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to set issuer, %s\n", ERR_error_string(ERR_get_error(), NULL)); + goto einval_end; + } + if (X509_set_subject_name(*cert, subject) != 1) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to set subject name, %s\n", ERR_error_string(ERR_get_error(), NULL)); + goto einval_end; + } + + expire_day = 365; + if (!X509_gmtime_adj(X509_get_notBefore(*cert), 0)) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to set notBefore, %s\n", ERR_error_string(ERR_get_error(), NULL)); + goto einval_end; + } + if (!X509_gmtime_adj(X509_get_notAfter(*cert), 60*60*24*expire_day)) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to set notAfter, %s\n", ERR_error_string(ERR_get_error(), NULL)); + goto einval_end; + } + + if (X509_set_version(*cert, 2) != 1) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to set version, %s\n", ERR_error_string(ERR_get_error(), NULL)); + goto einval_end; + } + + if (X509_set_pubkey(*cert, pkey) != 1) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to set public key, %s\n", ERR_error_string(ERR_get_error(), NULL)); + goto einval_end; + } + + if (!X509_sign(*cert, pkey, EVP_sha1())) { + av_log(NULL, AV_LOG_ERROR, "TLS: Failed to sign certificate, %s\n", ERR_error_string(ERR_get_error(), NULL)); + goto einval_end; + } + + *fingerprint = generate_fingerprint(*cert); + if (!*fingerprint) { + goto enomem_end; + } + + goto end; +enomem_end: + ret = AVERROR(ENOMEM); + goto end; +einval_end: + ret = AVERROR(EINVAL); +end: + X509_NAME_free(subject); + //av_bprint_finalize(&fingerprint, NULL); + return ret; +} + +int ff_ssl_gen_key_cert(char *key_buf, size_t key_sz, char *cert_buf, size_t cert_sz, char **fingerprint) +{ + int ret = 0; + EVP_PKEY *pkey = NULL; + EC_KEY *ec_key = NULL; + X509 *cert = NULL; + char *key_tem = NULL, *cert_tem = NULL; + + ret = openssl_gen_private_key(&pkey, &ec_key); + if (ret < 0) goto error; + + ret = openssl_gen_certificate(pkey, &cert, fingerprint); + if (ret < 0) goto error; + + key_tem = pkey_to_pem_string(pkey); + cert_tem = cert_to_pem_string(cert); + + snprintf(key_buf, key_sz, "%s", key_tem); + snprintf(cert_buf, cert_sz, "%s", cert_tem); + + if (key_tem) av_free(key_tem); + if (cert_tem) av_free(cert_tem); +error: + return ret; +} + + +/** + * Deserialize a PEM‐encoded private or public key from a NUL-terminated C string. + * + * @param pem_str The PEM text, e.g. + * "-----BEGIN PRIVATE KEY-----\n…\n-----END PRIVATE KEY-----\n" + * @param is_priv If non-zero, parse as a PRIVATE key; otherwise, parse as a PUBLIC key. + * @return EVP_PKEY* on success (must EVP_PKEY_free()), or NULL on error. + */ +static EVP_PKEY *pkey_from_pem_string(const char *pem_str, int is_priv) +{ + BIO *mem = BIO_new_mem_buf(pem_str, -1); + if (!mem) { + av_log(NULL, AV_LOG_ERROR, "BIO_new_mem_buf failed\n"); + return NULL; + } + + EVP_PKEY *pkey = NULL; + if (is_priv) { + pkey = PEM_read_bio_PrivateKey(mem, NULL, NULL, NULL); + } else { + pkey = PEM_read_bio_PUBKEY(mem, NULL, NULL, NULL); + } + + if (!pkey) + av_log(NULL, AV_LOG_ERROR, "Failed to parse %s key from string\n", + is_priv ? "private" : "public"); + + BIO_free(mem); + return pkey; +} + +/** + * Deserialize a PEM‐encoded certificate from a NUL-terminated C string. + * + * @param pem_str The PEM text, e.g. + * "-----BEGIN CERTIFICATE-----\n…\n-----END CERTIFICATE-----\n" + * @return X509* on success (must X509_free()), or NULL on error. + */ +static X509 *cert_from_pem_string(const char *pem_str) +{ + BIO *mem = BIO_new_mem_buf(pem_str, -1); + if (!mem) { + av_log(NULL, AV_LOG_ERROR, "BIO_new_mem_buf failed\n"); + return NULL; + } + + X509 *cert = PEM_read_bio_X509(mem, NULL, NULL, NULL); + if (!cert) { + av_log(NULL, AV_LOG_ERROR, "Failed to parse certificate from string\n"); + return NULL; + } + + BIO_free(mem); + return cert; +} + + typedef struct TLSContext { const AVClass *class; TLSShared tls_shared; @@ -38,8 +471,56 @@ typedef struct TLSContext { BIO_METHOD* url_bio_method; #endif int io_err; + char error_message[256]; } TLSContext; +/** + * Retrieves the error message for the latest OpenSSL error. + * + * This function retrieves the error code from the thread's error queue, converts it + * to a human-readable string, and stores it in the TLSContext's error_message field. + * The error queue is then cleared using ERR_clear_error(). + */ +static const char* openssl_get_error(TLSContext *ctx) +{ + int r2 = ERR_get_error(); + if (r2) { + ERR_error_string_n(r2, ctx->error_message, sizeof(ctx->error_message)); + } else + ctx->error_message[0] = '\0'; + + ERR_clear_error(); + return ctx->error_message; +} + +int ff_dtls_set_udp(URLContext *h, URLContext *udp) +{ + TLSContext *c = h->priv_data; + c->tls_shared.udp = udp; + return 0; +} + +int ff_dtls_export_materials(URLContext *h, char *dtls_srtp_materials, size_t materials_sz) +{ + int ret = 0; + const char* dst = "EXTRACTOR-dtls_srtp"; + TLSContext *c = h->priv_data; + + ret = SSL_export_keying_material(c->ssl, dtls_srtp_materials, materials_sz, + dst, strlen(dst), NULL, 0, 0); + if (!ret) { + av_log(c, AV_LOG_ERROR, "TLS: Failed to export SRTP material, %s\n", openssl_get_error(c)); + return -1; + } + return 0; +} + +int ff_dtls_state(URLContext *h) +{ + TLSContext *c = h->priv_data; + return c->tls_shared.state; +} + /* OpenSSL 1.0.2 or below, then you would use SSL_library_init. If you are * using OpenSSL 1.1.0 or above, then the library will initialize * itself automatically. @@ -121,7 +602,7 @@ void ff_openssl_deinit(void) } #endif -static int print_tls_error(URLContext *h, int ret) +static int print_ssl_error(URLContext *h, int ret) { TLSContext *c = h->priv_data; int printed = 0, e, averr = AVERROR(EIO); @@ -193,7 +674,7 @@ static int url_bio_destroy(BIO *b) static int url_bio_bread(BIO *b, char *buf, int len) { TLSContext *c = GET_BIO_DATA(b); - int ret = ffurl_read(c->tls_shared.tcp, buf, len); + int ret = ffurl_read(c->tls_shared.is_dtls ? c->tls_shared.udp : c->tls_shared.tcp, buf, len); if (ret >= 0) return ret; BIO_clear_retry_flags(b); @@ -209,7 +690,7 @@ static int url_bio_bread(BIO *b, char *buf, int len) static int url_bio_bwrite(BIO *b, const char *buf, int len) { TLSContext *c = GET_BIO_DATA(b); - int ret = ffurl_write(c->tls_shared.tcp, buf, len); + int ret = ffurl_write(c->tls_shared.is_dtls ? c->tls_shared.udp : c->tls_shared.tcp, buf, len); if (ret >= 0) return ret; BIO_clear_retry_flags(b); @@ -250,11 +731,300 @@ static BIO_METHOD url_bio_method = { }; #endif +static av_cold void init_bio_method(URLContext *h) +{ + TLSContext *p = h->priv_data; + BIO *bio; +#if OPENSSL_VERSION_NUMBER >= 0x1010000fL + p->url_bio_method = BIO_meth_new(BIO_TYPE_SOURCE_SINK, "urlprotocol bio"); + BIO_meth_set_write(p->url_bio_method, url_bio_bwrite); + BIO_meth_set_read(p->url_bio_method, url_bio_bread); + BIO_meth_set_puts(p->url_bio_method, url_bio_bputs); + BIO_meth_set_ctrl(p->url_bio_method, url_bio_ctrl); + BIO_meth_set_create(p->url_bio_method, url_bio_create); + BIO_meth_set_destroy(p->url_bio_method, url_bio_destroy); + bio = BIO_new(p->url_bio_method); + BIO_set_data(bio, p); +#else + bio = BIO_new(&url_bio_method); + bio->ptr = p; +#endif + SSL_set_bio(p->ssl, bio, bio); +} + +static void openssl_info_callback(const SSL *ssl, int where, int ret) { + const char *method = "undefined"; + TLSContext *ctx = (TLSContext*)SSL_get_ex_data(ssl, 0); + + if (where & SSL_ST_CONNECT) { + method = "SSL_connect"; + } else if (where & SSL_ST_ACCEPT) + method = "SSL_accept"; + + if (where & SSL_CB_LOOP) { + av_log(ctx, AV_LOG_DEBUG, "Info method=%s state=%s(%s), where=%d, ret=%d\n", + method, SSL_state_string(ssl), SSL_state_string_long(ssl), where, ret); + } else if (where & SSL_CB_ALERT) { + method = (where & SSL_CB_READ) ? "read":"write"; + av_log(ctx, AV_LOG_DEBUG, "Alert method=%s state=%s(%s), where=%d, ret=%d\n", + method, SSL_state_string(ssl), SSL_state_string_long(ssl), where, ret); + } +} + +/** + * Always return 1 to accept any certificate. This is because we allow the peer to + * use a temporary self-signed certificate for DTLS. + */ +static int openssl_dtls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) +{ + return 1; +} + +static int dtls_handshake(URLContext *h) +{ + int ret = 0, r0, r1; + TLSContext *p = h->priv_data; + + r0 = SSL_do_handshake(p->ssl); + r1 = SSL_get_error(p->ssl, r0); + if (r0 <= 0) { + if (r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE && r1 != SSL_ERROR_ZERO_RETURN) { + av_log(p, AV_LOG_ERROR, "TLS: Read failed, r0=%d, r1=%d %s\n", r0, r1, openssl_get_error(p)); + ret = AVERROR(EIO); + goto end; + } + } else { + av_log(p, AV_LOG_TRACE, "TLS: Read %d bytes, r0=%d, r1=%d\n", r0, r0, r1); + } + + /* Check whether the DTLS is completed. */ + if (SSL_is_init_finished(p->ssl) != 1) + goto end; + + p->tls_shared.state = DTLS_STATE_FINISHED; +end: + return ret; +} + +static av_cold int openssl_init_ca_key_cert(URLContext *h) +{ + int ret; + TLSContext *p = h->priv_data; + TLSShared *c = &p->tls_shared; + EVP_PKEY *pkey = NULL; + X509 *cert = NULL; + /* setup ca, private key, certificate */ + if (c->ca_file) { + if (!SSL_CTX_load_verify_locations(p->ctx, c->ca_file, NULL)) + av_log(h, AV_LOG_ERROR, "SSL_CTX_load_verify_locations %s\n", openssl_get_error(p)); + } + + if (c->cert_file) { + ret = SSL_CTX_use_certificate_chain_file(p->ctx, c->cert_file); + if (ret <= 0) { + av_log(h, AV_LOG_ERROR, "Unable to load cert file %s: %s\n", + c->cert_file, openssl_get_error(p)); + ret = AVERROR(EIO); + goto fail; + } + } else if (p->tls_shared.cert_buf) { + cert = cert_from_pem_string(p->tls_shared.cert_buf); + if (SSL_CTX_use_certificate(p->ctx, cert) != 1) { + av_log(p, AV_LOG_ERROR, "SSL: Init SSL_CTX_use_certificate failed, %s\n", openssl_get_error(p)); + ret = AVERROR(EINVAL); + return ret; + } + } else if (p->tls_shared.is_dtls){ + av_log(p, AV_LOG_ERROR, "TLS: Init cert failed, %s\n", openssl_get_error(p)); + ret = AVERROR(EINVAL); + goto fail; + } + + if (c->key_file) { + ret = SSL_CTX_use_PrivateKey_file(p->ctx, c->key_file, SSL_FILETYPE_PEM); + if (ret <= 0) { + av_log(h, AV_LOG_ERROR, "Unable to load key file %s: %s\n", + c->key_file, openssl_get_error(p)); + ret = AVERROR(EIO); + goto fail; + } + } else if (p->tls_shared.key_buf) { + pkey = pkey_from_pem_string(p->tls_shared.key_buf, 1); + if (SSL_CTX_use_PrivateKey(p->ctx, pkey) != 1) { + av_log(p, AV_LOG_ERROR, "TLS: Init SSL_CTX_use_PrivateKey failed, %s\n", openssl_get_error(p)); + ret = AVERROR(EINVAL); + return ret; + } + } else if (p->tls_shared.is_dtls){ + av_log(p, AV_LOG_ERROR, "TLS: Init pkey failed, %s\n", openssl_get_error(p)); + ret = AVERROR(EINVAL); + goto fail; + } + ret = 0; +fail: + return ret; +} + +/** + * Once the DTLS role has been negotiated - active for the DTLS client or passive for the + * DTLS server - we proceed to set up the DTLS state and initiate the handshake. + */ +static int dtls_start(URLContext *h, const char *url, int flags, AVDictionary **options) +{ + TLSContext *p = h->priv_data; + TLSShared *c = &p->tls_shared; + int ret = 0; + c->is_dtls = 1; + const char* ciphers = "ALL"; + /** + * The profile for OpenSSL's SRTP is SRTP_AES128_CM_SHA1_80, see ssl/d1_srtp.c. + * The profile for FFmpeg's SRTP is SRTP_AES128_CM_HMAC_SHA1_80, see libavformat/srtp.c. + */ + const char* profiles = "SRTP_AES128_CM_SHA1_80"; + /* Refer to the test cases regarding these curves in the WebRTC code. */ +#if OPENSSL_VERSION_NUMBER >= 0x10100000L /* OpenSSL 1.1.0 */ + const char* curves = "X25519:P-256:P-384:P-521"; +#elif OPENSSL_VERSION_NUMBER >= 0x10002000L /* OpenSSL 1.0.2 */ + const char* curves = "P-256:P-384:P-521"; +#endif + +#if OPENSSL_VERSION_NUMBER < 0x10002000L /* OpenSSL v1.0.2 */ + p->ctx = SSL_CTX_new(DTLSv1_method()); +#else + p->ctx = SSL_CTX_new(DTLS_method()); +#endif + if (!p->ctx) { + ret = AVERROR(ENOMEM); + goto fail; + } + +#if OPENSSL_VERSION_NUMBER >= 0x10002000L /* OpenSSL 1.0.2 */ + /* For ECDSA, we could set the curves list. */ + if (SSL_CTX_set1_curves_list(p->ctx, curves) != 1) { + av_log(p, AV_LOG_ERROR, "TLS: Init SSL_CTX_set1_curves_list failed, curves=%s, %s\n", + curves, openssl_get_error(p)); + ret = AVERROR(EINVAL); + return ret; + } +#endif + +#if OPENSSL_VERSION_NUMBER < 0x10100000L // v1.1.x +#if OPENSSL_VERSION_NUMBER < 0x10002000L // v1.0.2 + if (ctx->dtls_eckey) + SSL_CTX_set_tmp_ecdh(p->ctx, p->dtls_eckey); +#else + SSL_CTX_set_ecdh_auto(p->ctx, 1); +#endif +#endif + + /** + * We activate "ALL" cipher suites to align with the peer's capabilities, + * ensuring maximum compatibility. + */ + if (SSL_CTX_set_cipher_list(p->ctx, ciphers) != 1) { + av_log(p, AV_LOG_ERROR, "TLS: Init SSL_CTX_set_cipher_list failed, ciphers=%s, %s\n", + ciphers, openssl_get_error(p)); + ret = AVERROR(EINVAL); + return ret; + } + ret = openssl_init_ca_key_cert(h); + if (ret < 0) goto fail; + + /* Server will send Certificate Request. */ + SSL_CTX_set_verify(p->ctx, SSL_VERIFY_PEER | SSL_VERIFY_CLIENT_ONCE, openssl_dtls_verify_callback); + /* The depth count is "level 0:peer certificate", "level 1: CA certificate", + * "level 2: higher level CA certificate", and so on. */ + SSL_CTX_set_verify_depth(p->ctx, 4); + /* Whether we should read as many input bytes as possible (for non-blocking reads) or not. */ + SSL_CTX_set_read_ahead(p->ctx, 1); + /* Setup the SRTP context */ + if (SSL_CTX_set_tlsext_use_srtp(p->ctx, profiles)) { + av_log(p, AV_LOG_ERROR, "TLS: Init SSL_CTX_set_tlsext_use_srtp failed, profiles=%s, %s\n", + profiles, openssl_get_error(p)); + ret = AVERROR(EINVAL); + return ret; + } + + /* The ssl should not be created unless the ctx has been initialized. */ + p->ssl = SSL_new(p->ctx); + if (!p->ssl) { + ret = AVERROR(ENOMEM); + goto fail; + } + + /* Setup the callback for logging. */ + SSL_set_ex_data(p->ssl, 0, p); + SSL_set_info_callback(p->ssl, openssl_info_callback); + /** + * We have set the MTU to fragment the DTLS packet. It is important to note that the + * packet is split to ensure that each handshake packet is smaller than the MTU. + */ + SSL_set_options(p->ssl, SSL_OP_NO_QUERY_MTU); + SSL_set_mtu(p->ssl, p->tls_shared.mtu); +#if OPENSSL_VERSION_NUMBER >= 0x100010b0L /* OpenSSL 1.0.1k */ + DTLS_set_link_mtu(p->ssl, p->tls_shared.mtu); +#endif + init_bio_method(h); + + if (p->tls_shared.use_external_udp != 1) { + if ((ret = ff_tls_open_underlying(&p->tls_shared, h, url, options)) < 0) { + av_log(p, AV_LOG_ERROR, "Failed to connect %s\n", url); + return ret; + } + } + + /* Setup DTLS as passive, which is server role. */ + c->listen ? SSL_set_accept_state(p->ssl) : SSL_set_connect_state(p->ssl); + + /** + * During initialization, we only need to call SSL_do_handshake once because SSL_read consumes + * the handshake message if the handshake is incomplete. + * To simplify maintenance, we initiate the handshake for both the DTLS server and client after + * sending out the ICE response in the start_active_handshake function. It's worth noting that + * although the DTLS server may receive the ClientHello immediately after sending out the ICE + * response, this shouldn't be an issue as the handshake function is called before any DTLS + * packets are received. + * + * The SSL_do_handshake can't be called if DTLS hasn't prepare for udp. + */ + if (p->tls_shared.use_external_udp != 1) { + ret = dtls_handshake(h); + // Fatal SSL error, for example, no available suite when peer is DTLS 1.0 while we are DTLS 1.2. + if (ret < 0) { + av_log(p, AV_LOG_ERROR, "TLS: Failed to drive SSL context, ret=%d\n", ret); + return AVERROR(EIO); + } + } + + av_log(p, AV_LOG_VERBOSE, "TLS: Setup ok, MTU=%d, fingerprint %s\n", + p->tls_shared.mtu, p->tls_shared.fingerprint); + + ret = 0; +fail: + return ret; +} + +/** + * Cleanup the DTLS context. + */ +static av_cold int dtls_close(URLContext *h) +{ + TLSContext *ctx = h->priv_data; + SSL_free(ctx->ssl); + SSL_CTX_free(ctx->ctx); + av_freep(&ctx->tls_shared.fingerprint); + av_freep(&ctx->tls_shared.cert_buf); + av_freep(&ctx->tls_shared.key_buf); +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + EC_KEY_free(ctx->dtls_eckey); +#endif + return 0; +} + static int tls_open(URLContext *h, const char *uri, int flags, AVDictionary **options) { TLSContext *p = h->priv_data; TLSShared *c = &p->tls_shared; - BIO *bio; int ret; #if OPENSSL_VERSION_NUMBER < 0x10100000L @@ -271,52 +1041,26 @@ static int tls_open(URLContext *h, const char *uri, int flags, AVDictionary **op // support for the old protocols immediately after creating the context. p->ctx = SSL_CTX_new(c->listen ? SSLv23_server_method() : SSLv23_client_method()); if (!p->ctx) { - av_log(h, AV_LOG_ERROR, "%s\n", ERR_error_string(ERR_get_error(), NULL)); + av_log(h, AV_LOG_ERROR, "%s\n", openssl_get_error(p)); ret = AVERROR(EIO); goto fail; } SSL_CTX_set_options(p->ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3); - if (c->ca_file) { - if (!SSL_CTX_load_verify_locations(p->ctx, c->ca_file, NULL)) - av_log(h, AV_LOG_ERROR, "SSL_CTX_load_verify_locations %s\n", ERR_error_string(ERR_get_error(), NULL)); - } - if (c->cert_file && !SSL_CTX_use_certificate_chain_file(p->ctx, c->cert_file)) { - av_log(h, AV_LOG_ERROR, "Unable to load cert file %s: %s\n", - c->cert_file, ERR_error_string(ERR_get_error(), NULL)); - ret = AVERROR(EIO); - goto fail; - } - if (c->key_file && !SSL_CTX_use_PrivateKey_file(p->ctx, c->key_file, SSL_FILETYPE_PEM)) { - av_log(h, AV_LOG_ERROR, "Unable to load key file %s: %s\n", - c->key_file, ERR_error_string(ERR_get_error(), NULL)); - ret = AVERROR(EIO); - goto fail; - } + ret = openssl_init_ca_key_cert(h); + if (ret < 0) goto fail; // Note, this doesn't check that the peer certificate actually matches // the requested hostname. if (c->verify) SSL_CTX_set_verify(p->ctx, SSL_VERIFY_PEER|SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL); p->ssl = SSL_new(p->ctx); if (!p->ssl) { - av_log(h, AV_LOG_ERROR, "%s\n", ERR_error_string(ERR_get_error(), NULL)); + av_log(h, AV_LOG_ERROR, "%s\n", openssl_get_error(p)); ret = AVERROR(EIO); goto fail; } -#if OPENSSL_VERSION_NUMBER >= 0x1010000fL - p->url_bio_method = BIO_meth_new(BIO_TYPE_SOURCE_SINK, "urlprotocol bio"); - BIO_meth_set_write(p->url_bio_method, url_bio_bwrite); - BIO_meth_set_read(p->url_bio_method, url_bio_bread); - BIO_meth_set_puts(p->url_bio_method, url_bio_bputs); - BIO_meth_set_ctrl(p->url_bio_method, url_bio_ctrl); - BIO_meth_set_create(p->url_bio_method, url_bio_create); - BIO_meth_set_destroy(p->url_bio_method, url_bio_destroy); - bio = BIO_new(p->url_bio_method); - BIO_set_data(bio, p); -#else - bio = BIO_new(&url_bio_method); - bio->ptr = p; -#endif - SSL_set_bio(p->ssl, bio, bio); + SSL_set_ex_data(p->ssl, 0, p); + SSL_CTX_set_info_callback(p->ctx, openssl_info_callback); + init_bio_method(h); if (!c->listen && !c->numerichost) SSL_set_tlsext_host_name(p->ssl, c->host); ret = c->listen ? SSL_accept(p->ssl) : SSL_connect(p->ssl); @@ -325,7 +1069,7 @@ static int tls_open(URLContext *h, const char *uri, int flags, AVDictionary **op ret = AVERROR(EIO); goto fail; } else if (ret < 0) { - ret = print_tls_error(h, ret); + ret = print_ssl_error(h, ret); goto fail; } @@ -338,31 +1082,35 @@ static int tls_open(URLContext *h, const char *uri, int flags, AVDictionary **op static int tls_read(URLContext *h, uint8_t *buf, int size) { TLSContext *c = h->priv_data; + URLContext *uc = c->tls_shared.is_dtls ? c->tls_shared.udp + : c->tls_shared.tcp; int ret; // Set or clear the AVIO_FLAG_NONBLOCK on c->tls_shared.tcp - c->tls_shared.tcp->flags &= ~AVIO_FLAG_NONBLOCK; - c->tls_shared.tcp->flags |= h->flags & AVIO_FLAG_NONBLOCK; + uc->flags &= ~AVIO_FLAG_NONBLOCK; + uc->flags |= h->flags & AVIO_FLAG_NONBLOCK; ret = SSL_read(c->ssl, buf, size); if (ret > 0) return ret; if (ret == 0) return AVERROR_EOF; - return print_tls_error(h, ret); + return print_ssl_error(h, ret); } static int tls_write(URLContext *h, const uint8_t *buf, int size) { TLSContext *c = h->priv_data; + URLContext *uc = c->tls_shared.is_dtls ? c->tls_shared.udp + : c->tls_shared.tcp; int ret; // Set or clear the AVIO_FLAG_NONBLOCK on c->tls_shared.tcp - c->tls_shared.tcp->flags &= ~AVIO_FLAG_NONBLOCK; - c->tls_shared.tcp->flags |= h->flags & AVIO_FLAG_NONBLOCK; + uc->flags &= ~AVIO_FLAG_NONBLOCK; + uc->flags |= h->flags & AVIO_FLAG_NONBLOCK; ret = SSL_write(c->ssl, buf, size); if (ret > 0) return ret; if (ret == 0) return AVERROR_EOF; - return print_tls_error(h, ret); + return print_ssl_error(h, ret); } static int tls_get_file_handle(URLContext *h) @@ -401,3 +1149,22 @@ const URLProtocol ff_tls_protocol = { .flags = URL_PROTOCOL_FLAG_NETWORK, .priv_data_class = &tls_class, }; + +static const AVClass dtls_class = { + .class_name = "dtls", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + +const URLProtocol ff_dtls_protocol = { + .name = "dtls", + .url_open2 = dtls_start, + .url_handshake = dtls_handshake, + .url_close = dtls_close, + .url_read = tls_read, + .url_write = tls_write, + .priv_data_size = sizeof(TLSContext), + .flags = URL_PROTOCOL_FLAG_NETWORK, + .priv_data_class = &dtls_class, +}; diff --git a/libavformat/whip.c b/libavformat/whip.c new file mode 100644 index 0000000000000..0671e23635d65 --- /dev/null +++ b/libavformat/whip.c @@ -0,0 +1,1917 @@ +/* + * WebRTC-HTTP ingestion protocol (WHIP) muxer + * Copyright (c) 2023 The FFmpeg Project + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/avcodec.h" +#include "libavcodec/codec_desc.h" +#include "libavcodec/h264.h" +#include "libavcodec/startcode.h" +#include "libavutil/base64.h" +#include "libavutil/bprint.h" +#include "libavutil/crc.h" +#include "libavutil/hmac.h" +#include "libavutil/intreadwrite.h" +#include "libavutil/lfg.h" +#include "libavutil/opt.h" +#include "libavutil/mem.h" +#include "libavutil/random_seed.h" +#include "libavutil/time.h" +#include "avc.h" +#include "nal.h" +#include "avio_internal.h" +#include "http.h" +#include "internal.h" +#include "mux.h" +#include "network.h" +#include "srtp.h" +#include "tls.h" + +/** + * Maximum size limit of a Session Description Protocol (SDP), + * be it an offer or answer. + */ +#define MAX_SDP_SIZE 8192 + +/** + * The size of the Secure Real-time Transport Protocol (SRTP) master key material + * that is exported by Secure Sockets Layer (SSL) after a successful Datagram + * Transport Layer Security (DTLS) handshake. This material consists of a key + * of 16 bytes and a salt of 14 bytes. + */ +#define DTLS_SRTP_KEY_LEN 16 +#define DTLS_SRTP_SALT_LEN 14 + +/** + * The maximum size of the Secure Real-time Transport Protocol (SRTP) HMAC checksum + * and padding that is appended to the end of the packet. To calculate the maximum + * size of the User Datagram Protocol (UDP) packet that can be sent out, subtract + * this size from the `pkt_size`. + */ +#define DTLS_SRTP_CHECKSUM_LEN 16 + +/** + * When sending ICE or DTLS messages, responses are received via UDP. However, the peer + * may not be ready and return EAGAIN, in which case we should wait for a short duration + * and retry reading. + * For instance, if we try to read from UDP and get EAGAIN, we sleep for 5ms and retry. + * This macro is used to limit the total duration in milliseconds (e.g., 50ms), so we + * will try at most 5 times. + * Keep in mind that this macro should have a minimum duration of 5 ms. + */ +#define ICE_DTLS_READ_INTERVAL 50 + +/* The magic cookie for Session Traversal Utilities for NAT (STUN) messages. */ +#define STUN_MAGIC_COOKIE 0x2112A442 + +/** + * The DTLS content type. + * See https://tools.ietf.org/html/rfc2246#section-6.2.1 + * change_cipher_spec(20), alert(21), handshake(22), application_data(23) + */ +#define DTLS_CONTENT_TYPE_CHANGE_CIPHER_SPEC 20 + +/** + * The DTLS record layer header has a total size of 13 bytes, consisting of + * ContentType (1 byte), ProtocolVersion (2 bytes), Epoch (2 bytes), + * SequenceNumber (6 bytes), and Length (2 bytes). + * See https://datatracker.ietf.org/doc/html/rfc9147#section-4 + */ +#define DTLS_RECORD_LAYER_HEADER_LEN 13 + +/** + * The DTLS version number, which is 0xfeff for DTLS 1.0, or 0xfefd for DTLS 1.2. + * See https://datatracker.ietf.org/doc/html/rfc9147#name-the-dtls-record-layer + */ +#define DTLS_VERSION_10 0xfeff +#define DTLS_VERSION_12 0xfefd + +/** + * Maximum size of the buffer for sending and receiving UDP packets. + * Please note that this size does not limit the size of the UDP packet that can be sent. + * To set the limit for packet size, modify the `pkt_size` parameter. + * For instance, it is possible to set the UDP buffer to 4096 to send or receive packets, + * but please keep in mind that the `pkt_size` option limits the packet size to 1400. + */ +#define MAX_UDP_BUFFER_SIZE 4096 + +/* Referring to Chrome's definition of RTP payload types. */ +#define WHIP_RTP_PAYLOAD_TYPE_H264 106 +#define WHIP_RTP_PAYLOAD_TYPE_OPUS 111 + +/** + * The STUN message header, which is 20 bytes long, comprises the + * STUNMessageType (1B), MessageLength (2B), MagicCookie (4B), + * and TransactionID (12B). + * See https://datatracker.ietf.org/doc/html/rfc5389#section-6 + */ +#define ICE_STUN_HEADER_SIZE 20 + +/** + * The RTP header is 12 bytes long, comprising the Version(1B), PT(1B), + * SequenceNumber(2B), Timestamp(4B), and SSRC(4B). + * See https://www.rfc-editor.org/rfc/rfc3550#section-5.1 + */ +#define WHIP_RTP_HEADER_SIZE 12 + +/** + * For RTCP, PT is [128, 223] (or without marker [0, 95]). Literally, RTCP starts + * from 64 not 0, so PT is [192, 223] (or without marker [64, 95]), see "RTCP Control + * Packet Types (PT)" at + * https://www.iana.org/assignments/rtp-parameters/rtp-parameters.xhtml#rtp-parameters-4 + * + * For RTP, the PT is [96, 127], or [224, 255] with marker. See "RTP Payload Types (PT) + * for standard audio and video encodings" at + * https://www.iana.org/assignments/rtp-parameters/rtp-parameters.xhtml#rtp-parameters-1 + */ +#define WHIP_RTCP_PT_START 192 +#define WHIP_RTCP_PT_END 223 + +/** + * In the case of ICE-LITE, these fields are not used; instead, they are defined + * as constant values. + */ +#define WHIP_SDP_SESSION_ID "4489045141692799359" +#define WHIP_SDP_CREATOR_IP "127.0.0.1" + +/* Calculate the elapsed time from starttime to endtime in milliseconds. */ +#define ELAPSED(starttime, endtime) ((int)(endtime - starttime) / 1000) + +/* STUN Attribute, comprehension-required range (0x0000-0x7FFF) */ +enum STUNAttr { + STUN_ATTR_USERNAME = 0x0006, /// shared secret response/bind request + STUN_ATTR_USE_CANDIDATE = 0x0025, /// bind request + STUN_ATTR_MESSAGE_INTEGRITY = 0x0008, /// bind request/response + STUN_ATTR_FINGERPRINT = 0x8028, /// rfc5389 +}; + +enum WHIPState { + WHIP_STATE_NONE, + + /* The initial state. */ + WHIP_STATE_INIT, + /* The muxer has sent the offer to the peer. */ + WHIP_STATE_OFFER, + /* The muxer has received the answer from the peer. */ + WHIP_STATE_ANSWER, + /** + * After parsing the answer received from the peer, the muxer negotiates the abilities + * in the offer that it generated. + */ + WHIP_STATE_NEGOTIATED, + /* The muxer has connected to the peer via UDP. */ + WHIP_STATE_UDP_CONNECTED, + /* The muxer has sent the ICE request to the peer. */ + WHIP_STATE_ICE_CONNECTING, + /* The muxer has received the ICE response from the peer. */ + WHIP_STATE_ICE_CONNECTED, + /* The muxer starts attempting the DTLS handshake. */ + WHIP_STATE_DTLS_CONNECTING, + /* The muxer has finished the DTLS handshake with the peer. */ + WHIP_STATE_DTLS_FINISHED, + /* The muxer has finished the SRTP setup. */ + WHIP_STATE_SRTP_FINISHED, + /* The muxer is ready to send/receive media frames. */ + WHIP_STATE_READY, + /* The muxer is failed. */ + WHIP_STATE_FAILED, +}; + +typedef struct WHIPContext { + AVClass *av_class; + + /* The state of the RTC connection. */ + enum WHIPState state; + /* The callback return value for DTLS. */ + int dtls_ret; + int dtls_closed; + + /* Parameters for the input audio and video codecs. */ + AVCodecParameters *audio_par; + AVCodecParameters *video_par; + + /** + * The h264_mp4toannexb Bitstream Filter (BSF) bypasses the AnnexB packet; + * therefore, it is essential to insert the SPS and PPS before each IDR frame + * in such cases. + */ + int h264_annexb_insert_sps_pps; + + /* The random number generator. */ + AVLFG rnd; + + /* The ICE username and pwd fragment generated by the muxer. */ + char ice_ufrag_local[9]; + char ice_pwd_local[33]; + /* The SSRC of the audio and video stream, generated by the muxer. */ + uint32_t audio_ssrc; + uint32_t video_ssrc; + /* The PT(Payload Type) of stream, generated by the muxer. */ + uint8_t audio_payload_type; + uint8_t video_payload_type; + /** + * This is the SDP offer generated by the muxer based on the codec parameters, + * DTLS, and ICE information. + */ + char *sdp_offer; + + /* The ICE username and pwd from remote server. */ + char *ice_ufrag_remote; + char *ice_pwd_remote; + /** + * This represents the ICE candidate protocol, priority, host and port. + * Currently, we only support one candidate and choose the first UDP candidate. + * However, we plan to support multiple candidates in the future. + */ + char *ice_protocol; + char *ice_host; + int ice_port; + + /* The SDP answer received from the WebRTC server. */ + char *sdp_answer; + /* The resource URL returned in the Location header of WHIP HTTP response. */ + char *whip_resource_url; + + /* These variables represent timestamps used for calculating and tracking the cost. */ + int64_t whip_starttime; + int64_t whip_init_time; + int64_t whip_offer_time; + int64_t whip_answer_time; + int64_t whip_udp_time; + int64_t whip_ice_time; + int64_t whip_dtls_time; + int64_t whip_srtp_time; + + /* The certificate and private key content used for DTLS hanshake */ + char cert_buf[MAX_CERTIFICATE_SIZE]; + char key_buf[MAX_CERTIFICATE_SIZE]; + /* The fingerprint of certificate, used in SDP offer. */ + char *dtls_fingerprint; + /** + * This represents the material used to build the SRTP master key. It is + * generated by DTLS and has the following layout: + * 16B 16B 14B 14B + * client_key | server_key | client_salt | server_salt + */ + uint8_t dtls_srtp_materials[(DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN) * 2]; + + char ssl_error_message[256]; + + /* TODO: Use AVIOContext instead of URLContext */ + URLContext *dtls_uc; + + /* The SRTP send context, to encrypt outgoing packets. */ + SRTPContext srtp_audio_send; + SRTPContext srtp_video_send; + SRTPContext srtp_rtcp_send; + /* The SRTP receive context, to decrypt incoming packets. */ + SRTPContext srtp_recv; + + /* The UDP transport is used for delivering ICE, DTLS and SRTP packets. */ + URLContext *udp; + /* The buffer for UDP transmission. */ + char buf[MAX_UDP_BUFFER_SIZE]; + + /* The timeout in milliseconds for ICE and DTLS handshake. */ + int handshake_timeout; + /** + * The size of RTP packet, should generally be set to MTU. + * Note that pion requires a smaller value, for example, 1200. + */ + int pkt_size; + /** + * The optional Bearer token for WHIP Authorization. + * See https://www.ietf.org/archive/id/draft-ietf-wish-whip-08.html#name-authentication-and-authoriz + */ + char* authorization; + /* The certificate and private key used for DTLS handshake. */ + char* cert_file; + char* key_file; +} WHIPContext; + +/** + * Whether the packet is a DTLS packet. + */ +static int is_dtls_packet(uint8_t *b, int size) { + uint16_t version = AV_RB16(&b[1]); + return size > DTLS_RECORD_LAYER_HEADER_LEN && + b[0] >= DTLS_CONTENT_TYPE_CHANGE_CIPHER_SPEC && + (version == DTLS_VERSION_10 || version == DTLS_VERSION_12); +} + + +/** + * Get or Generate a self-signed certificate and private key for DTLS, + * fingerprint for SDP + */ +static av_cold int certificate_key_init(AVFormatContext *s) +{ + int ret = 0; + WHIPContext *whip = s->priv_data; + + if (whip->cert_file && whip->key_file) { + /* Read the private key and certificate from the file. */ + if ((ret = ff_ssl_read_key_cert(whip->key_file, whip->cert_file, + whip->key_buf, sizeof(whip->key_buf), + whip->cert_buf, sizeof(whip->cert_buf), + &whip->dtls_fingerprint)) < 0) { + av_log(s, AV_LOG_ERROR, "DTLS: Failed to read DTLS certificate from cert=%s, key=%s\n", + whip->cert_file, whip->key_file); + return ret; + } + } else { + /* Generate a private key to ctx->dtls_pkey and self-signed certificate. */ + if ((ret = ff_ssl_gen_key_cert(whip->key_buf, sizeof(whip->key_buf), + whip->cert_buf, sizeof(whip->cert_buf), + &whip->dtls_fingerprint)) < 0) { + av_log(s, AV_LOG_ERROR, "DTLS: Failed to generate DTLS private key and certificate\n"); + return ret; + } + } + + return ret; +} + +/** + * When DTLS state change. + */ +static int dtls_context_on_state(AVFormatContext *s, const char* type, const char* desc) +{ + int ret = 0; + WHIPContext *whip = s->priv_data; + int state = ff_dtls_state(whip->dtls_uc); + + if (state == DTLS_STATE_CLOSED) { + whip->dtls_closed = 1; + av_log(whip, AV_LOG_VERBOSE, "WHIP: DTLS session closed, type=%s, desc=%s, elapsed=%dms\n", + type ? type : "", desc ? desc : "", ELAPSED(whip->whip_starttime, av_gettime())); + goto error; + } + + if (state == DTLS_STATE_FAILED) { + whip->state = WHIP_STATE_FAILED; + av_log(whip, AV_LOG_ERROR, "WHIP: DTLS session failed, type=%s, desc=%s\n", + type ? type : "", desc ? desc : ""); + whip->dtls_ret = AVERROR(EIO); + goto error; + } + + if (state == DTLS_STATE_FINISHED && whip->state < WHIP_STATE_DTLS_FINISHED) { + whip->state = WHIP_STATE_DTLS_FINISHED; + whip->whip_dtls_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: DTLS handshake is done, elapsed=%dms\n", + ELAPSED(whip->whip_starttime, av_gettime())); + return ret; + } +error: + return -1; +} + +static av_cold int dtls_initialize(AVFormatContext *s) +{ + WHIPContext *whip = s->priv_data; + /* reuse the udp created by whip */ + ff_dtls_set_udp(whip->dtls_uc, whip->udp); + return 0; +} + +/** + * Initialize and check the options for the WebRTC muxer. + */ +static av_cold int initialize(AVFormatContext *s) +{ + int ret, ideal_pkt_size = 532; + WHIPContext *whip = s->priv_data; + uint32_t seed; + + whip->whip_starttime = av_gettime(); + + ret = certificate_key_init(s); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to init certificate and key\n"); + return ret; + } + + /* Initialize the random number generator. */ + seed = av_get_random_seed(); + av_lfg_init(&whip->rnd, seed); + + if (whip->pkt_size < ideal_pkt_size) + av_log(whip, AV_LOG_WARNING, "WHIP: pkt_size=%d(<%d) is too small, may cause packet loss\n", + whip->pkt_size, ideal_pkt_size); + + if (whip->state < WHIP_STATE_INIT) + whip->state = WHIP_STATE_INIT; + whip->whip_init_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: Init state=%d, handshake_timeout=%dms, pkt_size=%d, seed=%d, elapsed=%dms\n", + whip->state, whip->handshake_timeout, whip->pkt_size, seed, ELAPSED(whip->whip_starttime, av_gettime())); + + return 0; +} + +/** + * When duplicating a stream, the demuxer has already set the extradata, profile, and + * level of the par. Keep in mind that this function will not be invoked since the + * profile and level are set. + * + * When utilizing an encoder, such as libx264, to encode a stream, the extradata in + * par->extradata contains the SPS, which includes profile and level information. + * However, the profile and level of par remain unspecified. Therefore, it is necessary + * to extract the profile and level data from the extradata and assign it to the par's + * profile and level. Keep in mind that AVFMT_GLOBALHEADER must be enabled; otherwise, + * the extradata will remain empty. + */ +static int parse_profile_level(AVFormatContext *s, AVCodecParameters *par) +{ + int ret = 0; + const uint8_t *r = par->extradata, *r1, *end = par->extradata + par->extradata_size; + H264SPS seq, *const sps = &seq; + uint32_t state; + WHIPContext *whip = s->priv_data; + + if (par->codec_id != AV_CODEC_ID_H264) + return ret; + + if (par->profile != AV_PROFILE_UNKNOWN && par->level != AV_LEVEL_UNKNOWN) + return ret; + + if (!par->extradata || par->extradata_size <= 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Unable to parse profile from empty extradata=%p, size=%d\n", + par->extradata, par->extradata_size); + return AVERROR(EINVAL); + } + + while (1) { + r = avpriv_find_start_code(r, end, &state); + if (r >= end) + break; + + r1 = ff_nal_find_startcode(r, end); + if ((state & 0x1f) == H264_NAL_SPS) { + ret = ff_avc_decode_sps(sps, r, r1 - r); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to decode SPS, state=%x, size=%d\n", + state, (int)(r1 - r)); + return ret; + } + + av_log(whip, AV_LOG_VERBOSE, "WHIP: Parse profile=%d, level=%d from SPS\n", + sps->profile_idc, sps->level_idc); + par->profile = sps->profile_idc; + par->level = sps->level_idc; + } + + r = r1; + } + + return ret; +} + +/** + * Parses video SPS/PPS from the extradata of codecpar and checks the codec. + * Currently only supports video(h264) and audio(opus). Note that only baseline + * and constrained baseline profiles of h264 are supported. + * + * If the profile is less than 0, the function considers the profile as baseline. + * It may need to parse the profile from SPS/PPS. This situation occurs when ingesting + * desktop and transcoding. + * + * @param s Pointer to the AVFormatContext + * @returns Returns 0 if successful or AVERROR_xxx in case of an error. + * + * TODO: FIXME: There is an issue with the timestamp of OPUS audio, especially when + * the input is an MP4 file. The timestamp deviates from the expected value of 960, + * causing Chrome to play the audio stream with noise. This problem can be replicated + * by transcoding a specific file into MP4 format and publishing it using the WHIP + * muxer. However, when directly transcoding and publishing through the WHIP muxer, + * the issue is not present, and the audio timestamp remains consistent. The root + * cause is still unknown, and this comment has been added to address this issue + * in the future. Further research is needed to resolve the problem. + */ +static int parse_codec(AVFormatContext *s) +{ + int i, ret = 0; + WHIPContext *whip = s->priv_data; + + for (i = 0; i < s->nb_streams; i++) { + AVCodecParameters *par = s->streams[i]->codecpar; + const AVCodecDescriptor *desc = avcodec_descriptor_get(par->codec_id); + switch (par->codec_type) { + case AVMEDIA_TYPE_VIDEO: + if (whip->video_par) { + av_log(whip, AV_LOG_ERROR, "WHIP: Only one video stream is supported by RTC\n"); + return AVERROR(EINVAL); + } + whip->video_par = par; + + if (par->codec_id != AV_CODEC_ID_H264) { + av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported video codec %s by RTC, choose h264\n", + desc ? desc->name : "unknown"); + return AVERROR_PATCHWELCOME; + } + + if (par->video_delay > 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported B frames by RTC\n"); + return AVERROR_PATCHWELCOME; + } + + if ((ret = parse_profile_level(s, par)) < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to parse SPS/PPS from extradata\n"); + return AVERROR(EINVAL); + } + + if (par->profile == AV_PROFILE_UNKNOWN) { + av_log(whip, AV_LOG_WARNING, "WHIP: No profile found in extradata, consider baseline\n"); + return AVERROR(EINVAL); + } + if (par->level == AV_LEVEL_UNKNOWN) { + av_log(whip, AV_LOG_WARNING, "WHIP: No level found in extradata, consider 3.1\n"); + return AVERROR(EINVAL); + } + break; + case AVMEDIA_TYPE_AUDIO: + if (whip->audio_par) { + av_log(whip, AV_LOG_ERROR, "WHIP: Only one audio stream is supported by RTC\n"); + return AVERROR(EINVAL); + } + whip->audio_par = par; + + if (par->codec_id != AV_CODEC_ID_OPUS) { + av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported audio codec %s by RTC, choose opus\n", + desc ? desc->name : "unknown"); + return AVERROR_PATCHWELCOME; + } + + if (par->ch_layout.nb_channels != 2) { + av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported audio channels %d by RTC, choose stereo\n", + par->ch_layout.nb_channels); + return AVERROR_PATCHWELCOME; + } + + if (par->sample_rate != 48000) { + av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported audio sample rate %d by RTC, choose 48000\n", par->sample_rate); + return AVERROR_PATCHWELCOME; + } + break; + default: + av_log(whip, AV_LOG_ERROR, "WHIP: Codec type '%s' for stream %d is not supported by RTC\n", + av_get_media_type_string(par->codec_type), i); + return AVERROR_PATCHWELCOME; + } + } + + return ret; +} + +/** + * Generate SDP offer according to the codec parameters, DTLS and ICE information. + * + * Note that we don't use av_sdp_create to generate SDP offer because it doesn't + * support DTLS and ICE information. + * + * @return 0 if OK, AVERROR_xxx on error + */ +static int generate_sdp_offer(AVFormatContext *s) +{ + int ret = 0, profile, level, profile_iop; + const char *acodec_name = NULL, *vcodec_name = NULL; + AVBPrint bp; + WHIPContext *whip = s->priv_data; + + /* To prevent a crash during cleanup, always initialize it. */ + av_bprint_init(&bp, 1, MAX_SDP_SIZE); + + if (whip->sdp_offer) { + av_log(whip, AV_LOG_ERROR, "WHIP: SDP offer is already set\n"); + ret = AVERROR(EINVAL); + goto end; + } + + snprintf(whip->ice_ufrag_local, sizeof(whip->ice_ufrag_local), "%08x", + av_lfg_get(&whip->rnd)); + snprintf(whip->ice_pwd_local, sizeof(whip->ice_pwd_local), "%08x%08x%08x%08x", + av_lfg_get(&whip->rnd), av_lfg_get(&whip->rnd), av_lfg_get(&whip->rnd), + av_lfg_get(&whip->rnd)); + + whip->audio_ssrc = av_lfg_get(&whip->rnd); + whip->video_ssrc = av_lfg_get(&whip->rnd); + + whip->audio_payload_type = WHIP_RTP_PAYLOAD_TYPE_OPUS; + whip->video_payload_type = WHIP_RTP_PAYLOAD_TYPE_H264; + + av_bprintf(&bp, "" + "v=0\r\n" + "o=FFmpeg %s 2 IN IP4 %s\r\n" + "s=FFmpegPublishSession\r\n" + "t=0 0\r\n" + "a=group:BUNDLE 0 1\r\n" + "a=extmap-allow-mixed\r\n" + "a=msid-semantic: WMS\r\n", + WHIP_SDP_SESSION_ID, + WHIP_SDP_CREATOR_IP); + + if (whip->audio_par) { + if (whip->audio_par->codec_id == AV_CODEC_ID_OPUS) + acodec_name = "opus"; + + av_bprintf(&bp, "" + "m=audio 9 UDP/TLS/RTP/SAVPF %u\r\n" + "c=IN IP4 0.0.0.0\r\n" + "a=ice-ufrag:%s\r\n" + "a=ice-pwd:%s\r\n" + "a=fingerprint:sha-256 %s\r\n" + "a=setup:passive\r\n" + "a=mid:0\r\n" + "a=sendonly\r\n" + "a=msid:FFmpeg audio\r\n" + "a=rtcp-mux\r\n" + "a=rtpmap:%u %s/%d/%d\r\n" + "a=ssrc:%u cname:FFmpeg\r\n" + "a=ssrc:%u msid:FFmpeg audio\r\n", + whip->audio_payload_type, + whip->ice_ufrag_local, + whip->ice_pwd_local, + whip->dtls_fingerprint, + whip->audio_payload_type, + acodec_name, + whip->audio_par->sample_rate, + whip->audio_par->ch_layout.nb_channels, + whip->audio_ssrc, + whip->audio_ssrc); + } + + if (whip->video_par) { + profile_iop = profile = whip->video_par->profile; + level = whip->video_par->level; + if (whip->video_par->codec_id == AV_CODEC_ID_H264) { + vcodec_name = "H264"; + profile_iop &= AV_PROFILE_H264_CONSTRAINED; + profile &= (~AV_PROFILE_H264_CONSTRAINED); + } + + av_bprintf(&bp, "" + "m=video 9 UDP/TLS/RTP/SAVPF %u\r\n" + "c=IN IP4 0.0.0.0\r\n" + "a=ice-ufrag:%s\r\n" + "a=ice-pwd:%s\r\n" + "a=fingerprint:sha-256 %s\r\n" + "a=setup:passive\r\n" + "a=mid:1\r\n" + "a=sendonly\r\n" + "a=msid:FFmpeg video\r\n" + "a=rtcp-mux\r\n" + "a=rtcp-rsize\r\n" + "a=rtpmap:%u %s/90000\r\n" + "a=fmtp:%u level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=%02x%02x%02x\r\n" + "a=ssrc:%u cname:FFmpeg\r\n" + "a=ssrc:%u msid:FFmpeg video\r\n", + whip->video_payload_type, + whip->ice_ufrag_local, + whip->ice_pwd_local, + whip->dtls_fingerprint, + whip->video_payload_type, + vcodec_name, + whip->video_payload_type, + profile, + profile_iop, + level, + whip->video_ssrc, + whip->video_ssrc); + } + + if (!av_bprint_is_complete(&bp)) { + av_log(whip, AV_LOG_ERROR, "WHIP: Offer exceed max %d, %s\n", MAX_SDP_SIZE, bp.str); + ret = AVERROR(EIO); + goto end; + } + + whip->sdp_offer = av_strdup(bp.str); + if (!whip->sdp_offer) { + ret = AVERROR(ENOMEM); + goto end; + } + + if (whip->state < WHIP_STATE_OFFER) + whip->state = WHIP_STATE_OFFER; + whip->whip_offer_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: Generated state=%d, offer: %s\n", whip->state, whip->sdp_offer); + +end: + av_bprint_finalize(&bp, NULL); + return ret; +} + +/** + * Exchange SDP offer with WebRTC peer to get the answer. + * + * @return 0 if OK, AVERROR_xxx on error + */ +static int exchange_sdp(AVFormatContext *s) +{ + int ret; + char buf[MAX_URL_SIZE]; + AVBPrint bp; + WHIPContext *whip = s->priv_data; + /* The URL context is an HTTP transport layer for the WHIP protocol. */ + URLContext *whip_uc = NULL; + AVDictionary *opts = NULL; + char *hex_data = NULL; + + /* To prevent a crash during cleanup, always initialize it. */ + av_bprint_init(&bp, 1, MAX_SDP_SIZE); + + if (!whip->sdp_offer || !strlen(whip->sdp_offer)) { + av_log(whip, AV_LOG_ERROR, "WHIP: No offer to exchange\n"); + ret = AVERROR(EINVAL); + goto end; + } + + ret = snprintf(buf, sizeof(buf), "Cache-Control: no-cache\r\nContent-Type: application/sdp\r\n"); + if (whip->authorization) + ret += snprintf(buf + ret, sizeof(buf) - ret, "Authorization: Bearer %s\r\n", whip->authorization); + if (ret <= 0 || ret >= sizeof(buf)) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to generate headers, size=%d, %s\n", ret, buf); + ret = AVERROR(EINVAL); + goto end; + } + + av_dict_set(&opts, "headers", buf, 0); + av_dict_set_int(&opts, "chunked_post", 0, 0); + + hex_data = av_mallocz(2 * strlen(whip->sdp_offer) + 1); + if (!hex_data) { + ret = AVERROR(ENOMEM); + goto end; + } + ff_data_to_hex(hex_data, whip->sdp_offer, strlen(whip->sdp_offer), 0); + av_dict_set(&opts, "post_data", hex_data, 0); + + ret = ffurl_open_whitelist(&whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback, + &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to request url=%s, offer: %s\n", s->url, whip->sdp_offer); + goto end; + } + + if (ff_http_get_new_location(whip_uc)) { + whip->whip_resource_url = av_strdup(ff_http_get_new_location(whip_uc)); + if (!whip->whip_resource_url) { + ret = AVERROR(ENOMEM); + goto end; + } + } + + while (1) { + ret = ffurl_read(whip_uc, buf, sizeof(buf)); + if (ret == AVERROR_EOF) { + /* Reset the error because we read all response as answer util EOF. */ + ret = 0; + break; + } + if (ret <= 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read response from url=%s, offer is %s, answer is %s\n", + s->url, whip->sdp_offer, whip->sdp_answer); + goto end; + } + + av_bprintf(&bp, "%.*s", ret, buf); + if (!av_bprint_is_complete(&bp)) { + av_log(whip, AV_LOG_ERROR, "WHIP: Answer exceed max size %d, %.*s, %s\n", MAX_SDP_SIZE, ret, buf, bp.str); + ret = AVERROR(EIO); + goto end; + } + } + + if (!av_strstart(bp.str, "v=", NULL)) { + av_log(whip, AV_LOG_ERROR, "WHIP: Invalid answer: %s\n", bp.str); + ret = AVERROR(EINVAL); + goto end; + } + + whip->sdp_answer = av_strdup(bp.str); + if (!whip->sdp_answer) { + ret = AVERROR(ENOMEM); + goto end; + } + + if (whip->state < WHIP_STATE_ANSWER) + whip->state = WHIP_STATE_ANSWER; + av_log(whip, AV_LOG_VERBOSE, "WHIP: Got state=%d, answer: %s\n", whip->state, whip->sdp_answer); + +end: + ffurl_closep(&whip_uc); + av_bprint_finalize(&bp, NULL); + av_dict_free(&opts); + av_freep(&hex_data); + return ret; +} + +/** + * Parses the ICE ufrag, pwd, and candidates from the SDP answer. + * + * This function is used to extract the ICE ufrag, pwd, and candidates from the SDP answer. + * It returns an error if any of these fields is NULL. The function only uses the first + * candidate if there are multiple candidates. However, support for multiple candidates + * will be added in the future. + * + * @param s Pointer to the AVFormatContext + * @returns Returns 0 if successful or AVERROR_xxx if an error occurs. + */ +static int parse_answer(AVFormatContext *s) +{ + int ret = 0; + AVIOContext *pb; + char line[MAX_URL_SIZE]; + const char *ptr; + int i; + WHIPContext *whip = s->priv_data; + + if (!whip->sdp_answer || !strlen(whip->sdp_answer)) { + av_log(whip, AV_LOG_ERROR, "WHIP: No answer to parse\n"); + ret = AVERROR(EINVAL); + goto end; + } + + pb = avio_alloc_context(whip->sdp_answer, strlen(whip->sdp_answer), 0, NULL, NULL, NULL, NULL); + if (!pb) + return AVERROR(ENOMEM); + + for (i = 0; !avio_feof(pb); i++) { + ff_get_chomp_line(pb, line, sizeof(line)); + if (av_strstart(line, "a=ice-ufrag:", &ptr) && !whip->ice_ufrag_remote) { + whip->ice_ufrag_remote = av_strdup(ptr); + if (!whip->ice_ufrag_remote) { + ret = AVERROR(ENOMEM); + goto end; + } + } else if (av_strstart(line, "a=ice-pwd:", &ptr) && !whip->ice_pwd_remote) { + whip->ice_pwd_remote = av_strdup(ptr); + if (!whip->ice_pwd_remote) { + ret = AVERROR(ENOMEM); + goto end; + } + } else if (av_strstart(line, "a=candidate:", &ptr) && !whip->ice_protocol) { + ptr = av_stristr(ptr, "udp"); + if (ptr && av_stristr(ptr, "host")) { + char protocol[17], host[129]; + int priority, port; + ret = sscanf(ptr, "%16s %d %128s %d typ host", protocol, &priority, host, &port); + if (ret != 4) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed %d to parse line %d %s from %s\n", + ret, i, line, whip->sdp_answer); + ret = AVERROR(EIO); + goto end; + } + + if (av_strcasecmp(protocol, "udp")) { + av_log(whip, AV_LOG_ERROR, "WHIP: Protocol %s is not supported by RTC, choose udp, line %d %s of %s\n", + protocol, i, line, whip->sdp_answer); + ret = AVERROR(EIO); + goto end; + } + + whip->ice_protocol = av_strdup(protocol); + whip->ice_host = av_strdup(host); + whip->ice_port = port; + if (!whip->ice_protocol || !whip->ice_host) { + ret = AVERROR(ENOMEM); + goto end; + } + } + } + } + + if (!whip->ice_pwd_remote || !strlen(whip->ice_pwd_remote)) { + av_log(whip, AV_LOG_ERROR, "WHIP: No remote ice pwd parsed from %s\n", whip->sdp_answer); + ret = AVERROR(EINVAL); + goto end; + } + + if (!whip->ice_ufrag_remote || !strlen(whip->ice_ufrag_remote)) { + av_log(whip, AV_LOG_ERROR, "WHIP: No remote ice ufrag parsed from %s\n", whip->sdp_answer); + ret = AVERROR(EINVAL); + goto end; + } + + if (!whip->ice_protocol || !whip->ice_host || !whip->ice_port) { + av_log(whip, AV_LOG_ERROR, "WHIP: No ice candidate parsed from %s\n", whip->sdp_answer); + ret = AVERROR(EINVAL); + goto end; + } + + if (whip->state < WHIP_STATE_NEGOTIATED) + whip->state = WHIP_STATE_NEGOTIATED; + whip->whip_answer_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: SDP state=%d, offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d, elapsed=%dms\n", + whip->state, strlen(whip->sdp_offer), strlen(whip->sdp_answer), whip->ice_ufrag_remote, strlen(whip->ice_pwd_remote), + whip->ice_protocol, whip->ice_host, whip->ice_port, ELAPSED(whip->whip_starttime, av_gettime())); + +end: + avio_context_free(&pb); + return ret; +} + +/** + * Creates and marshals an ICE binding request packet. + * + * This function creates and marshals an ICE binding request packet. The function only + * generates the username attribute and does not include goog-network-info, ice-controlling, + * use-candidate, and priority. However, some of these attributes may be added in the future. + * + * @param s Pointer to the AVFormatContext + * @param buf Pointer to memory buffer to store the request packet + * @param buf_size Size of the memory buffer + * @param request_size Pointer to an integer that receives the size of the request packet + * @return Returns 0 if successful or AVERROR_xxx if an error occurs. + */ +static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, int *request_size) +{ + int ret, size, crc32; + char username[128]; + AVIOContext *pb = NULL; + AVHMAC *hmac = NULL; + WHIPContext *whip = s->priv_data; + + pb = avio_alloc_context(buf, buf_size, 1, NULL, NULL, NULL, NULL); + if (!pb) + return AVERROR(ENOMEM); + + hmac = av_hmac_alloc(AV_HMAC_SHA1); + if (!hmac) { + ret = AVERROR(ENOMEM); + goto end; + } + + /* Write 20 bytes header */ + avio_wb16(pb, 0x0001); /* STUN binding request */ + avio_wb16(pb, 0); /* length */ + avio_wb32(pb, STUN_MAGIC_COOKIE); /* magic cookie */ + avio_wb32(pb, av_lfg_get(&whip->rnd)); /* transaction ID */ + avio_wb32(pb, av_lfg_get(&whip->rnd)); /* transaction ID */ + avio_wb32(pb, av_lfg_get(&whip->rnd)); /* transaction ID */ + + /* The username is the concatenation of the two ICE ufrag */ + ret = snprintf(username, sizeof(username), "%s:%s", whip->ice_ufrag_remote, whip->ice_ufrag_local); + if (ret <= 0 || ret >= sizeof(username)) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to build username %s:%s, max=%lu, ret=%d\n", + whip->ice_ufrag_remote, whip->ice_ufrag_local, sizeof(username), ret); + ret = AVERROR(EIO); + goto end; + } + + /* Write the username attribute */ + avio_wb16(pb, STUN_ATTR_USERNAME); /* attribute type username */ + avio_wb16(pb, ret); /* size of username */ + avio_write(pb, username, ret); /* bytes of username */ + ffio_fill(pb, 0, (4 - (ret % 4)) % 4); /* padding */ + + /* Write the use-candidate attribute */ + avio_wb16(pb, STUN_ATTR_USE_CANDIDATE); /* attribute type use-candidate */ + avio_wb16(pb, 0); /* size of use-candidate */ + + /* Build and update message integrity */ + avio_wb16(pb, STUN_ATTR_MESSAGE_INTEGRITY); /* attribute type message integrity */ + avio_wb16(pb, 20); /* size of message integrity */ + ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; + av_hmac_init(hmac, whip->ice_pwd_remote, strlen(whip->ice_pwd_remote)); + av_hmac_update(hmac, buf, size - 24); + av_hmac_final(hmac, buf + size - 20, 20); + + /* Write the fingerprint attribute */ + avio_wb16(pb, STUN_ATTR_FINGERPRINT); /* attribute type fingerprint */ + avio_wb16(pb, 4); /* size of fingerprint */ + ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; + /* Refer to the av_hash_alloc("CRC32"), av_hash_init and av_hash_final */ + crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, size - 8) ^ 0xFFFFFFFF; + avio_skip(pb, -4); + avio_wb32(pb, crc32 ^ 0x5354554E); /* xor with "STUN" */ + + *request_size = size; + +end: + avio_context_free(&pb); + av_hmac_free(hmac); + return ret; +} + +/** + * Create an ICE binding response. + * + * This function generates an ICE binding response and writes it to the provided + * buffer. The response is signed using the local password for message integrity. + * + * @param s Pointer to the AVFormatContext structure. + * @param tid Pointer to the transaction ID of the binding request. The tid_size should be 12. + * @param tid_size The size of the transaction ID, should be 12. + * @param buf Pointer to the buffer where the response will be written. + * @param buf_size The size of the buffer provided for the response. + * @param response_size Pointer to an integer that will store the size of the generated response. + * @return Returns 0 if successful or AVERROR_xxx if an error occurs. + */ +static int ice_create_response(AVFormatContext *s, char *tid, int tid_size, uint8_t *buf, int buf_size, int *response_size) +{ + int ret = 0, size, crc32; + AVIOContext *pb = NULL; + AVHMAC *hmac = NULL; + WHIPContext *whip = s->priv_data; + + if (tid_size != 12) { + av_log(whip, AV_LOG_ERROR, "WHIP: Invalid transaction ID size. Expected 12, got %d\n", tid_size); + return AVERROR(EINVAL); + } + + pb = avio_alloc_context(buf, buf_size, 1, NULL, NULL, NULL, NULL); + if (!pb) + return AVERROR(ENOMEM); + + hmac = av_hmac_alloc(AV_HMAC_SHA1); + if (!hmac) { + ret = AVERROR(ENOMEM); + goto end; + } + + /* Write 20 bytes header */ + avio_wb16(pb, 0x0101); /* STUN binding response */ + avio_wb16(pb, 0); /* length */ + avio_wb32(pb, STUN_MAGIC_COOKIE); /* magic cookie */ + avio_write(pb, tid, tid_size); /* transaction ID */ + + /* Build and update message integrity */ + avio_wb16(pb, STUN_ATTR_MESSAGE_INTEGRITY); /* attribute type message integrity */ + avio_wb16(pb, 20); /* size of message integrity */ + ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; + av_hmac_init(hmac, whip->ice_pwd_local, strlen(whip->ice_pwd_local)); + av_hmac_update(hmac, buf, size - 24); + av_hmac_final(hmac, buf + size - 20, 20); + + /* Write the fingerprint attribute */ + avio_wb16(pb, STUN_ATTR_FINGERPRINT); /* attribute type fingerprint */ + avio_wb16(pb, 4); /* size of fingerprint */ + ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; + /* Refer to the av_hash_alloc("CRC32"), av_hash_init and av_hash_final */ + crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, size - 8) ^ 0xFFFFFFFF; + avio_skip(pb, -4); + avio_wb32(pb, crc32 ^ 0x5354554E); /* xor with "STUN" */ + + *response_size = size; + +end: + avio_context_free(&pb); + av_hmac_free(hmac); + return ret; +} + +/** + * A Binding request has class=0b00 (request) and method=0b000000000001 (Binding) + * and is encoded into the first 16 bits as 0x0001. + * See https://datatracker.ietf.org/doc/html/rfc5389#section-6 + */ +static int ice_is_binding_request(uint8_t *b, int size) +{ + return size >= ICE_STUN_HEADER_SIZE && AV_RB16(&b[0]) == 0x0001; +} + +/** + * A Binding response has class=0b10 (success response) and method=0b000000000001, + * and is encoded into the first 16 bits as 0x0101. + */ +static int ice_is_binding_response(uint8_t *b, int size) +{ + return size >= ICE_STUN_HEADER_SIZE && AV_RB16(&b[0]) == 0x0101; +} + +/** + * In RTP packets, the first byte is represented as 0b10xxxxxx, where the initial + * two bits (0b10) indicate the RTP version, + * see https://www.rfc-editor.org/rfc/rfc3550#section-5.1 + * The RTCP packet header is similar to RTP, + * see https://www.rfc-editor.org/rfc/rfc3550#section-6.4.1 + */ +static int media_is_rtp_rtcp(uint8_t *b, int size) +{ + return size >= WHIP_RTP_HEADER_SIZE && (b[0] & 0xC0) == 0x80; +} + +/* Whether the packet is RTCP. */ +static int media_is_rtcp(uint8_t *b, int size) +{ + return size >= WHIP_RTP_HEADER_SIZE && b[1] >= WHIP_RTCP_PT_START && b[1] <= WHIP_RTCP_PT_END; +} + +/** + * This function handles incoming binding request messages by responding to them. + * If the message is not a binding request, it will be ignored. + */ +static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_size) +{ + int ret = 0, size; + char tid[12]; + WHIPContext *whip = s->priv_data; + + /* Ignore if not a binding request. */ + if (!ice_is_binding_request(buf, buf_size)) + return ret; + + if (buf_size < ICE_STUN_HEADER_SIZE) { + av_log(whip, AV_LOG_ERROR, "WHIP: Invalid STUN message, expected at least %d, got %d\n", + ICE_STUN_HEADER_SIZE, buf_size); + return AVERROR(EINVAL); + } + + /* Parse transaction id from binding request in buf. */ + memcpy(tid, buf + 8, 12); + + /* Build the STUN binding response. */ + ret = ice_create_response(s, tid, sizeof(tid), whip->buf, sizeof(whip->buf), &size); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to create STUN binding response, size=%d\n", size); + return ret; + } + + ret = ffurl_write(whip->udp, whip->buf, size); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to send STUN binding response, size=%d\n", size); + return ret; + } + + return 0; +} + +/** + * To establish a connection with the UDP server, we utilize ICE-LITE in a Client-Server + * mode. In this setup, FFmpeg acts as the UDP client, while the peer functions as the + * UDP server. + */ +static int udp_connect(AVFormatContext *s) +{ + int ret = 0; + char url[256]; + AVDictionary *opts = NULL; + WHIPContext *whip = s->priv_data; + + /* Build UDP URL and create the UDP context as transport. */ + ff_url_join(url, sizeof(url), "udp", NULL, whip->ice_host, whip->ice_port, NULL); + + av_dict_set_int(&opts, "connect", 1, 0); + av_dict_set_int(&opts, "fifo_size", 0, 0); + /* Set the max packet size to the buffer size. */ + av_dict_set_int(&opts, "pkt_size", whip->pkt_size, 0); + + ret = ffurl_open_whitelist(&whip->udp, url, AVIO_FLAG_WRITE, &s->interrupt_callback, + &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to connect udp://%s:%d\n", whip->ice_host, whip->ice_port); + goto end; + } + + /* Make the socket non-blocking, set to READ and WRITE mode after connected */ + ff_socket_nonblock(ffurl_get_file_handle(whip->udp), 1); + whip->udp->flags |= AVIO_FLAG_READ | AVIO_FLAG_NONBLOCK; + + if (whip->state < WHIP_STATE_UDP_CONNECTED) + whip->state = WHIP_STATE_UDP_CONNECTED; + whip->whip_udp_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: UDP state=%d, elapsed=%dms, connected to udp://%s:%d\n", + whip->state, ELAPSED(whip->whip_starttime, av_gettime()), whip->ice_host, whip->ice_port); + +end: + av_dict_free(&opts); + return ret; +} + +static int ice_dtls_handshake(AVFormatContext *s) +{ + int ret = 0, size, i; + int64_t starttime = av_gettime(), now; + WHIPContext *whip = s->priv_data; + AVDictionary *opts = NULL; + char str[8]; + char buf[256], *cert_buf = NULL, *key_buf = NULL; + + if (whip->state < WHIP_STATE_UDP_CONNECTED || !whip->udp) { + av_log(whip, AV_LOG_ERROR, "WHIP: UDP not connected, state=%d, udp=%p\n", whip->state, whip->udp); + return AVERROR(EINVAL); + } + + while (1) { + if (whip->state <= WHIP_STATE_ICE_CONNECTING) { + /* Build the STUN binding request. */ + ret = ice_create_request(s, whip->buf, sizeof(whip->buf), &size); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to create STUN binding request, size=%d\n", size); + goto end; + } + + ret = ffurl_write(whip->udp, whip->buf, size); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to send STUN binding request, size=%d\n", size); + goto end; + } + + if (whip->state < WHIP_STATE_ICE_CONNECTING) + whip->state = WHIP_STATE_ICE_CONNECTING; + } + +next_packet: + if (whip->state >= WHIP_STATE_DTLS_FINISHED) + /* DTLS handshake is done, exit the loop. */ + break; + + now = av_gettime(); + if (now - starttime >= whip->handshake_timeout * 1000) { + av_log(whip, AV_LOG_ERROR, "WHIP: DTLS handshake timeout=%dms, cost=%dms, elapsed=%dms, state=%d\n", + whip->handshake_timeout, ELAPSED(starttime, now), ELAPSED(whip->whip_starttime, now), whip->state); + ret = AVERROR(ETIMEDOUT); + goto end; + } + + /* Read the STUN or DTLS messages from peer. */ + for (i = 0; i < ICE_DTLS_READ_INTERVAL / 5 && whip->state < WHIP_STATE_DTLS_CONNECTING; i++) { + ret = ffurl_read(whip->udp, whip->buf, sizeof(whip->buf)); + if (ret > 0) + break; + if (ret == AVERROR(EAGAIN)) { + av_usleep(5 * 1000); + continue; + } + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read message\n"); + goto end; + } + + /* Got nothing, continue to process handshake. */ + if (ret <= 0 && whip->state < WHIP_STATE_DTLS_CONNECTING) + continue; + + /* Handle the ICE binding response. */ + if (ice_is_binding_response(whip->buf, ret)) { + if (whip->state < WHIP_STATE_ICE_CONNECTED) { + whip->state = WHIP_STATE_ICE_CONNECTED; + whip->whip_ice_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: ICE STUN ok, state=%d, url=udp://%s:%d, location=%s, username=%s:%s, res=%dB, elapsed=%dms\n", + whip->state, whip->ice_host, whip->ice_port, whip->whip_resource_url ? whip->whip_resource_url : "", + whip->ice_ufrag_remote, whip->ice_ufrag_local, ret, ELAPSED(whip->whip_starttime, av_gettime())); + + ff_url_join(buf, sizeof(buf), "dtls", NULL, whip->ice_host, whip->ice_port, NULL); + snprintf(str, sizeof(str), "%d", whip->pkt_size); + av_dict_set(&opts, "mtu", str, 0); + if (whip->cert_file) { + av_dict_set(&opts, "cert_file", whip->cert_file, 0); + } else + av_dict_set(&opts, "cert_buf", whip->cert_buf, 0); + + if (whip->key_file) { + av_dict_set(&opts, "key_file", whip->key_file, 0); + } else + av_dict_set(&opts, "key_buf", whip->key_buf, 0); + + av_dict_set(&opts, "fingerprint", whip->dtls_fingerprint, 0); + av_dict_set(&opts, "use_external_udp", "1", 0); + av_dict_set(&opts, "listen", "1", 0); + /* If got the first binding response, start DTLS handshake. */ + ret = ffurl_open_whitelist(&whip->dtls_uc, buf, AVIO_FLAG_READ_WRITE, &s->interrupt_callback, + &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); + if (ret < 0) + goto end; + dtls_initialize(s); + } + goto next_packet; + } + + /* When a binding request is received, it is necessary to respond immediately. */ + if (ice_is_binding_request(whip->buf, ret)) { + if ((ret = ice_handle_binding_request(s, whip->buf, ret)) < 0) + goto end; + goto next_packet; + } + + /* If got any DTLS messages, handle it. */ + if (is_dtls_packet(whip->buf, ret) && whip->state >= WHIP_STATE_ICE_CONNECTED || whip->state == WHIP_STATE_DTLS_CONNECTING) { + whip->state = WHIP_STATE_DTLS_CONNECTING; + if ((ret = ffurl_handshake(whip->dtls_uc)) < 0) + goto end; + dtls_context_on_state(s, NULL, NULL); + goto next_packet; + } + } + +end: + if (cert_buf) + av_free(cert_buf); + if (key_buf) + av_free(key_buf); + return ret; +} + +/** + * Establish the SRTP context using the keying material exported from DTLS. + * + * Create separate SRTP contexts for sending video and audio, as their sequences differ + * and should not share a single context. Generate a single SRTP context for receiving + * RTCP only. + * + * @return 0 if OK, AVERROR_xxx on error + */ +static int setup_srtp(AVFormatContext *s) +{ + int ret; + char recv_key[DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN]; + char send_key[DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN]; + char buf[AV_BASE64_SIZE(DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN)]; + /** + * The profile for OpenSSL's SRTP is SRTP_AES128_CM_SHA1_80, see ssl/d1_srtp.c. + * The profile for FFmpeg's SRTP is SRTP_AES128_CM_HMAC_SHA1_80, see libavformat/srtp.c. + */ + const char* suite = "SRTP_AES128_CM_HMAC_SHA1_80"; + WHIPContext *whip = s->priv_data; + ret = ff_dtls_export_materials(whip->dtls_uc, whip->dtls_srtp_materials, sizeof(whip->dtls_srtp_materials)); + if (ret < 0) + goto end; + /** + * This represents the material used to build the SRTP master key. It is + * generated by DTLS and has the following layout: + * 16B 16B 14B 14B + * client_key | server_key | client_salt | server_salt + */ + char *client_key = whip->dtls_srtp_materials; + char *server_key = whip->dtls_srtp_materials + DTLS_SRTP_KEY_LEN; + char *client_salt = server_key + DTLS_SRTP_KEY_LEN; + char *server_salt = client_salt + DTLS_SRTP_SALT_LEN; + + /* As DTLS server, the recv key is client master key plus salt. */ + memcpy(recv_key, client_key, DTLS_SRTP_KEY_LEN); + memcpy(recv_key + DTLS_SRTP_KEY_LEN, client_salt, DTLS_SRTP_SALT_LEN); + + /* As DTLS server, the send key is server master key plus salt. */ + memcpy(send_key, server_key, DTLS_SRTP_KEY_LEN); + memcpy(send_key + DTLS_SRTP_KEY_LEN, server_salt, DTLS_SRTP_SALT_LEN); + + /* Setup SRTP context for outgoing packets */ + if (!av_base64_encode(buf, sizeof(buf), send_key, sizeof(send_key))) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to encode send key\n"); + ret = AVERROR(EIO); + goto end; + } + + ret = ff_srtp_set_crypto(&whip->srtp_audio_send, suite, buf); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to set crypto for audio send\n"); + goto end; + } + + ret = ff_srtp_set_crypto(&whip->srtp_video_send, suite, buf); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to set crypto for video send\n"); + goto end; + } + + ret = ff_srtp_set_crypto(&whip->srtp_rtcp_send, suite, buf); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "Failed to set crypto for rtcp send\n"); + goto end; + } + + /* Setup SRTP context for incoming packets */ + if (!av_base64_encode(buf, sizeof(buf), recv_key, sizeof(recv_key))) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to encode recv key\n"); + ret = AVERROR(EIO); + goto end; + } + + ret = ff_srtp_set_crypto(&whip->srtp_recv, suite, buf); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to set crypto for recv\n"); + goto end; + } + + if (whip->state < WHIP_STATE_SRTP_FINISHED) + whip->state = WHIP_STATE_SRTP_FINISHED; + whip->whip_srtp_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: SRTP setup done, state=%d, suite=%s, key=%luB, elapsed=%dms\n", + whip->state, suite, sizeof(send_key), ELAPSED(whip->whip_starttime, av_gettime())); + +end: + return ret; +} + +/** + * Callback triggered by the RTP muxer when it creates and sends out an RTP packet. + * + * This function modifies the video STAP packet, removing the markers, and updating the + * NRI of the first NALU. Additionally, it uses the corresponding SRTP context to encrypt + * the RTP packet, where the video packet is handled by the video SRTP context. + */ +static int on_rtp_write_packet(void *opaque, const uint8_t *buf, int buf_size) +{ + int ret, cipher_size, is_rtcp, is_video; + uint8_t payload_type; + AVFormatContext *s = opaque; + WHIPContext *whip = s->priv_data; + SRTPContext *srtp; + + /* Ignore if not RTP or RTCP packet. */ + if (!media_is_rtp_rtcp(buf, buf_size)) + return 0; + + /* Only support audio, video and rtcp. */ + is_rtcp = media_is_rtcp(buf, buf_size); + payload_type = buf[1] & 0x7f; + is_video = payload_type == whip->video_payload_type; + if (!is_rtcp && payload_type != whip->video_payload_type && payload_type != whip->audio_payload_type) + return 0; + + /* Get the corresponding SRTP context. */ + srtp = is_rtcp ? &whip->srtp_rtcp_send : (is_video? &whip->srtp_video_send : &whip->srtp_audio_send); + + /* Encrypt by SRTP and send out. */ + cipher_size = ff_srtp_encrypt(srtp, buf, buf_size, whip->buf, sizeof(whip->buf)); + if (cipher_size <= 0 || cipher_size < buf_size) { + av_log(whip, AV_LOG_WARNING, "WHIP: Failed to encrypt packet=%dB, cipher=%dB\n", buf_size, cipher_size); + return 0; + } + + ret = ffurl_write(whip->udp, whip->buf, cipher_size); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to write packet=%dB, ret=%d\n", cipher_size, ret); + return ret; + } + + return ret; +} + +/** + * Creates dedicated RTP muxers for each stream in the AVFormatContext to build RTP + * packets from the encoded frames. + * + * The corresponding SRTP context is utilized to encrypt each stream's RTP packets. For + * example, a video SRTP context is used for the video stream. Additionally, the + * "on_rtp_write_packet" callback function is set as the write function for each RTP + * muxer to send out encrypted RTP packets. + * + * @return 0 if OK, AVERROR_xxx on error + */ +static int create_rtp_muxer(AVFormatContext *s) +{ + int ret, i, is_video, buffer_size, max_packet_size; + AVFormatContext *rtp_ctx = NULL; + AVDictionary *opts = NULL; + uint8_t *buffer = NULL; + char buf[64]; + WHIPContext *whip = s->priv_data; + + const AVOutputFormat *rtp_format = av_guess_format("rtp", NULL, NULL); + if (!rtp_format) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to guess rtp muxer\n"); + ret = AVERROR(ENOSYS); + goto end; + } + + /* The UDP buffer size, may greater than MTU. */ + buffer_size = MAX_UDP_BUFFER_SIZE; + /* The RTP payload max size. Reserved some bytes for SRTP checksum and padding. */ + max_packet_size = whip->pkt_size - DTLS_SRTP_CHECKSUM_LEN; + + for (i = 0; i < s->nb_streams; i++) { + rtp_ctx = avformat_alloc_context(); + if (!rtp_ctx) { + ret = AVERROR(ENOMEM); + goto end; + } + + rtp_ctx->oformat = rtp_format; + if (!avformat_new_stream(rtp_ctx, NULL)) { + ret = AVERROR(ENOMEM); + goto end; + } + /* Pass the interrupt callback on */ + rtp_ctx->interrupt_callback = s->interrupt_callback; + /* Copy the max delay setting; the rtp muxer reads this. */ + rtp_ctx->max_delay = s->max_delay; + /* Copy other stream parameters. */ + rtp_ctx->streams[0]->sample_aspect_ratio = s->streams[i]->sample_aspect_ratio; + rtp_ctx->flags |= s->flags & AVFMT_FLAG_BITEXACT; + rtp_ctx->strict_std_compliance = s->strict_std_compliance; + + /* Set the synchronized start time. */ + rtp_ctx->start_time_realtime = s->start_time_realtime; + + avcodec_parameters_copy(rtp_ctx->streams[0]->codecpar, s->streams[i]->codecpar); + rtp_ctx->streams[0]->time_base = s->streams[i]->time_base; + + /** + * For H.264, consistently utilize the annexb format through the Bitstream Filter (BSF); + * therefore, we deactivate the extradata detection for the RTP muxer. + */ + if (s->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264) { + av_freep(&rtp_ctx->streams[i]->codecpar->extradata); + rtp_ctx->streams[i]->codecpar->extradata_size = 0; + } + + buffer = av_malloc(buffer_size); + if (!buffer) { + ret = AVERROR(ENOMEM); + goto end; + } + + rtp_ctx->pb = avio_alloc_context(buffer, buffer_size, 1, s, NULL, on_rtp_write_packet, NULL); + if (!rtp_ctx->pb) { + ret = AVERROR(ENOMEM); + goto end; + } + rtp_ctx->pb->max_packet_size = max_packet_size; + rtp_ctx->pb->av_class = &ff_avio_class; + + is_video = s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO; + snprintf(buf, sizeof(buf), "%d", is_video? whip->video_payload_type : whip->audio_payload_type); + av_dict_set(&opts, "payload_type", buf, 0); + snprintf(buf, sizeof(buf), "%d", is_video? whip->video_ssrc : whip->audio_ssrc); + av_dict_set(&opts, "ssrc", buf, 0); + + ret = avformat_write_header(rtp_ctx, &opts); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to write rtp header\n"); + goto end; + } + + ff_format_set_url(rtp_ctx, av_strdup(s->url)); + s->streams[i]->time_base = rtp_ctx->streams[0]->time_base; + s->streams[i]->priv_data = rtp_ctx; + rtp_ctx = NULL; + } + + if (whip->state < WHIP_STATE_READY) + whip->state = WHIP_STATE_READY; + av_log(whip, AV_LOG_INFO, "WHIP: Muxer state=%d, buffer_size=%d, max_packet_size=%d, " + "elapsed=%dms(init:%d,offer:%d,answer:%d,udp:%d,ice:%d,dtls:%d,srtp:%d)\n", + whip->state, buffer_size, max_packet_size, ELAPSED(whip->whip_starttime, av_gettime()), + ELAPSED(whip->whip_starttime, whip->whip_init_time), + ELAPSED(whip->whip_init_time, whip->whip_offer_time), + ELAPSED(whip->whip_offer_time, whip->whip_answer_time), + ELAPSED(whip->whip_answer_time, whip->whip_udp_time), + ELAPSED(whip->whip_udp_time, whip->whip_ice_time), + ELAPSED(whip->whip_ice_time, whip->whip_dtls_time), + ELAPSED(whip->whip_dtls_time, whip->whip_srtp_time)); + +end: + if (rtp_ctx) + avio_context_free(&rtp_ctx->pb); + avformat_free_context(rtp_ctx); + av_dict_free(&opts); + return ret; +} + +/** + * RTC is connectionless, for it's based on UDP, so it check whether sesison is + * timeout. In such case, publishers can't republish the stream util the session + * is timeout. + * This function is called to notify the server that the stream is ended, server + * should expire and close the session immediately, so that publishers can republish + * the stream quickly. + */ +static int dispose_session(AVFormatContext *s) +{ + int ret; + char buf[MAX_URL_SIZE]; + URLContext *whip_uc = NULL; + AVDictionary *opts = NULL; + WHIPContext *whip = s->priv_data; + + if (!whip->whip_resource_url) + return 0; + + ret = snprintf(buf, sizeof(buf), "Cache-Control: no-cache\r\n"); + if (whip->authorization) + ret += snprintf(buf + ret, sizeof(buf) - ret, "Authorization: Bearer %s\r\n", whip->authorization); + if (ret <= 0 || ret >= sizeof(buf)) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to generate headers, size=%d, %s\n", ret, buf); + ret = AVERROR(EINVAL); + goto end; + } + + av_dict_set(&opts, "headers", buf, 0); + av_dict_set_int(&opts, "chunked_post", 0, 0); + av_dict_set(&opts, "method", "DELETE", 0); + ret = ffurl_open_whitelist(&whip_uc, whip->whip_resource_url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback, + &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to DELETE url=%s\n", whip->whip_resource_url); + goto end; + } + + while (1) { + ret = ffurl_read(whip_uc, buf, sizeof(buf)); + if (ret == AVERROR_EOF) { + ret = 0; + break; + } + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read response from DELETE url=%s\n", whip->whip_resource_url); + goto end; + } + } + + av_log(whip, AV_LOG_INFO, "WHIP: Dispose resource %s ok\n", whip->whip_resource_url); + +end: + ffurl_closep(&whip_uc); + av_dict_free(&opts); + return ret; +} + +/** + * Since the h264_mp4toannexb filter only processes the MP4 ISOM format and bypasses + * the annexb format, it is necessary to manually insert encoder metadata before each + * IDR when dealing with annexb format packets. For instance, in the case of H.264, + * we must insert SPS and PPS before the IDR frame. + */ +static int h264_annexb_insert_sps_pps(AVFormatContext *s, AVPacket *pkt) +{ + int ret = 0; + AVPacket *in = NULL; + AVCodecParameters *par = s->streams[pkt->stream_index]->codecpar; + uint32_t nal_size = 0, out_size = par ? par->extradata_size : 0; + uint8_t unit_type, sps_seen = 0, pps_seen = 0, idr_seen = 0, *out; + const uint8_t *buf, *buf_end, *r1; + + if (!pkt || !pkt->data || pkt->size <= 0) + return ret; + if (!par || !par->extradata || par->extradata_size <= 0) + return ret; + + /* Discover NALU type from packet. */ + buf_end = pkt->data + pkt->size; + for (buf = ff_nal_find_startcode(pkt->data, buf_end); buf < buf_end; buf += nal_size) { + while (!*(buf++)); + r1 = ff_nal_find_startcode(buf, buf_end); + if ((nal_size = r1 - buf) > 0) { + unit_type = *buf & 0x1f; + if (unit_type == H264_NAL_SPS) { + sps_seen = 1; + } else if (unit_type == H264_NAL_PPS) { + pps_seen = 1; + } else if (unit_type == H264_NAL_IDR_SLICE) { + idr_seen = 1; + } + + out_size += 3 + nal_size; + } + } + + if (!idr_seen || (sps_seen && pps_seen)) + return ret; + + /* See av_bsf_send_packet */ + in = av_packet_alloc(); + if (!in) + return AVERROR(ENOMEM); + + ret = av_packet_make_refcounted(pkt); + if (ret < 0) + goto fail; + + av_packet_move_ref(in, pkt); + + /* Create a new packet with sps/pps inserted. */ + ret = av_new_packet(pkt, out_size); + if (ret < 0) + goto fail; + + ret = av_packet_copy_props(pkt, in); + if (ret < 0) + goto fail; + + memcpy(pkt->data, par->extradata, par->extradata_size); + out = pkt->data + par->extradata_size; + buf_end = in->data + in->size; + for (buf = ff_nal_find_startcode(in->data, buf_end); buf < buf_end; buf += nal_size) { + while (!*(buf++)); + r1 = ff_nal_find_startcode(buf, buf_end); + if ((nal_size = r1 - buf) > 0) { + AV_WB24(out, 0x00001); + memcpy(out + 3, buf, nal_size); + out += 3 + nal_size; + } + } + +fail: + if (ret < 0) + av_packet_unref(pkt); + av_packet_free(&in); + + return ret; +} + +static av_cold int whip_init(AVFormatContext *s) +{ + int ret; + WHIPContext *whip = s->priv_data; + + if ((ret = initialize(s)) < 0) + goto end; + + if ((ret = parse_codec(s)) < 0) + goto end; + + if ((ret = generate_sdp_offer(s)) < 0) + goto end; + + if ((ret = exchange_sdp(s)) < 0) + goto end; + + if ((ret = parse_answer(s)) < 0) + goto end; + + if ((ret = udp_connect(s)) < 0) + goto end; + + if ((ret = ice_dtls_handshake(s)) < 0) + goto end; + + if ((ret = setup_srtp(s)) < 0) + goto end; + + if ((ret = create_rtp_muxer(s)) < 0) + goto end; + +end: + if (ret < 0 && whip->state < WHIP_STATE_FAILED) + whip->state = WHIP_STATE_FAILED; + if (ret >= 0 && whip->state >= WHIP_STATE_FAILED && whip->dtls_ret < 0) + ret = whip->dtls_ret; + return ret; +} + +static int whip_write_packet(AVFormatContext *s, AVPacket *pkt) +{ + int ret; + WHIPContext *whip = s->priv_data; + AVStream *st = s->streams[pkt->stream_index]; + AVFormatContext *rtp_ctx = st->priv_data; + + /* TODO: Send binding request every 1s as WebRTC heartbeat. */ + + /** + * Receive packets from the server such as ICE binding requests, DTLS messages, + * and RTCP like PLI requests, then respond to them. + */ + ret = ffurl_read(whip->udp, whip->buf, sizeof(whip->buf)); + if (ret > 0) { + if (is_dtls_packet(whip->buf, ret)) { + if ((ret = ffurl_write(whip->dtls_uc, whip->buf, ret)) < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to handle DTLS message\n"); + goto end; + } + } + } else if (ret != AVERROR(EAGAIN)) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read from UDP socket\n"); + goto end; + } + + if (whip->h264_annexb_insert_sps_pps && st->codecpar->codec_id == AV_CODEC_ID_H264) { + if ((ret = h264_annexb_insert_sps_pps(s, pkt)) < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to insert SPS/PPS before IDR\n"); + goto end; + } + } + + ret = ff_write_chained(rtp_ctx, 0, pkt, s, 0); + if (ret < 0) { + if (ret == AVERROR(EINVAL)) { + av_log(whip, AV_LOG_WARNING, "WHIP: Ignore failed to write packet=%dB, ret=%d\n", pkt->size, ret); + ret = 0; + } else + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to write packet, size=%d\n", pkt->size); + goto end; + } + +end: + if (ret < 0 && whip->state < WHIP_STATE_FAILED) + whip->state = WHIP_STATE_FAILED; + if (ret >= 0 && whip->state >= WHIP_STATE_FAILED && whip->dtls_ret < 0) + ret = whip->dtls_ret; + if (ret >= 0 && whip->dtls_closed) + ret = AVERROR(EIO); + return ret; +} + +static av_cold void whip_deinit(AVFormatContext *s) +{ + int i, ret; + WHIPContext *whip = s->priv_data; + + ret = dispose_session(s); + if (ret < 0) + av_log(whip, AV_LOG_WARNING, "WHIP: Failed to dispose resource, ret=%d\n", ret); + + for (i = 0; i < s->nb_streams; i++) { + AVFormatContext* rtp_ctx = s->streams[i]->priv_data; + if (!rtp_ctx) + continue; + + av_write_trailer(rtp_ctx); + /** + * Keep in mind that it is necessary to free the buffer of pb since we allocate + * it and pass it to pb using avio_alloc_context, while avio_context_free does + * not perform this action. + */ + av_freep(&rtp_ctx->pb->buffer); + avio_context_free(&rtp_ctx->pb); + avformat_free_context(rtp_ctx); + s->streams[i]->priv_data = NULL; + } + + av_freep(&whip->sdp_offer); + av_freep(&whip->sdp_answer); + av_freep(&whip->whip_resource_url); + av_freep(&whip->ice_ufrag_remote); + av_freep(&whip->ice_pwd_remote); + av_freep(&whip->ice_protocol); + av_freep(&whip->ice_host); + av_freep(&whip->authorization); + av_freep(&whip->cert_file); + av_freep(&whip->key_file); + ffurl_closep(&whip->udp); + ff_srtp_free(&whip->srtp_audio_send); + ff_srtp_free(&whip->srtp_video_send); + ff_srtp_free(&whip->srtp_rtcp_send); + ff_srtp_free(&whip->srtp_recv); + ffurl_close(whip->dtls_uc); +} + +static int whip_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket *pkt) +{ + int ret = 1, extradata_isom = 0; + uint8_t *b = pkt->data; + WHIPContext *whip = s->priv_data; + + if (st->codecpar->codec_id == AV_CODEC_ID_H264) { + extradata_isom = st->codecpar->extradata_size > 0 && st->codecpar->extradata[0] == 1; + if (pkt->size >= 5 && AV_RB32(b) != 0x0000001 && (AV_RB24(b) != 0x000001 || extradata_isom)) { + ret = ff_stream_add_bitstream_filter(st, "h264_mp4toannexb", NULL); + av_log(whip, AV_LOG_VERBOSE, "WHIP: Enable BSF h264_mp4toannexb, packet=[%x %x %x %x %x ...], extradata_isom=%d\n", + b[0], b[1], b[2], b[3], b[4], extradata_isom); + } else + whip->h264_annexb_insert_sps_pps = 1; + } + + return ret; +} + +#define OFFSET(x) offsetof(WHIPContext, x) +#define DEC AV_OPT_FLAG_DECODING_PARAM +static const AVOption options[] = { + { "handshake_timeout", "Timeout in milliseconds for ICE and DTLS handshake.", OFFSET(handshake_timeout), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, DEC }, + { "pkt_size", "The maximum size, in bytes, of RTP packets that send out", OFFSET(pkt_size), AV_OPT_TYPE_INT, { .i64 = 1200 }, -1, INT_MAX, DEC }, + { "authorization", "The optional Bearer token for WHIP Authorization", OFFSET(authorization), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, DEC }, + { "cert_file", "The optional certificate file path for DTLS", OFFSET(cert_file), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, DEC }, + { "key_file", "The optional private key file path for DTLS", OFFSET(key_file), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, DEC }, + { NULL }, +}; + +static const AVClass whip_muxer_class = { + .class_name = "WHIP muxer", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + +const FFOutputFormat ff_whip_muxer = { + .p.name = "whip", + .p.long_name = NULL_IF_CONFIG_SMALL("WHIP(WebRTC-HTTP ingestion protocol) muxer"), + .p.audio_codec = AV_CODEC_ID_OPUS, + .p.video_codec = AV_CODEC_ID_H264, + .p.flags = AVFMT_GLOBALHEADER | AVFMT_NOFILE, + .p.priv_class = &whip_muxer_class, + .priv_data_size = sizeof(WHIPContext), + .init = whip_init, + .write_packet = whip_write_packet, + .deinit = whip_deinit, + .check_bitstream = whip_check_bitstream, +}; diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S index 50ce7d4dfd96b..2e4e451ec2517 100644 --- a/libavutil/aarch64/asm.S +++ b/libavutil/aarch64/asm.S @@ -196,7 +196,7 @@ DISABLE_SVE2 .popsection #endif -.macro function name, export=0, align=2 +.macro function name, export=0, align=4 .macro endfunc ELF .size \name, . - \name FUNC .endfunc @@ -217,7 +217,7 @@ FUNC .func \name .endif .endm -.macro const name, align=2, relocate=0 +.macro const name, align=4, relocate=0 .macro endconst ELF .size \name, . - \name .purgem endconst diff --git a/libavutil/avassert.h b/libavutil/avassert.h index 1895fb75513bf..8dbdb01566709 100644 --- a/libavutil/avassert.h +++ b/libavutil/avassert.h @@ -31,6 +31,7 @@ #ifdef HAVE_AV_CONFIG_H # include "config.h" #endif +#include "attributes.h" #include "log.h" #include "macros.h" @@ -75,4 +76,45 @@ */ void av_assert0_fpu(void); +/** + * Asserts that are used as compiler optimization hints depending + * upon ASSERT_LEVEL and NBDEBUG. + * + * Undefined behaviour occurs if execution reaches a point marked + * with av_unreachable() or if a condition used with av_assume() + * is false. + * + * The condition used with av_assume() should not have side-effects + * and should be visible to the compiler. + */ +#if defined(ASSERT_LEVEL) ? ASSERT_LEVEL > 0 : !defined(HAVE_AV_CONFIG_H) && !defined(NDEBUG) +#define av_unreachable(msg) \ +do { \ + av_log(NULL, AV_LOG_PANIC, \ + "Reached supposedly unreachable code at %s:%d: %s\n", \ + __FILE__, __LINE__, msg); \ + abort(); \ +} while (0) +#define av_assume(cond) av_assert0(cond) +#else +#if AV_GCC_VERSION_AT_LEAST(4, 5) || AV_HAS_BUILTIN(__builtin_unreachable) +#define av_unreachable(msg) __builtin_unreachable() +#elif defined(_MSC_VER) +#define av_unreachable(msg) __assume(0) +#define av_assume(cond) __assume(cond) +#elif __STDC_VERSION__ >= 202311L +#include +#define av_unreachable(msg) unreachable() +#else +#define av_unreachable(msg) ((void)0) +#endif + +#ifndef av_assume +#define av_assume(cond) do { \ + if (!(cond)) \ + av_unreachable(); \ +} while (0) +#endif +#endif + #endif /* AVUTIL_AVASSERT_H */ diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 978d7e29d3b37..ce485a85a2176 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -79,6 +79,7 @@ typedef struct VulkanDeviceFeatures { VkPhysicalDeviceVulkan12Features vulkan_1_2; VkPhysicalDeviceVulkan13Features vulkan_1_3; VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore; + VkPhysicalDeviceShaderSubgroupRotateFeaturesKHR subgroup_rotate; #ifdef VK_KHR_shader_expect_assume VkPhysicalDeviceShaderExpectAssumeFeaturesKHR expect_assume; @@ -205,6 +206,8 @@ static void device_features_init(AVHWDeviceContext *ctx, VulkanDeviceFeatures *f FF_VK_STRUCT_EXT(s, &feats->device, &feats->timeline_semaphore, FF_VK_EXT_PORTABILITY_SUBSET, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES); + FF_VK_STRUCT_EXT(s, &feats->device, &feats->subgroup_rotate, FF_VK_EXT_SUBGROUP_ROTATE, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_ROTATE_FEATURES_KHR); #ifdef VK_KHR_shader_expect_assume FF_VK_STRUCT_EXT(s, &feats->device, &feats->expect_assume, FF_VK_EXT_EXPECT_ASSUME, @@ -283,6 +286,7 @@ static void device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceF COPY_VAL(vulkan_1_3.dynamicRendering); COPY_VAL(timeline_semaphore.timelineSemaphore); + COPY_VAL(subgroup_rotate.shaderSubgroupRotate); COPY_VAL(video_maintenance_1.videoMaintenance1); #ifdef VK_KHR_video_maintenance2 @@ -406,6 +410,23 @@ static const struct FFVkFormatEntry { { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, { VK_FORMAT_G16B16G16R16_422_UNORM, AV_PIX_FMT_Y216, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + /* Planar YUVA 420 at 8, 10 and 16 bits */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_YUVA420P, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA420P10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA420P16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + + /* Planar YUVA 422 at 8, 10, 12 and 16 bits */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_YUVA422P, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA422P10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA422P12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA422P16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + + /* Planar YUVA 444 at 8, 10, 12 and 16 bits */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_YUVA444P, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA444P10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA444P12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA444P16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + /* Single plane 444 at 8, 10, 12 and 16 bits */ { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_UYVA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } }, { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_XV30, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, @@ -588,6 +609,7 @@ static const VulkanOptExtension optional_device_exts[] = { { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX }, { VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW }, { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT }, + { VK_KHR_SHADER_SUBGROUP_ROTATE_EXTENSION_NAME, FF_VK_EXT_SUBGROUP_ROTATE }, #ifdef VK_KHR_shader_expect_assume { VK_KHR_SHADER_EXPECT_ASSUME_EXTENSION_NAME, FF_VK_EXT_EXPECT_ASSUME }, #endif @@ -2638,11 +2660,12 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) try_export_flags(hwfc, &eiinfo.handleTypes, &e, VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT); -#endif - if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_DMABUF_MEMORY) + if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_DMABUF_MEMORY && + hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) try_export_flags(hwfc, &eiinfo.handleTypes, &e, VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); +#endif for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) { eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; @@ -2779,8 +2802,8 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) /* Image usage flags */ if (!hwctx->usage) { - hwctx->usage = supported_usage & (VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + hwctx->usage = supported_usage & (VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); diff --git a/libavutil/mem_internal.h b/libavutil/mem_internal.h index c027fa51c38c2..d58881d09c9ae 100644 --- a/libavutil/mem_internal.h +++ b/libavutil/mem_internal.h @@ -131,4 +131,6 @@ #define LOCAL_ALIGNED_32(t, v, ...) E1(LOCAL_ALIGNED_D(32, t, v, __VA_ARGS__,,)) +#define LOCAL_ALIGNED_64(t, v, ...) E1(LOCAL_ALIGNED_D(64, t, v, __VA_ARGS__,,)) + #endif /* AVUTIL_MEM_INTERNAL_H */ diff --git a/libavutil/refstruct.c b/libavutil/refstruct.c index ce804f781aee7..7a5eb2482665b 100644 --- a/libavutil/refstruct.c +++ b/libavutil/refstruct.c @@ -45,7 +45,7 @@ #define REFSTRUCT_COOKIE AV_NE((uint64_t)MKBETAG('R', 'e', 'f', 'S') << 32 | MKBETAG('t', 'r', 'u', 'c'), \ MKTAG('R', 'e', 'f', 'S') | (uint64_t)MKTAG('t', 'r', 'u', 'c') << 32) -#if __STDC_VERSION__ >= 201112L && !defined(_MSC_VER) +#ifndef _MSC_VER #define REFCOUNT_OFFSET FFALIGN(sizeof(RefCount), FFMAX(ALIGN_64, _Alignof(max_align_t))) #else #define REFCOUNT_OFFSET FFALIGN(sizeof(RefCount), ALIGN_64) diff --git a/libavutil/version.h b/libavutil/version.h index 4717cd562b1b0..2979f802332a7 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -79,7 +79,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 60 -#define LIBAVUTIL_VERSION_MINOR 2 +#define LIBAVUTIL_VERSION_MINOR 3 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 2cc8ec110e790..a989e080abd37 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -989,6 +989,16 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, int use_ded_mem; FFVulkanFunctions *vk = &s->vkfn; + /* Buffer usage flags corresponding to buffer descriptor types */ + const VkBufferUsageFlags desc_usage = + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; + + if ((s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) && (usage & desc_usage)) + usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + VkBufferCreateInfo buf_spawn = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = pNext, @@ -1611,7 +1621,10 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt, case AV_PIX_FMT_GBRAP: case AV_PIX_FMT_YUV420P: case AV_PIX_FMT_YUV422P: - case AV_PIX_FMT_YUV444P: { + case AV_PIX_FMT_YUV444P: + case AV_PIX_FMT_YUVA420P: + case AV_PIX_FMT_YUVA422P: + case AV_PIX_FMT_YUVA444P: { const char *rep_tab[] = { [FF_VK_REP_NATIVE] = "r8ui", [FF_VK_REP_FLOAT] = "r8", @@ -1640,7 +1653,15 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt, case AV_PIX_FMT_YUV422P16: case AV_PIX_FMT_YUV444P10: case AV_PIX_FMT_YUV444P12: - case AV_PIX_FMT_YUV444P16: { + case AV_PIX_FMT_YUV444P16: + case AV_PIX_FMT_YUVA420P10: + case AV_PIX_FMT_YUVA420P16: + case AV_PIX_FMT_YUVA422P10: + case AV_PIX_FMT_YUVA422P12: + case AV_PIX_FMT_YUVA422P16: + case AV_PIX_FMT_YUVA444P10: + case AV_PIX_FMT_YUVA444P12: + case AV_PIX_FMT_YUVA444P16: { const char *rep_tab[] = { [FF_VK_REP_NATIVE] = "r16ui", [FF_VK_REP_FLOAT] = "r16f", diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index cd61d71577948..8b413013e6564 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -48,6 +48,7 @@ typedef uint64_t FFVulkanExtensions; #define FF_VK_EXT_PUSH_DESCRIPTOR (1ULL << 14) /* VK_KHR_push_descriptor */ #define FF_VK_EXT_RELAXED_EXTENDED_INSTR (1ULL << 15) /* VK_KHR_shader_relaxed_extended_instruction */ #define FF_VK_EXT_EXPECT_ASSUME (1ULL << 16) /* VK_KHR_shader_expect_assume */ +#define FF_VK_EXT_SUBGROUP_ROTATE (1ULL << 17) /* VK_KHR_shader_subgroup_rotate */ /* Video extensions */ #define FF_VK_EXT_VIDEO_QUEUE (1ULL << 36) /* VK_KHR_video_queue */ diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h index eaf6e2e6bb86d..a7976fe5606d2 100644 --- a/libavutil/vulkan_loader.h +++ b/libavutil/vulkan_loader.h @@ -58,6 +58,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions, { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX }, { VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW }, { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT }, + { VK_KHR_SHADER_SUBGROUP_ROTATE_EXTENSION_NAME, FF_VK_EXT_SUBGROUP_ROTATE }, { VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_1 }, #ifdef VK_KHR_video_maintenance2 { VK_KHR_VIDEO_MAINTENANCE_2_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_2 }, diff --git a/libswscale/format.c b/libswscale/format.c index b859af7b043b6..e4c1348b9034a 100644 --- a/libswscale/format.c +++ b/libswscale/format.c @@ -483,7 +483,7 @@ static int infer_trc_ref(SwsColor *csp, const SwsColor *ref) return 1; } -int ff_infer_colors(SwsColor *src, SwsColor *dst) +bool ff_infer_colors(SwsColor *src, SwsColor *dst) { int incomplete = 0; diff --git a/libswscale/format.h b/libswscale/format.h index 11b4345f7c021..3b6d745159c80 100644 --- a/libswscale/format.h +++ b/libswscale/format.h @@ -21,6 +21,8 @@ #ifndef SWSCALE_FORMAT_H #define SWSCALE_FORMAT_H +#include + #include "libavutil/csp.h" #include "libavutil/pixdesc.h" @@ -129,7 +131,7 @@ static inline int ff_fmt_align(enum AVPixelFormat fmt) int ff_test_fmt(const SwsFormat *fmt, int output); -/* Returns 1 if the formats are incomplete, 0 otherwise */ -int ff_infer_colors(SwsColor *src, SwsColor *dst); +/* Returns true if the formats are incomplete, false otherwise */ +bool ff_infer_colors(SwsColor *src, SwsColor *dst); #endif /* SWSCALE_FORMAT_H */ diff --git a/libswscale/graph.c b/libswscale/graph.c index cd56f51f88c91..dc7784aa499ad 100644 --- a/libswscale/graph.c +++ b/libswscale/graph.c @@ -44,10 +44,9 @@ static int pass_alloc_output(SwsPass *pass) pass->num_slices * pass->slice_h, pass->format, 64); } -/* slice_align should be a power of two, or 0 to disable slice threading */ -static SwsPass *pass_add(SwsGraph *graph, void *priv, enum AVPixelFormat fmt, - int w, int h, SwsPass *input, int slice_align, - sws_filter_run_t run) +SwsPass *ff_sws_graph_add_pass(SwsGraph *graph, enum AVPixelFormat fmt, + int width, int height, SwsPass *input, + int align, void *priv, sws_filter_run_t run) { int ret; SwsPass *pass = av_mallocz(sizeof(*pass)); @@ -58,8 +57,8 @@ static SwsPass *pass_add(SwsGraph *graph, void *priv, enum AVPixelFormat fmt, pass->run = run; pass->priv = priv; pass->format = fmt; - pass->width = w; - pass->height = h; + pass->width = width; + pass->height = height; pass->input = input; pass->output.fmt = AV_PIX_FMT_NONE; @@ -69,12 +68,12 @@ static SwsPass *pass_add(SwsGraph *graph, void *priv, enum AVPixelFormat fmt, return NULL; } - if (!slice_align) { + if (!align) { pass->slice_h = pass->height; pass->num_slices = 1; } else { pass->slice_h = (pass->height + graph->num_threads - 1) / graph->num_threads; - pass->slice_h = FFALIGN(pass->slice_h, slice_align); + pass->slice_h = FFALIGN(pass->slice_h, align); pass->num_slices = (pass->height + pass->slice_h - 1) / pass->slice_h; } @@ -84,41 +83,27 @@ static SwsPass *pass_add(SwsGraph *graph, void *priv, enum AVPixelFormat fmt, return pass; } -/* Wrapper around pass_add that chains a pass "in-place" */ -static int pass_append(SwsGraph *graph, void *priv, enum AVPixelFormat fmt, - int w, int h, SwsPass **pass, int slice_align, - sws_filter_run_t run) +/* Wrapper around ff_sws_graph_add_pass() that chains a pass "in-place" */ +static int pass_append(SwsGraph *graph, enum AVPixelFormat fmt, int w, int h, + SwsPass **pass, int align, void *priv, sws_filter_run_t run) { - SwsPass *new = pass_add(graph, priv, fmt, w, h, *pass, slice_align, run); + SwsPass *new = ff_sws_graph_add_pass(graph, fmt, w, h, *pass, align, priv, run); if (!new) return AVERROR(ENOMEM); *pass = new; return 0; } -static int vshift(enum AVPixelFormat fmt, int plane) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); - return (plane == 1 || plane == 2) ? desc->log2_chroma_h : 0; -} - -/* Shift an image vertically by y lines */ -static SwsImg shift_img(const SwsImg *img_base, int y) -{ - SwsImg img = *img_base; - for (int i = 0; i < 4 && img.data[i]; i++) - img.data[i] += (y >> vshift(img.fmt, i)) * img.linesize[i]; - return img; -} - static void run_copy(const SwsImg *out_base, const SwsImg *in_base, int y, int h, const SwsPass *pass) { - SwsImg in = shift_img(in_base, y); - SwsImg out = shift_img(out_base, y); + SwsImg in = ff_sws_img_shift(in_base, y); + SwsImg out = ff_sws_img_shift(out_base, y); + + for (int i = 0; i < FF_ARRAY_ELEMS(out.data) && out.data[i]; i++) { + const int lines = h >> ff_fmt_vshift(in.fmt, i); + av_assert1(in.data[i]); - for (int i = 0; i < FF_ARRAY_ELEMS(in.data) && in.data[i]; i++) { - const int lines = h >> vshift(in.fmt, i); if (in.linesize[i] == out.linesize[i]) { memcpy(out.data[i], in.data[i], lines * out.linesize[i]); } else { @@ -219,7 +204,7 @@ static void run_legacy_unscaled(const SwsImg *out, const SwsImg *in_base, { SwsContext *sws = slice_ctx(pass, y); SwsInternal *c = sws_internal(sws); - const SwsImg in = shift_img(in_base, y); + const SwsImg in = ff_sws_img_shift(in_base, y); c->convert_unscaled(c, (const uint8_t *const *) in.data, in.linesize, y, h, out->data, out->linesize); @@ -230,7 +215,7 @@ static void run_legacy_swscale(const SwsImg *out_base, const SwsImg *in, { SwsContext *sws = slice_ctx(pass, y); SwsInternal *c = sws_internal(sws); - const SwsImg out = shift_img(out_base, y); + const SwsImg out = ff_sws_img_shift(out_base, y); ff_swscale(c, (const uint8_t *const *) in->data, in->linesize, 0, sws->src_h, out.data, out.linesize, y, h); @@ -325,19 +310,19 @@ static int init_legacy_subpass(SwsGraph *graph, SwsContext *sws, align = 0; /* disable slice threading */ if (c->src0Alpha && !c->dst0Alpha && isALPHA(sws->dst_format)) { - ret = pass_append(graph, c, AV_PIX_FMT_RGBA, src_w, src_h, &input, 1, run_rgb0); + ret = pass_append(graph, AV_PIX_FMT_RGBA, src_w, src_h, &input, 1, c, run_rgb0); if (ret < 0) return ret; } if (c->srcXYZ && !(c->dstXYZ && unscaled)) { - ret = pass_append(graph, c, AV_PIX_FMT_RGB48, src_w, src_h, &input, 1, run_xyz2rgb); + ret = pass_append(graph, AV_PIX_FMT_RGB48, src_w, src_h, &input, 1, c, run_xyz2rgb); if (ret < 0) return ret; } - pass = pass_add(graph, sws, sws->dst_format, dst_w, dst_h, input, align, - c->convert_unscaled ? run_legacy_unscaled : run_legacy_swscale); + pass = ff_sws_graph_add_pass(graph, sws->dst_format, dst_w, dst_h, input, align, sws, + c->convert_unscaled ? run_legacy_unscaled : run_legacy_swscale); if (!pass) return AVERROR(ENOMEM); pass->setup = setup_legacy_swscale; @@ -387,7 +372,7 @@ static int init_legacy_subpass(SwsGraph *graph, SwsContext *sws, } if (c->dstXYZ && !(c->srcXYZ && unscaled)) { - ret = pass_append(graph, c, AV_PIX_FMT_RGB48, dst_w, dst_h, &pass, 1, run_rgb2xyz); + ret = pass_append(graph, AV_PIX_FMT_RGB48, dst_w, dst_h, &pass, 1, c, run_rgb2xyz); if (ret < 0) return ret; } @@ -490,8 +475,8 @@ static void run_lut3d(const SwsImg *out_base, const SwsImg *in_base, int y, int h, const SwsPass *pass) { SwsLut3D *lut = pass->priv; - const SwsImg in = shift_img(in_base, y); - const SwsImg out = shift_img(out_base, y); + const SwsImg in = ff_sws_img_shift(in_base, y); + const SwsImg out = ff_sws_img_shift(out_base, y); ff_sws_lut3d_apply(lut, in.data[0], in.linesize[0], out.data[0], out.linesize[0], pass->width, h); @@ -548,8 +533,8 @@ static int adapt_colors(SwsGraph *graph, SwsFormat src, SwsFormat dst, return ret; } - pass = pass_add(graph, lut, fmt_out, src.width, src.height, - input, 1, run_lut3d); + pass = ff_sws_graph_add_pass(graph, fmt_out, src.width, src.height, + input, 1, lut, run_lut3d); if (!pass) { ff_sws_lut3d_free(&lut); return AVERROR(ENOMEM); @@ -589,7 +574,8 @@ static int init_passes(SwsGraph *graph) graph->noop = 1; /* Add threaded memcpy pass */ - pass = pass_add(graph, NULL, dst.format, dst.width, dst.height, pass, 1, run_copy); + pass = ff_sws_graph_add_pass(graph, dst.format, dst.width, dst.height, + pass, 1, NULL, run_copy); if (!pass) return AVERROR(ENOMEM); } diff --git a/libswscale/graph.h b/libswscale/graph.h index b42d54be04a78..0630b31ce639e 100644 --- a/libswscale/graph.h +++ b/libswscale/graph.h @@ -21,6 +21,8 @@ #ifndef SWSCALE_GRAPH_H #define SWSCALE_GRAPH_H +#include + #include "libavutil/slicethread.h" #include "swscale.h" #include "format.h" @@ -34,6 +36,20 @@ typedef struct SwsImg { int linesize[4]; } SwsImg; +static av_always_inline av_const int ff_fmt_vshift(enum AVPixelFormat fmt, int plane) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); + return (plane == 1 || plane == 2) ? desc->log2_chroma_h : 0; +} + +static av_const inline SwsImg ff_sws_img_shift(const SwsImg *base, const int y) +{ + SwsImg img = *base; + for (int i = 0; i < 4 && img.data[i]; i++) + img.data[i] += (y >> ff_fmt_vshift(img.fmt, i)) * img.linesize[i]; + return img; +} + typedef struct SwsPass SwsPass; typedef struct SwsGraph SwsGraph; @@ -95,8 +111,8 @@ typedef struct SwsGraph { SwsContext *ctx; AVSliceThread *slicethread; int num_threads; /* resolved at init() time */ - int incomplete; /* set during init() if formats had to be inferred */ - int noop; /* set during init() if the graph is a no-op */ + bool incomplete; /* set during init() if formats had to be inferred */ + bool noop; /* set during init() if the graph is a no-op */ /** Sorted sequence of filter passes to apply */ SwsPass **passes; @@ -128,6 +144,24 @@ typedef struct SwsGraph { int ff_sws_graph_create(SwsContext *ctx, const SwsFormat *dst, const SwsFormat *src, int field, SwsGraph **out_graph); + +/** + * Allocate and add a new pass to the filter graph. + * + * @param graph Filter graph to add the pass to. + * @param fmt Pixel format of the output image. + * @param w Width of the output image. + * @param h Height of the output image. + * @param input Previous pass to read from, or NULL for the input image. + * @param align Minimum slice alignment for this pass, or 0 for no threading. + * @param priv Private state for the filter run function. + * @param run Filter function to run. + * @return The newly created pass, or NULL on error. + */ +SwsPass *ff_sws_graph_add_pass(SwsGraph *graph, enum AVPixelFormat fmt, + int width, int height, SwsPass *input, + int align, void *priv, sws_filter_run_t run); + /** * Uninitialize any state associate with this filter graph and free it. */ diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index b84120549eb0a..61073c6c0a45c 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -699,7 +699,7 @@ static void packed16togbra16(const uint8_t *src, int srcStride, dst[0][x] = av_bswap16(av_bswap16(*src_line++) >> shift); dst[1][x] = av_bswap16(av_bswap16(*src_line++) >> shift); dst[2][x] = av_bswap16(av_bswap16(*src_line++) >> shift); - dst[3][x] = 0xFFFF; + dst[3][x] = av_bswap16(0xFFFF >> shift); } } else if (src_alpha) { for (x = 0; x < width; x++) { @@ -729,7 +729,7 @@ static void packed16togbra16(const uint8_t *src, int srcStride, dst[0][x] = av_bswap16(*src_line++ >> shift); dst[1][x] = av_bswap16(*src_line++ >> shift); dst[2][x] = av_bswap16(*src_line++ >> shift); - dst[3][x] = 0xFFFF; + dst[3][x] = av_bswap16(0xFFFF >> shift); } } else if (src_alpha) { for (x = 0; x < width; x++) { @@ -759,7 +759,7 @@ static void packed16togbra16(const uint8_t *src, int srcStride, dst[0][x] = av_bswap16(*src_line++) >> shift; dst[1][x] = av_bswap16(*src_line++) >> shift; dst[2][x] = av_bswap16(*src_line++) >> shift; - dst[3][x] = 0xFFFF; + dst[3][x] = 0xFFFF >> shift; } } else if (src_alpha) { for (x = 0; x < width; x++) { @@ -789,7 +789,7 @@ static void packed16togbra16(const uint8_t *src, int srcStride, dst[0][x] = *src_line++ >> shift; dst[1][x] = *src_line++ >> shift; dst[2][x] = *src_line++ >> shift; - dst[3][x] = 0xFFFF; + dst[3][x] = 0xFFFF >> shift; } } else if (src_alpha) { for (x = 0; x < width; x++) { @@ -818,6 +818,7 @@ static void packed30togbra10(const uint8_t *src, int srcStride, int x, h, i; int dst_alpha = dst[3] != NULL; int scale_high = bpc - 10, scale_low = 10 - scale_high; + uint16_t alpha_val = (1U << bpc) - 1; for (h = 0; h < srcSliceH; h++) { uint32_t *src_line = (uint32_t *)(src + srcStride * h); unsigned component; @@ -834,7 +835,7 @@ static void packed30togbra10(const uint8_t *src, int srcStride, dst[1][x] = av_bswap16(component << scale_high | component >> scale_low); component = p & 0x3FF; dst[2][x] = av_bswap16(component << scale_high | component >> scale_low); - dst[3][x] = 0xFFFF; + dst[3][x] = av_bswap16(alpha_val); src_line++; } } else { @@ -860,7 +861,7 @@ static void packed30togbra10(const uint8_t *src, int srcStride, dst[1][x] = component << scale_high | component >> scale_low; component = p & 0x3FF; dst[2][x] = component << scale_high | component >> scale_low; - dst[3][x] = 0xFFFF; + dst[3][x] = alpha_val; src_line++; } } else { @@ -1377,8 +1378,15 @@ static int planarRgbToplanarRgbWrapper(SwsInternal *c, dst[1], dstStride[1]); ff_copyPlane(src[2], srcStride[2], srcSliceY, srcSliceH, c->opts.src_w, dst[2], dstStride[2]); - if (dst[3]) - fillPlane(dst[3], dstStride[3], c->opts.src_w, srcSliceH, srcSliceY, 255); + if (dst[3]) { + if (is16BPS(c->opts.dst_format) || isNBPS(c->opts.dst_format)) { + const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(c->opts.dst_format); + fillPlane16(dst[3], dstStride[3], c->opts.src_w, srcSliceH, srcSliceY, 1, + desc_dst->comp[3].depth, isBE(c->opts.dst_format)); + } else { + fillPlane(dst[3], dstStride[3], c->opts.src_w, srcSliceH, srcSliceY, 255); + } + } return srcSliceH; } @@ -2221,7 +2229,7 @@ static int planarCopyWrapper(SwsInternal *c, const uint8_t *const src[], // ignore palette for GRAY8 if (plane == 1 && desc_dst->nb_components < 3) continue; - if (!src[plane] || (plane == 1 && desc_src->nb_components < 3)) { + if (!src[plane] || (plane == 1 && desc_src->nb_components < 3) || (plane == 3 && desc_src->nb_components <= 3)) { if (is16BPS(c->opts.dst_format) || isNBPS(c->opts.dst_format)) { fillPlane16(dst[plane], dstStride[plane], length, height, y, plane == 3, desc_dst->comp[plane].depth, diff --git a/libswscale/tests/swscale.c b/libswscale/tests/swscale.c index 70810581305db..0f1f8311c9f78 100644 --- a/libswscale/tests/swscale.c +++ b/libswscale/tests/swscale.c @@ -79,11 +79,12 @@ static int speedup_count; static const char *speedup_color(double ratio) { - return ratio > 1.10 ? "\033[1;32m" : /* bold green */ - ratio > 1.02 ? "\033[32m" : /* green */ - ratio > 0.98 ? "" : /* default */ - ratio > 0.95 ? "\033[33m" : /* yellow */ - ratio > 0.90 ? "\033[31m" : /* red */ + return ratio > 10.00 ? "\033[1;94m" : /* bold blue */ + ratio > 2.00 ? "\033[1;32m" : /* bold green */ + ratio > 1.02 ? "\033[32m" : /* green */ + ratio > 0.98 ? "" : /* default */ + ratio > 0.90 ? "\033[33m" : /* yellow */ + ratio > 0.75 ? "\033[31m" : /* red */ "\033[1;31m"; /* bold red */ } diff --git a/libswscale/utils.c b/libswscale/utils.c index f659e22fdc661..94a47ea5d0fcd 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -692,13 +692,35 @@ static void fill_rgb2yuv_table(SwsInternal *c, const int table[4], int dstRange) AV_WL16(p + 16*4 + 2*i, map[i] >= 0 ? c->input_rgb2yuv_table[map[i]] : 0); } -static int fill_xyztables(SwsInternal *c) +#if CONFIG_SMALL +static void init_xyz_tables(uint16_t xyzgamma_tab[4096], uint16_t xyzgammainv_tab[65536], + uint16_t rgbgamma_tab[65536], uint16_t rgbgammainv_tab[4096]) +#else +static uint16_t xyzgamma_tab[4096], rgbgammainv_tab[4096]; +static uint16_t rgbgamma_tab[65536], xyzgammainv_tab[65536]; +static av_cold void init_xyz_tables(void) +#endif { - int i; - double xyzgamma = XYZ_GAMMA; - double rgbgamma = 1.0 / RGB_GAMMA; + double xyzgamma = XYZ_GAMMA; + double rgbgamma = 1.0 / RGB_GAMMA; double xyzgammainv = 1.0 / XYZ_GAMMA; double rgbgammainv = RGB_GAMMA; + + /* set input gamma vectors */ + for (int i = 0; i < 4096; i++) { + xyzgamma_tab[i] = lrint(pow(i / 4095.0, xyzgamma) * 65535.0); + rgbgammainv_tab[i] = lrint(pow(i / 4095.0, rgbgammainv) * 65535.0); + } + + /* set output gamma vectors */ + for (int i = 0; i < 65536; i++) { + rgbgamma_tab[i] = lrint(pow(i / 65535.0, rgbgamma) * 4095.0); + xyzgammainv_tab[i] = lrint(pow(i / 65535.0, xyzgammainv) * 4095.0); + } +} + +static int fill_xyztables(SwsInternal *c) +{ static const int16_t xyz2rgb_matrix[3][4] = { {13270, -6295, -2041}, {-3969, 7682, 170}, @@ -707,10 +729,7 @@ static int fill_xyztables(SwsInternal *c) {1689, 1464, 739}, { 871, 2929, 296}, { 79, 488, 3891} }; -#if !CONFIG_SMALL - static uint16_t xyzgamma_tab[4096], rgbgammainv_tab[4096]; - static uint16_t rgbgamma_tab[65536], xyzgammainv_tab[65536]; -#endif + if (c->xyzgamma) return 0; @@ -724,26 +743,16 @@ static int fill_xyztables(SwsInternal *c) c->rgbgammainv = c->xyzgamma + 4096; c->rgbgamma = c->rgbgammainv + 4096; c->xyzgammainv = c->rgbgamma + 65536; + init_xyz_tables(c->xyzgamma, c->xyzgammainv, c->rgbgamma, c->rgbgammainv); #else c->xyzgamma = xyzgamma_tab; c->rgbgamma = rgbgamma_tab; c->xyzgammainv = xyzgammainv_tab; c->rgbgammainv = rgbgammainv_tab; - if (xyzgamma_tab[4095]) - return 0; -#endif - /* set input gamma vectors */ - for (i = 0; i < 4096; i++) { - c->xyzgamma[i] = lrint(pow(i / 4095.0, xyzgamma) * 65535.0); - c->rgbgammainv[i] = lrint(pow(i / 4095.0, rgbgammainv) * 65535.0); - } - - /* set output gamma vectors */ - for (i = 0; i < 65536; i++) { - c->rgbgamma[i] = lrint(pow(i / 65535.0, rgbgamma) * 4095.0); - c->xyzgammainv[i] = lrint(pow(i / 65535.0, xyzgammainv) * 4095.0); - } + static AVOnce xyz_init_static_once = AV_ONCE_INIT; + ff_thread_once(&xyz_init_static_once, init_xyz_tables); +#endif return 0; } diff --git a/tests/checkasm/pixblockdsp.c b/tests/checkasm/pixblockdsp.c index 26a697a3468c2..79763de1ea435 100644 --- a/tests/checkasm/pixblockdsp.c +++ b/tests/checkasm/pixblockdsp.c @@ -90,11 +90,8 @@ void checkasm_check_pixblockdsp(void) uint16_t *dst0 = (uint16_t *)dst0_; uint16_t *dst1 = (uint16_t *)dst1_; PixblockDSPContext h; - AVCodecContext avctx = { - .bits_per_raw_sample = 8, - }; - ff_pixblockdsp_init(&h, &avctx); + ff_pixblockdsp_init(&h, 8); if (check_func(h.get_pixels, "get_pixels")) check_get_pixels(uint8_t, 1); diff --git a/tests/checkasm/vp9dsp.c b/tests/checkasm/vp9dsp.c index cecd0dee0fa29..bddc9a79fc59b 100644 --- a/tests/checkasm/vp9dsp.c +++ b/tests/checkasm/vp9dsp.c @@ -310,13 +310,13 @@ static int is_zero(const int16_t *c, int sz) static void check_itxfm(void) { - LOCAL_ALIGNED_32(uint8_t, src, [32 * 32 * 2]); - LOCAL_ALIGNED_32(uint8_t, dst, [32 * 32 * 2]); - LOCAL_ALIGNED_32(uint8_t, dst0, [32 * 32 * 2]); - LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]); - LOCAL_ALIGNED_32(int16_t, coef, [32 * 32 * 2]); - LOCAL_ALIGNED_32(int16_t, subcoef0, [32 * 32 * 2]); - LOCAL_ALIGNED_32(int16_t, subcoef1, [32 * 32 * 2]); + LOCAL_ALIGNED_64(uint8_t, src, [32 * 32 * 2]); + LOCAL_ALIGNED_64(uint8_t, dst, [32 * 32 * 2]); + LOCAL_ALIGNED_64(uint8_t, dst0, [32 * 32 * 2]); + LOCAL_ALIGNED_64(uint8_t, dst1, [32 * 32 * 2]); + LOCAL_ALIGNED_64(int16_t, coef, [32 * 32 * 2]); + LOCAL_ALIGNED_64(int16_t, subcoef0, [32 * 32 * 2]); + LOCAL_ALIGNED_64(int16_t, subcoef1, [32 * 32 * 2]); declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); VP9DSPContext dsp; int y, x, tx, txtp, bit_depth, sub; diff --git a/tests/fate/ac3.mak b/tests/fate/ac3.mak index 1ecb5a3f54b52..e52678a2fdc12 100644 --- a/tests/fate/ac3.mak +++ b/tests/fate/ac3.mak @@ -91,6 +91,13 @@ fate-ac3-fixed-encode: CMD = md5 -i $(SRC) -c ac3_fixed -ab 128k -f ac3 -flags + fate-ac3-fixed-encode: CMP = oneline fate-ac3-fixed-encode: REF = e9d78bca187b4bbafc4512bcea8efd3e +# This tests that the LFE does not get lost when converting the input 7.1 +# to a channel layout supported by the encoder. +FATE_AC3-$(call FRAMECRC, WAV, PCM_S16LE, ARESAMPLE_FILTER AC3_FIXED_ENCODER) += fate-ac3-fixed-encode-2 +fate-ac3-fixed-encode-2: tests/data/asynth-44100-8.wav +fate-ac3-fixed-encode-2: SRC = $(TARGET_PATH)/tests/data/asynth-44100-8.wav +fate-ac3-fixed-encode-2: CMD = framecrc -i $(SRC) -c:a ac3_fixed -ab 256k -frames:a 6 -af aresample + FATE_EAC3-$(call ALLYES, EAC3_DEMUXER EAC3_MUXER EAC3_CORE_BSF) += fate-eac3-core-bsf fate-eac3-core-bsf: CMD = md5pipe -i $(TARGET_SAMPLES)/eac3/the_great_wall_7.1.eac3 -c:a copy -bsf:a eac3_core -fflags +bitexact -f eac3 fate-eac3-core-bsf: CMP = oneline diff --git a/tests/fate/cbs.mak b/tests/fate/cbs.mak index 32207e2ee223c..138dab67a9d15 100644 --- a/tests/fate/cbs.mak +++ b/tests/fate/cbs.mak @@ -172,6 +172,11 @@ $(foreach N,$(FATE_CBS_DISCARD_TYPES),$(eval $(call FATE_CBS_DISCARD_TEST,hevc,$ FATE_CBS_HEVC-$(call ALLYES, HEVC_DEMUXER HEVC_MUXER HEVC_PARSER FILTER_UNITS_BSF HEVC_METADATA_BSF FILE_PROTOCOL) += $(FATE_CBS_hevc_DISCARD) +fate-cbs-hevc-metadata-set-color: CMD = md5 -i $(TARGET_SAMPLES)/hevc-conformance/AMP_A_Samsung_4.bit -c:v copy -bsf:v hevc_metadata=colour_primaries=0:transfer_characteristics=0:matrix_coefficients=3 -f hevc +fate-cbs-hevc-metadata-set-color: CMP = oneline +fate-cbs-hevc-metadata-set-color: REF = d073124fca9e30a46c173292f948967c +FATE_CBS_HEVC-$(call ALLYES, HEVC_DEMUXER, HEVC_METADATA_BSF, HEVC_MUXER) += fate-cbs-hevc-metadata-set-color + FATE_SAMPLES_AVCONV += $(FATE_CBS_HEVC-yes) fate-cbs-hevc: $(FATE_CBS_HEVC-yes) diff --git a/tests/fate/hevc.mak b/tests/fate/hevc.mak index e432345ef73be..8113c04300c26 100644 --- a/tests/fate/hevc.mak +++ b/tests/fate/hevc.mak @@ -292,7 +292,10 @@ fate-hevc-mv-position: CMD = framecrc -i $(TARGET_SAMPLES)/hevc/multiview.mov -m FATE_HEVC-$(call FRAMECRC, MOV, HEVC) += fate-hevc-mv-position fate-hevc-alpha: CMD = framecrc -i $(TARGET_SAMPLES)/hevc/alpha.mp4 -FATE_HEVC-$(call FRAMECRC, HEVC, HEVC) += fate-hevc-alpha +FATE_HEVC-$(call FRAMECRC, MOV, HEVC) += fate-hevc-alpha + +fate-hevc-color-reserved: CMD = framecrc -bsf:v hevc_metadata=colour_primaries=0:transfer_characteristics=0:matrix_coefficients=3 -i $(TARGET_SAMPLES)/hevc-conformance/AMP_A_Samsung_4.bit -vf scale,format=nv12 -frames:v 1 +FATE_HEVC-$(call FRAMECRC, HEVC, HEVC, HEVC_METADATA_BSF SCALE_FILTER) += fate-hevc-color-reserved FATE_SAMPLES_AVCONV += $(FATE_HEVC-yes) FATE_SAMPLES_FFPROBE += $(FATE_HEVC_FFPROBE-yes) diff --git a/tests/fate/matroska.mak b/tests/fate/matroska.mak index 563d7564852a9..b00d19942d55e 100644 --- a/tests/fate/matroska.mak +++ b/tests/fate/matroska.mak @@ -100,6 +100,18 @@ fate-matroska-non-rotation-displaymatrix: CMD = transcode mov $(TARGET_SAMPLES)/ "-c copy" \ "-show_entries stream_side_data_list" +# This test tests container cropping. The expected output is that +# only the copied streams have cropping (and displaymatrix) side data +# and that stream #1 (for which applying cropping was not disabled) +# and the reencoded stream #2 decode to the same. +FATE_MATROSKA_FFMPEG_FFPROBE-$(call TRANSCODE, UTVIDEO, MATROSKA, MOV_DEMUXER HEVC_DECODER) \ + += fate-matroska-crop +fate-matroska-crop: CMD = transcode mov $(TARGET_SAMPLES)/heif-conformance/MIAF007.heic matroska \ + "-map 0:0 -map 0:0 -map 0:0 -c:0 copy -c:1 copy -c:2 utvideo" \ + "-map 0" \ + "-show_entries stream=index,codec_name,width,height:stream_side_data_list" "" \ + "-apply_cropping:0 none" + # This tests DOVI (reading from MP4 and Matroska and writing to Matroska) # as well as writing the Cues at the front (by shifting data) if # the initially reserved amount of space turns out to be insufficient. diff --git a/tests/fate/mov.mak b/tests/fate/mov.mak index f7e5e522178a7..b966249dc0738 100644 --- a/tests/fate/mov.mak +++ b/tests/fate/mov.mak @@ -84,6 +84,14 @@ fate-mov-ibi-elst-starts-b: CMD = framemd5 -flags +bitexact -i $(TARGET_SAMPLES) # Makes sure that we handle overlapping framgments fate-mov-frag-overlap: CMD = framemd5 -i $(TARGET_SAMPLES)/mov/frag_overlap.mp4 +fate-mov-mp4-frag-flush: CMD = md5 -f lavfi -i color=blue,format=rgb24,trim=duration=0.04 -f lavfi -i anullsrc,aformat=s16,atrim=duration=2 -c:v png -c:a pcm_s16le -movflags +empty_moov+hybrid_fragmented -frag_duration 1000000 -frag_interleave 1 -f mp4 +fate-mov-mp4-frag-flush: CMP = oneline +fate-mov-mp4-frag-flush: REF = a10c0e2e2dfc120f31ca5e59e0e4392a +FATE_MOV_FFMPEG-$(call ALLYES, LAVFI_INDEV COLOR_FILTER FORMAT_FILTER TRIM_FILTER \ + ANULLSRC_FILTER AFORMAT_FILTER ATRIM_FILTER \ + WRAPPED_AVFRAME_DECODER PCM_S16LE_DECODER PCM_S16BE_DECODER \ + PNG_ENCODER PCM_S16LE_ENCODER MP4_MUXER) += fate-mov-mp4-frag-flush + # Makes sure that we pick the right frames according to edit list when there is no keyframe with PTS < edit list start. # For example, when video starts on a B-frame, and edit list starts on that B-frame too. # GOP structure : B B I in presentation order. diff --git a/tests/fate/pixfmt.mak b/tests/fate/pixfmt.mak index 859aeebec0c52..5f8e343fdc810 100644 --- a/tests/fate/pixfmt.mak +++ b/tests/fate/pixfmt.mak @@ -136,9 +136,9 @@ $(FATE_PIXFMT_EXT): REF = $(SRC_PATH)/tests/ref/pixfmt/$(@:fate-pixfmt-%=%) FATE_PIXFMT_16-YUV-$(call ALLYES, SCALE_FILTER YUVTESTSRC_FILTER LAVFI_INDEV) += $(PIXFMT_16_LIST) FATE_PIXFMT_16-RGB-$(call ALLYES, SCALE_FILTER RGBTESTSRC_FILTER LAVFI_INDEV) += $(PIXFMT_16_LIST) -FATE_PIXFMT_16-YUV := $(FATE_PIXFMT_16-YUV-yes:%=fate-pixfmt-yuv444p16-%) -FATE_PIXFMT_16-YUV := $(FATE_PIXFMT_16-RGB-yes:%=fate-pixfmt-p416-%) -FATE_PIXFMT_16-RGB := $(FATE_PIXFMT_16-RGB-yes:%=fate-pixfmt-gbrp16-%) +FATE_PIXFMT_16-YUV += $(FATE_PIXFMT_16-YUV-yes:%=fate-pixfmt-yuv444p16-%) +FATE_PIXFMT_16-YUV += $(FATE_PIXFMT_16-YUV-yes:%=fate-pixfmt-p416-%) +FATE_PIXFMT_16-RGB += $(FATE_PIXFMT_16-RGB-yes:%=fate-pixfmt-gbrp16-%) $(FATE_PIXFMT_16-YUV): CMD = pixfmt_conversion_ext "yuv" "le" $(FATE_PIXFMT_16-RGB): CMD = pixfmt_conversion_ext "rgb" "le" diff --git a/tests/fate/qt.mak b/tests/fate/qt.mak index 42e5fd9107688..436da172f1a67 100644 --- a/tests/fate/qt.mak +++ b/tests/fate/qt.mak @@ -57,7 +57,7 @@ fate-svq3-1: CMD = framecrc -i $(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3. fate-svq3-2: CMD = framecrc -flags +bitexact -ignore_editlist 1 -i $(TARGET_SAMPLES)/svq3/svq3_decoding_regression.mov -an FATE_SVQ3 += fate-svq3-watermark -fate-svq3-watermark: CMD = framecrc -flags +bitexact -i $(TARGET_SAMPLES)/svq3/svq3_watermark.mov +fate-svq3-watermark: CMD = framecrc -flags +bitexact -i $(TARGET_SAMPLES)/svq3/svq3_watermark.mov -fps_mode passthrough FATE_QT-$(call FRAMECRC, MOV, SVQ3, ZLIB) += $(FATE_SVQ3) fate-svq3: $(FATE_SVQ3) diff --git a/tests/ref/fate/ac3-fixed-encode-2 b/tests/ref/fate/ac3-fixed-encode-2 new file mode 100644 index 0000000000000..8e945b6637945 --- /dev/null +++ b/tests/ref/fate/ac3-fixed-encode-2 @@ -0,0 +1,13 @@ +#tb 0: 1/44100 +#media_type 0: audio +#codec_id 0: ac3 +#sample_rate 0: 44100 +#channel_layout_name 0: 5.1(side) +0, -256, -256, 1536, 1114, 0x32fd276c +0, 1280, 1280, 1536, 1116, 0x1ac63ba7 +0, 2816, 2816, 1536, 1114, 0xdde82dbc +0, 4352, 4352, 1536, 1114, 0x39313179 +0, 5888, 5888, 1536, 1116, 0x166214e2 +0, 7424, 7424, 1536, 1114, 0xfbcc27ad +0, 8960, 8960, 1536, 1114, 0xe7ed3321 +0, 10496, 10496, 1536, 1114, 0xa1823473 diff --git a/tests/ref/fate/dxv3enc-dxt1 b/tests/ref/fate/dxv3enc-dxt1 index 74849a803113c..e09000e1815e8 100644 --- a/tests/ref/fate/dxv3enc-dxt1 +++ b/tests/ref/fate/dxv3enc-dxt1 @@ -3,4 +3,4 @@ #codec_id 0: dxv #dimensions 0: 1920x1080 #sar 0: 1/1 -0, 0, 0, 1, 76521, 0xed387a5e +0, 0, 0, 1, 76190, 0x0e6f0326 diff --git a/tests/ref/fate/hevc-color-reserved b/tests/ref/fate/hevc-color-reserved new file mode 100644 index 0000000000000..cba6397aa855f --- /dev/null +++ b/tests/ref/fate/hevc-color-reserved @@ -0,0 +1,6 @@ +#tb 0: 1/25 +#media_type 0: video +#codec_id 0: rawvideo +#dimensions 0: 2560x1600 +#sar 0: 0/1 +0, 0, 0, 1, 6144000, 0x427b9a00 diff --git a/tests/ref/fate/matroska-crop b/tests/ref/fate/matroska-crop new file mode 100644 index 0000000000000..12a863942c082 --- /dev/null +++ b/tests/ref/fate/matroska-crop @@ -0,0 +1,70 @@ +fc4932f90dfc955b55cfbdbb210fdd16 *tests/data/fate/matroska-crop.matroska +355698 tests/data/fate/matroska-crop.matroska +#tb 0: 1/1 +#media_type 0: video +#codec_id 0: rawvideo +#dimensions 0: 720x1280 +#sar 0: 0/1 +#tb 1: 1/1 +#media_type 1: video +#codec_id 1: rawvideo +#dimensions 1: 360x640 +#sar 1: 0/1 +#tb 2: 1/1 +#media_type 2: video +#codec_id 2: rawvideo +#dimensions 2: 360x640 +#sar 2: 0/1 +0, 0, 0, 1, 1382400, 0xc8267e89 +1, 0, 0, 1, 345600, 0x84b4bdaa +2, 0, 0, 1, 345600, 0x84b4bdaa +[STREAM] +index=0 +codec_name=hevc +width=1280 +height=720 +[SIDE_DATA] +side_data_type=Frame Cropping +crop_top=180 +crop_bottom=180 +crop_left=320 +crop_right=320 +[/SIDE_DATA] +[SIDE_DATA] +side_data_type=Display Matrix +displaymatrix= +00000000: 0 65536 0 +00000001: 65536 0 0 +00000002: 0 0 1073741824 + +rotation=-90 +[/SIDE_DATA] +[/STREAM] +[STREAM] +index=1 +codec_name=hevc +width=1280 +height=720 +[SIDE_DATA] +side_data_type=Frame Cropping +crop_top=180 +crop_bottom=180 +crop_left=320 +crop_right=320 +[/SIDE_DATA] +[SIDE_DATA] +side_data_type=Display Matrix +displaymatrix= +00000000: 0 65536 0 +00000001: 65536 0 0 +00000002: 0 0 1073741824 + +rotation=-90 +[/SIDE_DATA] +[/STREAM] +[STREAM] +index=2 +codec_name=utvideo +width=360 +height=640 +[/STREAM] diff --git a/tests/ref/fate/matroska-mastering-display-metadata b/tests/ref/fate/matroska-mastering-display-metadata index 6a2ff15b1b220..6f10dc57a67fb 100644 --- a/tests/ref/fate/matroska-mastering-display-metadata +++ b/tests/ref/fate/matroska-mastering-display-metadata @@ -1,7 +1,7 @@ -c1e5e2ecf433cf05af8556debc7d4d0b *tests/data/fate/matroska-mastering-display-metadata.matroska -1669773 tests/data/fate/matroska-mastering-display-metadata.matroska +bdca53906b34c57192416a0f737b885e *tests/data/fate/matroska-mastering-display-metadata.matroska +1669723 tests/data/fate/matroska-mastering-display-metadata.matroska #extradata 0: 4, 0x040901a3 -#extradata 3: 202, 0xfce96279 +#extradata 3: 201, 0x9a706279 #tb 0: 1/1000 #media_type 0: video #codec_id 0: prores diff --git a/tests/ref/fate/ogg-flac-chained-meta.txt b/tests/ref/fate/ogg-flac-chained-meta.txt index ad20ba935f745..28e22aa29e613 100644 --- a/tests/ref/fate/ogg-flac-chained-meta.txt +++ b/tests/ref/fate/ogg-flac-chained-meta.txt @@ -5,8 +5,6 @@ Stream ID: 0, frame PTS: 0, metadata: N/A Stream ID: 0, packet PTS: 4608, packet DTS: 4608 Stream ID: 0, frame PTS: 4608, metadata: N/A Stream ID: 0, packet PTS: 0, packet DTS: 0 -Stream ID: 0, packet PTS: 0, packet DTS: 0 -Stream ID: 0, packet PTS: 0, packet DTS: 0 Stream ID: 0, frame PTS: 0, metadata: N/A Stream ID: 0, packet PTS: 4608, packet DTS: 4608 Stream ID: 0, frame PTS: 4608, metadata: N/A diff --git a/tests/ref/fate/ogg-opus-chained-meta.txt b/tests/ref/fate/ogg-opus-chained-meta.txt index fc84b8b703fb7..addc41c1eb73c 100644 --- a/tests/ref/fate/ogg-opus-chained-meta.txt +++ b/tests/ref/fate/ogg-opus-chained-meta.txt @@ -13,7 +13,6 @@ Stream ID: 0, frame PTS: 3528, metadata: N/A Stream ID: 0, packet PTS: 4488, packet DTS: 4488 Stream ID: 0, frame PTS: 4488, metadata: N/A Stream ID: 0, packet PTS: -312, packet DTS: -312 -Stream ID: 0, new metadata: encoder=Lavc61.19.100 libopus;Lavc61.19.100 libopus:title=First Stream;Second Stream Stream ID: 0, frame PTS: -312, metadata: N/A Stream ID: 0, packet PTS: 648, packet DTS: 648 Stream ID: 0, frame PTS: 648, metadata: N/A diff --git a/tests/ref/fate/svq3-watermark b/tests/ref/fate/svq3-watermark index f4068c612e85c..95d67e3da4dd6 100644 --- a/tests/ref/fate/svq3-watermark +++ b/tests/ref/fate/svq3-watermark @@ -12,3 +12,4 @@ 0, 7, 7, 1, 102240, 0x342bf32f 0, 8, 8, 1, 102240, 0x7b311bf1 0, 9, 9, 1, 102240, 0xf56e0cd3 +0, 9, 9, 1, 102240, 0xfb95c7d3 diff --git a/tests/ref/fate/ts-demux b/tests/ref/fate/ts-demux index 6a830d0d99fd7..d56cc279379d7 100644 --- a/tests/ref/fate/ts-demux +++ b/tests/ref/fate/ts-demux @@ -24,6 +24,6 @@ packet|codec_type=video|stream_index=0|pts=3912686363|pts_time=43474.292922|dts= packet|codec_type=audio|stream_index=1|pts=3912644825|pts_time=43473.831389|dts=3912644825|dts_time=43473.831389|duration=2880|duration_time=0.032000|size=906|pos=474888|flags=K__|data_hash=CRC32:0893d398 packet|codec_type=audio|stream_index=2|pts=3912645580|pts_time=43473.839778|dts=3912645580|dts_time=43473.839778|duration=2880|duration_time=0.032000|size=354|pos=491808|flags=K__|data_hash=CRC32:f5963fa6 stream|index=0|codec_name=mpeg2video|profile=4|codec_type=video|codec_tag_string=[2][0][0][0]|codec_tag=0x0002|width=1280|height=720|coded_width=0|coded_height=0|has_b_frames=1|sample_aspect_ratio=1:1|display_aspect_ratio=16:9|pix_fmt=yuv420p|level=4|color_range=tv|color_space=unknown|color_transfer=unknown|color_primaries=unknown|chroma_location=left|field_order=progressive|refs=1|ts_id=32776|ts_packetsize=188|id=0x31|r_frame_rate=60000/1001|avg_frame_rate=60000/1001|time_base=1/90000|start_pts=3912669846|start_time=43474.109400|duration_ts=19519|duration=0.216878|bit_rate=15000000|max_bit_rate=N/A|bits_per_raw_sample=N/A|nb_frames=N/A|nb_read_frames=N/A|nb_read_packets=15|extradata_size=150|extradata_hash=CRC32:53134fa8|disposition:default=0|disposition:dub=0|disposition:original=0|disposition:comment=0|disposition:lyrics=0|disposition:karaoke=0|disposition:forced=0|disposition:hearing_impaired=0|disposition:visual_impaired=0|disposition:clean_effects=0|disposition:attached_pic=0|disposition:timed_thumbnails=0|disposition:non_diegetic=0|disposition:captions=0|disposition:descriptions=0|disposition:metadata=0|disposition:dependent=0|disposition:still_image=0|disposition:multilayer=0|side_datum/cpb_properties:side_data_type=CPB properties|side_datum/cpb_properties:max_bitrate=15000000|side_datum/cpb_properties:min_bitrate=0|side_datum/cpb_properties:avg_bitrate=0|side_datum/cpb_properties:buffer_size=9781248|side_datum/cpb_properties:vbv_delay=-1 -stream|index=1|codec_name=ac3|profile=unknown|codec_type=audio|codec_tag_string=[4][0][0][0]|codec_tag=0x0004|sample_fmt=fltp|sample_rate=48000|channels=6|channel_layout=5.1(side)|bits_per_sample=0|initial_padding=0|dmix_mode=0|ltrt_cmixlev=0.000000|ltrt_surmixlev=0.000000|loro_cmixlev=0.000000|loro_surmixlev=0.000000|ts_id=32776|ts_packetsize=188|id=0x34|r_frame_rate=0/0|avg_frame_rate=0/0|time_base=1/90000|start_pts=3912633305|start_time=43473.703389|duration_ts=14400|duration=0.160000|bit_rate=384000|max_bit_rate=N/A|bits_per_raw_sample=N/A|nb_frames=N/A|nb_read_frames=N/A|nb_read_packets=5|disposition:default=0|disposition:dub=0|disposition:original=0|disposition:comment=0|disposition:lyrics=0|disposition:karaoke=0|disposition:forced=0|disposition:hearing_impaired=0|disposition:visual_impaired=0|disposition:clean_effects=0|disposition:attached_pic=0|disposition:timed_thumbnails=0|disposition:non_diegetic=0|disposition:captions=0|disposition:descriptions=0|disposition:metadata=0|disposition:dependent=0|disposition:still_image=0|disposition:multilayer=0|tag:language=eng -stream|index=2|codec_name=ac3|profile=unknown|codec_type=audio|codec_tag_string=[4][0][0][0]|codec_tag=0x0004|sample_fmt=fltp|sample_rate=48000|channels=2|channel_layout=stereo|bits_per_sample=0|initial_padding=0|dmix_mode=0|ltrt_cmixlev=0.000000|ltrt_surmixlev=0.000000|loro_cmixlev=0.000000|loro_surmixlev=0.000000|ts_id=32776|ts_packetsize=188|id=0x35|r_frame_rate=0/0|avg_frame_rate=0/0|time_base=1/90000|start_pts=3912634060|start_time=43473.711778|duration_ts=14400|duration=0.160000|bit_rate=192000|max_bit_rate=N/A|bits_per_raw_sample=N/A|nb_frames=N/A|nb_read_frames=N/A|nb_read_packets=5|disposition:default=0|disposition:dub=0|disposition:original=0|disposition:comment=0|disposition:lyrics=0|disposition:karaoke=0|disposition:forced=0|disposition:hearing_impaired=0|disposition:visual_impaired=0|disposition:clean_effects=0|disposition:attached_pic=0|disposition:timed_thumbnails=0|disposition:non_diegetic=0|disposition:captions=0|disposition:descriptions=0|disposition:metadata=0|disposition:dependent=0|disposition:still_image=0|disposition:multilayer=0|tag:language=es +stream|index=1|codec_name=ac3|profile=unknown|codec_type=audio|codec_tag_string=[6][0][0][0]|codec_tag=0x0006|sample_fmt=fltp|sample_rate=48000|channels=6|channel_layout=5.1(side)|bits_per_sample=0|initial_padding=0|dmix_mode=0|ltrt_cmixlev=0.000000|ltrt_surmixlev=0.000000|loro_cmixlev=0.000000|loro_surmixlev=0.000000|ts_id=32776|ts_packetsize=188|id=0x34|r_frame_rate=0/0|avg_frame_rate=0/0|time_base=1/90000|start_pts=3912633305|start_time=43473.703389|duration_ts=14400|duration=0.160000|bit_rate=384000|max_bit_rate=N/A|bits_per_raw_sample=N/A|nb_frames=N/A|nb_read_frames=N/A|nb_read_packets=5|disposition:default=0|disposition:dub=0|disposition:original=0|disposition:comment=0|disposition:lyrics=0|disposition:karaoke=0|disposition:forced=0|disposition:hearing_impaired=0|disposition:visual_impaired=0|disposition:clean_effects=0|disposition:attached_pic=0|disposition:timed_thumbnails=0|disposition:non_diegetic=0|disposition:captions=0|disposition:descriptions=0|disposition:metadata=0|disposition:dependent=0|disposition:still_image=0|disposition:multilayer=0|tag:language=eng +stream|index=2|codec_name=ac3|profile=unknown|codec_type=audio|codec_tag_string=[6][0][0][0]|codec_tag=0x0006|sample_fmt=fltp|sample_rate=48000|channels=2|channel_layout=stereo|bits_per_sample=0|initial_padding=0|dmix_mode=0|ltrt_cmixlev=0.000000|ltrt_surmixlev=0.000000|loro_cmixlev=0.000000|loro_surmixlev=0.000000|ts_id=32776|ts_packetsize=188|id=0x35|r_frame_rate=0/0|avg_frame_rate=0/0|time_base=1/90000|start_pts=3912634060|start_time=43473.711778|duration_ts=14400|duration=0.160000|bit_rate=192000|max_bit_rate=N/A|bits_per_raw_sample=N/A|nb_frames=N/A|nb_read_frames=N/A|nb_read_packets=5|disposition:default=0|disposition:dub=0|disposition:original=0|disposition:comment=0|disposition:lyrics=0|disposition:karaoke=0|disposition:forced=0|disposition:hearing_impaired=0|disposition:visual_impaired=0|disposition:clean_effects=0|disposition:attached_pic=0|disposition:timed_thumbnails=0|disposition:non_diegetic=0|disposition:captions=0|disposition:descriptions=0|disposition:metadata=0|disposition:dependent=0|disposition:still_image=0|disposition:multilayer=0|tag:language=es format|filename=mp3ac325-4864-small.ts|nb_streams=3|nb_programs=1|nb_stream_groups=0|format_name=mpegts|start_time=43473.703389|duration=0.622889|size=512000|bit_rate=6575810|probe_score=50 diff --git a/tests/ref/vsynth/vsynth3-asv1 b/tests/ref/vsynth/vsynth3-asv1 index 0abbf787ecf28..af1dc644b0745 100644 --- a/tests/ref/vsynth/vsynth3-asv1 +++ b/tests/ref/vsynth/vsynth3-asv1 @@ -1,4 +1,4 @@ -81eeea0d0e6219b2f381cf2100e9a12f *tests/data/fate/vsynth3-asv1.avi -34704 tests/data/fate/vsynth3-asv1.avi +69ae6df10440e68c53bee4e713851199 *tests/data/fate/vsynth3-asv1.avi +31524 tests/data/fate/vsynth3-asv1.avi 3c8636e22a96267451684f42d7a6f608 *tests/data/fate/vsynth3-asv1.out.rawvideo stddev: 13.16 PSNR: 25.74 MAXDIFF: 112 bytes: 86700/ 86700 diff --git a/tests/ref/vsynth/vsynth3-asv2 b/tests/ref/vsynth/vsynth3-asv2 index 90b8a47f3415f..9fa9822c0bce4 100644 --- a/tests/ref/vsynth/vsynth3-asv2 +++ b/tests/ref/vsynth/vsynth3-asv2 @@ -1,4 +1,4 @@ -8402fb1112fb8119c019154a472b5cd0 *tests/data/fate/vsynth3-asv2.avi -36208 tests/data/fate/vsynth3-asv2.avi +63000eaedeb60bede8baeb090f02881a *tests/data/fate/vsynth3-asv2.avi +33696 tests/data/fate/vsynth3-asv2.avi 5469c0735b7c9279e5e8e3439fc6acab *tests/data/fate/vsynth3-asv2.out.rawvideo stddev: 9.07 PSNR: 28.97 MAXDIFF: 51 bytes: 86700/ 86700