@@ -295,7 +295,7 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.c
295295 #include "filters.h"
296296 #include "scale_eval.h"
297297 #include "video.h"
298- @@ -108 ,6 +110 ,9 @@ typedef struct CUDAScaleContext {
298+ @@ -109 ,6 +111 ,9 @@ typedef struct CUDAScaleContext {
299299 int interp_as_integer;
300300
301301 float param;
@@ -305,7 +305,7 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.c
305305 } CUDAScaleContext;
306306
307307 static av_cold int cudascale_init(AVFilterContext *ctx)
308- @@ -129 ,13 +134 ,23 @@ static av_cold void cudascale_uninit(AVF
308+ @@ -130 ,13 +135 ,23 @@ static av_cold void cudascale_uninit(AVFilterContext *ctx)
309309 {
310310 CUDAScaleContext *s = ctx->priv;
311311
@@ -332,7 +332,7 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.c
332332 CHECK_CU(cu->cuCtxPopCurrent(&dummy));
333333 }
334334
335- @@ -275 ,6 +290 ,68 @@ static av_cold int init_processing_chain
335+ @@ -276 ,6 +291 ,68 @@ static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int
336336 return 0;
337337 }
338338
@@ -401,7 +401,7 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.c
401401 static av_cold int cudascale_load_functions(AVFilterContext *ctx)
402402 {
403403 CUDAScaleContext *s = ctx->priv;
404- @@ -383 ,6 +460 ,11 @@ static av_cold int cudascale_config_prop
404+ @@ -389 ,6 +466 ,11 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink)
405405 s->hwctx = device_hwctx;
406406 s->cu_stream = s->hwctx->stream;
407407
@@ -410,10 +410,10 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.c
410410+ goto fail;
411411+ }
412412+
413- if (inlink->sample_aspect_ratio.num) {
414- outlink->sample_aspect_ratio = av_mul_q(( AVRational){outlink->h*inlink->w,
415- outlink->w* inlink->h},
416- @@ -418 ,11 +500 ,15 @@ static int call_resize_kernel(AVFilterCo
413+ if (s->reset_sar)
414+ outlink->sample_aspect_ratio = ( AVRational){1, 1};
415+ else if ( inlink->sample_aspect_ratio.num) {
416+ @@ -426 ,11 +508 ,15 @@ static int call_resize_kernel(AVFilterContext *ctx, CUfunction func,
417417 (CUdeviceptr)out_frame->data[2], (CUdeviceptr)out_frame->data[3]
418418 };
419419
@@ -424,13 +424,13 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.c
424424 &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
425425 &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
426426 &dst_width, &dst_height, &dst_pitch,
427- - &src_width, &src_height, &s->param
428- + &src_width, &src_height, &s->param,
427+ - &src_left, &src_top, & src_width, &src_height, &s->param
428+ + &src_left, &src_top, & src_width, &src_height, &s->param,
429429+ &s->dither_tex, &dither_size, &dither_quantization
430430 };
431431
432432 return CHECK_CU(cu->cuLaunchKernel(func,
433- @@ -446 ,6 +532 ,7 @@ static int scalecuda_resize(AVFilterCont
433+ @@ -457 ,6 +543 ,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
434434
435435 for (i = 0; i < s->in_planes; i++) {
436436 CUDA_TEXTURE_DESC tex_desc = {
@@ -442,7 +442,7 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.cu
442442===================================================================
443443--- FFmpeg.orig/libavfilter/vf_scale_cuda.cu
444444+++ FFmpeg/libavfilter/vf_scale_cuda.cu
445- @@ -29 ,6 +29 ,19 @@ using subsample_function_t = T (*)(cudaT
445+ @@ -30 ,6 +30 ,19 @@ using subsample_function_t = T (*)(cudaTextureObject_t tex, int xo, int yo,
446446 int src_width, int src_height,
447447 int bit_depth, float param);
448448
@@ -462,18 +462,18 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.cu
462462 // --- CONVERSION LOGIC ---
463463
464464 static const ushort mask_10bit = 0xFFC0;
465- @@ -64 ,7 +77 ,9 @@ static inline __device__ ushort conv_16t
465+ @@ -65 ,7 +78 ,9 @@ static inline __device__ ushort conv_16to10(ushort in)
466466 subsample_function_t<in_T_uv> subsample_func_uv> \
467467 __device__ static inline void N(cudaTextureObject_t src_tex[4], T *dst[4], int xo, int yo, \
468468 int dst_width, int dst_height, int dst_pitch, \
469- - int src_width, int src_height, float param)
470- + int src_width, int src_height, float param, \
469+ - int src_left, int src_top, int src_width, int src_height, float param)
470+ + int src_left, int src_top, int src_width, int src_height, float param, \
471471+ cudaTextureObject_t dither_tex, \
472472+ float dither_size, float dither_quantization)
473473
474474 #define SUB_F(m, plane) \
475475 subsample_func_##m(src_tex[plane], xo, yo, \
476- @@ -477 ,7 +492 ,10 @@ struct Convert_p010le_yuv420p
476+ @@ -479 ,7 +494 ,10 @@ struct Convert_p010le_yuv420p
477477
478478 DEF_F(Convert, out_T)
479479 {
@@ -485,7 +485,7 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.cu
485485 }
486486
487487 DEF_F(Convert_uv, out_T_uv)
488- @@ -498 ,7 +516 ,10 @@ struct Convert_p010le_nv12
488+ @@ -500 ,7 +518 ,10 @@ struct Convert_p010le_nv12
489489
490490 DEF_F(Convert, out_T)
491491 {
@@ -497,7 +497,7 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.cu
497497 }
498498
499499 DEF_F(Convert_uv, out_T_uv)
500- @@ -521 ,7 +542 ,10 @@ struct Convert_p010le_yuv444p
500+ @@ -523 ,7 +544 ,10 @@ struct Convert_p010le_yuv444p
501501
502502 DEF_F(Convert, out_T)
503503 {
@@ -509,7 +509,7 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.cu
509509 }
510510
511511 DEF_F(Convert_uv, out_T_uv)
512- @@ -607 ,7 +631 ,10 @@ struct Convert_p016le_yuv420p
512+ @@ -609 ,7 +633 ,10 @@ struct Convert_p016le_yuv420p
513513
514514 DEF_F(Convert, out_T)
515515 {
@@ -521,7 +521,7 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.cu
521521 }
522522
523523 DEF_F(Convert_uv, out_T_uv)
524- @@ -628 ,7 +655 ,10 @@ struct Convert_p016le_nv12
524+ @@ -630 ,7 +657 ,10 @@ struct Convert_p016le_nv12
525525
526526 DEF_F(Convert, out_T)
527527 {
@@ -533,7 +533,7 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.cu
533533 }
534534
535535 DEF_F(Convert_uv, out_T_uv)
536- @@ -651 ,7 +681 ,10 @@ struct Convert_p016le_yuv444p
536+ @@ -653 ,7 +683 ,10 @@ struct Convert_p016le_yuv444p
537537
538538 DEF_F(Convert, out_T)
539539 {
@@ -545,7 +545,7 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.cu
545545 }
546546
547547 DEF_F(Convert_uv, out_T_uv)
548- @@ -672 ,7 +705 ,10 @@ struct Convert_p016le_p010le
548+ @@ -674 ,7 +707 ,10 @@ struct Convert_p016le_p010le
549549
550550 DEF_F(Convert, out_T)
551551 {
@@ -557,7 +557,7 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.cu
557557 }
558558
559559 DEF_F(Convert_uv, out_T_uv)
560- @@ -737 ,7 +773 ,10 @@ struct Convert_yuv444p16le_yuv420p
560+ @@ -739 ,7 +775 ,10 @@ struct Convert_yuv444p16le_yuv420p
561561
562562 DEF_F(Convert, out_T)
563563 {
@@ -569,7 +569,7 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.cu
569569 }
570570
571571 DEF_F(Convert_uv, out_T_uv)
572- @@ -757 ,7 +796 ,10 @@ struct Convert_yuv444p16le_nv12
572+ @@ -759 ,7 +798 ,10 @@ struct Convert_yuv444p16le_nv12
573573
574574 DEF_F(Convert, out_T)
575575 {
@@ -581,7 +581,7 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.cu
581581 }
582582
583583 DEF_F(Convert_uv, out_T_uv)
584- @@ -779 ,7 +821 ,10 @@ struct Convert_yuv444p16le_yuv444p
584+ @@ -781 ,7 +823 ,10 @@ struct Convert_yuv444p16le_yuv444p
585585
586586 DEF_F(Convert, out_T)
587587 {
@@ -593,7 +593,7 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.cu
593593 }
594594
595595 DEF_F(Convert_uv, out_T_uv)
596- @@ -799 ,7 +844 ,10 @@ struct Convert_yuv444p16le_p010le
596+ @@ -801 ,7 +846 ,10 @@ struct Convert_yuv444p16le_p010le
597597
598598 DEF_F(Convert, out_T)
599599 {
@@ -605,32 +605,32 @@ Index: FFmpeg/libavfilter/vf_scale_cuda.cu
605605 }
606606
607607 DEF_F(Convert_uv, out_T_uv)
608- @@ -1114 ,8 +1162 ,8 @@ __device__ static inline T Subsample_Bic
608+ @@ -1119 ,8 +1167 ,8 @@ __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex,
609609 {
610610 float hscale = (float)src_width / (float)dst_width;
611611 float vscale = (float)src_height / (float)dst_height;
612- - float xi = (xo + 0.5f) * hscale - 0.5f;
613- - float yi = (yo + 0.5f) * vscale - 0.5f;
614- + float xi = xo * hscale + 0.5f * hscale - 0.5f; // avoid (x - v + v = x)
615- + float yi = yo * hscale + 0.5f * vscale - 0.5f;
612+ - float xi = (xo + 0.5f) * hscale - 0.5f + src_left ;
613+ - float yi = (yo + 0.5f) * vscale - 0.5f + src_top ;
614+ + float xi = xo * hscale + 0.5f * hscale - 0.5f + src_left ; // avoid (x - v + v = x)
615+ + float yi = yo * vscale + 0.5f * vscale - 0.5f + src_top ;
616616 float px = floor(xi);
617617 float py = floor(yi);
618618 float fx = xi - px;
619- @@ -1147 ,7 +1195 ,9 @@ __device__ static inline T Subsample_Bic
619+ @@ -1152 ,7 +1200 ,9 @@ __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex,
620620 cudaTextureObject_t src_tex_2, cudaTextureObject_t src_tex_3, \
621621 T *dst_0, T *dst_1, T *dst_2, T *dst_3, \
622622 int dst_width, int dst_height, int dst_pitch, \
623- - int src_width, int src_height, float param
624- + int src_width, int src_height, float param, \
623+ - int src_left, int src_top, int src_width, int src_height, float param
624+ + int src_left, int src_top, int src_width, int src_height, float param, \
625625+ cudaTextureObject_t dither_tex, \
626626+ float dither_size, float dither_quantization
627627
628628 #define SUBSAMPLE(Convert, T) \
629629 cudaTextureObject_t src_tex[4] = \
630- @@ -1159,7 +1209,9 @@ __device__ static inline T Subsample_Bic
631- Convert( \
630+ @@ -1165,7 +1215,9 @@ __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex,
632631 src_tex, dst, xo, yo, \
633632 dst_width, dst_height, dst_pitch, \
633+ src_left, src_top, \
634634- src_width, src_height, param);
635635+ src_width, src_height, param, \
636636+ dither_tex, \
0 commit comments