Fixes

jantonguirao · jantonguirao · commit a71cf242f6ad · 2025-04-10T09:20:55.000+02:00
Signed-off-by: Joaquin Anton Guirao &lt;janton@nvidia.com&gt;
diff --git a/dali/operators/CMakeLists.txt b/dali/operators/CMakeLists.txt
@@ -66,7 +66,6 @@ if (NOT OPERATOR_SRCS_PATTERN STREQUAL "" OR
   list(APPEND EXTRA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/operators.cc")
   # Those are needed for operators.cc to have all symbols
   list(APPEND EXTRA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/util/npp.cc")
-  list(APPEND EXTRA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/decoder/nvjpeg/nvjpeg_helper.cc")
   list(APPEND EXTRA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/video/dynlink_nvcuvid/dynlink_nvcuvid.cc")
 
   list(APPEND OPERATOR_SRCS_PATTERN_EXCLUDE "*test*")
diff --git a/dali/operators/image/remap/displacement_test.cc b/dali/operators/image/remap/displacement_test.cc
@@ -24,14 +24,14 @@ typedef ::testing::Types<RGB, BGR, Gray> Types;
 TYPED_TEST_SUITE(DisplacementTest, Types);
 
 TYPED_TEST(DisplacementTest, Sphere) {
-  this->RunTest("Sphere", nullptr, 0, false, 0.006);
+  this->RunTest("Sphere", nullptr, 0, false, 0.008);
 }
 
 TYPED_TEST(DisplacementTest, Water) {
   const OpArg params[] = {{"ampl_x", "2.", DALI_FLOAT},
                           {"ampl_y", "3.", DALI_FLOAT},
                           {"phase_x", "0.2", DALI_FLOAT}};
-  this->RunTest("Water", params, sizeof(params)/sizeof(params[0]), false, 0.005);
+  this->RunTest("Water", params, sizeof(params)/sizeof(params[0]), false, 0.008);
 }
 
 /*
diff --git a/internal_tools/hw_decoder_bench.py b/internal_tools/hw_decoder_bench.py
@@ -116,13 +116,6 @@ def parse_range_arg(arg_str, parse_fn=int):
     default=-1,
     type=int,
 )
-
-parser.add_argument(
-    "--experimental_decoder",
-    action="store_true",
-    help="If True, uses the experimental decoder instead of the default",
-)
-
 args = parser.parse_args()
 
 
diff --git a/qa/TL0_self_test_Ampere/test.sh b/qa/TL0_self_test_Ampere/test.sh
@@ -30,7 +30,7 @@ test_body() {
   done
 
   # test decoders on A100 as well
-  ${python_new_invoke_test} -s decoder test_image
+  ${python_new_invoke_test} -s decoder test_imgcodec
 
   # test Optical Flow
   ${python_new_invoke_test} -s operator_1 test_optical_flow
diff --git a/qa/TL1_custom_src_pattern_build/test.sh b/qa/TL1_custom_src_pattern_build/test.sh
@@ -33,7 +33,7 @@ example_2() {
   cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
         -DBUILD_TEST=ON \
         -DBUILD_BENCHMARK=OFF \
-        -DOPERATOR_SRCS_PATTERN="reader/tf*.cc;reader/loader/loader.cc;decoder/cache/*" \
+        -DOPERATOR_SRCS_PATTERN="reader/tf*.cc;reader/loader/loader.cc;decoder/cache/*;imgcodec" \
         -DOPERATOR_TEST_SRCS_PATTERN=" " \
         -DKERNEL_SRCS_PATTERN=" " \
         -DKERNEL_TEST_SRCS_PATTERN=" " \
diff --git a/qa/TL1_decoder_perf/test.sh b/qa/TL1_decoder_perf/test.sh
@@ -3,11 +3,9 @@
 pip_packages='numpy'
 target_dir=./internal_tools
 
-LOG1="dali_legacy.log"
-LOG2="dali_nvimgcodec.log"
+LOG="dali.log"
 function CLEAN_AND_EXIT {
-    rm -rf ${LOG1}
-    rm -rf ${LOG2}
+    rm -rf ${LOG}
     exit $1
 }
 
@@ -17,61 +15,28 @@ test_body() {
     # Hopper
     MIN_PERF=19000;
     # use taskset to avoid inefficient data migration between cores we don't want to use
-    taskset --cpu-list 0-127 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 70 --hw_load 0.12 | tee ${LOG1}
-    taskset --cpu-list 0-127 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 70 --hw_load 0.12 --experimental_decoder | tee ${LOG2}
+    taskset --cpu-list 0-127 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 70 --hw_load 0.12 | tee ${LOG}
 
   else
     # GraceHopper
     MIN_PERF=29000;
     # use taskset to avoid inefficient data migration between cores we don't want to use
-    taskset --cpu-list 0-71 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 72 --hw_load 0.11 | tee ${LOG1}
-    taskset --cpu-list 0-71 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 72 --hw_load 0.11 --experimental_decoder | tee ${LOG2}
+    taskset --cpu-list 0-71 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 72 --hw_load 0.11 | tee ${LOG}
   fi
 
   # Regex Explanation:
   # Total Throughput: : Matches the literal string "Total Throughput: ".
   # \K: Resets the start of the match, so anything before \K is not included in the output.
   # [0-9]+(\.[0-9]+)?: Matches the number, with an optional decimal part.
   # (?= frames/sec): ensures " frames/sec" follows the number, but doesn't include it.
-  PERF1=$(grep -oP 'Total Throughput: \K[0-9]+(\.[0-9]+)?(?= frames/sec)' ${LOG1})
-  PERF2=$(grep -oP 'Total Throughput: \K[0-9]+(\.[0-9]+)?(?= frames/sec)' ${LOG2})
+  PERF=$(grep -oP 'Total Throughput: \K[0-9]+(\.[0-9]+)?(?= frames/sec)' ${LOG})
+  PERF_RESULT=$(echo "$PERF $MIN_PERF" | awk '{if ($1>=$2) {print "OK"} else { print "FAIL" }}')
+  echo "PERF_RESULT=${PERF_RESULT}"
 
-  PERF_RESULT1=$(echo "$PERF1 $MIN_PERF" | awk '{if ($1>=$2) {print "OK"} else { print "FAIL" }}')
-  PERF_RESULT2=$(echo "$PERF2 $MIN_PERF" | awk '{if ($1>=$2) {print "OK"} else { print "FAIL" }}')
-  # Ensure that PERF2 is no less than 5% smaller than PERF1
-  PERF_RESULT3=$(echo "$PERF2 $PERF1" | awk '{if ($1 >= $2 * 0.95) {print "OK"} else { print "FAIL" }}')
-
-  echo "PERF_RESULT1=${PERF_RESULT1}"
-  echo "PERF_RESULT2=${PERF_RESULT2}"
-  echo "PERF_RESULT3=${PERF_RESULT3}"
-
-  # If nvImageCodec>=0.5.0 enforce the performance requirements. Otherwise, we check only the legacy decoder
-  if pip show nvidia-nvimgcodec-cu12 > /dev/null 2>&1; then
-      NVIMGCODEC_VERSION=$(pip show nvidia-nvimgcodec-cu12 | grep ^Version: | awk '{print $2}')
-  elif pip show nvidia-nvimgcodec-cu11 > /dev/null 2>&1; then
-      NVIMGCODEC_VERSION=$(pip show nvidia-nvimgcodec-cu11 | grep ^Version: | awk '{print $2}')
-  else
-      echo "Neither nvidia-nvimgcodec-cu11 nor nvidia-nvimgcodec-cu12 is installed"
-      exit 1
-  fi
-  NVIMGCODEC_VERSION_WITHOUT_EXTRA=$(echo "$NVIMGCODEC_VERSION" | awk -F '.' '{print $1 "." $2 "." $3}')
-  if [[ "$NVIMGCODEC_VERSION_WITHOUT_EXTRA" =~ ^([0-9]+)\.([0-9]+)\.([0-9]+)$ ]]; then
-    IFS='.' read -r MAJOR MINOR PATCH <<< "$NVIMGCODEC_VERSION_WITHOUT_EXTRA"
-    if [[ $MAJOR -gt 0 || ($MAJOR -eq 0 && $MINOR -ge 5) ]]; then
-      if [[ "$PERF_RESULT1" == "OK" && "$PERF_RESULT2" == "OK" && "$PERF_RESULT3" == "OK" ]]; then
-        CLEAN_AND_EXIT 0
-      else
-        CLEAN_AND_EXIT 4
-      fi
-    else
-      if [[ "$PERF_RESULT1" == "OK" ]]; then
-        CLEAN_AND_EXIT 0
-      else
-        CLEAN_AND_EXIT 4
-      fi
-    fi
+  if [[ "$PERF_RESULT" == "OK" ]]; then
+    CLEAN_AND_EXIT 0
   else
-    CLEAN_AND_EXIT 3
+    CLEAN_AND_EXIT 4
   fi
 }
 pushd ../..

Original file line number	Diff line number	Diff line change
`@@ -24,14 +24,14 @@ typedef ::testing::Types<RGB, BGR, Gray> Types;`
`24`	`24`	`TYPED_TEST_SUITE(DisplacementTest, Types);`
`25`	`25`
`26`	`26`	`TYPED_TEST(DisplacementTest, Sphere) {`
`27`		`- this->RunTest("Sphere", nullptr, 0, false, 0.006);`
	`27`	`+ this->RunTest("Sphere", nullptr, 0, false, 0.008);`
`28`	`28`	`}`
`29`	`29`
`30`	`30`	`TYPED_TEST(DisplacementTest, Water) {`
`31`	`31`	`const OpArg params[] = {{"ampl_x", "2.", DALI_FLOAT},`
`32`	`32`	`{"ampl_y", "3.", DALI_FLOAT},`
`33`	`33`	`{"phase_x", "0.2", DALI_FLOAT}};`
`34`		`- this->RunTest("Water", params, sizeof(params)/sizeof(params[0]), false, 0.005);`
	`34`	`+ this->RunTest("Water", params, sizeof(params)/sizeof(params[0]), false, 0.008);`
`35`	`35`	`}`
`36`	`36`
`37`	`37`	`/*`