Skip to content

Commit a71cf24

Browse files
committed
Fixes
Signed-off-by: Joaquin Anton Guirao <[email protected]>
1 parent dc0c4ae commit a71cf24

File tree

6 files changed

+14
-57
lines changed

6 files changed

+14
-57
lines changed

dali/operators/CMakeLists.txt

-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ if (NOT OPERATOR_SRCS_PATTERN STREQUAL "" OR
6666
list(APPEND EXTRA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/operators.cc")
6767
# Those are needed for operators.cc to have all symbols
6868
list(APPEND EXTRA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/util/npp.cc")
69-
list(APPEND EXTRA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/decoder/nvjpeg/nvjpeg_helper.cc")
7069
list(APPEND EXTRA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/video/dynlink_nvcuvid/dynlink_nvcuvid.cc")
7170

7271
list(APPEND OPERATOR_SRCS_PATTERN_EXCLUDE "*test*")

dali/operators/image/remap/displacement_test.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@ typedef ::testing::Types<RGB, BGR, Gray> Types;
2424
TYPED_TEST_SUITE(DisplacementTest, Types);
2525

2626
TYPED_TEST(DisplacementTest, Sphere) {
27-
this->RunTest("Sphere", nullptr, 0, false, 0.006);
27+
this->RunTest("Sphere", nullptr, 0, false, 0.008);
2828
}
2929

3030
TYPED_TEST(DisplacementTest, Water) {
3131
const OpArg params[] = {{"ampl_x", "2.", DALI_FLOAT},
3232
{"ampl_y", "3.", DALI_FLOAT},
3333
{"phase_x", "0.2", DALI_FLOAT}};
34-
this->RunTest("Water", params, sizeof(params)/sizeof(params[0]), false, 0.005);
34+
this->RunTest("Water", params, sizeof(params)/sizeof(params[0]), false, 0.008);
3535
}
3636

3737
/*

internal_tools/hw_decoder_bench.py

-7
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,6 @@ def parse_range_arg(arg_str, parse_fn=int):
116116
default=-1,
117117
type=int,
118118
)
119-
120-
parser.add_argument(
121-
"--experimental_decoder",
122-
action="store_true",
123-
help="If True, uses the experimental decoder instead of the default",
124-
)
125-
126119
args = parser.parse_args()
127120

128121

qa/TL0_self_test_Ampere/test.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ test_body() {
3030
done
3131

3232
# test decoders on A100 as well
33-
${python_new_invoke_test} -s decoder test_image
33+
${python_new_invoke_test} -s decoder test_imgcodec
3434

3535
# test Optical Flow
3636
${python_new_invoke_test} -s operator_1 test_optical_flow

qa/TL1_custom_src_pattern_build/test.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ example_2() {
3333
cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
3434
-DBUILD_TEST=ON \
3535
-DBUILD_BENCHMARK=OFF \
36-
-DOPERATOR_SRCS_PATTERN="reader/tf*.cc;reader/loader/loader.cc;decoder/cache/*" \
36+
-DOPERATOR_SRCS_PATTERN="reader/tf*.cc;reader/loader/loader.cc;decoder/cache/*;imgcodec" \
3737
-DOPERATOR_TEST_SRCS_PATTERN=" " \
3838
-DKERNEL_SRCS_PATTERN=" " \
3939
-DKERNEL_TEST_SRCS_PATTERN=" " \

qa/TL1_decoder_perf/test.sh

+10-45
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,9 @@
33
pip_packages='numpy'
44
target_dir=./internal_tools
55

6-
LOG1="dali_legacy.log"
7-
LOG2="dali_nvimgcodec.log"
6+
LOG="dali.log"
87
function CLEAN_AND_EXIT {
9-
rm -rf ${LOG1}
10-
rm -rf ${LOG2}
8+
rm -rf ${LOG}
119
exit $1
1210
}
1311

@@ -17,61 +15,28 @@ test_body() {
1715
# Hopper
1816
MIN_PERF=19000;
1917
# use taskset to avoid inefficient data migration between cores we don't want to use
20-
taskset --cpu-list 0-127 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 70 --hw_load 0.12 | tee ${LOG1}
21-
taskset --cpu-list 0-127 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 70 --hw_load 0.12 --experimental_decoder | tee ${LOG2}
18+
taskset --cpu-list 0-127 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 70 --hw_load 0.12 | tee ${LOG}
2219

2320
else
2421
# GraceHopper
2522
MIN_PERF=29000;
2623
# use taskset to avoid inefficient data migration between cores we don't want to use
27-
taskset --cpu-list 0-71 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 72 --hw_load 0.11 | tee ${LOG1}
28-
taskset --cpu-list 0-71 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 72 --hw_load 0.11 --experimental_decoder | tee ${LOG2}
24+
taskset --cpu-list 0-71 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 72 --hw_load 0.11 | tee ${LOG}
2925
fi
3026

3127
# Regex Explanation:
3228
# Total Throughput: : Matches the literal string "Total Throughput: ".
3329
# \K: Resets the start of the match, so anything before \K is not included in the output.
3430
# [0-9]+(\.[0-9]+)?: Matches the number, with an optional decimal part.
3531
# (?= frames/sec): ensures " frames/sec" follows the number, but doesn't include it.
36-
PERF1=$(grep -oP 'Total Throughput: \K[0-9]+(\.[0-9]+)?(?= frames/sec)' ${LOG1})
37-
PERF2=$(grep -oP 'Total Throughput: \K[0-9]+(\.[0-9]+)?(?= frames/sec)' ${LOG2})
32+
PERF=$(grep -oP 'Total Throughput: \K[0-9]+(\.[0-9]+)?(?= frames/sec)' ${LOG})
33+
PERF_RESULT=$(echo "$PERF $MIN_PERF" | awk '{if ($1>=$2) {print "OK"} else { print "FAIL" }}')
34+
echo "PERF_RESULT=${PERF_RESULT}"
3835

39-
PERF_RESULT1=$(echo "$PERF1 $MIN_PERF" | awk '{if ($1>=$2) {print "OK"} else { print "FAIL" }}')
40-
PERF_RESULT2=$(echo "$PERF2 $MIN_PERF" | awk '{if ($1>=$2) {print "OK"} else { print "FAIL" }}')
41-
# Ensure that PERF2 is no less than 5% smaller than PERF1
42-
PERF_RESULT3=$(echo "$PERF2 $PERF1" | awk '{if ($1 >= $2 * 0.95) {print "OK"} else { print "FAIL" }}')
43-
44-
echo "PERF_RESULT1=${PERF_RESULT1}"
45-
echo "PERF_RESULT2=${PERF_RESULT2}"
46-
echo "PERF_RESULT3=${PERF_RESULT3}"
47-
48-
# If nvImageCodec>=0.5.0 enforce the performance requirements. Otherwise, we check only the legacy decoder
49-
if pip show nvidia-nvimgcodec-cu12 > /dev/null 2>&1; then
50-
NVIMGCODEC_VERSION=$(pip show nvidia-nvimgcodec-cu12 | grep ^Version: | awk '{print $2}')
51-
elif pip show nvidia-nvimgcodec-cu11 > /dev/null 2>&1; then
52-
NVIMGCODEC_VERSION=$(pip show nvidia-nvimgcodec-cu11 | grep ^Version: | awk '{print $2}')
53-
else
54-
echo "Neither nvidia-nvimgcodec-cu11 nor nvidia-nvimgcodec-cu12 is installed"
55-
exit 1
56-
fi
57-
NVIMGCODEC_VERSION_WITHOUT_EXTRA=$(echo "$NVIMGCODEC_VERSION" | awk -F '.' '{print $1 "." $2 "." $3}')
58-
if [[ "$NVIMGCODEC_VERSION_WITHOUT_EXTRA" =~ ^([0-9]+)\.([0-9]+)\.([0-9]+)$ ]]; then
59-
IFS='.' read -r MAJOR MINOR PATCH <<< "$NVIMGCODEC_VERSION_WITHOUT_EXTRA"
60-
if [[ $MAJOR -gt 0 || ($MAJOR -eq 0 && $MINOR -ge 5) ]]; then
61-
if [[ "$PERF_RESULT1" == "OK" && "$PERF_RESULT2" == "OK" && "$PERF_RESULT3" == "OK" ]]; then
62-
CLEAN_AND_EXIT 0
63-
else
64-
CLEAN_AND_EXIT 4
65-
fi
66-
else
67-
if [[ "$PERF_RESULT1" == "OK" ]]; then
68-
CLEAN_AND_EXIT 0
69-
else
70-
CLEAN_AND_EXIT 4
71-
fi
72-
fi
36+
if [[ "$PERF_RESULT" == "OK" ]]; then
37+
CLEAN_AND_EXIT 0
7338
else
74-
CLEAN_AND_EXIT 3
39+
CLEAN_AND_EXIT 4
7540
fi
7641
}
7742
pushd ../..

0 commit comments

Comments
 (0)