diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml index 899a57b754b..47af1c17aae 100644 --- a/.github/workflows/dev-long-tests.yml +++ b/.github/workflows/dev-long-tests.yml @@ -28,7 +28,7 @@ jobs: steps: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1 - name: make test - run: make test + run: MOREFLAGS="-DZSTD_WINDOW_ALLOW_PICKING_FRACTIONAL_SIZES=1" make test # lasts ~26mn make-test-macos: @@ -51,7 +51,7 @@ jobs: sudo apt-get -qqq update make libc6install make clean - CFLAGS="-m32 -O2" make -j test V=1 + CFLAGS="-m32 -O2" MOREFLAGS="-DZSTD_WINDOW_ALLOW_PICKING_FRACTIONAL_SIZES=1" make -j test V=1 no-intrinsics-fuzztest: runs-on: ubuntu-latest @@ -72,7 +72,7 @@ jobs: steps: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1 - name: ub + address sanitizer on zstreamtest - run: CC=clang make uasan-test-zstream + run: CC=clang MOREFLAGS="-DZSTD_WINDOW_ALLOW_PICKING_FRACTIONAL_SIZES=1" make uasan-test-zstream # lasts ~15mn tsan-fuzztest: @@ -91,7 +91,7 @@ jobs: run: | sudo apt-get -qqq update make libc6install - CC=clang make -C tests test-zstream32 FUZZER_FLAGS="--big-tests" + CC=clang MOREFLAGS="-DZSTD_WINDOW_ALLOW_PICKING_FRACTIONAL_SIZES=1" make -C tests test-zstream32 FUZZER_FLAGS="--big-tests" # lasts ~23mn gcc-8-asan-ubsan-testzstd: @@ -121,7 +121,7 @@ jobs: run: | sudo apt-get -qqq update make libc6install - make -j uasan-test-zstd32 V=1 + MOREFLAGS="-DZSTD_WINDOW_ALLOW_PICKING_FRACTIONAL_SIZES=1" make -j uasan-test-zstd32 V=1 # Note : external libraries must be turned off when using MSAN tests, # because they are not msan-instrumented, @@ -144,7 +144,7 @@ jobs: steps: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1 - name: clang + ASan + UBSan + Fuzz Test - run: CC=clang FUZZER_FLAGS="--long-tests" make clean uasan-fuzztest + run: CC=clang FUZZER_FLAGS="--long-tests" MOREFLAGS="-DZSTD_WINDOW_ALLOW_PICKING_FRACTIONAL_SIZES=1" make clean uasan-fuzztest gcc-asan-ubsan-fuzz32: runs-on: ubuntu-latest @@ -154,7 +154,7 @@ jobs: run: | sudo apt-get -qqq update make libc6install - CFLAGS="-O3 -m32" FUZZER_FLAGS="--long-tests" make uasan-fuzztest + CFLAGS="-O3 -m32" MOREFLAGS="-DZSTD_WINDOW_ALLOW_PICKING_FRACTIONAL_SIZES=1" FUZZER_FLAGS="--long-tests" make uasan-fuzztest clang-asan-fuzz32: runs-on: ubuntu-latest diff --git a/lib/compress/clevels.h b/lib/compress/clevels.h index c18da465f32..0ff8875fdb3 100644 --- a/lib/compress/clevels.h +++ b/lib/compress/clevels.h @@ -11,7 +11,7 @@ #ifndef ZSTD_CLEVELS_H #define ZSTD_CLEVELS_H -#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_CParams */ #include "../zstd.h" /*-===== Pre-defined compression levels =====-*/ @@ -22,110 +22,110 @@ __attribute__((__unused__)) #endif -static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +static const ZSTD_CParams ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { { /* "default" - for any srcSize > 256 KB */ - /* W, C, H, S, L, TL, strat */ - { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ - { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ - { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ - { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ - { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ - { 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */ - { 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */ - { 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */ - { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */ - { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ - { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */ - { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */ - { 22, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */ - { 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */ - { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ - { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ - { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ - { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ - { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ - { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ - { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ - { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ - { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ + /* W, WF, C, H, S, L, TL, strat */ + { 19, 0, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ + { 19, 0, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ + { 20, 0, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ + { 21, 0, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ + { 21, 0, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ + { 21, 0, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 0, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */ + { 21, 0, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */ + { 21, 0, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 22, 0, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 22, 0, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 0, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 0, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */ + { 22, 0, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */ + { 22, 0, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ + { 22, 0, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ + { 22, 0, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ + { 23, 0, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ + { 23, 0, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ + { 23, 0, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ + { 25, 0, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ + { 26, 0, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ + { 27, 0, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ }, { /* for srcSize <= 256 KB */ - /* W, C, H, S, L, T, strat */ - { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ - { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ - { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ - { 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/ - { 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/ - { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ - { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ - { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ - { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ - { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ - { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ - { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ - { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ - { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ - { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ - { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ + /* W, WF, C, H, S, L, T, strat */ + { 18, 0, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 18, 0, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 18, 0, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ + { 18, 0, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ + { 18, 0, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/ + { 18, 0, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/ + { 18, 0, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ + { 18, 0, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 0, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 0, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 0, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 18, 0, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ + { 18, 0, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ + { 18, 0, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ + { 18, 0, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 18, 0, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ + { 18, 0, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 18, 0, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 18, 0, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ + { 18, 0, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 18, 0, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 18, 0, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 18, 0, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ }, { /* for srcSize <= 128 KB */ - /* W, C, H, S, L, T, strat */ - { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ - { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ - { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ - { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ - { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ - { 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ - { 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ - { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ - { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ - { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ - { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ - { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ - { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ - { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ - { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ + /* W, WF, C, H, S, L, T, strat */ + { 17, 0, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 17, 0, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 17, 0, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ + { 17, 0, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ + { 17, 0, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ + { 17, 0, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 0, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 0, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 0, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 0, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 0, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 17, 0, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ + { 17, 0, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ + { 17, 0, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ + { 17, 0, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 17, 0, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ + { 17, 0, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 17, 0, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 17, 0, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ + { 17, 0, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 17, 0, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 17, 0, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 17, 0, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ }, { /* for srcSize <= 16 KB */ - /* W, C, H, S, L, T, strat */ - { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ - { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ - { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ - { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ - { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ - { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ - { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ - { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ - { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ - { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ - { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ - { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ - { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ - { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ - { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ - { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ - { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ - { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ + /* W, WF, C, H, S, L, T, strat */ + { 14, 0, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 14, 0, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ + { 14, 0, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ + { 14, 0, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ + { 14, 0, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ + { 14, 0, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ + { 14, 0, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ + { 14, 0, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 14, 0, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ + { 14, 0, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 0, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ + { 14, 0, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ + { 14, 0, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ + { 14, 0, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ + { 14, 0, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ + { 14, 0, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ + { 14, 0, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ + { 14, 0, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ + { 14, 0, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ + { 14, 0, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 14, 0, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 14, 0, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 14, 0, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ }, }; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index d928b1d3ee3..b03f2d2e706 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -236,7 +236,7 @@ static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_Para /* Returns row matchfinder usage given an initial mode and cParams */ static ZSTD_ParamSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_ParamSwitch_e mode, - const ZSTD_compressionParameters* const cParams) { + const ZSTD_CParams* const cParams) { if (mode != ZSTD_ps_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */ mode = ZSTD_ps_disable; if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode; @@ -246,7 +246,7 @@ static ZSTD_ParamSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_ParamSwitch_e mode /* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */ static ZSTD_ParamSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_ParamSwitch_e mode, - const ZSTD_compressionParameters* const cParams) { + const ZSTD_CParams* const cParams) { if (mode != ZSTD_ps_auto) return mode; return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable; } @@ -266,8 +266,9 @@ static int ZSTD_allocateChainTable(const ZSTD_strategy strategy, * enable long distance matching (wlog >= 27, strategy >= btopt). * Returns ZSTD_ps_disable otherwise. */ -static ZSTD_ParamSwitch_e ZSTD_resolveEnableLdm(ZSTD_ParamSwitch_e mode, - const ZSTD_compressionParameters* const cParams) { +static ZSTD_ParamSwitch_e ZSTD_resolveEnableLdm( + ZSTD_ParamSwitch_e mode, + const ZSTD_CParams* const cParams) { if (mode != ZSTD_ps_auto) return mode; return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable; } @@ -296,12 +297,12 @@ static ZSTD_ParamSwitch_e ZSTD_resolveExternalRepcodeSearch(ZSTD_ParamSwitch_e v /* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged. * If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */ -static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) { +static int ZSTD_CDictIndicesAreTagged(const ZSTD_CParams* const cParams) { return cParams->strategy == ZSTD_fast || cParams->strategy == ZSTD_dfast; } static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( - ZSTD_compressionParameters cParams) + ZSTD_CParams cParams) { ZSTD_CCtx_params cctxParams; /* should not matter, as all cParams are presumed properly defined */ @@ -321,7 +322,7 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( cctxParams.maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams.maxBlockSize); cctxParams.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams.searchForExternalRepcodes, cctxParams.compressionLevel); - assert(!ZSTD_checkCParams(cParams)); + assert(!ZSTD_checkCParams_internal(cParams)); return cctxParams; } @@ -371,10 +372,10 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) */ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, - const ZSTD_parameters* params, + const ZSTD_Params* params, int compressionLevel) { - assert(!ZSTD_checkCParams(params->cParams)); + assert(!ZSTD_checkCParams_internal(params->cParams)); ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); cctxParams->cParams = params->cParams; cctxParams->fParams = params->fParams; @@ -382,9 +383,9 @@ ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, * But, set it for tracing anyway. */ cctxParams->compressionLevel = compressionLevel; - cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams); - cctxParams->postBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->postBlockSplitter, ¶ms->cParams); - cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams); + cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, &cctxParams->cParams); + cctxParams->postBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->postBlockSplitter, &cctxParams->cParams); + cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, &cctxParams->cParams); cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences); cctxParams->maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams->maxBlockSize); cctxParams->searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams->searchForExternalRepcodes, compressionLevel); @@ -392,10 +393,11 @@ ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, cctxParams->useRowMatchFinder, cctxParams->postBlockSplitter, cctxParams->ldmParams.enableLdm); } -size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) +size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters publicParams) { + ZSTD_Params params = ZSTD_getParamsFromPublicParams(publicParams); RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); - FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + FORWARD_IF_ERROR( ZSTD_checkCParams_internal(params.cParams) , ""); ZSTD_CCtxParams_init_internal(cctxParams, ¶ms, ZSTD_NO_CLEVEL); return 0; } @@ -405,9 +407,9 @@ size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_paramete * @param params Validated zstd parameters. */ static void ZSTD_CCtxParams_setZstdParams( - ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) + ZSTD_CCtx_params* cctxParams, const ZSTD_Params* params) { - assert(!ZSTD_checkCParams(params->cParams)); + assert(!ZSTD_checkCParams_internal(params->cParams)); cctxParams->cParams = params->cParams; cctxParams->fParams = params->fParams; /* Should not matter, as all cParams are presumed properly defined. @@ -432,6 +434,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = ZSTD_WINDOWLOG_MAX; return bounds; + case ZSTD_c_windowFrac: + bounds.lowerBound = 0; + bounds.upperBound = 7; + return bounds; + case ZSTD_c_hashLog: bounds.lowerBound = ZSTD_HASHLOG_MIN; bounds.upperBound = ZSTD_HASHLOG_MAX; @@ -671,6 +678,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_format: case ZSTD_c_windowLog: + case ZSTD_c_windowFrac: case ZSTD_c_contentSizeFlag: case ZSTD_c_checksumFlag: case ZSTD_c_dictIDFlag: @@ -724,6 +732,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_compressionLevel: case ZSTD_c_windowLog: + case ZSTD_c_windowFrac: case ZSTD_c_hashLog: case ZSTD_c_chainLog: case ZSTD_c_searchLog: @@ -792,8 +801,20 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, if (value!=0) /* 0 => use default */ BOUNDCHECK(ZSTD_c_windowLog, value); CCtxParams->cParams.windowLog = (U32)value; + if (value == ZSTD_WINDOWLOG_MAX) { + CCtxParams->cParams.windowFrac = 0; + } return CCtxParams->cParams.windowLog; + case ZSTD_c_windowFrac : + BOUNDCHECK(ZSTD_c_windowFrac, value); + if (CCtxParams->cParams.windowLog != ZSTD_WINDOWLOG_MAX) { + CCtxParams->cParams.windowFrac = (U32)value; + } else { + CCtxParams->cParams.windowFrac = 0; + } + return CCtxParams->cParams.windowFrac; + case ZSTD_c_hashLog : if (value!=0) /* 0 => use default */ BOUNDCHECK(ZSTD_c_hashLog, value); @@ -1039,6 +1060,9 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_windowLog : *value = (int)CCtxParams->cParams.windowLog; break; + case ZSTD_c_windowFrac : + *value = (int)CCtxParams->cParams.windowFrac; + break; case ZSTD_c_hashLog : *value = (int)CCtxParams->cParams.hashLog; break; @@ -1236,13 +1260,13 @@ size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSr return 0; } -static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams( +static ZSTD_CParams ZSTD_dedicatedDictSearch_getCParams( int const compressionLevel, size_t const dictSize); static int ZSTD_dedicatedDictSearch_isSupported( - const ZSTD_compressionParameters* cParams); + const ZSTD_CParams* cParams); static void ZSTD_dedicatedDictSearch_revertCParams( - ZSTD_compressionParameters* cParams); + ZSTD_CParams* cParams); /** * Initializes the local dictionary using requested parameters. @@ -1381,27 +1405,35 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) return 0; } +size_t ZSTD_checkCParams_internal(ZSTD_CParams cParams) +{ + BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); + BOUNDCHECK(ZSTD_c_windowFrac, (int)cParams.windowFrac); + RETURN_ERROR_IF( + cParams.windowLog + !!cParams.windowFrac > ZSTD_WINDOWLOG_MAX, + parameter_outOfBound, "Param out of bounds"); + BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); + BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); + BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); + BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); + BOUNDCHECK(ZSTD_c_targetLength, (int)cParams.targetLength); + BOUNDCHECK(ZSTD_c_strategy, (int)cParams.strategy); + return 0; +} /** ZSTD_checkCParams() : control CParam values remain within authorized range. @return : 0, or an error code if one value is beyond authorized range */ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) { - BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); - BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); - BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); - BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); - BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); - BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); - BOUNDCHECK(ZSTD_c_strategy, (int)cParams.strategy); - return 0; + return ZSTD_checkCParams_internal(ZSTD_getCParamsFromPublicCParams(cParams)); } /** ZSTD_clampCParams() : * make CParam values within valid range. * @return : valid CParams */ -static ZSTD_compressionParameters -ZSTD_clampCParams(ZSTD_compressionParameters cParams) +static ZSTD_CParams +ZSTD_clampCParams(ZSTD_CParams cParams) { # define CLAMP_TYPE(cParam, val, type) \ do { \ @@ -1410,54 +1442,55 @@ ZSTD_clampCParams(ZSTD_compressionParameters cParams) else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \ } while (0) # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) - CLAMP(ZSTD_c_windowLog, cParams.windowLog); - CLAMP(ZSTD_c_chainLog, cParams.chainLog); - CLAMP(ZSTD_c_hashLog, cParams.hashLog); - CLAMP(ZSTD_c_searchLog, cParams.searchLog); - CLAMP(ZSTD_c_minMatch, cParams.minMatch); - CLAMP(ZSTD_c_targetLength,cParams.targetLength); - CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); + CLAMP(ZSTD_c_windowLog, cParams.windowLog); + if (ZSTD_c_windowLog == ZSTD_WINDOWLOG_MAX) { + cParams.windowFrac = 0; + } + CLAMP(ZSTD_c_windowFrac, cParams.windowFrac); + CLAMP(ZSTD_c_chainLog, cParams.chainLog); + CLAMP(ZSTD_c_hashLog, cParams.hashLog); + CLAMP(ZSTD_c_searchLog, cParams.searchLog); + CLAMP(ZSTD_c_minMatch, cParams.minMatch); + CLAMP(ZSTD_c_targetLength, cParams.targetLength); + CLAMP_TYPE(ZSTD_c_strategy, cParams.strategy, ZSTD_strategy); return cParams; } /** ZSTD_cycleLog() : * condition for correct operation : hashLog > 1 */ -U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +U32 ZSTD_cycleLog(U32 chainLog, ZSTD_strategy strat) { U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); - return hashLog - btScale; + return chainLog - btScale; } /** ZSTD_dictAndWindowLog() : - * Returns an adjusted window log that is large enough to fit the source and the dictionary. + * Adjusts the provided window params to be large enough to fit the source and the dictionary. * The zstd format says that the entire dictionary is valid if one byte of the dictionary * is within the window. So the hashLog and chainLog should be large enough to reference both * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing * the hashLog and windowLog. * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN. */ -static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize) +static void ZSTD_dictAndWindowLog(ZSTD_CParams* cParams, U64 srcSize, U64 dictSize) { - const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX; /* No dictionary ==> No change */ if (dictSize == 0) { - return windowLog; + return; } - assert(windowLog <= ZSTD_WINDOWLOG_MAX); + assert(cParams->windowLog + !!cParams->windowFrac <= ZSTD_WINDOWLOG_MAX); assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */ { - U64 const windowSize = 1ULL << windowLog; + U64 const windowSize = ZSTD_windowSize(cParams); U64 const dictAndWindowSize = dictSize + windowSize; /* If the window size is already large enough to fit both the source and the dictionary * then just use the window size. Otherwise adjust so that it fits the dictionary and * the window. */ if (windowSize >= dictSize + srcSize) { - return windowLog; /* Window size large enough already */ - } else if (dictAndWindowSize >= maxWindowSize) { - return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */ - } else { - return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1; + /* Window size large enough already */ + } else { + ZSTD_setMinimalWindowLogAndFrac(cParams, (U32)dictAndWindowSize, ZSTD_WINDOWLOG_MIN); } } } @@ -1469,16 +1502,16 @@ static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize) * `mode` is the mode for parameter adjustment. See docs for `ZSTD_CParamMode_e`. * note : `srcSize==0` means 0! * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ -static ZSTD_compressionParameters -ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, +static ZSTD_CParams +ZSTD_adjustCParams_internal(ZSTD_CParams cPar, unsigned long long srcSize, size_t dictSize, ZSTD_CParamMode_e mode, ZSTD_ParamSwitch_e useRowMatchFinder) { const U64 minSrcSize = 513; /* (1<<9) + 1 */ - const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); - assert(ZSTD_checkCParams(cPar)==0); + const U64 maxWindowResize = 15ULL << (ZSTD_WINDOWLOG_MAX-4); + assert(ZSTD_checkCParams_internal(cPar)==0); /* Cascade the selected strategy down to the next-highest one built into * this binary. */ @@ -1554,20 +1587,43 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, && (dictSize <= maxWindowResize) ) { U32 const tSize = (U32)(srcSize + dictSize); static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; - U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : - ZSTD_highbit32(tSize-1) + 1; - if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; + if (ZSTD_windowSize(&cPar) > tSize) { + if (tSize < hashSizeMin) { + cPar.windowLog = ZSTD_HASHLOG_MIN; + cPar.windowFrac = 0; + } else +#if !ZSTD_WINDOW_ALLOW_PICKING_FRACTIONAL_SIZES + /* Prevents adjustment in the scenario where we have explicitly + * selected a fractional window size that is slightly larger than + * the src size hint. If we allow picking without this check, we + * might end up growing the window to the next power of two. */ + if (ZSTD_windowSize(&cPar) > (1u << (ZSTD_highbit32(tSize - 1) + 1))) +#endif + { + const U32 tmpMinWindowLog = ZSTD_HASHLOG_MIN < ZSTD_WINDOWLOG_MIN ? ZSTD_HASHLOG_MIN : ZSTD_WINDOWLOG_MIN; + ZSTD_setMinimalWindowLogAndFrac(&cPar, tSize, tmpMinWindowLog); + } + } } if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) { - U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize); + ZSTD_CParams dictWindowCParams = cPar; U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); - if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1; - if (cycleLog > dictAndWindowLog) - cPar.chainLog -= (cycleLog - dictAndWindowLog); + ZSTD_dictAndWindowLog(&dictWindowCParams, (U64)srcSize, (U64)dictSize); + if (dictWindowCParams.windowFrac != 0) { + /* Round up to a whole power of two. */ + dictWindowCParams.windowLog++; + dictWindowCParams.windowFrac = 0; + } + if (cPar.hashLog > dictWindowCParams.windowLog+1) + cPar.hashLog = dictWindowCParams.windowLog+1; + if (cycleLog > dictWindowCParams.windowLog) + cPar.chainLog -= (cycleLog - dictWindowCParams.windowLog); } - if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) + if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) { cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ + cPar.windowFrac = 0; + } /* We can't use more than 32 bits of hash in total, so that means that we require: * (hashLog + 8) <= 32 && (chainLog + 8) <= 32 @@ -1613,19 +1669,64 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize) { - cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ + ZSTD_CParams cParams = ZSTD_getCParamsFromPublicCParams(cPar); + cParams = ZSTD_clampCParams(cParams); /* resulting cPar is necessarily valid (all parameters within range) */ if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto); -} - -static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); -static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); + cParams = ZSTD_adjustCParams_internal(cParams, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto); + return ZSTD_getPublicCParamsFromCParams(cParams); +} + +ZSTD_CParams ZSTD_getCParamsFromPublicCParams( + ZSTD_compressionParameters cParams) { + ZSTD_CParams ret; + ret.windowLog = cParams.windowLog; + ret.windowFrac = 0; + ret.chainLog = cParams.chainLog; + ret.hashLog = cParams.hashLog; + ret.searchLog = cParams.searchLog; + ret.minMatch = cParams.minMatch; + ret.targetLength = cParams.targetLength; + ret.strategy = cParams.strategy; + return ret; +} + +ZSTD_compressionParameters ZSTD_getPublicCParamsFromCParams( + ZSTD_CParams cParams) { + ZSTD_compressionParameters ret; + ret.windowLog = cParams.windowLog; + ret.chainLog = cParams.chainLog; + ret.hashLog = cParams.hashLog; + ret.searchLog = cParams.searchLog; + ret.minMatch = cParams.minMatch; + ret.targetLength = cParams.targetLength; + ret.strategy = cParams.strategy; + return ret; +} + +ZSTD_Params ZSTD_getParamsFromPublicParams(ZSTD_parameters params) { + ZSTD_Params ret; + ret.cParams = ZSTD_getCParamsFromPublicCParams(params.cParams); + ret.fParams = params.fParams; + return ret; +} + +ZSTD_parameters ZSTD_getPublicParamsFromParams(ZSTD_Params params) { + ZSTD_parameters ret; + ret.cParams = ZSTD_getPublicCParamsFromCParams(params.cParams); + ret.fParams = params.fParams; + return ret; +} + +static ZSTD_CParams ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); +static ZSTD_Params ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); static void ZSTD_overrideCParams( - ZSTD_compressionParameters* cParams, - const ZSTD_compressionParameters* overrides) + ZSTD_CParams* cParams, + const ZSTD_CParams* overrides) { if (overrides->windowLog) cParams->windowLog = overrides->windowLog; + /* An explicit windowFrac only applies if windowLog is set explicitly. */ + if (overrides->windowLog) cParams->windowFrac = overrides->windowFrac; if (overrides->hashLog) cParams->hashLog = overrides->hashLog; if (overrides->chainLog) cParams->chainLog = overrides->chainLog; if (overrides->searchLog) cParams->searchLog = overrides->searchLog; @@ -1634,24 +1735,27 @@ static void ZSTD_overrideCParams( if (overrides->strategy) cParams->strategy = overrides->strategy; } -ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( +ZSTD_CParams ZSTD_getCParamsFromCCtxParams( const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode) { - ZSTD_compressionParameters cParams; + ZSTD_CParams cParams; if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { assert(CCtxParams->srcSizeHint>=0); srcSizeHint = (U64)CCtxParams->srcSizeHint; } cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); - if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) { + cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + cParams.windowFrac = 0; + } ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); - assert(!ZSTD_checkCParams(cParams)); + assert(!ZSTD_checkCParams_internal(cParams)); /* srcSizeHint == 0 means 0 */ return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder); } static size_t -ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, +ZSTD_sizeof_matchState(const ZSTD_CParams* const cParams, const ZSTD_ParamSwitch_e useRowMatchFinder, const int enableDedicatedDictSearch, const U32 forCCtx) @@ -1700,7 +1804,7 @@ static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useSequence } static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( - const ZSTD_compressionParameters* cParams, + const ZSTD_CParams* cParams, const ldmParams_t* ldmParams, const int isStatic, const ZSTD_ParamSwitch_e useRowMatchFinder, @@ -1710,7 +1814,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( int useSequenceProducer, size_t maxBlockSize) { - size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); + size_t const windowSize = (size_t) BOUNDED(1ULL, ZSTD_windowSize(cParams), pledgedSrcSize); size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize); size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer); size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) @@ -1753,7 +1857,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) { - ZSTD_compressionParameters const cParams = + ZSTD_CParams const cParams = ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); ZSTD_ParamSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, &cParams); @@ -1766,7 +1870,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize); } -size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) +static size_t ZSTD_estimateCCtxSize_usingCParams_internal(ZSTD_CParams cParams) { ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { @@ -1783,6 +1887,12 @@ size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) } } +size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters publicParams) +{ + ZSTD_CParams cParams = ZSTD_getCParamsFromPublicCParams(publicParams); + return ZSTD_estimateCCtxSize_usingCParams_internal(cParams); +} + static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) { int tier = 0; @@ -1790,8 +1900,8 @@ static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN}; for (; tier < 4; ++tier) { /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */ - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict); - largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize); + ZSTD_CParams const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict); + largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams_internal(cParams), largestSize); } return largestSize; } @@ -1811,11 +1921,12 @@ size_t ZSTD_estimateCCtxSize(int compressionLevel) size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) { RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); - { ZSTD_compressionParameters const cParams = + { ZSTD_CParams const cParams = ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); - size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(params->maxBlockSize), (size_t)1 << cParams.windowLog); + size_t const windowSize = ZSTD_windowSize(&cParams); + size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(params->maxBlockSize), windowSize); size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered) - ? ((size_t)1 << cParams.windowLog) + blockSize + ? windowSize + blockSize : 0; size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) ? ZSTD_compressBound(blockSize) + 1 @@ -1828,7 +1939,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) } } -size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) +static size_t ZSTD_estimateCStreamSize_usingCParams_internal(ZSTD_CParams cParams) { ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { @@ -1845,10 +1956,16 @@ size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) } } +size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters publicParams) +{ + ZSTD_CParams cParams = ZSTD_getCParamsFromPublicCParams(publicParams); + return ZSTD_estimateCStreamSize_usingCParams_internal(cParams); +} + static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) { - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); - return ZSTD_estimateCStreamSize_usingCParams(cParams); + ZSTD_CParams const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + return ZSTD_estimateCStreamSize_usingCParams_internal(cParams); } size_t ZSTD_estimateCStreamSize(int compressionLevel) @@ -1901,12 +2018,13 @@ size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ } -static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, - ZSTD_compressionParameters cParams2) +static void ZSTD_assertEqualCParams(ZSTD_CParams cParams1, + ZSTD_CParams cParams2) { (void)cParams1; (void)cParams2; assert(cParams1.windowLog == cParams2.windowLog); + assert(cParams1.windowFrac == cParams2.windowFrac); assert(cParams1.chainLog == cParams2.chainLog); assert(cParams1.hashLog == cParams2.hashLog); assert(cParams1.searchLog == cParams2.searchLog); @@ -1984,7 +2102,7 @@ static void ZSTD_advanceHashSalt(ZSTD_MatchState_t* ms) { static size_t ZSTD_reset_matchState(ZSTD_MatchState_t* ms, ZSTD_cwksp* ws, - const ZSTD_compressionParameters* cParams, + const ZSTD_CParams* cParams, const ZSTD_ParamSwitch_e useRowMatchFinder, const ZSTD_compResetPolicy_e crp, const ZSTD_indexResetPolicy_e forceResetIndex, @@ -2107,7 +2225,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ZSTD_cwksp* const ws = &zc->workspace; DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d", (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->postBlockSplitter); - assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + assert(!ZSTD_isError(ZSTD_checkCParams_internal(params->cParams))); zc->isFirstBlock = 1; @@ -2128,7 +2246,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, assert(params->ldmParams.hashRateLog < 32); } - { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); + { size_t const windowSize = MAX(1, (size_t)MIN(ZSTD_windowSize(¶ms->cParams), pledgedSrcSize)); size_t const blockSize = MIN(params->maxBlockSize, windowSize); size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, ZSTD_hasExtSeqProd(params)); size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) @@ -2331,8 +2449,9 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu", (unsigned long long)pledgedSrcSize); { - ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; + ZSTD_CParams adjusted_cdict_cParams = cdict->matchState.cParams; unsigned const windowLog = params.cParams.windowLog; + unsigned const windowFrac = params.cParams.windowFrac; assert(windowLog != 0); /* Resize working context table params for input only, since the dict * has its own tables. */ @@ -2345,7 +2464,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, cdict->dictContentSize, ZSTD_cpm_attachDict, params.useRowMatchFinder); - params.cParams.windowLog = windowLog; + params.cParams.windowLog = windowLog; + params.cParams.windowFrac = windowFrac; params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */ FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, /* loadedDictSize */ 0, @@ -2384,7 +2504,7 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, } static void ZSTD_copyCDictTableIntoCCtx(U32* dst, U32 const* src, size_t tableSize, - ZSTD_compressionParameters const* cParams) { + ZSTD_CParams const* cParams) { if (ZSTD_CDictIndicesAreTagged(cParams)){ /* Remove tags from the CDict table if they are present. * See docs on "short cache" in zstd_compress_internal.h for context. */ @@ -2405,17 +2525,19 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { - const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; + const ZSTD_CParams *cdict_cParams = &cdict->matchState.cParams; assert(!cdict->matchState.dedicatedDictSearch); DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu", (unsigned long long)pledgedSrcSize); - { unsigned const windowLog = params.cParams.windowLog; + { unsigned const windowLog = params.cParams.windowLog; + unsigned const windowFrac = params.cParams.windowFrac; assert(windowLog != 0); /* Copy only compression parameters related to tables. */ params.cParams = *cdict_cParams; - params.cParams.windowLog = windowLog; + params.cParams.windowLog = windowLog; + params.cParams.windowFrac = windowFrac; params.useRowMatchFinder = cdict->useRowMatchFinder; FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, /* loadedDictSize */ 0, @@ -2535,6 +2657,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, /* loadedDictSize */ 0, ZSTDcrp_leaveDirty, zbuff); assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); + assert(dstCCtx->appliedParams.cParams.windowFrac == srcCCtx->appliedParams.cParams.windowFrac); assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); @@ -3234,7 +3357,7 @@ static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seq /** * Function to validate sequences produced by a block compressor. */ -static void ZSTD_validateSeqStore(const SeqStore_t* seqStore, const ZSTD_compressionParameters* cParams) +static void ZSTD_validateSeqStore(const SeqStore_t* seqStore, const ZSTD_CParams* cParams) { #if DEBUGLEVEL >= 1 const SeqDef* seq = seqStore->sequencesStart; @@ -3354,7 +3477,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ); assert(zc->appliedParams.extSeqProdFunc != NULL); - { U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog; + { U32 const windowSize = ZSTD_windowSize(&zc->appliedParams.cParams); size_t const nbExternalSeqs = (zc->appliedParams.extSeqProdFunc)( zc->appliedParams.extSeqProdState, @@ -4530,7 +4653,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_MatchState_t* ms, void const* iend) { U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); - U32 const maxDist = (U32)1 << params->cParams.windowLog; + U32 const maxDist = ZSTD_windowSize(¶ms->cParams); if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) { U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); @@ -4598,10 +4721,10 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, const BYTE* ip = (const BYTE*)src; BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; - U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; + U32 const maxDist = ZSTD_windowSize(&cctx->appliedParams.cParams); S64 savings = (S64)cctx->consumedSrcSize - (S64)cctx->producedCSize; - assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); + assert(cctx->appliedParams.cParams.windowLog + !!cctx->appliedParams.cParams.windowFrac <= ZSTD_WINDOWLOG_MAX); DEBUGLOG(5, "ZSTD_compress_frameChunk (srcSize=%u, blockSizeMax=%u)", (unsigned)srcSize, (unsigned)blockSizeMax); if (cctx->appliedParams.fParams.checksumFlag && srcSize) @@ -4700,9 +4823,9 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ U32 const checksumFlag = params->fParams.checksumFlag>0; - U32 const windowSize = (U32)1 << params->cParams.windowLog; + U32 const windowSize = ZSTD_windowSize(¶ms->cParams); U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); - BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); + BYTE const windowByte = (BYTE)(((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3) | (params->cParams.windowFrac & 7)); U32 const fcsCode = params->fParams.contentSizeFlag ? (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); @@ -4718,7 +4841,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, pos = 4; } op[pos++] = frameHeaderDescriptionByte; - if (!singleSegment) op[pos++] = windowLogByte; + if (!singleSegment) op[pos++] = windowByte; switch(dictIDSizeCode) { default: @@ -4868,9 +4991,9 @@ size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, static size_t ZSTD_getBlockSize_deprecated(const ZSTD_CCtx* cctx) { - ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; - assert(!ZSTD_checkCParams(cParams)); - return MIN(cctx->appliedParams.maxBlockSize, (size_t)1 << cParams.windowLog); + ZSTD_CParams const cParams = cctx->appliedParams.cParams; + assert(!ZSTD_checkCParams_internal(cParams)); + return MIN(cctx->appliedParams.maxBlockSize, (size_t)ZSTD_windowSize(&cParams)); } /* NOTE: Must just wrap ZSTD_getBlockSize_deprecated() */ @@ -4879,7 +5002,6 @@ size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) return ZSTD_getBlockSize_deprecated(cctx); } -/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */ size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); @@ -5249,7 +5371,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, #endif DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); /* params are supposed to be fully validated at this point */ - assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + assert(!ZSTD_isError(ZSTD_checkCParams_internal(params->cParams))); assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if ( (cdict) && (cdict->dictContentSize > 0) @@ -5292,7 +5414,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, { DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); /* compression parameters verification and optimization */ - FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); + FORWARD_IF_ERROR( ZSTD_checkCParams_internal(params->cParams) , ""); return ZSTD_compressBegin_internal(cctx, dict, dictSize, dictContentType, dtlm, cdict, @@ -5304,9 +5426,11 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, * @return : 0, or an error code */ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pledgedSrcSize) + ZSTD_parameters publicParams, + unsigned long long pledgedSrcSize) { ZSTD_CCtx_params cctxParams; + ZSTD_Params params = ZSTD_getParamsFromPublicParams(publicParams); ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); return ZSTD_compressBegin_advanced_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, @@ -5318,7 +5442,7 @@ static size_t ZSTD_compressBegin_usingDict_deprecated(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { ZSTD_CCtx_params cctxParams; - { ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); + { ZSTD_Params const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); } DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); @@ -5442,10 +5566,11 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, - ZSTD_parameters params) + ZSTD_parameters publicParams) { + ZSTD_Params params = ZSTD_getParamsFromPublicParams(publicParams); DEBUGLOG(4, "ZSTD_compress_advanced"); - FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); + FORWARD_IF_ERROR(ZSTD_checkCParams_internal(params.cParams), ""); ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, ZSTD_NO_CLEVEL); return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, @@ -5476,7 +5601,7 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, int compressionLevel) { { - ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); + ZSTD_Params const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); assert(params.fParams.contentSizeFlag == 1); ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); } @@ -5516,10 +5641,8 @@ size_t ZSTD_compress(void* dst, size_t dstCapacity, /* ===== Dictionary API ===== */ -/*! ZSTD_estimateCDictSize_advanced() : - * Estimate amount of memory that will be needed to create a dictionary with following arguments */ -size_t ZSTD_estimateCDictSize_advanced( - size_t dictSize, ZSTD_compressionParameters cParams, +static size_t ZSTD_estimateCDictSize_internal( + size_t dictSize, ZSTD_CParams cParams, ZSTD_dictLoadMethod_e dictLoadMethod) { DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); @@ -5533,10 +5656,20 @@ size_t ZSTD_estimateCDictSize_advanced( : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); } +/*! ZSTD_estimateCDictSize_advanced() : + * Estimate amount of memory that will be needed to create a dictionary with following arguments */ +size_t ZSTD_estimateCDictSize_advanced( + size_t dictSize, ZSTD_compressionParameters publicParams, + ZSTD_dictLoadMethod_e dictLoadMethod) +{ + ZSTD_CParams cParams = ZSTD_getCParamsFromPublicCParams(publicParams); + return ZSTD_estimateCDictSize_internal(dictSize, cParams, dictLoadMethod); +} + size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) { - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); - return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); + ZSTD_CParams const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + return ZSTD_estimateCDictSize_internal(dictSize, cParams, ZSTD_dlm_byCopy); } size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) @@ -5556,7 +5689,7 @@ static size_t ZSTD_initCDict_internal( ZSTD_CCtx_params params) { DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); - assert(!ZSTD_checkCParams(params.cParams)); + assert(!ZSTD_checkCParams_internal(params.cParams)); cdict->matchState.cParams = params.cParams; cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { @@ -5604,7 +5737,7 @@ static size_t ZSTD_initCDict_internal( static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_compressionParameters cParams, + ZSTD_CParams cParams, ZSTD_ParamSwitch_e useRowMatchFinder, int enableDedicatedDictSearch, ZSTD_customMem customMem) @@ -5639,10 +5772,10 @@ ZSTD_createCDict_advanced_internal(size_t dictSize, } } -ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, +ZSTD_CDict* ZSTD_createCDict_internal(const void* dictBuffer, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams, + ZSTD_CParams cParams, ZSTD_customMem customMem) { ZSTD_CCtx_params cctxParams; @@ -5657,6 +5790,19 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, &cctxParams, customMem); } +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters publicParams, + ZSTD_customMem customMem) +{ + ZSTD_CParams cParams = ZSTD_getCParamsFromPublicCParams(publicParams); + return ZSTD_createCDict_internal( + dictBuffer, dictSize, + dictLoadMethod, dictContentType, + cParams, customMem); +} + ZSTD_CDict* ZSTD_createCDict_advanced2( const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, @@ -5665,7 +5811,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced2( ZSTD_customMem customMem) { ZSTD_CCtx_params cctxParams = *originalCctxParams; - ZSTD_compressionParameters cParams; + ZSTD_CParams cParams; ZSTD_CDict* cdict; DEBUGLOG(3, "ZSTD_createCDict_advanced2, dictSize=%u, mode=%u", (unsigned)dictSize, (unsigned)dictContentType); @@ -5709,10 +5855,11 @@ ZSTD_CDict* ZSTD_createCDict_advanced2( ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); - ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, - ZSTD_dlm_byCopy, ZSTD_dct_auto, - cParams, ZSTD_defaultCMem); + ZSTD_CParams cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_internal( + dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); if (cdict) cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; return cdict; @@ -5720,10 +5867,11 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); - ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, - ZSTD_dlm_byRef, ZSTD_dct_auto, - cParams, ZSTD_defaultCMem); + ZSTD_CParams cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_internal( + dict, dictSize, + ZSTD_dlm_byRef, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); if (cdict) cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; return cdict; @@ -5760,8 +5908,9 @@ const ZSTD_CDict* ZSTD_initStaticCDict( const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams) + ZSTD_compressionParameters publicParams) { + ZSTD_CParams cParams = ZSTD_getCParamsFromPublicCParams(publicParams); ZSTD_ParamSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams); /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */ size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0); @@ -5801,7 +5950,7 @@ const ZSTD_CDict* ZSTD_initStaticCDict( return cdict; } -ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) +ZSTD_CParams ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) { assert(cdict != NULL); return cdict->matchState.cParams; @@ -5829,16 +5978,20 @@ static size_t ZSTD_compressBegin_usingCDict_internal( RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); /* Initialize the cctxParams from the cdict */ { - ZSTD_parameters params; + ZSTD_Params params; params.fParams = fParams; - params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + if ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN - || cdict->compressionLevel == 0 ) ? - ZSTD_getCParamsFromCDict(cdict) - : ZSTD_getCParams(cdict->compressionLevel, - pledgedSrcSize, - cdict->dictContentSize); + || cdict->compressionLevel == 0 ) { + params.cParams = ZSTD_getCParamsFromCDict(cdict); + } else { + params.cParams = ZSTD_getCParams_internal( + cdict->compressionLevel, + pledgedSrcSize, + cdict->dictContentSize, + ZSTD_cpm_unknown); + } ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, cdict->compressionLevel); } /* Increase window log to fit the entire dictionary and source if the @@ -5847,8 +6000,9 @@ static size_t ZSTD_compressBegin_usingCDict_internal( */ if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); - U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; - cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog); + if (limitedSrcSize > 1 && ZSTD_windowSize(&cctxParams.cParams) < limitedSrcSize) { + ZSTD_setMinimalWindowLogAndFrac(&cctxParams.cParams, limitedSrcSize, ZSTD_WINDOWLOG_MIN); + } } return ZSTD_compressBegin_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, @@ -5991,7 +6145,7 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, DEBUGLOG(4, "ZSTD_initCStream_internal"); FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + assert(!ZSTD_isError(ZSTD_checkCParams_internal(params->cParams))); zcs->requestedParams = *params; assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if (dict) { @@ -6034,8 +6188,9 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pss) + ZSTD_parameters publicParams, unsigned long long pss) { + ZSTD_Params params = ZSTD_getParamsFromPublicParams(publicParams); /* for compatibility with older programs relying on this behavior. * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. * This line will be removed in the future. @@ -6044,7 +6199,7 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, DEBUGLOG(4, "ZSTD_initCStream_advanced"); FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + FORWARD_IF_ERROR( ZSTD_checkCParams_internal(params.cParams) , ""); ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, ¶ms); FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); return 0; @@ -6418,7 +6573,7 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, } else #endif /* ZSTD_MULTITHREAD */ { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!ZSTD_isError(ZSTD_checkCParams_internal(params.cParams))); FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast, cctx->cdict, @@ -6601,17 +6756,18 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, * @returns a ZSTD error code if sequence is not valid */ static size_t -ZSTD_validateSequence(U32 offBase, U32 matchLength, U32 minMatch, - size_t posInSrc, U32 windowLog, size_t dictSize, int useSequenceProducer) +ZSTD_validateSequence( + U32 offBase, U32 matchLength, const ZSTD_CParams* cParams, + size_t posInSrc, size_t dictSize, int useSequenceProducer) { - U32 const windowSize = 1u << windowLog; + U32 const windowSize = ZSTD_windowSize(cParams); /* posInSrc represents the amount of data the decoder would decode up to this point. * As long as the amount of data decoded is less than or equal to window size, offsets may be * larger than the total length of output decoded in order to reference the dict, even larger than * window size. After output surpasses windowSize, we're limited to windowSize offsets again. */ size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; - size_t const matchLenLowerBound = (minMatch == 3 || useSequenceProducer) ? 3 : 4; + size_t const matchLenLowerBound = (cParams->minMatch == 3 || useSequenceProducer) ? 3 : 4; RETURN_ERROR_IF(offBase > OFFSET_TO_OFFBASE(offsetBound), externalSequences_invalid, "Offset too large!"); /* Validate maxNbSeq is large enough for the given matchLength and minMatch */ RETURN_ERROR_IF(matchLength < matchLenLowerBound, externalSequences_invalid, "Matchlength too small for the minMatch"); @@ -6681,11 +6837,10 @@ ZSTD_transferSequences_wBlockDelim(ZSTD_CCtx* cctx, DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength); if (cctx->appliedParams.validateSequences) { seqPos->posInSrc += litLength + matchLength; - FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, - seqPos->posInSrc, - cctx->appliedParams.cParams.windowLog, dictSize, - ZSTD_hasExtSeqProd(&cctx->appliedParams)), - "Sequence validation failed"); + FORWARD_IF_ERROR(ZSTD_validateSequence( + offBase, matchLength, &cctx->appliedParams.cParams, + seqPos->posInSrc, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)), + "Sequence validation failed"); } RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid, "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); @@ -6836,9 +6991,10 @@ ZSTD_transferSequences_noDelim(ZSTD_CCtx* cctx, if (cctx->appliedParams.validateSequences) { seqPos->posInSrc += litLength + matchLength; - FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc, - cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)), - "Sequence validation failed"); + FORWARD_IF_ERROR(ZSTD_validateSequence( + offBase, matchLength, &cctx->appliedParams.cParams, + seqPos->posInSrc, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)), + "Sequence validation failed"); } DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength); RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid, @@ -7674,9 +7830,9 @@ int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; } -static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) +static ZSTD_CParams ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) { - ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict); + ZSTD_CParams cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict); switch (cParams.strategy) { case ZSTD_fast: case ZSTD_dfast: @@ -7696,7 +7852,7 @@ static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const } static int ZSTD_dedicatedDictSearch_isSupported( - ZSTD_compressionParameters const* cParams) + ZSTD_CParams const* cParams) { return (cParams->strategy >= ZSTD_greedy) && (cParams->strategy <= ZSTD_lazy2) @@ -7710,7 +7866,7 @@ static int ZSTD_dedicatedDictSearch_isSupported( * context. (Otherwise, those tables would also grow.) */ static void ZSTD_dedicatedDictSearch_revertCParams( - ZSTD_compressionParameters* cParams) { + ZSTD_CParams* cParams) { switch (cParams->strategy) { case ZSTD_fast: case ZSTD_dfast: @@ -7752,11 +7908,15 @@ static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_CParamMo } /*! ZSTD_getCParams_internal() : - * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * @return ZSTD_CParams structure for a selected compression level, srcSize and dictSize. * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. * Use dictSize == 0 for unknown or unused. * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_CParamMode_e`. */ -static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode) +static ZSTD_CParams ZSTD_getCParams_internal( + int compressionLevel, + unsigned long long srcSizeHint, + size_t dictSize, + ZSTD_CParamMode_e mode) { U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode); U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); @@ -7769,7 +7929,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; else row = compressionLevel; - { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + { ZSTD_CParams cp = ZSTD_defaultCParameters[tableID][row]; DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy); /* acceleration factor */ if (compressionLevel < 0) { @@ -7782,26 +7942,25 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, } /*! ZSTD_getCParams() : - * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * @return ZSTD_CParams structure for a selected compression level, srcSize and dictSize. * Size values are optional, provide 0 if not known or unused */ ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); + return ZSTD_getPublicCParamsFromCParams(ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown)); } -/*! ZSTD_getParams() : +/*! ZSTD_getParams_internal() : * same idea as ZSTD_getCParams() - * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * @return a `ZSTD_Params` structure (instead of `ZSTD_CParams`). * Fields of `ZSTD_frameParameters` are set to default values */ -static ZSTD_parameters +static ZSTD_Params ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode) { - ZSTD_parameters params; - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); + ZSTD_Params params; DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); ZSTD_memset(¶ms, 0, sizeof(params)); - params.cParams = cParams; + params.cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); params.fParams.contentSizeFlag = 1; return params; } @@ -7813,7 +7972,7 @@ ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, si ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); + return ZSTD_getPublicParamsFromParams(ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown)); } void ZSTD_registerSequenceProducer( diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index ca5e2a4c5bf..d9f5a3b45ad 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -191,6 +191,29 @@ size_t ZSTD_buildBlockEntropyStats( * Compression internals structs * *********************************/ +/** + * Internal equivalent of public ZSTD_compressionParameters struct. + */ +typedef struct { + unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ + unsigned windowFrac; /**< fractional component of window size representation */ + unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + unsigned hashLog; /**< dispatch table : larger == faster, more memory */ + unsigned searchLog; /**< nb of searches : larger == more compression, slower */ + unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ + unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ +} ZSTD_CParams; + +/** + * Internal equivalent of public ZSTD_Params struct, wrapping the internal + * ZSTD_CParams struct rather than the public ZSTD_compressionParameters. + */ +typedef struct { + ZSTD_CParams cParams; + ZSTD_frameParameters fParams; +} ZSTD_Params; + typedef struct { U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */ U32 len; /* Raw length of match */ @@ -298,7 +321,7 @@ struct ZSTD_MatchState_t { */ optState_t opt; /* optimal parser state */ const ZSTD_MatchState_t* dictMatchState; - ZSTD_compressionParameters cParams; + ZSTD_CParams cParams; const RawSeqStore_t* ldmSeqStore; /* Controls prefetching in some dictMatchState matchfinders. @@ -348,8 +371,9 @@ typedef struct { U32 hashLog; /* Log size of hashTable */ U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ U32 minMatchLength; /* Minimum match length */ - U32 hashRateLog; /* Log number of entries to skip */ + U32 hashRateLog; /* Log number of entries to skip */ U32 windowLog; /* Window log for the LDM */ + U32 windowFrac; /* Window log for the LDM */ } ldmParams_t; typedef struct { @@ -361,7 +385,7 @@ typedef struct { struct ZSTD_CCtx_params_s { ZSTD_format_e format; - ZSTD_compressionParameters cParams; + ZSTD_CParams cParams; ZSTD_frameParameters fParams; int compressionLevel; @@ -1077,6 +1101,88 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_MatchState_t *ms) ZSTD_noDict; } +/** + * Fractional window sizes can always be picked by the user explicitly + * setting ZSTD_c_windowFrac. This macro controls whether, when Zstd is + * picking a window size itself, it is allowed to pick a non-power-of-two + * window size. + * + * For now, this defaults to false. + */ +#ifndef ZSTD_WINDOW_ALLOW_PICKING_FRACTIONAL_SIZES +#define ZSTD_WINDOW_ALLOW_PICKING_FRACTIONAL_SIZES 0 +#endif + +/** + * Return the window size described by the windowLog and windowFrac in the + * provided CParams. + */ +MEM_STATIC U32 ZSTD_windowSize(const ZSTD_CParams* cParams) { + return (U32)(((8ull + cParams->windowFrac) << cParams->windowLog) >> 3); +} +MEM_STATIC U32 ZSTD_windowSizeLDM(const ldmParams_t* ldmParams) { + return (U32)(((8ull + ldmParams->windowFrac) << ldmParams->windowLog) >> 3); +} + +/** + * Checks that the selected windowSize satisfies the inequality + * `srcSize <= windowSize <= srcSize * margin`, in the range where the window + * isn't pressed up against one of the hard bounds, where `margin` is either + * 1.125 or 2, depending on whether we're allowed to pick fractional window + * sizes. + */ +MEM_STATIC int ZSTD_windowLogAndFracAreMinimal(ZSTD_CParams* cParams, const U32 srcSize) { + const U32 lowerBound = MIN(srcSize, 1u << ZSTD_WINDOWLOG_MAX); +#if ZSTD_WINDOW_ALLOW_PICKING_FRACTIONAL_SIZES + const U32 upperBound = MAX(lowerBound + (lowerBound >> 3), 1u << ZSTD_WINDOWLOG_ABSOLUTEMIN); +#else + const U32 upperBound = MAX(2 * lowerBound - 1, 1u << ZSTD_WINDOWLOG_ABSOLUTEMIN); +#endif + const U32 windowSize = ZSTD_windowSize(cParams); + if (windowSize < lowerBound) { + return 0; + } + if (windowSize > upperBound) { + return 0; + } + return 1; +} + +/** + * Calculates the minimum legal window log and fraction that contain the + * provided source size. + */ +MEM_STATIC void ZSTD_setMinimalWindowLogAndFrac(ZSTD_CParams* cParams, const U32 srcSize, const U32 minWindowLog) { + const U32 minSize = 1u << minWindowLog; +#if ZSTD_WINDOW_ALLOW_PICKING_FRACTIONAL_SIZES + if (srcSize < minSize) { + cParams->windowLog = minWindowLog; + cParams->windowFrac = 0; + } else { + const U32 srcSizeMinusOne = srcSize - 1; + cParams->windowLog = ZSTD_highbit32(srcSizeMinusOne); + cParams->windowFrac = ((srcSizeMinusOne >> (cParams->windowLog - 3)) & 7) + 1; + if (cParams->windowFrac == 8) { + cParams->windowFrac = 0; + cParams->windowLog++; + } + } +#else + if (srcSize < minSize) { + cParams->windowLog = minWindowLog; + cParams->windowFrac = 0; + } else { + cParams->windowLog = ZSTD_highbit32(srcSize - 1) + 1; + cParams->windowFrac = 0; + } +#endif + if (cParams->windowLog + !!cParams->windowFrac > ZSTD_WINDOWLOG_MAX) { + cParams->windowLog = ZSTD_WINDOWLOG_MAX; + cParams->windowFrac = 0; + } + assert(ZSTD_windowLogAndFracAreMinimal(cParams, srcSize)); +} + /* Defining this macro to non-zero tells zstd to run the overflow correction * code much more frequently. This is very inefficient, and should only be * used for tests and fuzzers. @@ -1100,10 +1206,12 @@ MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window, U32 loadedDictEnd, void const* src) { + /* overflow correction only handles power-of-two index moves. */ + U32 const roundedMaxDist = 1u << (ZSTD_highbit32(maxDist - 1) + 1); U32 const cycleSize = 1u << cycleLog; U32 const curr = (U32)((BYTE const*)src - window.base); U32 const minIndexToOverflowCorrect = cycleSize - + MAX(maxDist, cycleSize) + + MAX(roundedMaxDist, cycleSize) + ZSTD_WINDOW_START_INDEX; /* Adjust the min index to backoff the overflow correction frequency, @@ -1178,23 +1286,29 @@ U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); U32 const currentCycle = curr & cycleMask; - /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */ + /* Ensure newCurrent - roundedMaxDist >= ZSTD_WINDOW_START_INDEX. */ U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX) : 0; U32 const newCurrent = currentCycle + currentCycleCorrection - + MAX(maxDist, cycleSize); + + MAX(roundedMaxDist, cycleSize); U32 const correction = curr - newCurrent; /* maxDist must be a power of two so that: * (newCurrent & cycleMask) == (curr & cycleMask) * This is required to not corrupt the chains / binary tree. + * + * Now that window sizes can be non-power-of-two, we round it up to the + * next power of two. */ - assert((maxDist & (maxDist - 1)) == 0); + assert(roundedMaxDist >= maxDist); + assert(roundedMaxDist < maxDist + 7 * (maxDist >> 3)); + assert((roundedMaxDist & (roundedMaxDist - 1)) == 0); assert((curr & cycleMask) == (newCurrent & cycleMask)); assert(curr > newCurrent); if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { @@ -1392,9 +1506,9 @@ U32 ZSTD_window_update(ZSTD_window_t* window, /** * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. */ -MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog) +MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t* ms, U32 curr) { - U32 const maxDistance = 1U << windowLog; + U32 const maxDistance = ZSTD_windowSize(&ms->cParams); U32 const lowestValid = ms->window.lowLimit; U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; U32 const isDictionary = (ms->loadedDictEnd != 0); @@ -1409,9 +1523,9 @@ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t* ms, U32 curr, u /** * Returns the lowest allowed match index in the prefix. */ -MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog) +MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_MatchState_t* ms, U32 curr) { - U32 const maxDistance = 1U << windowLog; + U32 const maxDistance = ZSTD_windowSize(&ms->cParams); U32 const lowestValid = ms->window.dictLimit; U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; U32 const isDictionary = (ms->loadedDictEnd != 0); @@ -1538,12 +1652,32 @@ BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs); * These prototypes shall only be called from within lib/compress * ============================================================== */ +/* ZSTD_getCParamsFromPublicCParams(), ZSTD_getPublicCParamsFromCParams() : + * Translates between the public and internal structs. + * + * Note: as these structs diverge, this may increasingly become a lossy + * translation. The only long-term justified use of these should be at the + * User API. + */ +ZSTD_CParams ZSTD_getCParamsFromPublicCParams(ZSTD_compressionParameters cParams); +ZSTD_compressionParameters ZSTD_getPublicCParamsFromCParams(ZSTD_CParams cParams); + +/* ZSTD_getParamsFromPublicParams(), ZSTD_getPublicParamsFromParams() : + * Translates between the public and internal structs. + * + * Note: as these structs diverge, this may increasingly become a lossy + * translation. The only long-term justified use of these should be at the + * User API. + */ +ZSTD_Params ZSTD_getParamsFromPublicParams(ZSTD_parameters params); +ZSTD_parameters ZSTD_getPublicParamsFromParams(ZSTD_Params params); + /* ZSTD_getCParamsFromCCtxParams() : * cParams are built depending on compressionLevel, src size hints, * LDM and manually set compression parameters. * Note: srcSizeHint == 0 means 0! */ -ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( +ZSTD_CParams ZSTD_getCParamsFromCCtxParams( const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); /*! ZSTD_initCStream_internal() : @@ -1560,7 +1694,17 @@ void ZSTD_resetSeqStore(SeqStore_t* ssPtr); /*! ZSTD_getCParamsFromCDict() : * as the name implies */ -ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); +ZSTD_CParams ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); + +size_t ZSTD_checkCParams_internal(ZSTD_CParams cParams); + +/* ZSTD_createCDict_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +ZSTD_CDict* ZSTD_createCDict_internal(const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_CParams cParams, + ZSTD_customMem customMem); /* ZSTD_compressBegin_advanced_internal() : * Private use only. To be called from zstdmt_compress.c. */ diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 1a266e7d955..8fdb3062de5 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -18,7 +18,7 @@ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR void ZSTD_fillDoubleHashTableForCDict(ZSTD_MatchState_t* ms, void const* end, ZSTD_dictTableLoadMethod_e dtlm) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; U32* const hashLarge = ms->hashTable; U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; U32 const mls = cParams->minMatch; @@ -56,7 +56,7 @@ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR void ZSTD_fillDoubleHashTableForCCtx(ZSTD_MatchState_t* ms, void const* end, ZSTD_dictTableLoadMethod_e dtlm) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; U32* const hashLarge = ms->hashTable; U32 const hBitsL = cParams->hashLog; U32 const mls = cParams->minMatch; @@ -106,7 +106,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, U32 const mls /* template */) { - ZSTD_compressionParameters const* cParams = &ms->cParams; + ZSTD_CParams const* cParams = &ms->cParams; U32* const hashLong = ms->hashTable; const U32 hBitsL = cParams->hashLog; U32* const hashSmall = ms->chainTable; @@ -116,7 +116,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( const BYTE* anchor = istart; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); /* presumes that, if there is a dictionary, it must be using Attach mode */ - const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex); const BYTE* const prefixLowest = base + prefixLowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; @@ -157,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( ip += ((ip - prefixLowest) == 0); { U32 const current = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current); U32 const maxRep = current - windowLow; if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; @@ -330,7 +330,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( void const* src, size_t srcSize, U32 const mls /* template */) { - ZSTD_compressionParameters const* cParams = &ms->cParams; + ZSTD_CParams const* cParams = &ms->cParams; U32* const hashLong = ms->hashTable; const U32 hBitsL = cParams->hashLog; U32* const hashSmall = ms->chainTable; @@ -341,14 +341,14 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( const BYTE* anchor = istart; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); /* presumes that, if there is a dictionary, it must be using Attach mode */ - const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex); const BYTE* const prefixLowest = base + prefixLowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; U32 offset_1=rep[0], offset_2=rep[1]; const ZSTD_MatchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = &dms->cParams; + const ZSTD_CParams* const dictCParams = &dms->cParams; const U32* const dictHashLong = dms->hashTable; const U32* const dictHashSmall = dms->chainTable; const U32 dictStartIndex = dms->window.dictLimit; @@ -363,7 +363,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic"); /* if a dictionary is attached, it must be within window range */ - assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); + assert(ms->window.dictLimit + ZSTD_windowSize(cParams) >= endIndex); if (ms->prefetchCDictTables) { size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); @@ -612,7 +612,7 @@ size_t ZSTD_compressBlock_doubleFast_extDict_generic( void const* src, size_t srcSize, U32 const mls /* template */) { - ZSTD_compressionParameters const* cParams = &ms->cParams; + ZSTD_CParams const* cParams = &ms->cParams; U32* const hashLong = ms->hashTable; U32 const hBitsL = cParams->hashLog; U32* const hashSmall = ms->chainTable; @@ -624,7 +624,7 @@ size_t ZSTD_compressBlock_doubleFast_extDict_generic( const BYTE* const ilimit = iend - 8; const BYTE* const base = ms->window.base; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex); const U32 dictStartIndex = lowLimit; const U32 dictLimit = ms->window.dictLimit; const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index ee25bcbac8d..2145353f233 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -17,7 +17,7 @@ void ZSTD_fillHashTableForCDict(ZSTD_MatchState_t* ms, const void* const end, ZSTD_dictTableLoadMethod_e dtlm) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; U32 const hBits = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; U32 const mls = cParams->minMatch; @@ -54,7 +54,7 @@ void ZSTD_fillHashTableForCCtx(ZSTD_MatchState_t* ms, const void* const end, ZSTD_dictTableLoadMethod_e dtlm) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; U32 const hBits = cParams->hashLog; U32 const mls = cParams->minMatch; @@ -194,14 +194,14 @@ size_t ZSTD_compressBlock_fast_noDict_generic( void const* src, size_t srcSize, U32 const mls, int useCmov) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; U32 const hlog = cParams->hashLog; size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; /* min 2 */ const BYTE* const base = ms->window.base; const BYTE* const istart = (const BYTE*)src; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex); const BYTE* const prefixStart = base + prefixStartIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; @@ -237,7 +237,7 @@ size_t ZSTD_compressBlock_fast_noDict_generic( DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); ip0 += (ip0 == prefixStart); { U32 const curr = (U32)(ip0 - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr); U32 const maxRep = curr - windowLow; if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0; if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0; @@ -484,7 +484,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, U32 const mls, U32 const hasStep) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; U32 const hlog = cParams->hashLog; /* support stepSize of 0 */ @@ -501,7 +501,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( U32 offset_1=rep[0], offset_2=rep[1]; const ZSTD_MatchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; + const ZSTD_CParams* const dictCParams = &dms->cParams; const U32* const dictHashTable = dms->hashTable; const U32 dictStartIndex = dms->window.dictLimit; const BYTE* const dictBase = dms->window.base; @@ -513,7 +513,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( /* if a dictionary is still attached, it necessarily means that * it is within window size. So we just check it. */ - const U32 maxDistance = 1U << cParams->windowLog; + const U32 maxDistance = ZSTD_windowSize(cParams); const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); assert(endIndex - prefixStartIndex <= maxDistance); (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ @@ -710,7 +710,7 @@ size_t ZSTD_compressBlock_fast_extDict_generic( ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, U32 const mls, U32 const hasStep) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; U32 const hlog = cParams->hashLog; /* support stepSize of 0 */ @@ -720,7 +720,7 @@ size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const istart = (const BYTE*)src; const BYTE* anchor = istart; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex); const U32 dictStartIndex = lowLimit; const BYTE* const dictStart = dictBase + dictStartIndex; const U32 dictLimit = ms->window.dictLimit; diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 272ebe0ece7..f3cbc7f73ef 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -30,7 +30,7 @@ void ZSTD_updateDUBT(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend, U32 mls) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; U32 const hashLog = cParams->hashLog; @@ -76,7 +76,7 @@ void ZSTD_insertDUBT1(const ZSTD_MatchState_t* ms, U32 nbCompares, U32 btLow, const ZSTD_dictMode_e dictMode) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; U32* const bt = ms->chainTable; U32 const btLog = cParams->chainLog - 1; U32 const btMask = (1 << btLog) - 1; @@ -94,7 +94,7 @@ void ZSTD_insertDUBT1(const ZSTD_MatchState_t* ms, U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ U32 dummy32; /* to be nullified at the end */ U32 const windowValid = ms->window.lowLimit; - U32 const maxDistance = 1U << cParams->windowLog; + U32 const maxDistance = ZSTD_windowSize(cParams); U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid; @@ -171,7 +171,7 @@ size_t ZSTD_DUBT_findBetterDictMatch ( const ZSTD_dictMode_e dictMode) { const ZSTD_MatchState_t * const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; + const ZSTD_CParams* const dmsCParams = &dms->cParams; const U32 * const dictHashTable = dms->hashTable; U32 const hashLog = dmsCParams->hashLog; size_t const h = ZSTD_hashPtr(ip, hashLog, mls); @@ -246,7 +246,7 @@ size_t ZSTD_DUBT_findBestMatch(ZSTD_MatchState_t* ms, U32 const mls, const ZSTD_dictMode_e dictMode) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; U32 const hashLog = cParams->hashLog; size_t const h = ZSTD_hashPtr(ip, hashLog, mls); @@ -254,7 +254,7 @@ size_t ZSTD_DUBT_findBestMatch(ZSTD_MatchState_t* ms, const BYTE* const base = ms->window.base; U32 const curr = (U32)(ip-base); - U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr); U32* const bt = ms->chainTable; U32 const btLog = cParams->chainLog - 1; @@ -631,7 +631,7 @@ FORCE_INLINE_TEMPLATE ZSTD_ALLOW_POINTER_OVERFLOW_ATTR U32 ZSTD_insertAndFindFirstIndex_internal( ZSTD_MatchState_t* ms, - const ZSTD_compressionParameters* const cParams, + const ZSTD_CParams* const cParams, const BYTE* ip, U32 const mls, U32 const lazySkipping) { U32* const hashTable = ms->hashTable; @@ -657,7 +657,7 @@ U32 ZSTD_insertAndFindFirstIndex_internal( } U32 ZSTD_insertAndFindFirstIndex(ZSTD_MatchState_t* ms, const BYTE* ip) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0); } @@ -670,7 +670,7 @@ size_t ZSTD_HcFindBestMatch( size_t* offsetPtr, const U32 mls, const ZSTD_dictMode_e dictMode) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; U32* const chainTable = ms->chainTable; const U32 chainSize = (1 << cParams->chainLog); const U32 chainMask = chainSize-1; @@ -680,7 +680,7 @@ size_t ZSTD_HcFindBestMatch( const BYTE* const prefixStart = base + dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const U32 curr = (U32)(ip-base); - const U32 maxDistance = 1U << cParams->windowLog; + const U32 maxDistance = ZSTD_windowSize(cParams); const U32 lowestValid = ms->window.lowLimit; const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; const U32 isDictionary = (ms->loadedDictEnd != 0); @@ -1149,14 +1149,14 @@ size_t ZSTD_RowFindBestMatch( BYTE* const tagTable = ms->tagTable; U32* const hashCache = ms->hashCache; const U32 hashLog = ms->rowHashLog; - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; const BYTE* const base = ms->window.base; const BYTE* const dictBase = ms->window.dictBase; const U32 dictLimit = ms->window.dictLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const U32 curr = (U32)(ip-base); - const U32 maxDistance = 1U << cParams->windowLog; + const U32 maxDistance = ZSTD_windowSize(cParams); const U32 lowestValid = ms->window.lowLimit; const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; const U32 isDictionary = (ms->loadedDictEnd != 0); @@ -1551,7 +1551,7 @@ size_t ZSTD_compressBlock_lazy_generic( ip += (dictAndPrefixLength == 0); if (dictMode == ZSTD_noDict) { U32 const curr = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr); U32 const maxRep = curr - windowLow; if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; @@ -1951,7 +1951,6 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const BYTE* const dictBase = ms->window.dictBase; const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const dictStart = dictBase + ms->window.lowLimit; - const U32 windowLog = ms->cParams.windowLog; const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6); const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); @@ -1982,7 +1981,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( U32 curr = (U32)(ip-base); /* check repCode */ - { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog); + { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1); const U32 repIndex = (U32)(curr+1 - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; @@ -2023,7 +2022,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( curr++; /* check repCode */ if (offBase) { - const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog); + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr); const U32 repIndex = (U32)(curr - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; @@ -2055,7 +2054,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( curr++; /* check repCode */ if (offBase) { - const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog); + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr); const U32 repIndex = (U32)(curr - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; @@ -2109,7 +2108,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( /* check immediate repcode */ while (ip <= ilimit) { const U32 repCurrent = (U32)(ip-base); - const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent); const U32 repIndex = repCurrent - offset_2; const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index 070551cad81..894830d82fe 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -133,9 +133,10 @@ static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state, } void ZSTD_ldm_adjustParameters(ldmParams_t* params, - const ZSTD_compressionParameters* cParams) + const ZSTD_CParams* cParams) { params->windowLog = cParams->windowLog; + params->windowFrac = cParams->windowFrac; ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); if (params->hashRateLog == 0) { @@ -527,7 +528,7 @@ size_t ZSTD_ldm_generateSequences( ldmState_t* ldmState, RawSeqStore_t* sequences, ldmParams_t const* params, void const* src, size_t srcSize) { - U32 const maxDist = 1U << params->windowLog; + U32 const maxDist = ZSTD_windowSizeLDM(params); BYTE const* const istart = (BYTE const*)src; BYTE const* const iend = istart + srcSize; size_t const kMaxChunkSize = 1 << 20; @@ -683,7 +684,7 @@ size_t ZSTD_ldm_blockCompress(RawSeqStore_t* rawSeqStore, ZSTD_ParamSwitch_e useRowMatchFinder, void const* src, size_t srcSize) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; unsigned const minMatch = cParams->minMatch; ZSTD_BlockCompressor_f const blockCompressor = ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms)); diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h index 42736231aa8..11f36d6d17e 100644 --- a/lib/compress/zstd_ldm.h +++ b/lib/compress/zstd_ldm.h @@ -104,6 +104,6 @@ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); * Ensures that the minMatchLength >= targetLength during optimal parsing. */ void ZSTD_ldm_adjustParameters(ldmParams_t* params, - ZSTD_compressionParameters const* cParams); + ZSTD_CParams const* cParams); #endif /* ZSTD_FAST_H */ diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 3d7171b755b..3b8584badae 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -445,7 +445,7 @@ U32 ZSTD_insertBt1( U32 const target, U32 const mls, const int extDict) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; U32 const hashLog = cParams->hashLog; size_t const h = ZSTD_hashPtr(ip, hashLog, mls); @@ -468,7 +468,7 @@ U32 ZSTD_insertBt1( /* windowLow is based on target because * we only need positions that will be in the window at the end of the tree update. */ - U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog); + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target); U32 matchEndIdx = curr+8+1; size_t bestLength = 8; U32 nbCompares = 1U << cParams->searchLog; @@ -598,7 +598,7 @@ ZSTD_insertBtAndGetAllMatches ( const U32 lengthToBeat, const U32 mls /* template */) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); const BYTE* const base = ms->window.base; U32 const curr = (U32)(ip-base); @@ -616,7 +616,7 @@ ZSTD_insertBtAndGetAllMatches ( const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const prefixStart = base + dictLimit; U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; - U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr); U32 const matchLow = windowLow ? windowLow : 1; U32* smallerPtr = bt + 2*(curr&btMask); U32* largerPtr = bt + 2*(curr&btMask) + 1; @@ -626,7 +626,7 @@ ZSTD_insertBtAndGetAllMatches ( U32 nbCompares = 1U << cParams->searchLog; const ZSTD_MatchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; - const ZSTD_compressionParameters* const dmsCParams = + const ZSTD_CParams* const dmsCParams = dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; @@ -1089,7 +1089,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t* ms, const BYTE* const ilimit = iend - 8; const BYTE* const base = ms->window.base; const BYTE* const prefixStart = base + ms->window.dictLimit; - const ZSTD_compressionParameters* const cParams = &ms->cParams; + const ZSTD_CParams* const cParams = &ms->cParams; ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode); diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 0f1fe6d7469..9168fefb7ac 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -494,7 +494,7 @@ ZSTDMT_serialState_reset(SerialState* serialState, { /* Adjust parameters */ if (params.ldmParams.enableLdm == ZSTD_ps_enable) { - DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10); + DEBUGLOG(4, "LDM window size = %u KB", ZSTD_windowSize(¶ms.cParams) >> 10); ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); assert(params.ldmParams.hashRateLog < 32); @@ -1095,13 +1095,15 @@ static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers) * New parameters will be applied to next compression job. */ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams) { - U32 const saved_wlog = mtctx->params.cParams.windowLog; /* Do not modify windowLog while compressing */ + U32 const saved_wlog = mtctx->params.cParams.windowLog; /* Do not modify windowLog while compressing */ + U32 const saved_wfrac = mtctx->params.cParams.windowFrac; /* Do not modify windowFrac while compressing */ int const compressionLevel = cctxParams->compressionLevel; DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)", compressionLevel); mtctx->params.compressionLevel = compressionLevel; - { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + { ZSTD_CParams cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); cParams.windowLog = saved_wlog; + cParams.windowFrac = saved_wfrac; mtctx->params.cParams = cParams; } } @@ -1256,7 +1258,7 @@ size_t ZSTDMT_initCStream_internal( (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx); /* params supposed partially fully validated at this point */ - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!ZSTD_isError(ZSTD_checkCParams_internal(params.cParams))); assert(!((dict) && (cdict))); /* either dict or cdict, not both */ /* init */ @@ -1276,9 +1278,10 @@ size_t ZSTDMT_initCStream_internal( mtctx->frameContentSize = pledgedSrcSize; ZSTD_freeCDict(mtctx->cdictLocal); if (dict) { - mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, - ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */ - params.cParams, mtctx->cMem); + mtctx->cdictLocal = ZSTD_createCDict_internal( + dict, dictSize, + ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */ + params.cParams, mtctx->cMem); mtctx->cdict = mtctx->cdictLocal; if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation); } else { @@ -1312,7 +1315,7 @@ size_t ZSTDMT_initCStream_internal( ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize)); { /* If ldm is enabled we need windowSize space. */ - size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0; + size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? ZSTD_windowSize(&mtctx->params.cParams) : 0; /* Two buffers of slack, plus extra space for the overlap * This is the minimum slack that LDM works with. One extra because * flush might waste up to targetSectionSize-1 bytes. Another extra @@ -1358,7 +1361,7 @@ size_t ZSTDMT_initCStream_internal( mtctx->inBuff.prefix.size = dictSize; } else { /* note : a loadPrefix becomes an internal CDict */ - mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, + mtctx->cdictLocal = ZSTD_createCDict_internal(dict, dictSize, ZSTD_dlm_byRef, dictContentType, params.cParams, mtctx->cMem); mtctx->cdict = mtctx->cdictLocal; diff --git a/lib/zstd.h b/lib/zstd.h index b8c0644a7ec..222e1793dc3 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -372,7 +372,10 @@ typedef enum { * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. * Special: value 0 means "use default windowLog". * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT - * requires explicitly allowing such size at streaming decompression stage. */ + * requires explicitly allowing such size at streaming + * decompression stage. + * Note: The ZSTD_c_windowFrac parameter allows finer-grained + * tweaking of the window size set by this parameter. */ ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. * Resulting memory usage is (1 << (hashLog+2)). * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. @@ -538,7 +541,8 @@ typedef enum { ZSTD_c_experimentalParam17=1014, ZSTD_c_experimentalParam18=1015, ZSTD_c_experimentalParam19=1016, - ZSTD_c_experimentalParam20=1017 + ZSTD_c_experimentalParam20=1017, + ZSTD_c_experimentalParam21=1018 } ZSTD_cParameter; typedef struct { @@ -2355,6 +2359,19 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo #define ZSTD_c_repcodeResolution ZSTD_c_experimentalParam19 #define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19 /* older name */ +/* ZSTD_c_windowFrac + * The Zstandard format allows expressing window sizes in a more fine-grained + * way than just the power-of-two sizes captured by the windowLog parameter. + * The window size is `(1 + WF / 8) * 2 ^ WL` bytes, where WL is the windowLog + * and WF is the windowFrac. + * + * The valid range of values for this parameter is 0 to 7. The default value + * is 0. The value configured only takes effect if the windowLog has also + * explicitly been set. The windowFrac is ignored when the windowLog is + * ZSTD_WINDOWLOG_MAX (you can't select a window size larger than + * `2 ^ ZSTD_WINDOWLOG_MAX`). + */ +#define ZSTD_c_windowFrac ZSTD_c_experimentalParam21 /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, diff --git a/programs/fileio.c b/programs/fileio.c index 0ecca40d2ab..593e9b402e7 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -308,6 +308,7 @@ FIO_prefs_t* FIO_createPreferences(void) ret->allowBlockDevices = 0; ret->asyncIO = AIO_supported(); ret->passThrough = -1; + ret->windowFrac = 0; return ret; } @@ -427,6 +428,10 @@ void FIO_setLiteralCompressionMode( prefs->literalCompressionMode = mode; } +void FIO_setWindowFrac(FIO_prefs_t* const prefs, int windowFrac) { + prefs->windowFrac = windowFrac; +} + void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel) { #ifndef ZSTD_NOCOMPRESS @@ -1171,6 +1176,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_useRowMatchFinder, prefs->useRowMatchFinder)); /* compression parameters */ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowFrac, prefs->windowFrac) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) ); diff --git a/programs/fileio.h b/programs/fileio.h index cb53ef53781..bb36922cf96 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -91,6 +91,7 @@ void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode); void FIO_setLiteralCompressionMode( FIO_prefs_t* const prefs, ZSTD_ParamSwitch_e mode); +void FIO_setWindowFrac(FIO_prefs_t* const prefs, int windowFrac); void FIO_setProgressSetting(FIO_progressSetting_e progressSetting); void FIO_setNotificationLevel(int level); diff --git a/programs/fileio_types.h b/programs/fileio_types.h index 23bda4168d8..6b573da4837 100644 --- a/programs/fileio_types.h +++ b/programs/fileio_types.h @@ -54,6 +54,7 @@ typedef struct FIO_prefs_s { int srcSizeHint; int testMode; ZSTD_ParamSwitch_e literalCompressionMode; + int windowFrac; /* IO preferences */ int removeSrcFile; diff --git a/programs/zstd.1.md b/programs/zstd.1.md index e5c1b7fd215..e602c106990 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -387,6 +387,18 @@ The list of available _options_: Note: If `windowLog` is set to larger than 27, `--long=windowLog` or `--memory=windowSize` needs to be passed to the decompressor. +- `windowFrac`=_wfrac_, `wfrac`=_wfrac_: + Set the window size to a non-power-of-two value, as an adjustment to the + power-of-two window size set by the `windowLog` parameter. + + The Zstd format supports expressing window sizes as + `(1 + wfrac / 8) * (2 ^ wlog)` where `wfrac` has an integer value between + 0 and 7 inclusive and `wlog` has an integer value between 10 and 41. + (Although see the documentation on `windowLog` above.) + + This parameter has no effect unless the `windowLog` is also set. It also + has no effect when the `windowLog` is set to its maximum supported value. + - `hashLog`=_hlog_, `hlog`=_hlog_: Specify the maximum number of bits for a hash table. diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 83d9b881e50..03291d59879 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -616,10 +616,11 @@ static unsigned parseAdaptParameters(const char* stringPtr, int* adaptMinPtr, in * @return 1 means that compression parameters were correct * @return 0 in case of malformed parameters */ -static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressionParameters* params) +static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressionParameters* params, int* windowFrac) { for ( ; ;) { if (longCommandWArg(&stringPtr, "windowLog=") || longCommandWArg(&stringPtr, "wlog=")) { params->windowLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "windowFrac=") || longCommandWArg(&stringPtr, "wfrac=")) { *windowFrac = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } if (longCommandWArg(&stringPtr, "chainLog=") || longCommandWArg(&stringPtr, "clog=")) { params->chainLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } if (longCommandWArg(&stringPtr, "hashLog=") || longCommandWArg(&stringPtr, "hlog=")) { params->hashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } if (longCommandWArg(&stringPtr, "searchLog=") || longCommandWArg(&stringPtr, "slog=")) { params->searchLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } @@ -878,6 +879,7 @@ int main(int argCount, const char* argv[]) FIO_progressSetting_e progress = FIO_ps_auto; zstd_operation_mode operation = zom_compress; ZSTD_compressionParameters compressionParams; + int windowFrac = 0; int cLevel = init_cLevel(); int cLevelLast = MINCLEVEL - 1; /* lower than minimum */ unsigned recursive = 0; @@ -1076,7 +1078,7 @@ int main(int argCount, const char* argv[]) if (longCommandWArg(&argument, "--block-size")) { NEXT_TSIZE(blockSize); continue; } if (longCommandWArg(&argument, "--maxdict")) { NEXT_UINT32(maxDictSize); continue; } if (longCommandWArg(&argument, "--dictID")) { NEXT_UINT32(dictID); continue; } - if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badUsage(programName, originalArgument); CLEAN_RETURN(1); } ; cType = FIO_zstdCompression; continue; } + if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams, &windowFrac)) { badUsage(programName, originalArgument); CLEAN_RETURN(1); } ; cType = FIO_zstdCompression; continue; } if (longCommandWArg(&argument, "--stream-size")) { NEXT_TSIZE(streamSrcSize); continue; } if (longCommandWArg(&argument, "--target-compressed-block-size")) { NEXT_TSIZE(targetCBlockSize); continue; } if (longCommandWArg(&argument, "--size-hint")) { NEXT_TSIZE(srcSizeHint); continue; } @@ -1600,6 +1602,7 @@ int main(int argCount, const char* argv[]) FIO_setSrcSizeHint(prefs, srcSizeHint); FIO_setLiteralCompressionMode(prefs, literalCompressionMode); FIO_setSparseWrite(prefs, 0); + FIO_setWindowFrac(prefs, windowFrac); if (adaptMin > cLevel) cLevel = adaptMin; if (adaptMax < cLevel) cLevel = adaptMax; diff --git a/tests/cli-tests/compression/window-frac.sh b/tests/cli-tests/compression/window-frac.sh new file mode 100755 index 00000000000..507f6036480 --- /dev/null +++ b/tests/cli-tests/compression/window-frac.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +set -e + +zstd --zstd=wlog=21,wfrac=5 < file > file.zst +zstd -vv -l file.zst diff --git a/tests/cli-tests/compression/window-frac.sh.stderr.ignore b/tests/cli-tests/compression/window-frac.sh.stderr.ignore new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/cli-tests/compression/window-frac.sh.stdout.glob b/tests/cli-tests/compression/window-frac.sh.stdout.glob new file mode 100644 index 00000000000..d26dbedfa06 --- /dev/null +++ b/tests/cli-tests/compression/window-frac.sh.stdout.glob @@ -0,0 +1,5 @@ +file.zst +# Zstandard Frames: 1 +... +Window Size: 3407872 B (3407872 B) +... \ No newline at end of file diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c index 9295d248ccd..f2ad92a7b11 100644 --- a/tests/fuzz/sequence_compression_api.c +++ b/tests/fuzz/sequence_compression_api.c @@ -142,10 +142,11 @@ static size_t decodeSequences(void* dst, size_t nbSequences, */ static size_t generateRandomSequences(FUZZ_dataProducer_t* producer, size_t literalsSizeLimit, size_t dictSize, - size_t windowLog, ZSTD_SequenceFormat_e mode) + uint32_t windowLog, uint32_t windowFrac, + ZSTD_SequenceFormat_e mode) { const uint32_t repCode = 0; /* not used by sequence ingestion api */ - size_t windowSize = 1ULL << windowLog; + uint64_t windowSize = ((8ULL + windowFrac) << windowLog) >> 3; size_t blockSizeMax = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); uint32_t matchLengthMax = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE; uint32_t bytesGenerated = 0; @@ -346,6 +347,7 @@ int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size) size_t dictSize = 0; unsigned hasDict; unsigned wLog; + unsigned wFrac; int cLevel; ZSTD_SequenceFormat_e mode; @@ -361,8 +363,12 @@ int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size) FUZZ_ASSERT(dctx); } - /* Generate window log first so we don't generate offsets too large */ + /* Generate window size first so we don't generate offsets too large */ wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + wFrac = FUZZ_dataProducer_uint32Range(producer, 0, 7); + if (wLog == ZSTD_WINDOWLOG_MAX) { + wFrac = 0; + } cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22); mode = (ZSTD_SequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1); @@ -370,6 +376,7 @@ int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size) ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0); ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel); ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, (int)wLog); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowFrac, (int)wFrac); ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN); ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1); ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, (int)mode); @@ -415,7 +422,7 @@ int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size) generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE); } - nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode); + nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, wFrac, mode); generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize, mode); /* Note : in explicit block delimiters mode, diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index f3b2e6fba4c..755b81b6c26 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -102,6 +102,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer { ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, producer); set(cctx, ZSTD_c_windowLog, cParams.windowLog); + setRand(cctx, ZSTD_c_windowFrac, 0, 7, producer); set(cctx, ZSTD_c_hashLog, cParams.hashLog); set(cctx, ZSTD_c_chainLog, cParams.chainLog); set(cctx, ZSTD_c_searchLog, cParams.searchLog); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index b74460bb573..213cfd9ba90 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -43,6 +43,7 @@ #include "timefn.h" /* SEC_TO_MICRO, UTIL_time_t, UTIL_TIME_INITIALIZER, UTIL_clockSpanMicro, UTIL_getTime */ /* must be included after util.h, due to ERROR macro redefinition issue on Visual Studio */ #include "zstd_internal.h" /* ZSTD_WORKSPACETOOLARGE_MAXDURATION, ZSTD_WORKSPACETOOLARGE_FACTOR, KB, MB */ +#include "zstd_compress_internal.h" /* ZSTD_WINDOW_ALLOW_PICKING_FRACTIONAL_SIZES */ #include "threading.h" /* ZSTD_pthread_create, ZSTD_pthread_join */ @@ -707,8 +708,8 @@ static int basicUnitTests(U32 const seed, double compressibility) params.hashLog = 19; params.chainLog = 19; params = ZSTD_adjustCParams(params, 1000, 100000); - if (params.hashLog != 18) goto _output_error; - if (params.chainLog != 17) goto _output_error; + CHECK_EQ(params.chainLog, 17); + CHECK_EQ(params.hashLog, 18); } DISPLAYLEVEL(3, "OK \n"); @@ -1771,6 +1772,8 @@ static int basicUnitTests(U32 const seed, double compressibility) CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_windowLog, &value)); CHECK_EQ(value, 0); + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_windowFrac, &value)); + CHECK_EQ(value, 0); CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_chainLog, &value)); CHECK_EQ(value, 0); CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value)); @@ -1789,6 +1792,8 @@ static int basicUnitTests(U32 const seed, double compressibility) CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_windowLog, &value)); CHECK_EQ(value, (int)cparams.windowLog); + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_windowFrac, &value)); + CHECK_EQ(value, 0); CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_chainLog, &value)); CHECK_EQ(value, (int)cparams.chainLog); CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value)); @@ -1839,6 +1844,8 @@ static int basicUnitTests(U32 const seed, double compressibility) CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_windowLog, &value)); CHECK_EQ(value, 0); + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_windowFrac, &value)); + CHECK_EQ(value, 0); CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_chainLog, &value)); CHECK_EQ(value, 0); CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value)); @@ -1866,6 +1873,8 @@ static int basicUnitTests(U32 const seed, double compressibility) CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_windowLog, &value)); CHECK_EQ(value, (int)params.cParams.windowLog); + CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_windowFrac, &value)); + CHECK_EQ(value, 0); CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_chainLog, &value)); CHECK_EQ(value, (int)params.cParams.chainLog); CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value)); @@ -3377,6 +3386,108 @@ static int basicUnitTests(U32 const seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : check fractional window sizes : \n", testNb++); + { + int windowLog; + int windowFrac; + + for (windowLog = 0; windowLog <= ZSTD_WINDOWLOG_MAX; windowLog++) { + if (windowLog == 1) { + windowLog = ZSTD_WINDOWLOG_MIN; + } + for (windowFrac = 0; windowFrac <= 7; windowFrac++) { + int hint; + for (hint = 0; hint <= 1; hint++) { + unsigned long long inputSize; + for (inputSize = 100; inputSize < (3ull << ZSTD_WINDOWLOG_MAX); inputSize += (1ull << (ZSTD_highbit32((U32)(inputSize >> 4))))) { + ZSTD_inBuffer input = {CNBuffer, CNBuffSize, 0}; + ZSTD_outBuffer compressed = {compressedBuffer, compressedBufferSize, 0}; + ZSTD_FrameHeader zfh; + unsigned long long maxWindowSize; + + DISPLAYLEVEL(5, + "Checking %s input = 0x%16llx, windowLog = %2d, windowFrac = %d: ", + hint ? "hinted" : " fixed", + inputSize, windowLog, windowFrac); + + if (input.size > 16) { + /* We don't have to compress too much. */ + input.size = 16; + } + if (input.size > inputSize) { + input.size = (size_t)inputSize; + } + + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, windowLog)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowFrac, windowFrac)); + if (hint) { + if (inputSize >= (1ull << 31)) { + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_srcSizeHint, (int)((1ull << 31) - 1))); + } else { + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_srcSizeHint, (int)inputSize)); + } + } else { + CHECK_Z(ZSTD_CCtx_setPledgedSrcSize(cctx, inputSize)); + } + + CHECK_Z(ZSTD_compressStream2(cctx, &compressed, &input, inputSize == 0 ? ZSTD_e_end : ZSTD_e_flush)); + CHECK_LT(0, compressed.size); + + CHECK_Z(ZSTD_getFrameHeader(&zfh, compressed.dst, compressed.pos)); + if (!hint) { + CHECK_EQ(zfh.frameContentSize, inputSize); + } + + DISPLAYLEVEL(5, + "got frame size = 0x%16llx, window size = 0x%8llx, ", + zfh.frameContentSize, zfh.windowSize); + + maxWindowSize = 1ull << ZSTD_WINDOWLOG_MAX; + + if (windowLog != 0 && maxWindowSize > (1ull << windowLog)) { + maxWindowSize = ((8ull + windowFrac) << windowLog) >> 3; + } + + if (!hint) { + if (maxWindowSize > inputSize) { + maxWindowSize = inputSize; + } + } else { + int winLogAndFrac; + for (winLogAndFrac = ZSTD_WINDOWLOG_MIN << 3; winLogAndFrac < (ZSTD_WINDOWLOG_MAX << 3); winLogAndFrac++) { + unsigned long long candidateWindowSize = ((8ull + (winLogAndFrac & 7)) << (winLogAndFrac >> 3)) >> 3; + if (candidateWindowSize >= inputSize && (windowLog == 0 || maxWindowSize > candidateWindowSize)) { + maxWindowSize = candidateWindowSize; + break; + } +#if !ZSTD_WINDOW_ALLOW_PICKING_FRACTIONAL_SIZES + winLogAndFrac += 7; /* skip over fractional windows */ +#endif + } + } + + if (hint && (maxWindowSize < (1ull << ZSTD_WINDOWLOG_MIN))) { + maxWindowSize = 1ull << ZSTD_WINDOWLOG_MIN; + } + + DISPLAYLEVEL(5, + "expected window size = 0x%16llx\n", + maxWindowSize); + + if (windowLog != 0) { + CHECK_EQ(zfh.windowSize, maxWindowSize); + } else { + CHECK_LT(zfh.windowSize, maxWindowSize + 1); + } + } + } + } + } + } + DISPLAYLEVEL(3, "OK \n"); + ZSTD_freeCCtx(cctx); free(dictBuffer); free(samplesSizes); @@ -3670,7 +3781,7 @@ static int basicUnitTests(U32 const seed, double compressibility) { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); ZSTD_DCtx* const dctx = ZSTD_createDCtx(); static const size_t dictSize = 65 KB; - static const size_t blockSize = 100 KB; /* won't cause pb with small dict size */ + static const size_t blockSize = 71 KB; /* won't cause pb with small dict size */ size_t cSize2; assert(cctx != NULL); assert(dctx != NULL); @@ -3734,6 +3845,7 @@ static int basicUnitTests(U32 const seed, double compressibility) { ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, dictSize, 3); if (cdict==NULL) goto _output_error; CHECK_Z( ZSTD_compressBegin_usingCDict(cctx, cdict) ); + CHECK_Z( ZSTD_getBlockSize(cctx) >= blockSize); CHECK_Z( ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize) ); ZSTD_freeCDict(cdict); }