Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 136 additions & 26 deletions lib/compress/zstd_compress.c

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions lib/compress/zstd_compress_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,9 @@ struct ZSTD_CCtx_params_s {

/* Controls repcode search in external sequence parsing */
ZSTD_ParamSwitch_e searchForExternalRepcodes;

/* Constrains window to comply with application protocol requirements. */
ZSTD_ConstrainWindow_e constrainWindowForProtocol;
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */

#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
Expand Down
68 changes: 67 additions & 1 deletion lib/zstd.h
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,7 @@ typedef enum {
* ZSTD_c_prefetchCDictTables
* ZSTD_c_enableSeqProducerFallback
* ZSTD_c_maxBlockSize
* ZSTD_c_constrainWindowForProtocol
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change.
Expand All @@ -538,7 +539,8 @@ typedef enum {
ZSTD_c_experimentalParam17=1014,
ZSTD_c_experimentalParam18=1015,
ZSTD_c_experimentalParam19=1016,
ZSTD_c_experimentalParam20=1017
ZSTD_c_experimentalParam20=1017,
ZSTD_c_experimentalParam21=1018
} ZSTD_cParameter;

typedef struct {
Expand Down Expand Up @@ -2355,6 +2357,70 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
#define ZSTD_c_repcodeResolution ZSTD_c_experimentalParam19
#define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19 /* older name */

/**
* Used in conjunction with the `ZSTD_c_constrainWindowForProtocol` option
* described below.
*/
typedef enum {
/* Equivalent to disabling. */
ZSTD_ConstrainWindow_auto = ZSTD_ps_auto,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious, what is the difference between "auto" and "disable", they seem basically the same? Do you see a situation where they aren't the same in the future?

The command line option also only has "none".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They're just here by analogy to the ZSTD_paramSwitch_e enum, which is used for most cctx param options. They behave identically.


/* No equivalent for ZSTD_ps_enable: you need to specify which protocol. */

/* No constraints imposed. */
ZSTD_ConstrainWindow_disable = ZSTD_ps_disable,

/* Constrains the window size to comply with the limits imposed on the
* `zstd` Content-Encoding, specified by RFCs 8878 and 9659. I.e., liimits
* the window to <= 8MB. */
ZSTD_ConstrainWindow_HTTP_Zstd = 3,

/* Constrains the window size to comply with the limits imposed on the
* `dcz` Content-Encoding, as specified by the Compression Dictionary
* Transport protocol.
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth mentioning what the actual limit is?

*
* Note: as of the writing of this version of zstd, this protocol has not
* been finalized and published as an RFC. This implementation is written to
* comply with draft-ietf-httpbis-compression-dictionary-19.
*
* The likeliness that the window sizing constraints between this draft and
* the final document is judged to be low enough that we aren't bothering with
* an interim enum value representing this draft version, which would impose
* almost certainly needless migration burden once the RFC is published.
* However, the macro `ZSTD_CONSTRAINWINDOW_HTTP_DCZ_DRAFT_VERSION` is defined
* during the transition to help libraries validate that they have the
* behavior they expect. It will be updated and then removed once the draft
* is published as an RFC. */
ZSTD_ConstrainWindow_HTTP_DCZ = 4
} ZSTD_ConstrainWindow_e;

#define ZSTD_CONSTRAINWINDOW_HTTP_DCZ_DRAFT_VERSION 19

/**
* ZSTD_c_constrainWindowForProtocol
*
* This option constrains the window size of compressions to comply with limits
* imposed by protocols that use Zstandard. This option clamps the selected
* window size of a compression into the range allowed by the selected protocol.
* If the window size a compression would use is already inside that range,
* this option will not change the resolved window size. (E.g., if a compression
* would use a 1 MB window, selecting `ZSTD_ConstrainWindow_HTTP_Zstd` won't
* *raise* the window size to the 8 MB limit imposed by that protocol).
*
* This is intended to be a convenience option, and save users the burden of
* implementing these constraints themselves, because doing that properly would
* not be simple. In normal operation with only a compression level selected by
* the user, zstd has non-trivial internal logic to select a window size. The
* user would have to reimplement that logic and then apply the protocol's
* limits. This option lets zstd continue to use its internal logic to resolve
* an appropriate window size and then applies the protocol constraint.
*
* The currently understood protocols, and the values that should be used to
* indicate them, are defined and described in the `ZSTD_ConstrainWindow_e`
* enum.
*/
#define ZSTD_c_constrainWindowForProtocol ZSTD_c_experimentalParam21


/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
Expand Down
8 changes: 8 additions & 0 deletions programs/fileio.c
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ FIO_prefs_t* FIO_createPreferences(void)
ret->allowBlockDevices = 0;
ret->asyncIO = AIO_supported();
ret->passThrough = -1;
ret->constrainWindowForProtocol = ZSTD_ConstrainWindow_auto;
return ret;
}

Expand Down Expand Up @@ -490,6 +491,12 @@ void FIO_setMMapDict(FIO_prefs_t* const prefs, ZSTD_ParamSwitch_e value)
prefs->mmapDict = value;
}

void FIO_setConstrainWindowForProtocol(
FIO_prefs_t* const prefs,
ZSTD_ConstrainWindow_e constraint) {
prefs->constrainWindowForProtocol = constraint;
}

/* FIO_ctx_t functions */

void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) {
Expand Down Expand Up @@ -1188,6 +1195,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) );
}
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_constrainWindowForProtocol, prefs->constrainWindowForProtocol) );
#endif
/* dictionary */
if (prefs->patchFromMode) {
Expand Down
3 changes: 3 additions & 0 deletions programs/fileio.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode);
void FIO_setLiteralCompressionMode(
FIO_prefs_t* const prefs,
ZSTD_ParamSwitch_e mode);
void FIO_setConstrainWindowForProtocol(
FIO_prefs_t* const prefs,
ZSTD_ConstrainWindow_e constraint);

void FIO_setProgressSetting(FIO_progressSetting_e progressSetting);
void FIO_setNotificationLevel(int level);
Expand Down
1 change: 1 addition & 0 deletions programs/fileio_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ typedef struct FIO_prefs_s {
int srcSizeHint;
int testMode;
ZSTD_ParamSwitch_e literalCompressionMode;
ZSTD_ConstrainWindow_e constrainWindowForProtocol;

/* IO preferences */
int removeSrcFile;
Expand Down
5 changes: 5 additions & 0 deletions programs/zstd.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,11 @@ the last one takes effect.
This parameter defines a loose target: compressed blocks will target this size "on average", but individual blocks can still be larger or smaller.
Enabling this feature can decrease compression speed by up to ~10% at level 1.
Higher levels will see smaller relative speed regression, becoming invisible at higher settings.
* `--constrain-window={none,http-zstd,http-dcz}`:
Constrains the window size to the limits set by the indicated protocol.
`none` (the default) doesn't set any constraints.
`http-zstd` refers to the `zstd` HTTP Content-Encoding specified by RFCs 8878 and 9659.
`http-dcz` refers to the `dcz` HTTP Content-Encoding specified by the Compression Dictionary Transport draft.
* `-f`, `--force`:
disable input and output checks. Allows overwriting existing files, input
from console, output to stdout, operating on links, block devices, etc.
Expand Down
24 changes: 23 additions & 1 deletion programs/zstdcli.c
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,9 @@ static void usageAdvanced(const char* programName)
DISPLAYOUT(" --stream-size=# Specify size of streaming input from STDIN.\n");
DISPLAYOUT(" --size-hint=# Optimize compression parameters for streaming input of approximately size #.\n");
DISPLAYOUT(" --target-compressed-block-size=#\n");
DISPLAYOUT(" Generate compressed blocks of approximately # size.\n\n");
DISPLAYOUT(" Generate compressed blocks of approximately # size.\n");
DISPLAYOUT(" --constrain-window={none,http-zstd,http-dcz}\n");
DISPLAYOUT(" Constrain window size to comply with limits set by protocol. [Default: none]\n\n");
DISPLAYOUT(" --no-dictID Don't write `dictID` into the header (dictionary compression only).\n");
DISPLAYOUT(" --[no-]compress-literals Force (un)compressed literals.\n");
DISPLAYOUT(" --[no-]row-match-finder Explicitly enable/disable the fast, row-based matchfinder for\n");
Expand Down Expand Up @@ -851,6 +853,7 @@ int main(int argCount, const char* argv[])
removeSrcFile=0;
ZSTD_ParamSwitch_e mmapDict=ZSTD_ps_auto;
ZSTD_ParamSwitch_e useRowMatchFinder = ZSTD_ps_auto;
ZSTD_ConstrainWindow_e constrainWindowForProtocol = ZSTD_ConstrainWindow_auto;
FIO_compressionType_t cType = FIO_zstdCompression;
int nbWorkers = -1; /* -1 means unset */
double compressibility = -1.0; /* lorem ipsum generator */
Expand Down Expand Up @@ -1135,6 +1138,23 @@ int main(int argCount, const char* argv[])
continue;
}


if (longCommandWArg(&argument, "--constrain-window")) {
const char* protocol;
NEXT_FIELD(protocol);
if (!strncmp(protocol, "none", strlen("none") + 1)) {
constrainWindowForProtocol = ZSTD_ConstrainWindow_HTTP_Zstd;
} else if (!strncmp(protocol, "http-zstd", strlen("http-zstd") + 1)) {
constrainWindowForProtocol = ZSTD_ConstrainWindow_HTTP_Zstd;
} else if (!strncmp(protocol, "http-dcz", strlen("http-dcz") + 1)) {
constrainWindowForProtocol = ZSTD_ConstrainWindow_HTTP_DCZ;
} else {
badUsage(programName, originalArgument);
CLEAN_RETURN(1);
}
continue;
}

badUsage(programName, originalArgument);
CLEAN_RETURN(1);
}
Expand Down Expand Up @@ -1573,6 +1593,7 @@ int main(int argCount, const char* argv[])
FIO_setSrcSizeHint(prefs, srcSizeHint);
FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
FIO_setSparseWrite(prefs, 0);
FIO_setConstrainWindowForProtocol(prefs, constrainWindowForProtocol);
if (adaptMin > cLevel) cLevel = adaptMin;
if (adaptMax < cLevel) cLevel = adaptMax;

Expand Down Expand Up @@ -1603,6 +1624,7 @@ int main(int argCount, const char* argv[])
(void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode;
(void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint;
(void)ZSTD_strategyMap; (void)useRowMatchFinder; (void)cType;
(void)constrainWindowForProtocol;
DISPLAYLEVEL(1, "Compression not supported \n");
#endif
} else { /* decompression or test */
Expand Down
14 changes: 14 additions & 0 deletions tests/cli-tests/compression/window-constraint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/sh
datagen -g256M > file

zstd --long=30 --single-thread --constrain-window=http-zstd -f < file > file.zst
zstd -l -v file.zst

zstd --long=30 --single-thread --constrain-window=http-dcz -f < file > file.zst
zstd -l -v file.zst

cp file dict
zstd --long=30 --single-thread --constrain-window=http-dcz --patch-from dict -f file
zstd -l -v file.zst

rm dict file file.zst
7 changes: 7 additions & 0 deletions tests/cli-tests/compression/window-constraint.sh.stdout.glob
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
...
Window Size: 8.00 MiB (8388608 B)
...
Window Size: 8.00 MiB (8388608 B)
...
Window Size: 128 MiB (134217728 B)
...
126 changes: 126 additions & 0 deletions tests/fuzzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,11 @@ static U32 FUZ_highbit32(U32 v32)
} \
}
#define CHECK_EQ(lhs, rhs) CHECK_OP(==, lhs, rhs)
#define CHECK_NE(lhs, rhs) CHECK_OP(!=, lhs, rhs)
#define CHECK_LT(lhs, rhs) CHECK_OP(<, lhs, rhs)
#define CHECK_GT(lhs, rhs) CHECK_OP(>, lhs, rhs)
#define CHECK_LE(lhs, rhs) CHECK_OP(<=, lhs, rhs)
#define CHECK_GE(lhs, rhs) CHECK_OP(>=, lhs, rhs)


/*=============================================
Expand Down Expand Up @@ -3377,6 +3381,128 @@ static int basicUnitTests(U32 const seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");

DISPLAYLEVEL(3, "test%3i : check window size when applying protocol constraints : \n", testNb++);
{
const size_t dictSizes[] = { 0, 128, 128 KB, 4 MB, 13421772, 13421773, 107374182, 107374183, 150 MB};
const size_t nbDictSizes = sizeof(dictSizes) / sizeof(dictSizes[0]);
size_t dictSizeIdx;
const size_t bigDictSize = dictSizes[nbDictSizes - 1];
char* bigDictBuffer = malloc(bigDictSize);
const ZSTD_ConstrainWindow_e constraints[] = {
ZSTD_ConstrainWindow_disable,
ZSTD_ConstrainWindow_HTTP_Zstd,
ZSTD_ConstrainWindow_HTTP_DCZ
};
const char* constraintNames[] = {
"ZSTD_ConstrainWindow_disable",
"ZSTD_ConstrainWindow_HTTP_Zstd",
"ZSTD_ConstrainWindow_HTTP_DCZ"
};
const size_t nbConstraints = sizeof(constraints) / sizeof(constraints[0]);
size_t constraintIdx;
const unsigned long long inputSizes[] = { 0, 1ull << 10, 1ull << 23, 1ull << 27, 1ull << 31, 1ull << 63, ZSTD_CONTENTSIZE_UNKNOWN };
const size_t nbInputSizes = sizeof(inputSizes) / sizeof(inputSizes[0]);
size_t inputSizeIdx;
const int windowLogs[] = { 0, 15, 25, ZSTD_WINDOWLOG_MAX };
const size_t nbWindowLogs = sizeof(windowLogs) / sizeof(windowLogs[0]);
size_t windowLogIdx;

CHECK_NE(bigDictBuffer, NULL);
memset(bigDictBuffer, 0, bigDictSize);

for (constraintIdx = 0; constraintIdx < nbConstraints; constraintIdx++) {
const ZSTD_ConstrainWindow_e constraint = constraints[constraintIdx];
for (inputSizeIdx = 0; inputSizeIdx < nbInputSizes; inputSizeIdx++) {
const unsigned long long inputSize = inputSizes[inputSizeIdx];
for (dictSizeIdx = 0; dictSizeIdx < nbDictSizes; dictSizeIdx++) {
dictSize = dictSizes[dictSizeIdx];
if (constraint != ZSTD_ConstrainWindow_HTTP_DCZ && (dictSize != 0 && dictSize != dictSizes[nbDictSizes - 1])) {
continue;
}
for (windowLogIdx = 0; windowLogIdx < nbWindowLogs; windowLogIdx++) {
const int windowLog = windowLogs[windowLogIdx];
ZSTD_inBuffer input = {CNBuffer, CNBuffSize, 0};
ZSTD_outBuffer compressed = {compressedBuffer, compressedBufferSize, 0};
ZSTD_FrameHeader zfh;
unsigned long long maxWindowSize;

DISPLAYLEVEL(5,
"Checking constraint = %-30s "
"with input size = %20llu and dict size = %9zu "
"and windowLog = %2d: ",
constraintNames[constraintIdx],
inputSize, dictSize, windowLog);

if (input.size > 200 KB) {
input.size = 200 KB;
}
if (input.size > inputSize) {
input.size = (size_t)inputSize;
}

ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_constrainWindowForProtocol, constraint));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, windowLog));
CHECK_Z(ZSTD_CCtx_setPledgedSrcSize(cctx, inputSize));
if (dictSize != 0) {
CHECK_Z(ZSTD_CCtx_loadDictionary_advanced(cctx, bigDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent));
}
CHECK_Z(ZSTD_compressStream2(cctx, &compressed, &input, inputSize == 0 ? ZSTD_e_end : ZSTD_e_flush));

CHECK_GT(compressed.size, 0);

CHECK_Z(ZSTD_getFrameHeader(&zfh, compressed.dst, compressed.pos));

DISPLAYLEVEL(5,
"got window size = %10llu, Frame size = %20llu, ",
zfh.windowSize, zfh.frameContentSize);

CHECK_EQ(zfh.frameContentSize, inputSize);

switch (constraint) {
case ZSTD_ConstrainWindow_auto:
case ZSTD_ConstrainWindow_disable:
maxWindowSize = 1ull << ZSTD_WINDOWLOG_MAX;
break;
case ZSTD_ConstrainWindow_HTTP_Zstd:
maxWindowSize = 8 MB;
break;
case ZSTD_ConstrainWindow_HTTP_DCZ:
maxWindowSize = dictSize + (dictSize >> 2);
maxWindowSize = maxWindowSize < 8 MB ? 8 MB : maxWindowSize > 128 MB ? 128 MB : maxWindowSize;
maxWindowSize = 1ull << ZSTD_highbit32((U32)maxWindowSize);
break;
default:
CHECK(0);
}

if (windowLog != 0 && maxWindowSize > (1ull << windowLog)) {
maxWindowSize = 1ull << windowLog;
}

if (maxWindowSize > inputSize) {
maxWindowSize = inputSize;
}

DISPLAYLEVEL(5,
"expected window size = %20llu\n",
maxWindowSize);

if (windowLog != 0) {
CHECK_EQ(zfh.windowSize, maxWindowSize);
} else {
CHECK_LE(zfh.windowSize, maxWindowSize);
}
}
}
}
}

free(bigDictBuffer);
}
DISPLAYLEVEL(3, "OK \n");

ZSTD_freeCCtx(cctx);
free(dictBuffer);
free(samplesSizes);
Expand Down
Loading