diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index abc75fb2d0d..40ea19f3c0c 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -630,6 +630,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = (int)ZSTD_ps_disable; return bounds; + case ZSTD_c_constrainWindowForProtocol: + bounds.lowerBound = (int)ZSTD_ConstrainWindow_auto; + bounds.upperBound = (int)ZSTD_ConstrainWindow_HTTP_DCZ; + return bounds; + default: bounds.error = ERROR(parameter_unsupported); return bounds; @@ -700,6 +705,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_enableSeqProducerFallback: case ZSTD_c_maxBlockSize: case ZSTD_c_repcodeResolution: + case ZSTD_c_constrainWindowForProtocol: default: return 0; } @@ -760,6 +766,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_enableSeqProducerFallback: case ZSTD_c_maxBlockSize: case ZSTD_c_repcodeResolution: + case ZSTD_c_constrainWindowForProtocol: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -1016,6 +1023,14 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->searchForExternalRepcodes = (ZSTD_ParamSwitch_e)value; return CCtxParams->searchForExternalRepcodes; + case ZSTD_c_constrainWindowForProtocol: + BOUNDCHECK(ZSTD_c_constrainWindowForProtocol, value); + RETURN_ERROR_IF( + value == (int)ZSTD_ps_enable, + parameter_outOfBound, "Param out of bounds"); + CCtxParams->constrainWindowForProtocol = (ZSTD_ConstrainWindow_e)value; + return CCtxParams->constrainWindowForProtocol; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -1163,6 +1178,9 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_repcodeResolution: *value = (int)CCtxParams->searchForExternalRepcodes; break; + case ZSTD_c_constrainWindowForProtocol: + *value = (int)CCtxParams->constrainWindowForProtocol; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -1462,6 +1480,35 @@ static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize) } } +static U32 ZSTD_windowConstraintForProtocol( + ZSTD_ConstrainWindow_e protocol, + U64 srcSize, + size_t dictSize) { + (void)srcSize; + switch (protocol) { + case ZSTD_ConstrainWindow_auto: + case ZSTD_ConstrainWindow_disable: + default: + return ZSTD_WINDOWLOG_MAX; + case ZSTD_ConstrainWindow_HTTP_Zstd: + return 23; /* 8 MB */ + case ZSTD_ConstrainWindow_HTTP_DCZ: { + U32 wlog; + if (dictSize < 8 MB) { + wlog = 23; /* 8 MB */ + } else if (dictSize >= 128 MB) { + wlog = 27; /* 128 MB */ + } else { + dictSize += dictSize >> 2; /* dictSize *= 1.25 */ + wlog = ZSTD_highbit32((U32)dictSize); + } + assert(wlog >= 23); + assert(wlog <= 27); + return wlog; + } + } +} + /** ZSTD_adjustCParams_internal() : * optimize `cPar` for a specified input (`srcSize` and `dictSize`). * mostly downsize to reduce memory consumption and initialization latency. @@ -1474,7 +1521,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize, ZSTD_CParamMode_e mode, - ZSTD_ParamSwitch_e useRowMatchFinder) + ZSTD_ParamSwitch_e useRowMatchFinder, + ZSTD_ConstrainWindow_e constrainWindowForProtocol) { const U64 minSrcSize = 513; /* (1<<9) + 1 */ const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); @@ -1522,6 +1570,16 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, } #endif + /* Shrink window to comply with protocols that place restrictions on the + * window size. */ + { + U32 const windowConstraint = ZSTD_windowConstraintForProtocol( + constrainWindowForProtocol, srcSize, dictSize); + if (cPar.windowLog > windowConstraint) { + cPar.windowLog = windowConstraint; + } + } + switch (mode) { case ZSTD_cpm_unknown: case ZSTD_cpm_noAttachDict: @@ -1615,11 +1673,23 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar, { cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto); -} - -static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); -static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); + return ZSTD_adjustCParams_internal( + cPar, srcSize, dictSize, ZSTD_cpm_unknown, + ZSTD_ps_auto, ZSTD_ConstrainWindow_auto); +} + +static ZSTD_compressionParameters ZSTD_getCParams_internal( + int compressionLevel, + unsigned long long srcSizeHint, + size_t dictSize, + ZSTD_CParamMode_e mode, + ZSTD_ConstrainWindow_e constrainWindowForProtocol); +static ZSTD_parameters ZSTD_getParams_internal( + int compressionLevel, + unsigned long long srcSizeHint, + size_t dictSize, + ZSTD_CParamMode_e mode, + ZSTD_ConstrainWindow_e constrainWindowForProtocol); static void ZSTD_overrideCParams( ZSTD_compressionParameters* cParams, @@ -1642,12 +1712,17 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( assert(CCtxParams->srcSizeHint>=0); srcSizeHint = (U64)CCtxParams->srcSizeHint; } - cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); + cParams = ZSTD_getCParams_internal( + CCtxParams->compressionLevel, srcSizeHint, dictSize, + mode, CCtxParams->constrainWindowForProtocol); if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); assert(!ZSTD_checkCParams(cParams)); /* srcSizeHint == 0 means 0 */ - return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder); + return ZSTD_adjustCParams_internal( + cParams, srcSizeHint, dictSize, mode, + CCtxParams->useRowMatchFinder, + CCtxParams->constrainWindowForProtocol); } static size_t @@ -1790,7 +1865,9 @@ static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN}; for (; tier < 4; ++tier) { /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */ - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal( + compressionLevel, srcSizeTiers[tier], 0, + ZSTD_cpm_noAttachDict, ZSTD_ConstrainWindow_auto); largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize); } return largestSize; @@ -1847,7 +1924,9 @@ size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) { - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal( + compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, + ZSTD_cpm_noAttachDict, ZSTD_ConstrainWindow_auto); return ZSTD_estimateCStreamSize_usingCParams(cParams); } @@ -2342,9 +2421,11 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams); } - params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, - cdict->dictContentSize, ZSTD_cpm_attachDict, - params.useRowMatchFinder); + params.cParams = ZSTD_adjustCParams_internal( + adjusted_cdict_cParams, pledgedSrcSize, + cdict->dictContentSize, ZSTD_cpm_attachDict, + params.useRowMatchFinder, + params.constrainWindowForProtocol); params.cParams.windowLog = windowLog; params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */ FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, @@ -5315,7 +5396,9 @@ static size_t ZSTD_compressBegin_usingDict_deprecated(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { ZSTD_CCtx_params cctxParams; - { ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); + { ZSTD_parameters const params = ZSTD_getParams_internal( + compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, + ZSTD_cpm_noAttachDict, ZSTD_ConstrainWindow_auto); ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); } DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); @@ -5473,7 +5556,9 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, int compressionLevel) { { - ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); + ZSTD_parameters const params = ZSTD_getParams_internal( + compressionLevel, srcSize, dict ? dictSize : 0, + ZSTD_cpm_noAttachDict, ZSTD_ConstrainWindow_auto); assert(params.fParams.contentSizeFlag == 1); ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); } @@ -5532,7 +5617,9 @@ size_t ZSTD_estimateCDictSize_advanced( size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) { - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal( + compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, + ZSTD_cpm_createCDict, ZSTD_ConstrainWindow_auto); return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); } @@ -5703,7 +5790,9 @@ ZSTD_CDict* ZSTD_createCDict_advanced2( ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal( + compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, + ZSTD_cpm_createCDict, ZSTD_ConstrainWindow_auto); ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); @@ -5714,7 +5803,9 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal( + compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, + ZSTD_cpm_createCDict, ZSTD_ConstrainWindow_auto); ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); @@ -7671,7 +7762,9 @@ int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; } static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) { - ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict); + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal( + compressionLevel, 0, dictSize, + ZSTD_cpm_createCDict, ZSTD_ConstrainWindow_auto); switch (cParams.strategy) { case ZSTD_fast: case ZSTD_dfast: @@ -7751,7 +7844,12 @@ static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_CParamMo * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. * Use dictSize == 0 for unknown or unused. * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_CParamMode_e`. */ -static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode) +static ZSTD_compressionParameters ZSTD_getCParams_internal( + int compressionLevel, + unsigned long long srcSizeHint, + size_t dictSize, + ZSTD_CParamMode_e mode, + ZSTD_ConstrainWindow_e constrainWindowForProtocol) { U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode); U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); @@ -7772,7 +7870,9 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, cp.targetLength = (unsigned)(-clampedCompressionLevel); } /* refine parameters based on srcSize & dictSize */ - return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto); + return ZSTD_adjustCParams_internal( + cp, srcSizeHint, dictSize, mode, + ZSTD_ps_auto, constrainWindowForProtocol); } } @@ -7782,18 +7882,26 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); + return ZSTD_getCParams_internal( + compressionLevel, srcSizeHint, dictSize, + ZSTD_cpm_unknown, ZSTD_ConstrainWindow_auto); } /*! ZSTD_getParams() : * same idea as ZSTD_getCParams() * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). * Fields of `ZSTD_frameParameters` are set to default values */ -static ZSTD_parameters -ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode) +static ZSTD_parameters ZSTD_getParams_internal( + int compressionLevel, + unsigned long long srcSizeHint, + size_t dictSize, + ZSTD_CParamMode_e mode, + ZSTD_ConstrainWindow_e constrainWindowForProtocol) { ZSTD_parameters params; - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal( + compressionLevel, srcSizeHint, dictSize, + mode, constrainWindowForProtocol); DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); ZSTD_memset(¶ms, 0, sizeof(params)); params.cParams = cParams; @@ -7808,7 +7916,9 @@ ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, si ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); + return ZSTD_getParams_internal( + compressionLevel, srcSizeHint, dictSize, + ZSTD_cpm_unknown, ZSTD_ConstrainWindow_auto); } void ZSTD_registerSequenceProducer( diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index ca5e2a4c5bf..8f314a138ed 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -437,6 +437,9 @@ struct ZSTD_CCtx_params_s { /* Controls repcode search in external sequence parsing */ ZSTD_ParamSwitch_e searchForExternalRepcodes; + + /* Constrains window to comply with application protocol requirements. */ + ZSTD_ConstrainWindow_e constrainWindowForProtocol; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) diff --git a/lib/zstd.h b/lib/zstd.h index b8c0644a7ec..45562b93292 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -515,6 +515,7 @@ typedef enum { * ZSTD_c_prefetchCDictTables * ZSTD_c_enableSeqProducerFallback * ZSTD_c_maxBlockSize + * ZSTD_c_constrainWindowForProtocol * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -538,7 +539,8 @@ typedef enum { ZSTD_c_experimentalParam17=1014, ZSTD_c_experimentalParam18=1015, ZSTD_c_experimentalParam19=1016, - ZSTD_c_experimentalParam20=1017 + ZSTD_c_experimentalParam20=1017, + ZSTD_c_experimentalParam21=1018 } ZSTD_cParameter; typedef struct { @@ -2355,6 +2357,70 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo #define ZSTD_c_repcodeResolution ZSTD_c_experimentalParam19 #define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19 /* older name */ +/** + * Used in conjunction with the `ZSTD_c_constrainWindowForProtocol` option + * described below. + */ +typedef enum { + /* Equivalent to disabling. */ + ZSTD_ConstrainWindow_auto = ZSTD_ps_auto, + + /* No equivalent for ZSTD_ps_enable: you need to specify which protocol. */ + + /* No constraints imposed. */ + ZSTD_ConstrainWindow_disable = ZSTD_ps_disable, + + /* Constrains the window size to comply with the limits imposed on the + * `zstd` Content-Encoding, specified by RFCs 8878 and 9659. I.e., liimits + * the window to <= 8MB. */ + ZSTD_ConstrainWindow_HTTP_Zstd = 3, + + /* Constrains the window size to comply with the limits imposed on the + * `dcz` Content-Encoding, as specified by the Compression Dictionary + * Transport protocol. + * + * Note: as of the writing of this version of zstd, this protocol has not + * been finalized and published as an RFC. This implementation is written to + * comply with draft-ietf-httpbis-compression-dictionary-19. + * + * The likeliness that the window sizing constraints between this draft and + * the final document is judged to be low enough that we aren't bothering with + * an interim enum value representing this draft version, which would impose + * almost certainly needless migration burden once the RFC is published. + * However, the macro `ZSTD_CONSTRAINWINDOW_HTTP_DCZ_DRAFT_VERSION` is defined + * during the transition to help libraries validate that they have the + * behavior they expect. It will be updated and then removed once the draft + * is published as an RFC. */ + ZSTD_ConstrainWindow_HTTP_DCZ = 4 +} ZSTD_ConstrainWindow_e; + +#define ZSTD_CONSTRAINWINDOW_HTTP_DCZ_DRAFT_VERSION 19 + +/** + * ZSTD_c_constrainWindowForProtocol + * + * This option constrains the window size of compressions to comply with limits + * imposed by protocols that use Zstandard. This option clamps the selected + * window size of a compression into the range allowed by the selected protocol. + * If the window size a compression would use is already inside that range, + * this option will not change the resolved window size. (E.g., if a compression + * would use a 1 MB window, selecting `ZSTD_ConstrainWindow_HTTP_Zstd` won't + * *raise* the window size to the 8 MB limit imposed by that protocol). + * + * This is intended to be a convenience option, and save users the burden of + * implementing these constraints themselves, because doing that properly would + * not be simple. In normal operation with only a compression level selected by + * the user, zstd has non-trivial internal logic to select a window size. The + * user would have to reimplement that logic and then apply the protocol's + * limits. This option lets zstd continue to use its internal logic to resolve + * an appropriate window size and then applies the protocol constraint. + * + * The currently understood protocols, and the values that should be used to + * indicate them, are defined and described in the `ZSTD_ConstrainWindow_e` + * enum. + */ +#define ZSTD_c_constrainWindowForProtocol ZSTD_c_experimentalParam21 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, diff --git a/programs/fileio.c b/programs/fileio.c index 1b8aa8a9996..4f24c4c01da 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -308,6 +308,7 @@ FIO_prefs_t* FIO_createPreferences(void) ret->allowBlockDevices = 0; ret->asyncIO = AIO_supported(); ret->passThrough = -1; + ret->constrainWindowForProtocol = ZSTD_ConstrainWindow_auto; return ret; } @@ -490,6 +491,12 @@ void FIO_setMMapDict(FIO_prefs_t* const prefs, ZSTD_ParamSwitch_e value) prefs->mmapDict = value; } +void FIO_setConstrainWindowForProtocol( + FIO_prefs_t* const prefs, + ZSTD_ConstrainWindow_e constraint) { + prefs->constrainWindowForProtocol = constraint; +} + /* FIO_ctx_t functions */ void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) { @@ -1188,6 +1195,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) ); } CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_constrainWindowForProtocol, prefs->constrainWindowForProtocol) ); #endif /* dictionary */ if (prefs->patchFromMode) { diff --git a/programs/fileio.h b/programs/fileio.h index cb53ef53781..3061aa993b2 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -91,6 +91,9 @@ void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode); void FIO_setLiteralCompressionMode( FIO_prefs_t* const prefs, ZSTD_ParamSwitch_e mode); +void FIO_setConstrainWindowForProtocol( + FIO_prefs_t* const prefs, + ZSTD_ConstrainWindow_e constraint); void FIO_setProgressSetting(FIO_progressSetting_e progressSetting); void FIO_setNotificationLevel(int level); diff --git a/programs/fileio_types.h b/programs/fileio_types.h index 23bda4168d8..901594f944e 100644 --- a/programs/fileio_types.h +++ b/programs/fileio_types.h @@ -54,6 +54,7 @@ typedef struct FIO_prefs_s { int srcSizeHint; int testMode; ZSTD_ParamSwitch_e literalCompressionMode; + ZSTD_ConstrainWindow_e constrainWindowForProtocol; /* IO preferences */ int removeSrcFile; diff --git a/programs/zstd.1.md b/programs/zstd.1.md index b4e848640fd..e2e929c1d09 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -232,6 +232,11 @@ the last one takes effect. This parameter defines a loose target: compressed blocks will target this size "on average", but individual blocks can still be larger or smaller. Enabling this feature can decrease compression speed by up to ~10% at level 1. Higher levels will see smaller relative speed regression, becoming invisible at higher settings. +* `--constrain-window={none,http-zstd,http-dcz}`: + Constrains the window size to the limits set by the indicated protocol. + `none` (the default) doesn't set any constraints. + `http-zstd` refers to the `zstd` HTTP Content-Encoding specified by RFCs 8878 and 9659. + `http-dcz` refers to the `dcz` HTTP Content-Encoding specified by the Compression Dictionary Transport draft. * `-f`, `--force`: disable input and output checks. Allows overwriting existing files, input from console, output to stdout, operating on links, block devices, etc. diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 7d00a94b2fc..2da60de4280 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -244,7 +244,9 @@ static void usageAdvanced(const char* programName) DISPLAYOUT(" --stream-size=# Specify size of streaming input from STDIN.\n"); DISPLAYOUT(" --size-hint=# Optimize compression parameters for streaming input of approximately size #.\n"); DISPLAYOUT(" --target-compressed-block-size=#\n"); - DISPLAYOUT(" Generate compressed blocks of approximately # size.\n\n"); + DISPLAYOUT(" Generate compressed blocks of approximately # size.\n"); + DISPLAYOUT(" --constrain-window={none,http-zstd,http-dcz}\n"); + DISPLAYOUT(" Constrain window size to comply with limits set by protocol. [Default: none]\n\n"); DISPLAYOUT(" --no-dictID Don't write `dictID` into the header (dictionary compression only).\n"); DISPLAYOUT(" --[no-]compress-literals Force (un)compressed literals.\n"); DISPLAYOUT(" --[no-]row-match-finder Explicitly enable/disable the fast, row-based matchfinder for\n"); @@ -851,6 +853,7 @@ int main(int argCount, const char* argv[]) removeSrcFile=0; ZSTD_ParamSwitch_e mmapDict=ZSTD_ps_auto; ZSTD_ParamSwitch_e useRowMatchFinder = ZSTD_ps_auto; + ZSTD_ConstrainWindow_e constrainWindowForProtocol = ZSTD_ConstrainWindow_auto; FIO_compressionType_t cType = FIO_zstdCompression; int nbWorkers = -1; /* -1 means unset */ double compressibility = -1.0; /* lorem ipsum generator */ @@ -1135,6 +1138,23 @@ int main(int argCount, const char* argv[]) continue; } + + if (longCommandWArg(&argument, "--constrain-window")) { + const char* protocol; + NEXT_FIELD(protocol); + if (!strncmp(protocol, "none", strlen("none") + 1)) { + constrainWindowForProtocol = ZSTD_ConstrainWindow_HTTP_Zstd; + } else if (!strncmp(protocol, "http-zstd", strlen("http-zstd") + 1)) { + constrainWindowForProtocol = ZSTD_ConstrainWindow_HTTP_Zstd; + } else if (!strncmp(protocol, "http-dcz", strlen("http-dcz") + 1)) { + constrainWindowForProtocol = ZSTD_ConstrainWindow_HTTP_DCZ; + } else { + badUsage(programName, originalArgument); + CLEAN_RETURN(1); + } + continue; + } + badUsage(programName, originalArgument); CLEAN_RETURN(1); } @@ -1573,6 +1593,7 @@ int main(int argCount, const char* argv[]) FIO_setSrcSizeHint(prefs, srcSizeHint); FIO_setLiteralCompressionMode(prefs, literalCompressionMode); FIO_setSparseWrite(prefs, 0); + FIO_setConstrainWindowForProtocol(prefs, constrainWindowForProtocol); if (adaptMin > cLevel) cLevel = adaptMin; if (adaptMax < cLevel) cLevel = adaptMax; @@ -1603,6 +1624,7 @@ int main(int argCount, const char* argv[]) (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; (void)useRowMatchFinder; (void)cType; + (void)constrainWindowForProtocol; DISPLAYLEVEL(1, "Compression not supported \n"); #endif } else { /* decompression or test */ diff --git a/tests/cli-tests/compression/window-constraint.sh b/tests/cli-tests/compression/window-constraint.sh new file mode 100755 index 00000000000..c8b8e4229a1 --- /dev/null +++ b/tests/cli-tests/compression/window-constraint.sh @@ -0,0 +1,14 @@ +#!/bin/sh +datagen -g256M > file + +zstd --long=30 --single-thread --constrain-window=http-zstd -f < file > file.zst +zstd -l -v file.zst + +zstd --long=30 --single-thread --constrain-window=http-dcz -f < file > file.zst +zstd -l -v file.zst + +cp file dict +zstd --long=30 --single-thread --constrain-window=http-dcz --patch-from dict -f file +zstd -l -v file.zst + +rm dict file file.zst diff --git a/tests/cli-tests/compression/window-constraint.sh.stdout.glob b/tests/cli-tests/compression/window-constraint.sh.stdout.glob new file mode 100644 index 00000000000..ffafa85dc5f --- /dev/null +++ b/tests/cli-tests/compression/window-constraint.sh.stdout.glob @@ -0,0 +1,7 @@ +... +Window Size: 8.00 MiB (8388608 B) +... +Window Size: 8.00 MiB (8388608 B) +... +Window Size: 128 MiB (134217728 B) +... diff --git a/tests/fuzzer.c b/tests/fuzzer.c index b457c21710b..e5dfd0c5135 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -139,7 +139,11 @@ static U32 FUZ_highbit32(U32 v32) } \ } #define CHECK_EQ(lhs, rhs) CHECK_OP(==, lhs, rhs) +#define CHECK_NE(lhs, rhs) CHECK_OP(!=, lhs, rhs) #define CHECK_LT(lhs, rhs) CHECK_OP(<, lhs, rhs) +#define CHECK_GT(lhs, rhs) CHECK_OP(>, lhs, rhs) +#define CHECK_LE(lhs, rhs) CHECK_OP(<=, lhs, rhs) +#define CHECK_GE(lhs, rhs) CHECK_OP(>=, lhs, rhs) /*============================================= @@ -3377,6 +3381,128 @@ static int basicUnitTests(U32 const seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : check window size when applying protocol constraints : \n", testNb++); + { + const size_t dictSizes[] = { 0, 128, 128 KB, 4 MB, 13421772, 13421773, 107374182, 107374183, 150 MB}; + const size_t nbDictSizes = sizeof(dictSizes) / sizeof(dictSizes[0]); + size_t dictSizeIdx; + const size_t bigDictSize = dictSizes[nbDictSizes - 1]; + char* bigDictBuffer = malloc(bigDictSize); + const ZSTD_ConstrainWindow_e constraints[] = { + ZSTD_ConstrainWindow_disable, + ZSTD_ConstrainWindow_HTTP_Zstd, + ZSTD_ConstrainWindow_HTTP_DCZ + }; + const char* constraintNames[] = { + "ZSTD_ConstrainWindow_disable", + "ZSTD_ConstrainWindow_HTTP_Zstd", + "ZSTD_ConstrainWindow_HTTP_DCZ" + }; + const size_t nbConstraints = sizeof(constraints) / sizeof(constraints[0]); + size_t constraintIdx; + const unsigned long long inputSizes[] = { 0, 1ull << 10, 1ull << 23, 1ull << 27, 1ull << 31, 1ull << 63, ZSTD_CONTENTSIZE_UNKNOWN }; + const size_t nbInputSizes = sizeof(inputSizes) / sizeof(inputSizes[0]); + size_t inputSizeIdx; + const int windowLogs[] = { 0, 15, 25, ZSTD_WINDOWLOG_MAX }; + const size_t nbWindowLogs = sizeof(windowLogs) / sizeof(windowLogs[0]); + size_t windowLogIdx; + + CHECK_NE(bigDictBuffer, NULL); + memset(bigDictBuffer, 0, bigDictSize); + + for (constraintIdx = 0; constraintIdx < nbConstraints; constraintIdx++) { + const ZSTD_ConstrainWindow_e constraint = constraints[constraintIdx]; + for (inputSizeIdx = 0; inputSizeIdx < nbInputSizes; inputSizeIdx++) { + const unsigned long long inputSize = inputSizes[inputSizeIdx]; + for (dictSizeIdx = 0; dictSizeIdx < nbDictSizes; dictSizeIdx++) { + dictSize = dictSizes[dictSizeIdx]; + if (constraint != ZSTD_ConstrainWindow_HTTP_DCZ && (dictSize != 0 && dictSize != dictSizes[nbDictSizes - 1])) { + continue; + } + for (windowLogIdx = 0; windowLogIdx < nbWindowLogs; windowLogIdx++) { + const int windowLog = windowLogs[windowLogIdx]; + ZSTD_inBuffer input = {CNBuffer, CNBuffSize, 0}; + ZSTD_outBuffer compressed = {compressedBuffer, compressedBufferSize, 0}; + ZSTD_FrameHeader zfh; + unsigned long long maxWindowSize; + + DISPLAYLEVEL(5, + "Checking constraint = %-30s " + "with input size = %20llu and dict size = %9zu " + "and windowLog = %2d: ", + constraintNames[constraintIdx], + inputSize, dictSize, windowLog); + + if (input.size > 200 KB) { + input.size = 200 KB; + } + if (input.size > inputSize) { + input.size = (size_t)inputSize; + } + + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_constrainWindowForProtocol, constraint)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, windowLog)); + CHECK_Z(ZSTD_CCtx_setPledgedSrcSize(cctx, inputSize)); + if (dictSize != 0) { + CHECK_Z(ZSTD_CCtx_loadDictionary_advanced(cctx, bigDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent)); + } + CHECK_Z(ZSTD_compressStream2(cctx, &compressed, &input, inputSize == 0 ? ZSTD_e_end : ZSTD_e_flush)); + + CHECK_GT(compressed.size, 0); + + CHECK_Z(ZSTD_getFrameHeader(&zfh, compressed.dst, compressed.pos)); + + DISPLAYLEVEL(5, + "got window size = %10llu, Frame size = %20llu, ", + zfh.windowSize, zfh.frameContentSize); + + CHECK_EQ(zfh.frameContentSize, inputSize); + + switch (constraint) { + case ZSTD_ConstrainWindow_auto: + case ZSTD_ConstrainWindow_disable: + maxWindowSize = 1ull << ZSTD_WINDOWLOG_MAX; + break; + case ZSTD_ConstrainWindow_HTTP_Zstd: + maxWindowSize = 8 MB; + break; + case ZSTD_ConstrainWindow_HTTP_DCZ: + maxWindowSize = dictSize + (dictSize >> 2); + maxWindowSize = maxWindowSize < 8 MB ? 8 MB : maxWindowSize > 128 MB ? 128 MB : maxWindowSize; + maxWindowSize = 1ull << ZSTD_highbit32((U32)maxWindowSize); + break; + default: + CHECK(0); + } + + if (windowLog != 0 && maxWindowSize > (1ull << windowLog)) { + maxWindowSize = 1ull << windowLog; + } + + if (maxWindowSize > inputSize) { + maxWindowSize = inputSize; + } + + DISPLAYLEVEL(5, + "expected window size = %20llu\n", + maxWindowSize); + + if (windowLog != 0) { + CHECK_EQ(zfh.windowSize, maxWindowSize); + } else { + CHECK_LE(zfh.windowSize, maxWindowSize); + } + } + } + } + } + + free(bigDictBuffer); + } + DISPLAYLEVEL(3, "OK \n"); + ZSTD_freeCCtx(cctx); free(dictBuffer); free(samplesSizes);