diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 9e2fb319332..f51ea0ebfbb 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -723,36 +723,36 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, case ZSTD_c_windowLog : if (value!=0) /* 0 => use default */ BOUNDCHECK(ZSTD_c_windowLog, value); - CCtxParams->cParams.windowLog = (U32)value; + CCtxParams->cParams.windowLog = (unsigned)value; return CCtxParams->cParams.windowLog; case ZSTD_c_hashLog : if (value!=0) /* 0 => use default */ BOUNDCHECK(ZSTD_c_hashLog, value); - CCtxParams->cParams.hashLog = (U32)value; + CCtxParams->cParams.hashLog = (unsigned)value; return CCtxParams->cParams.hashLog; case ZSTD_c_chainLog : if (value!=0) /* 0 => use default */ BOUNDCHECK(ZSTD_c_chainLog, value); - CCtxParams->cParams.chainLog = (U32)value; + CCtxParams->cParams.chainLog = (unsigned)value; return CCtxParams->cParams.chainLog; case ZSTD_c_searchLog : if (value!=0) /* 0 => use default */ BOUNDCHECK(ZSTD_c_searchLog, value); - CCtxParams->cParams.searchLog = (U32)value; + CCtxParams->cParams.searchLog = (unsigned)value; return (size_t)value; case ZSTD_c_minMatch : if (value!=0) /* 0 => use default */ BOUNDCHECK(ZSTD_c_minMatch, value); - CCtxParams->cParams.minMatch = value; + CCtxParams->cParams.minMatch = (unsigned)value; return CCtxParams->cParams.minMatch; case ZSTD_c_targetLength : BOUNDCHECK(ZSTD_c_targetLength, value); - CCtxParams->cParams.targetLength = value; + CCtxParams->cParams.targetLength = (unsigned)value; return CCtxParams->cParams.targetLength; case ZSTD_c_strategy : @@ -765,12 +765,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, /* Content size written in frame header _when known_ (default:1) */ DEBUGLOG(4, "set content size flag = %u", (value!=0)); CCtxParams->fParams.contentSizeFlag = value != 0; - return CCtxParams->fParams.contentSizeFlag; + return (size_t)CCtxParams->fParams.contentSizeFlag; case ZSTD_c_checksumFlag : /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ CCtxParams->fParams.checksumFlag = value != 0; - return CCtxParams->fParams.checksumFlag; + return (size_t)CCtxParams->fParams.checksumFlag; case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); @@ -779,11 +779,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, case ZSTD_c_forceMaxWindow : CCtxParams->forceWindow = (value != 0); - return CCtxParams->forceWindow; + return (size_t)CCtxParams->forceWindow; case ZSTD_c_forceAttachDict : { const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; - BOUNDCHECK(ZSTD_c_forceAttachDict, pref); + BOUNDCHECK(ZSTD_c_forceAttachDict, (int)pref); CCtxParams->attachDictPref = pref; return CCtxParams->attachDictPref; } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 03b5e33b0aa..0e32382b671 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -676,7 +676,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val) return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0; # endif # elif defined(__GNUC__) && (__GNUC__ >= 4) - return (__builtin_ctzll((U64)val) >> 3); + return (unsigned)(__builtin_ctzll((U64)val) >> 3); # else static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, @@ -693,7 +693,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val) unsigned long r=0; return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0; # elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_ctz((U32)val) >> 3); + return (unsigned)(__builtin_ctz((U32)val) >> 3); # else static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, @@ -712,7 +712,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val) return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0; # endif # elif defined(__GNUC__) && (__GNUC__ >= 4) - return (__builtin_clzll(val) >> 3); + return (unsigned)(__builtin_clzll(val) >> 3); # else unsigned r; const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ @@ -726,7 +726,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val) unsigned long r = 0; return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0; # elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_clz((U32)val) >> 3); + return (unsigned)(__builtin_clz((U32)val) >> 3); # else unsigned r; if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } diff --git a/lib/compress/zstd_compress_literals.c b/lib/compress/zstd_compress_literals.c index 52b0a8059ab..438aee80bb8 100644 --- a/lib/compress/zstd_compress_literals.c +++ b/lib/compress/zstd_compress_literals.c @@ -93,7 +93,6 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } @@ -136,7 +135,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, switch(lhSize) { case 3: /* 2 - 2 - 10 - 10 */ - { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + { U32 const lhc = hType + (((U32)!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); MEM_writeLE24(ostart, lhc); break; } diff --git a/lib/compress/zstd_compress_literals.h b/lib/compress/zstd_compress_literals.h index 9775fb97cb7..94e06d9e3ab 100644 --- a/lib/compress/zstd_compress_literals.h +++ b/lib/compress/zstd_compress_literals.h @@ -14,6 +14,11 @@ #include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */ +/* Below this limit, the heuristic does not even attempt compression of literals, + * as the cost of the headers is expected to outweight the benefits. + * This limit is not applicable when re-using statistics from dictionary or previous block */ +#define COMPRESS_LITERALS_SIZE_MIN 63 + size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 4c765128a15..159cd82bfe6 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -113,6 +113,21 @@ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor)); } +static unsigned const k_baseLLfreqs[MaxLL+1] = { + 4, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1 + }; + +static unsigned const k_baseOFCfreqs[MaxOff+1] = { + 6, 2, 1, 1, 2, 3, 4, 4, + 4, 3, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1 + }; + /* ZSTD_rescaleFreqs() : * if first block (detected by optPtr->litLengthSum == 0) : init statistics * take hints from dictionary if there is one @@ -192,20 +207,13 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, assert(optPtr->litFreq != NULL); if (compressedLiterals) { - unsigned lit = MaxLit; - HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + unsigned maxlit = MaxLit; + HIST_count_simple(optPtr->litFreq, &maxlit, src, srcSize); /* use raw first block to init statistics */ optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8); } - { unsigned const baseLLfreqs[MaxLL+1] = { - 4, 2, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1 - }; - ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1); - } + ZSTD_memcpy(optPtr->litLengthFreq, k_baseLLfreqs, sizeof(k_baseLLfreqs)); + optPtr->litLengthSum = sum_u32(k_baseLLfreqs, MaxLL+1); { unsigned ml; for (ml=0; ml<=MaxML; ml++) @@ -213,15 +221,8 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, } optPtr->matchLengthSum = MaxML+1; - { unsigned const baseOFCfreqs[MaxOff+1] = { - 6, 2, 1, 1, 2, 3, 4, 4, - 4, 3, 2, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1 - }; - ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1); - } - + ZSTD_memcpy(optPtr->offCodeFreq, k_baseOFCfreqs, sizeof(k_baseOFCfreqs)); + optPtr->offCodeSum = sum_u32(k_baseOFCfreqs, MaxOff+1); } @@ -1056,8 +1057,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, opt[pos].price = (int)sequencePrice; } } last_pos = pos-1; - } - } + } } /* check further positions */ for (cur = 1; cur <= last_pos; cur++) { @@ -1084,8 +1084,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); - } - } + } } /* Set the repcodes of the current position. We must do it here * because we rely on the repcodes of the 2nd to last sequence being @@ -1252,15 +1251,76 @@ size_t ZSTD_compressBlock_btopt( } +#include "zstd_compress_literals.h" /* COMPRESS_LITERALS_SIZE_MIN */ + +static void transfer_literalStats(optState_t* opt, const seqStore_t* seqStore) +{ + size_t const nbLiterals = (size_t)(seqStore->lit - seqStore->litStart); + assert(seqStore->lit >= seqStore->litStart); + + if (nbLiterals < COMPRESS_LITERALS_SIZE_MIN) { + /* literals won't be compressed, so give them an all-flat cost */ + /* note : it would be better if it was also possible to extend this category + * to non-compressible literals, even when they are more numerous than threshold. + * However, this requires additional cpu and memory workspace */ + unsigned u; + for (u=0; u<=MaxLit; u++) opt->litFreq[u]=2; + } else { + unsigned maxlit = MaxLit; + HIST_count_simple(opt->litFreq, &maxlit, seqStore->litStart, nbLiterals); + } + opt->litSum = ZSTD_downscaleStats(opt->litFreq, MaxLit, 0); /* flatten stats, by providing at least 1 to every symbol */ +} + + +typedef enum { fix_greedy_bias, from_btultra } biasfix_e; +static void transfer_seqStats(optState_t* opt, const seqStore_t* seqStore, biasfix_e biasfix) +{ + U32 const nbSeq = (U32)(seqStore->sequences - seqStore->sequencesStart); + assert(seqStore->sequences >= seqStore->sequencesStart); + ZSTD_seqToCodes(seqStore); -/* ZSTD_initStats_ultra(): + { const BYTE* codePtr = seqStore->ofCode; + U32 u; + memset(opt->offCodeFreq, 0, sizeof(U32) * (MaxOff+1)); + ZSTD_STATIC_ASSERT(MaxOff >= 17); + for (u=0; u<17; u++) opt->offCodeFreq[u]=1; /* flatten stats; some offcode may not be produced by greedy but still be present */ + for (u=0; uoffCodeFreq[codePtr[u]]++; + if (biasfix == fix_greedy_bias) { + assert(opt->offCodeFreq[1] == 1); /* greedy can't find rep1/rep2 */ + opt->offCodeFreq[1] = (opt->offCodeFreq[0] / 3) + 1; /* bias correction */ + } + opt->offCodeSum = sum_u32(opt->offCodeFreq, 18); + } + + { const BYTE* codePtr = seqStore->mlCode; + U32 u; + for (u=0; umatchLengthFreq[u]=1; /* flatten stats; some match length not produced by greedy might end up present */ + for (u=0; umatchLengthFreq[codePtr[u]]++; + if (biasfix == fix_greedy_bias) { + assert(opt->matchLengthFreq[0] == 1); /* greedy can't find mml=3 */ + opt->matchLengthFreq[0] = opt->matchLengthFreq[1] + 1; /* bias correction */ + } + opt->matchLengthSum = sum_u32(opt->matchLengthFreq, MaxML+1); + } + + { const BYTE* codePtr = seqStore->llCode; + U32 u; + ZSTD_memcpy(opt->litLengthFreq, k_baseLLfreqs, sizeof(k_baseLLfreqs)); + for (u=0; ulitLengthFreq[codePtr[u]]++; + opt->litLengthSum = sum_u32(opt->litLengthFreq, MaxLL+1); + } +} + +#include "zstd_lazy.h" +/* ZSTD_initStats_greedy(): * make a first compression pass, just to seed stats with more accurate starting values. * only works on first block, with no dictionary and no ldm. * this function cannot error, hence its contract must be respected. */ static void -ZSTD_initStats_ultra(ZSTD_matchState_t* ms, +ZSTD_initStats_greedy(ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) @@ -1268,13 +1328,22 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms, U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); - DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); + DEBUGLOG(4, "ZSTD_initStats_greedy (srcSize=%zu)", srcSize); assert(ms->opt.litLengthSum == 0); /* first block */ assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ - ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ + { size_t const lastLits = ZSTD_compressBlock_greedy(ms, seqStore, tmpRep, src, srcSize); /* generate stats into seqstore */ + /* add last lits into literals buffers for proper accounting */ + assert(lastLits <= srcSize); + ZSTD_memcpy(seqStore->lit , (const char*)src + srcSize - lastLits, lastLits); + seqStore->lit += lastLits; + } + + /* transfer stats from seqStore into ms-opt */ + transfer_literalStats(&ms->opt, seqStore); + transfer_seqStats(&ms->opt, seqStore, fix_greedy_bias); /* invalidate first scan from history */ ZSTD_resetSeqStore(seqStore); @@ -1289,10 +1358,80 @@ size_t ZSTD_compressBlock_btultra( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { + U32 const curr = (U32)((const BYTE*)src - ms->window.base); DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); + + /* 2-pass strategy (when possible): + * this strategy makes a first pass over first block to collect statistics + * and seed next round's statistics with it. + * After 1st pass, function forgets everything, and starts a new block. + * Consequently, this can only work if no data has been previously loaded in tables, + * aka, no dictionary, no prefix, no ldm preprocessing. + * The compression ratio gain is generally small (~0.4% on first block), + * the cost is +5% cpu time on first block. */ + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + if ( (ms->opt.litLengthSum==0) /* first block */ + && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ + && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ + && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ + && (srcSize > ZSTD_PREDEF_THRESHOLD) + ) { + ZSTD_initStats_greedy(ms, seqStore, rep, src, srcSize); + } + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); } + + +/* ZSTD_initStats_ultra(): + * make a first compression pass, just to seed stats with more accurate starting values. + * only works on first block, with no dictionary and no ldm. + * this function cannot error, hence its contract must be respected. + */ +static void +ZSTD_initStats_ultra(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + size_t lastLits; + U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ + ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); + + DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); + assert(ms->opt.litLengthSum == 0); /* first block */ + assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ + assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ + assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ + + if (srcSize < 8 KB) { + /* use raw btultra, initialized by default starting stats + * generally preferable for small blocks + */ + lastLits = ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ + } else { + /* in this mode, btultra is initialized with greedy strategy; + * it's generally better for larger block sizes */ + lastLits = ZSTD_compressBlock_btultra(ms, seqStore, tmpRep, src, srcSize); /* generate stats into ms->opt*/ + } + + /* transfer stats from seqStore into ms-opt */ + assert(lastLits <= srcSize); + ZSTD_memcpy(seqStore->lit , (const char*)src + srcSize - lastLits, lastLits); + seqStore->lit += lastLits; + transfer_literalStats(&ms->opt, seqStore); + transfer_seqStats(&ms->opt, seqStore, from_btultra); + + /* invalidate first scan from history */ + ZSTD_resetSeqStore(seqStore); + ms->window.base -= srcSize; + ms->window.dictLimit += (U32)srcSize; + ms->window.lowLimit = ms->window.dictLimit; + ms->nextToUpdate = ms->window.dictLimit; + +} + size_t ZSTD_compressBlock_btultra2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index 627255f53de..2c68de867e5 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -20,6 +20,8 @@ extern "C" { /* used in ZSTD_loadDictionaryContent() */ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); +/* All parsers @return the size of "last literals" segment */ + size_t ZSTD_compressBlock_btopt( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 9edc77fe800..821f71f9570 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -627,7 +627,6 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params, if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ { size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict); if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; } - } cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; } @@ -637,7 +636,7 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params, /* literals stats */ { const BYTE* bytePtr; - for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++) + for (bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++) countLit[*bytePtr]++; }