Skip to content

Commit

Permalink
Merge pull request #4176 from facebook/sample11
Browse files Browse the repository at this point in the history
Added faster block splitter variants for levels 3-7
  • Loading branch information
Cyan4973 authored Oct 25, 2024
2 parents 2dddf09 + c63b5d2 commit 7fb5347
Show file tree
Hide file tree
Showing 4 changed files with 545 additions and 529 deletions.
8 changes: 6 additions & 2 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -4507,9 +4507,13 @@ static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t src
/* dynamic splitting has a cpu cost for analysis,
* due to that cost it's only used for higher levels */
if (strat >= ZSTD_btopt)
return ZSTD_splitBlock(src, srcSize, blockSizeMax, split_lvl2, cctx->tmpWorkspace, cctx->tmpWkspSize);
return ZSTD_splitBlock(src, blockSizeMax, 3, cctx->tmpWorkspace, cctx->tmpWkspSize);
if (strat >= ZSTD_lazy2)
return ZSTD_splitBlock(src, srcSize, blockSizeMax, split_lvl1, cctx->tmpWorkspace, cctx->tmpWkspSize);
return ZSTD_splitBlock(src, blockSizeMax, 2, cctx->tmpWorkspace, cctx->tmpWkspSize);
if (strat >= ZSTD_greedy)
return ZSTD_splitBlock(src, blockSizeMax, 1, cctx->tmpWorkspace, cctx->tmpWkspSize);
if (strat >= ZSTD_dfast)
return ZSTD_splitBlock(src, blockSizeMax, 0, cctx->tmpWorkspace, cctx->tmpWkspSize);
/* blind split strategy
* heuristic value, tested as being "generally better".
* no cpu cost, but can over-split homegeneous data.
Expand Down
99 changes: 56 additions & 43 deletions lib/compress/zstd_preSplit.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,20 @@
#define THRESHOLD_PENALTY 3

#define HASHLENGTH 2
#define HASHLOG 10
#define HASHTABLESIZE (1 << HASHLOG)
#define HASHLOG_MAX 10
#define HASHTABLESIZE (1 << HASHLOG_MAX)
#define HASHMASK (HASHTABLESIZE - 1)
#define KNUTH 0x9e3779b9

static unsigned hash2(const void *p)
/* for hashLog > 8, hash 2 bytes.
* for hashLog == 8, just take the byte, no hashing.
* The speed of this method relies on compile-time constant propagation */
FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog)
{
return (U32)(MEM_read16(p)) * KNUTH >> (32 - HASHLOG);
assert(hashLog >= 8);
if (hashLog == 8) return (U32)((const BYTE*)p)[0];
assert(hashLog <= HASHLOG_MAX);
return (U32)(MEM_read16(p)) * KNUTH >> (32 - hashLog);
}


Expand All @@ -46,45 +52,51 @@ static void initStats(FPStats* fpstats)
ZSTD_memset(fpstats, 0, sizeof(FPStats));
}

FORCE_INLINE_TEMPLATE void addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate)
FORCE_INLINE_TEMPLATE void
addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
{
const char* p = (const char*)src;
size_t limit = srcSize - HASHLENGTH + 1;
size_t n;
assert(srcSize >= HASHLENGTH);
for (n = 0; n < limit; n+=samplingRate) {
fp->events[hash2(p+n)]++;
fp->events[hash2(p+n, hashLog)]++;
}
fp->nbEvents += limit/samplingRate;
}

#define ADDEVENTS_RATE(_rate) ZSTD_addEvents_##_rate
FORCE_INLINE_TEMPLATE void
recordFingerprint_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
{
ZSTD_memset(fp, 0, sizeof(unsigned) * ((size_t)1 << hashLog));
fp->nbEvents = 0;
addEvents_generic(fp, src, srcSize, samplingRate, hashLog);
}

typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize);

#define ZSTD_GEN_ADDEVENTS_SAMPLE(_rate) \
static void ADDEVENTS_RATE(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
#define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate

#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \
static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
{ \
addEvents_generic(fp, src, srcSize, _rate); \
recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \
}

ZSTD_GEN_ADDEVENTS_SAMPLE(1)
ZSTD_GEN_ADDEVENTS_SAMPLE(5)

ZSTD_GEN_RECORD_FINGERPRINT(1, 10)
ZSTD_GEN_RECORD_FINGERPRINT(5, 10)
ZSTD_GEN_RECORD_FINGERPRINT(11, 9)
ZSTD_GEN_RECORD_FINGERPRINT(43, 8)

typedef void (*addEvents_f)(Fingerprint* fp, const void* src, size_t srcSize);

static void recordFingerprint(Fingerprint* fp, const void* src, size_t s, addEvents_f addEvents)
{
ZSTD_memset(fp, 0, sizeof(*fp));
addEvents(fp, src, s);
}

static U64 abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); }

static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2)
static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2, unsigned hashLog)
{
U64 distance = 0;
size_t n;
for (n = 0; n < HASHTABLESIZE; n++) {
assert(hashLog <= HASHLOG_MAX);
for (n = 0; n < ((size_t)1 << hashLog); n++) {
distance +=
abs64((S64)fp1->events[n] * (S64)fp2->nbEvents - (S64)fp2->events[n] * (S64)fp1->nbEvents);
}
Expand All @@ -96,12 +108,13 @@ static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2)
*/
static int compareFingerprints(const Fingerprint* ref,
const Fingerprint* newfp,
int penalty)
int penalty,
unsigned hashLog)
{
assert(ref->nbEvents > 0);
assert(newfp->nbEvents > 0);
{ U64 p50 = (U64)ref->nbEvents * (U64)newfp->nbEvents;
U64 deviation = fpDistance(ref, newfp);
U64 deviation = fpDistance(ref, newfp, hashLog);
U64 threshold = p50 * (U64)(THRESHOLD_BASE + penalty) / THRESHOLD_PENALTY_RATE;
return deviation >= threshold;
}
Expand Down Expand Up @@ -137,45 +150,45 @@ static void removeEvents(Fingerprint* acc, const Fingerprint* slice)
}

#define CHUNKSIZE (8 << 10)
/* Note: technically, we use CHUNKSIZE, so that's 8 KB */
static size_t ZSTD_splitBlock_byChunks(const void* src, size_t srcSize,
size_t blockSizeMax, addEvents_f f,
static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,
int level,
void* workspace, size_t wkspSize)
{
static const RecordEvents_f records_fs[] = {
FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1)
};
static const unsigned hashParams[] = { 8, 9, 10, 10 };
const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]);
FPStats* const fpstats = (FPStats*)workspace;
const char* p = (const char*)src;
const char* p = (const char*)blockStart;
int penalty = THRESHOLD_PENALTY;
size_t pos = 0;
if (srcSize <= blockSizeMax) return srcSize;
assert(blockSizeMax == (128 << 10));
assert(blockSize == (128 << 10));
assert(workspace != NULL);
assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;

initStats(fpstats);
recordFingerprint(&fpstats->pastEvents, p, CHUNKSIZE, f);
for (pos = CHUNKSIZE; pos <= blockSizeMax - CHUNKSIZE; pos += CHUNKSIZE) {
recordFingerprint(&fpstats->newEvents, p + pos, CHUNKSIZE, f);
if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty)) {
record_f(&fpstats->pastEvents, p, CHUNKSIZE);
for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) {
record_f(&fpstats->newEvents, p + pos, CHUNKSIZE);
if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[level])) {
return pos;
} else {
mergeEvents(&fpstats->pastEvents, &fpstats->newEvents);
if (penalty > 0) penalty--;
}
}
assert(pos == blockSizeMax);
return blockSizeMax;
assert(pos == blockSize);
return blockSize;
(void)flushEvents; (void)removeEvents;
}

size_t ZSTD_splitBlock(const void* src, size_t srcSize,
size_t blockSizeMax, ZSTD_SplitBlock_strategy_e splitStrat,
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
int level,
void* workspace, size_t wkspSize)
{
if (splitStrat == split_lvl2)
return ZSTD_splitBlock_byChunks(src, srcSize, blockSizeMax, ADDEVENTS_RATE(1), workspace, wkspSize);

assert(splitStrat == split_lvl1);
return ZSTD_splitBlock_byChunks(src, srcSize, blockSizeMax, ADDEVENTS_RATE(5), workspace, wkspSize);
assert(0<=level && level<=3);
return ZSTD_splitBlock_byChunks(blockStart, blockSize, level, workspace, wkspSize);
}
9 changes: 4 additions & 5 deletions lib/compress/zstd_preSplit.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,19 @@
extern "C" {
#endif

typedef enum { split_lvl1, split_lvl2 } ZSTD_SplitBlock_strategy_e;

#define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208

/* note:
/* @level must be a value between 0 and 3.
* higher levels spend more energy to find block boundaries
* @workspace must be aligned on 8-bytes boundaries
* @wkspSize must be at least >= ZSTD_SLIPBLOCK_WORKSPACESIZE
* note2:
* for the time being, this function only accepts full 128 KB blocks,
* therefore @blockSizeMax must be == 128 KB.
* This could be extended to smaller sizes in the future.
*/
size_t ZSTD_splitBlock(const void* src, size_t srcSize,
size_t blockSizeMax, ZSTD_SplitBlock_strategy_e splitStrat,
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
int level,
void* workspace, size_t wkspSize);

#if defined (__cplusplus)
Expand Down
Loading

0 comments on commit 7fb5347

Please sign in to comment.