Skip to content

Commit 7fb5347

Browse files
authored
Merge pull request #4176 from facebook/sample11
Added faster block splitter variants for levels 3-7
2 parents 2dddf09 + c63b5d2 commit 7fb5347

File tree

4 files changed

+545
-529
lines changed

4 files changed

+545
-529
lines changed

lib/compress/zstd_compress.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4507,9 +4507,13 @@ static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t src
45074507
/* dynamic splitting has a cpu cost for analysis,
45084508
* due to that cost it's only used for higher levels */
45094509
if (strat >= ZSTD_btopt)
4510-
return ZSTD_splitBlock(src, srcSize, blockSizeMax, split_lvl2, cctx->tmpWorkspace, cctx->tmpWkspSize);
4510+
return ZSTD_splitBlock(src, blockSizeMax, 3, cctx->tmpWorkspace, cctx->tmpWkspSize);
45114511
if (strat >= ZSTD_lazy2)
4512-
return ZSTD_splitBlock(src, srcSize, blockSizeMax, split_lvl1, cctx->tmpWorkspace, cctx->tmpWkspSize);
4512+
return ZSTD_splitBlock(src, blockSizeMax, 2, cctx->tmpWorkspace, cctx->tmpWkspSize);
4513+
if (strat >= ZSTD_greedy)
4514+
return ZSTD_splitBlock(src, blockSizeMax, 1, cctx->tmpWorkspace, cctx->tmpWkspSize);
4515+
if (strat >= ZSTD_dfast)
4516+
return ZSTD_splitBlock(src, blockSizeMax, 0, cctx->tmpWorkspace, cctx->tmpWkspSize);
45134517
/* blind split strategy
45144518
* heuristic value, tested as being "generally better".
45154519
* no cpu cost, but can over-split homegeneous data.

lib/compress/zstd_preSplit.c

Lines changed: 56 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,20 @@
2121
#define THRESHOLD_PENALTY 3
2222

2323
#define HASHLENGTH 2
24-
#define HASHLOG 10
25-
#define HASHTABLESIZE (1 << HASHLOG)
24+
#define HASHLOG_MAX 10
25+
#define HASHTABLESIZE (1 << HASHLOG_MAX)
2626
#define HASHMASK (HASHTABLESIZE - 1)
2727
#define KNUTH 0x9e3779b9
2828

29-
static unsigned hash2(const void *p)
29+
/* for hashLog > 8, hash 2 bytes.
30+
* for hashLog == 8, just take the byte, no hashing.
31+
* The speed of this method relies on compile-time constant propagation */
32+
FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog)
3033
{
31-
return (U32)(MEM_read16(p)) * KNUTH >> (32 - HASHLOG);
34+
assert(hashLog >= 8);
35+
if (hashLog == 8) return (U32)((const BYTE*)p)[0];
36+
assert(hashLog <= HASHLOG_MAX);
37+
return (U32)(MEM_read16(p)) * KNUTH >> (32 - hashLog);
3238
}
3339

3440

@@ -46,45 +52,51 @@ static void initStats(FPStats* fpstats)
4652
ZSTD_memset(fpstats, 0, sizeof(FPStats));
4753
}
4854

49-
FORCE_INLINE_TEMPLATE void addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate)
55+
FORCE_INLINE_TEMPLATE void
56+
addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
5057
{
5158
const char* p = (const char*)src;
5259
size_t limit = srcSize - HASHLENGTH + 1;
5360
size_t n;
5461
assert(srcSize >= HASHLENGTH);
5562
for (n = 0; n < limit; n+=samplingRate) {
56-
fp->events[hash2(p+n)]++;
63+
fp->events[hash2(p+n, hashLog)]++;
5764
}
5865
fp->nbEvents += limit/samplingRate;
5966
}
6067

61-
#define ADDEVENTS_RATE(_rate) ZSTD_addEvents_##_rate
68+
FORCE_INLINE_TEMPLATE void
69+
recordFingerprint_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
70+
{
71+
ZSTD_memset(fp, 0, sizeof(unsigned) * ((size_t)1 << hashLog));
72+
fp->nbEvents = 0;
73+
addEvents_generic(fp, src, srcSize, samplingRate, hashLog);
74+
}
75+
76+
typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize);
6277

63-
#define ZSTD_GEN_ADDEVENTS_SAMPLE(_rate) \
64-
static void ADDEVENTS_RATE(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
78+
#define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate
79+
80+
#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \
81+
static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
6582
{ \
66-
addEvents_generic(fp, src, srcSize, _rate); \
83+
recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \
6784
}
6885

69-
ZSTD_GEN_ADDEVENTS_SAMPLE(1)
70-
ZSTD_GEN_ADDEVENTS_SAMPLE(5)
71-
86+
ZSTD_GEN_RECORD_FINGERPRINT(1, 10)
87+
ZSTD_GEN_RECORD_FINGERPRINT(5, 10)
88+
ZSTD_GEN_RECORD_FINGERPRINT(11, 9)
89+
ZSTD_GEN_RECORD_FINGERPRINT(43, 8)
7290

73-
typedef void (*addEvents_f)(Fingerprint* fp, const void* src, size_t srcSize);
74-
75-
static void recordFingerprint(Fingerprint* fp, const void* src, size_t s, addEvents_f addEvents)
76-
{
77-
ZSTD_memset(fp, 0, sizeof(*fp));
78-
addEvents(fp, src, s);
79-
}
8091

8192
static U64 abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); }
8293

83-
static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2)
94+
static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2, unsigned hashLog)
8495
{
8596
U64 distance = 0;
8697
size_t n;
87-
for (n = 0; n < HASHTABLESIZE; n++) {
98+
assert(hashLog <= HASHLOG_MAX);
99+
for (n = 0; n < ((size_t)1 << hashLog); n++) {
88100
distance +=
89101
abs64((S64)fp1->events[n] * (S64)fp2->nbEvents - (S64)fp2->events[n] * (S64)fp1->nbEvents);
90102
}
@@ -96,12 +108,13 @@ static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2)
96108
*/
97109
static int compareFingerprints(const Fingerprint* ref,
98110
const Fingerprint* newfp,
99-
int penalty)
111+
int penalty,
112+
unsigned hashLog)
100113
{
101114
assert(ref->nbEvents > 0);
102115
assert(newfp->nbEvents > 0);
103116
{ U64 p50 = (U64)ref->nbEvents * (U64)newfp->nbEvents;
104-
U64 deviation = fpDistance(ref, newfp);
117+
U64 deviation = fpDistance(ref, newfp, hashLog);
105118
U64 threshold = p50 * (U64)(THRESHOLD_BASE + penalty) / THRESHOLD_PENALTY_RATE;
106119
return deviation >= threshold;
107120
}
@@ -137,45 +150,45 @@ static void removeEvents(Fingerprint* acc, const Fingerprint* slice)
137150
}
138151

139152
#define CHUNKSIZE (8 << 10)
140-
/* Note: technically, we use CHUNKSIZE, so that's 8 KB */
141-
static size_t ZSTD_splitBlock_byChunks(const void* src, size_t srcSize,
142-
size_t blockSizeMax, addEvents_f f,
153+
static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,
154+
int level,
143155
void* workspace, size_t wkspSize)
144156
{
157+
static const RecordEvents_f records_fs[] = {
158+
FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1)
159+
};
160+
static const unsigned hashParams[] = { 8, 9, 10, 10 };
161+
const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]);
145162
FPStats* const fpstats = (FPStats*)workspace;
146-
const char* p = (const char*)src;
163+
const char* p = (const char*)blockStart;
147164
int penalty = THRESHOLD_PENALTY;
148165
size_t pos = 0;
149-
if (srcSize <= blockSizeMax) return srcSize;
150-
assert(blockSizeMax == (128 << 10));
166+
assert(blockSize == (128 << 10));
151167
assert(workspace != NULL);
152168
assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
153169
ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
154170
assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;
155171

156172
initStats(fpstats);
157-
recordFingerprint(&fpstats->pastEvents, p, CHUNKSIZE, f);
158-
for (pos = CHUNKSIZE; pos <= blockSizeMax - CHUNKSIZE; pos += CHUNKSIZE) {
159-
recordFingerprint(&fpstats->newEvents, p + pos, CHUNKSIZE, f);
160-
if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty)) {
173+
record_f(&fpstats->pastEvents, p, CHUNKSIZE);
174+
for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) {
175+
record_f(&fpstats->newEvents, p + pos, CHUNKSIZE);
176+
if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[level])) {
161177
return pos;
162178
} else {
163179
mergeEvents(&fpstats->pastEvents, &fpstats->newEvents);
164180
if (penalty > 0) penalty--;
165181
}
166182
}
167-
assert(pos == blockSizeMax);
168-
return blockSizeMax;
183+
assert(pos == blockSize);
184+
return blockSize;
169185
(void)flushEvents; (void)removeEvents;
170186
}
171187

172-
size_t ZSTD_splitBlock(const void* src, size_t srcSize,
173-
size_t blockSizeMax, ZSTD_SplitBlock_strategy_e splitStrat,
188+
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
189+
int level,
174190
void* workspace, size_t wkspSize)
175191
{
176-
if (splitStrat == split_lvl2)
177-
return ZSTD_splitBlock_byChunks(src, srcSize, blockSizeMax, ADDEVENTS_RATE(1), workspace, wkspSize);
178-
179-
assert(splitStrat == split_lvl1);
180-
return ZSTD_splitBlock_byChunks(src, srcSize, blockSizeMax, ADDEVENTS_RATE(5), workspace, wkspSize);
192+
assert(0<=level && level<=3);
193+
return ZSTD_splitBlock_byChunks(blockStart, blockSize, level, workspace, wkspSize);
181194
}

lib/compress/zstd_preSplit.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,19 @@
1717
extern "C" {
1818
#endif
1919

20-
typedef enum { split_lvl1, split_lvl2 } ZSTD_SplitBlock_strategy_e;
21-
2220
#define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208
2321

24-
/* note:
22+
/* @level must be a value between 0 and 3.
23+
* higher levels spend more energy to find block boundaries
2524
* @workspace must be aligned on 8-bytes boundaries
2625
* @wkspSize must be at least >= ZSTD_SLIPBLOCK_WORKSPACESIZE
2726
* note2:
2827
* for the time being, this function only accepts full 128 KB blocks,
2928
* therefore @blockSizeMax must be == 128 KB.
3029
* This could be extended to smaller sizes in the future.
3130
*/
32-
size_t ZSTD_splitBlock(const void* src, size_t srcSize,
33-
size_t blockSizeMax, ZSTD_SplitBlock_strategy_e splitStrat,
31+
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
32+
int level,
3433
void* workspace, size_t wkspSize);
3534

3635
#if defined (__cplusplus)

0 commit comments

Comments
 (0)