21
21
#define THRESHOLD_PENALTY 3
22
22
23
23
#define HASHLENGTH 2
24
- #define HASHLOG 10
25
- #define HASHTABLESIZE (1 << HASHLOG )
24
+ #define HASHLOG_MAX 10
25
+ #define HASHTABLESIZE (1 << HASHLOG_MAX )
26
26
#define HASHMASK (HASHTABLESIZE - 1)
27
27
#define KNUTH 0x9e3779b9
28
28
29
- static unsigned hash2 (const void * p )
29
+ /* for hashLog > 8, hash 2 bytes.
30
+ * for hashLog == 8, just take the byte, no hashing.
31
+ * The speed of this method relies on compile-time constant propagation */
32
+ FORCE_INLINE_TEMPLATE unsigned hash2 (const void * p , unsigned hashLog )
30
33
{
31
- return (U32 )(MEM_read16 (p )) * KNUTH >> (32 - HASHLOG );
34
+ assert (hashLog >= 8 );
35
+ if (hashLog == 8 ) return (U32 )((const BYTE * )p )[0 ];
36
+ assert (hashLog <= HASHLOG_MAX );
37
+ return (U32 )(MEM_read16 (p )) * KNUTH >> (32 - hashLog );
32
38
}
33
39
34
40
@@ -46,45 +52,51 @@ static void initStats(FPStats* fpstats)
46
52
ZSTD_memset (fpstats , 0 , sizeof (FPStats ));
47
53
}
48
54
49
- FORCE_INLINE_TEMPLATE void addEvents_generic (Fingerprint * fp , const void * src , size_t srcSize , size_t samplingRate )
55
+ FORCE_INLINE_TEMPLATE void
56
+ addEvents_generic (Fingerprint * fp , const void * src , size_t srcSize , size_t samplingRate , unsigned hashLog )
50
57
{
51
58
const char * p = (const char * )src ;
52
59
size_t limit = srcSize - HASHLENGTH + 1 ;
53
60
size_t n ;
54
61
assert (srcSize >= HASHLENGTH );
55
62
for (n = 0 ; n < limit ; n += samplingRate ) {
56
- fp -> events [hash2 (p + n )]++ ;
63
+ fp -> events [hash2 (p + n , hashLog )]++ ;
57
64
}
58
65
fp -> nbEvents += limit /samplingRate ;
59
66
}
60
67
61
- #define ADDEVENTS_RATE (_rate ) ZSTD_addEvents_##_rate
68
+ FORCE_INLINE_TEMPLATE void
69
+ recordFingerprint_generic (Fingerprint * fp , const void * src , size_t srcSize , size_t samplingRate , unsigned hashLog )
70
+ {
71
+ ZSTD_memset (fp , 0 , sizeof (unsigned ) * ((size_t )1 << hashLog ));
72
+ fp -> nbEvents = 0 ;
73
+ addEvents_generic (fp , src , srcSize , samplingRate , hashLog );
74
+ }
75
+
76
+ typedef void (* RecordEvents_f )(Fingerprint * fp , const void * src , size_t srcSize );
62
77
63
- #define ZSTD_GEN_ADDEVENTS_SAMPLE (_rate ) \
64
- static void ADDEVENTS_RATE(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
78
+ #define FP_RECORD (_rate ) ZSTD_recordFingerprint_##_rate
79
+
80
+ #define ZSTD_GEN_RECORD_FINGERPRINT (_rate , _hSize ) \
81
+ static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
65
82
{ \
66
- addEvents_generic (fp, src, srcSize, _rate); \
83
+ recordFingerprint_generic (fp, src, srcSize, _rate, _hSize); \
67
84
}
68
85
69
- ZSTD_GEN_ADDEVENTS_SAMPLE (1 )
70
- ZSTD_GEN_ADDEVENTS_SAMPLE (5 )
71
-
86
+ ZSTD_GEN_RECORD_FINGERPRINT (1 , 10 )
87
+ ZSTD_GEN_RECORD_FINGERPRINT (5 , 10 )
88
+ ZSTD_GEN_RECORD_FINGERPRINT (11 , 9 )
89
+ ZSTD_GEN_RECORD_FINGERPRINT (43 , 8 )
72
90
73
- typedef void (* addEvents_f )(Fingerprint * fp , const void * src , size_t srcSize );
74
-
75
- static void recordFingerprint (Fingerprint * fp , const void * src , size_t s , addEvents_f addEvents )
76
- {
77
- ZSTD_memset (fp , 0 , sizeof (* fp ));
78
- addEvents (fp , src , s );
79
- }
80
91
81
92
static U64 abs64 (S64 s64 ) { return (U64 )((s64 < 0 ) ? - s64 : s64 ); }
82
93
83
- static U64 fpDistance (const Fingerprint * fp1 , const Fingerprint * fp2 )
94
+ static U64 fpDistance (const Fingerprint * fp1 , const Fingerprint * fp2 , unsigned hashLog )
84
95
{
85
96
U64 distance = 0 ;
86
97
size_t n ;
87
- for (n = 0 ; n < HASHTABLESIZE ; n ++ ) {
98
+ assert (hashLog <= HASHLOG_MAX );
99
+ for (n = 0 ; n < ((size_t )1 << hashLog ); n ++ ) {
88
100
distance +=
89
101
abs64 ((S64 )fp1 -> events [n ] * (S64 )fp2 -> nbEvents - (S64 )fp2 -> events [n ] * (S64 )fp1 -> nbEvents );
90
102
}
@@ -96,12 +108,13 @@ static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2)
96
108
*/
97
109
static int compareFingerprints (const Fingerprint * ref ,
98
110
const Fingerprint * newfp ,
99
- int penalty )
111
+ int penalty ,
112
+ unsigned hashLog )
100
113
{
101
114
assert (ref -> nbEvents > 0 );
102
115
assert (newfp -> nbEvents > 0 );
103
116
{ U64 p50 = (U64 )ref -> nbEvents * (U64 )newfp -> nbEvents ;
104
- U64 deviation = fpDistance (ref , newfp );
117
+ U64 deviation = fpDistance (ref , newfp , hashLog );
105
118
U64 threshold = p50 * (U64 )(THRESHOLD_BASE + penalty ) / THRESHOLD_PENALTY_RATE ;
106
119
return deviation >= threshold ;
107
120
}
@@ -137,45 +150,45 @@ static void removeEvents(Fingerprint* acc, const Fingerprint* slice)
137
150
}
138
151
139
152
#define CHUNKSIZE (8 << 10)
140
- /* Note: technically, we use CHUNKSIZE, so that's 8 KB */
141
- static size_t ZSTD_splitBlock_byChunks (const void * src , size_t srcSize ,
142
- size_t blockSizeMax , addEvents_f f ,
153
+ static size_t ZSTD_splitBlock_byChunks (const void * blockStart , size_t blockSize ,
154
+ int level ,
143
155
void * workspace , size_t wkspSize )
144
156
{
157
+ static const RecordEvents_f records_fs [] = {
158
+ FP_RECORD (43 ), FP_RECORD (11 ), FP_RECORD (5 ), FP_RECORD (1 )
159
+ };
160
+ static const unsigned hashParams [] = { 8 , 9 , 10 , 10 };
161
+ const RecordEvents_f record_f = (assert (0 <=level && level <=3 ), records_fs [level ]);
145
162
FPStats * const fpstats = (FPStats * )workspace ;
146
- const char * p = (const char * )src ;
163
+ const char * p = (const char * )blockStart ;
147
164
int penalty = THRESHOLD_PENALTY ;
148
165
size_t pos = 0 ;
149
- if (srcSize <= blockSizeMax ) return srcSize ;
150
- assert (blockSizeMax == (128 << 10 ));
166
+ assert (blockSize == (128 << 10 ));
151
167
assert (workspace != NULL );
152
168
assert ((size_t )workspace % ZSTD_ALIGNOF (FPStats ) == 0 );
153
169
ZSTD_STATIC_ASSERT (ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof (FPStats ));
154
170
assert (wkspSize >= sizeof (FPStats )); (void )wkspSize ;
155
171
156
172
initStats (fpstats );
157
- recordFingerprint (& fpstats -> pastEvents , p , CHUNKSIZE , f );
158
- for (pos = CHUNKSIZE ; pos <= blockSizeMax - CHUNKSIZE ; pos += CHUNKSIZE ) {
159
- recordFingerprint (& fpstats -> newEvents , p + pos , CHUNKSIZE , f );
160
- if (compareFingerprints (& fpstats -> pastEvents , & fpstats -> newEvents , penalty )) {
173
+ record_f (& fpstats -> pastEvents , p , CHUNKSIZE );
174
+ for (pos = CHUNKSIZE ; pos <= blockSize - CHUNKSIZE ; pos += CHUNKSIZE ) {
175
+ record_f (& fpstats -> newEvents , p + pos , CHUNKSIZE );
176
+ if (compareFingerprints (& fpstats -> pastEvents , & fpstats -> newEvents , penalty , hashParams [ level ] )) {
161
177
return pos ;
162
178
} else {
163
179
mergeEvents (& fpstats -> pastEvents , & fpstats -> newEvents );
164
180
if (penalty > 0 ) penalty -- ;
165
181
}
166
182
}
167
- assert (pos == blockSizeMax );
168
- return blockSizeMax ;
183
+ assert (pos == blockSize );
184
+ return blockSize ;
169
185
(void )flushEvents ; (void )removeEvents ;
170
186
}
171
187
172
- size_t ZSTD_splitBlock (const void * src , size_t srcSize ,
173
- size_t blockSizeMax , ZSTD_SplitBlock_strategy_e splitStrat ,
188
+ size_t ZSTD_splitBlock (const void * blockStart , size_t blockSize ,
189
+ int level ,
174
190
void * workspace , size_t wkspSize )
175
191
{
176
- if (splitStrat == split_lvl2 )
177
- return ZSTD_splitBlock_byChunks (src , srcSize , blockSizeMax , ADDEVENTS_RATE (1 ), workspace , wkspSize );
178
-
179
- assert (splitStrat == split_lvl1 );
180
- return ZSTD_splitBlock_byChunks (src , srcSize , blockSizeMax , ADDEVENTS_RATE (5 ), workspace , wkspSize );
192
+ assert (0 <=level && level <=3 );
193
+ return ZSTD_splitBlock_byChunks (blockStart , blockSize , level , workspace , wkspSize );
181
194
}
0 commit comments