26
26
27
27
#define MAX_STRING_MODE_EXTENDED_VALUES 512
28
28
29
- static inline void esmOutputBits (unsigned char * outValsT , const uint32_t nBits , const uint32_t bitVal , uint32_t * nextOutIx , uint32_t * nextOutBit )
29
+ static inline void esmOutputRemainder (unsigned char * outValsT , uint32_t * thisOutIx , uint32_t * nextOutBit , uint64_t * outBits )
30
30
{
31
- // output 1 to 8 bits
32
- outValsT [* nextOutIx ] |= (unsigned char )(bitVal << * nextOutBit );
31
+ if (* nextOutBit == 0 )
32
+ return ; // no bits to output
33
+ uint32_t shiftPos = 0 ;
34
+ int32_t bitsRemaining = * nextOutBit - 8 ;
35
+ // output bits that remain
36
+ outValsT [(* thisOutIx )++ ] = (unsigned char )* outBits ;
37
+ while (bitsRemaining > 0 )
38
+ {
39
+ shiftPos += 8 ;
40
+ outValsT [(* thisOutIx )++ ] = (unsigned char )(* outBits >> shiftPos );
41
+ bitsRemaining -= 8 ;
42
+ }
43
+ * nextOutBit = 0 ;
44
+ } // end esmOutputRemainder
45
+
46
+ static inline void esmOutputOutBits (unsigned char * outValsT , uint32_t * thisOutIx , uint64_t * outBits )
47
+ {
48
+ // copy 64 bits to output
49
+ outValsT [(* thisOutIx )++ ] = (unsigned char )* outBits ;
50
+ outValsT [(* thisOutIx )++ ] = (unsigned char )(* outBits >>8 );
51
+ outValsT [(* thisOutIx )++ ] = (unsigned char )(* outBits >>16 );
52
+ outValsT [(* thisOutIx )++ ] = (unsigned char )(* outBits >>24 );
53
+ outValsT [(* thisOutIx )++ ] = (unsigned char )(* outBits >>32 );
54
+ outValsT [(* thisOutIx )++ ] = (unsigned char )(* outBits >>40 );
55
+ outValsT [(* thisOutIx )++ ] = (unsigned char )(* outBits >>48 );
56
+ outValsT [(* thisOutIx )++ ] = (unsigned char )(* outBits >>56 );
57
+ } // end esmOutputOutBits
58
+
59
+ static inline void thisOutIx2 (unsigned char * outValsT , const uint32_t nBits , const uint64_t bitVal , uint32_t * thisOutIx , uint32_t * nextOutBit , uint64_t * outBits )
60
+ {
61
+ // output 1 to 64 bits
62
+ * outBits |= bitVal << * nextOutBit ;
33
63
* nextOutBit += nBits ;
34
- if (* nextOutBit >= 8 )
64
+ if (* nextOutBit >= 64 )
35
65
{
36
- * nextOutBit -= 8 ;
37
- outValsT [++ (* nextOutIx )] = (unsigned char )bitVal >> (nBits - * nextOutBit );
66
+ esmOutputOutBits (outValsT , thisOutIx , outBits );
67
+ // init outBits with remainder of bits from current output
68
+ * nextOutBit -= 64 ;
69
+ * outBits = bitVal >> (nBits - * nextOutBit );
38
70
}
39
- } // end esmOutputBits
71
+ } // end thisOutIx2
40
72
41
73
int32_t encodeExtendedStringMode (const unsigned char * inVals , unsigned char * outVals , const uint32_t nValuesMax , uint32_t * nValuesOut )
42
74
{
@@ -52,18 +84,19 @@ int32_t encodeExtendedStringMode(const unsigned char *inVals, unsigned char *out
52
84
uint64_t twoVals [64 ]; // index is first unique val, with bit position of second unique value set to 1
53
85
uint32_t twoValsPoss [64 * 64 ]; // position in input of first occurrence of corresponding two unique values of up to 64
54
86
uint32_t twoValsPos ;
55
- uint32_t nextOutIx ;
87
+ uint32_t thisOutIx ;
56
88
uint32_t nextOutBit = 1 ; // start of encoding after first two inputs
57
89
unsigned char outValsT [MAX_STRING_MODE_EXTENDED_VALUES ];
58
90
uint32_t maxUniquesExceeded = 0 ;
59
91
uint32_t highBitClear ;
92
+ uint64_t outBits ; // accumulate 64 bits before output
60
93
// smaller values compress slightly better with string limit of 9 versus 17
61
94
const uint32_t string_limit = nValuesMax <=64 ? 9 : 17 ;
62
95
const uint32_t extended_string_length_bits = nValuesMax <=64 ? 3 : 4 ;
63
96
if (nValuesMax > MAX_STRING_MODE_EXTENDED_VALUES )
64
97
return -100 ;
65
98
outVals [1 ] = 0 ; // init second info byte
66
- nextOutIx = 0 ; // start of encoding in outValsT
99
+ thisOutIx = 0 ; // start of encoding in outValsT
67
100
// output encoding of first two values in outVals starting at third bit in second byte
68
101
// first bit is last bit of unique count, second is whether
69
102
// uniques are compressed
@@ -85,7 +118,7 @@ int32_t encodeExtendedStringMode(const unsigned char *inVals, unsigned char *out
85
118
twoValsPoss [1 ] = 3 ; // set position to two past second value
86
119
}
87
120
// output 1 to indicate first unique value repeated
88
- outValsT [ 0 ] = 1 ; // 1 for first encoding bit
121
+ outBits = 1 ; // 1 for first encoding bit
89
122
}
90
123
else
91
124
{
@@ -111,9 +144,9 @@ int32_t encodeExtendedStringMode(const unsigned char *inVals, unsigned char *out
111
144
// set up new two value in 2nd position
112
145
twoVals [1 ] = 1 << UOinPos2 ;
113
146
twoValsPoss [64 | UOinPos2 ] = 3 ; // set position to two past second value
114
- outValsT [ 0 ] = 0 ; // for first encoding bit
147
+ outBits = 0 ; // for first encoding bit
115
148
}
116
- uint32_t nUniqueBits = 1 ; // bits to encode current number of uniques
149
+ uint32_t nUniqueBits = 1 ; // bits to encode current number of uniques (1 or 2)
117
150
uint32_t nextInVal = inVals [2 ];
118
151
inPos = 2 ; // start loop after init of first two values
119
152
const uint32_t lastPos = nValuesMax - 1 ;
@@ -127,7 +160,7 @@ int32_t encodeExtendedStringMode(const unsigned char *inVals, unsigned char *out
127
160
{
128
161
// set up for new unique in this position
129
162
// uniques > 64 are output as uniques but are not considered for processing
130
- if (nextOutIx + nUniques > lastPos )
163
+ if (thisOutIx + nUniques > lastPos )
131
164
{
132
165
* nValuesOut = inPos - 1 ; // processed through last inPos
133
166
return 0 ;
@@ -168,10 +201,11 @@ int32_t encodeExtendedStringMode(const unsigned char *inVals, unsigned char *out
168
201
nUniques ++ ;
169
202
highBitClear |= inVal ;
170
203
// output a 0 to indicate new unique
171
- if (++ nextOutBit == 8 )
204
+ if (++ nextOutBit == 64 )
172
205
{
173
- // update out index and next out bit
174
- outValsT [++ nextOutIx ] = 0 ;
206
+ // output outBits and init for next output
207
+ esmOutputOutBits (outValsT , & thisOutIx , & outBits );
208
+ outBits = 0 ;
175
209
nextOutBit = 0 ;
176
210
}
177
211
outVals [nUniques + 1 ] = (unsigned char )inVal ; // save unique or any value encountered beyond 64 uniques in list at front of outVals starting in third position
@@ -189,7 +223,7 @@ int32_t encodeExtendedStringMode(const unsigned char *inVals, unsigned char *out
189
223
twoValsPoss [(UOinVal <<6 ) | UOinValsInPosP1 ] = inPos + 1 ;
190
224
}
191
225
// output repeated value: 01 plus unique occurrence
192
- esmOutputBits (outValsT , 2 + nUniqueBits , 1 |(UOinVal <<2 ), & nextOutIx , & nextOutBit );
226
+ thisOutIx2 (outValsT , 2 + nUniqueBits , ( uint64_t )( 1 |(UOinVal <<2 )) , & thisOutIx , & nextOutBit , & outBits );
193
227
continue ;
194
228
}
195
229
const uint64_t TVuniqueOccurrence = twoVals [UOinVal ];
@@ -203,7 +237,7 @@ int32_t encodeExtendedStringMode(const unsigned char *inVals, unsigned char *out
203
237
{
204
238
// two vals include first value so this is a repeat
205
239
// output repeated value: 01 plus unique occurrence
206
- esmOutputBits (outValsT , 2 + nUniqueBits , 1 |(UOinVal <<2 ), & nextOutIx , & nextOutBit );
240
+ thisOutIx2 (outValsT , 2 + nUniqueBits , ( uint64_t )( 1 |(UOinVal <<2 )) , & thisOutIx , & nextOutBit , & outBits );
207
241
continue ;
208
242
}
209
243
uint32_t strPos = inPos + 1 ;
@@ -218,18 +252,10 @@ int32_t encodeExtendedStringMode(const unsigned char *inVals, unsigned char *out
218
252
strPos ++ ;
219
253
twoValsPos ++ ;
220
254
}
221
- // output 11 plus string length bits
222
- esmOutputBits (outValsT , 2 + extended_string_length_bits , 3 | ((strCount - 1 )<<2 ), & nextOutIx , & nextOutBit );
223
- // output the position of string
224
- // output lowest bit and then remaining bits
225
- const uint32_t outVal9 = twoValsPos - strCount - 1 ;
226
- outValsT [nextOutIx ] |= (outVal9 & 1 ) << nextOutBit ;
227
- if (++ nextOutBit == 8 )
228
- {
229
- outValsT [++ nextOutIx ] = 0 ;
230
- nextOutBit = 0 ;
231
- }
232
- esmOutputBits (outValsT , encodingBits512 [inPos - 1 ]- 1 , outVal9 >>1 , & nextOutIx , & nextOutBit );
255
+ // output 11 plus string length bit then position of string
256
+ const uint32_t stringBits = 2 + extended_string_length_bits ;
257
+ const uint64_t outVal9 = twoValsPos - strCount - 1 ;
258
+ thisOutIx2 (outValsT , stringBits + encodingBits512 [inPos - 1 ], (3 | ((strCount - 1 )<<2 )) | (outVal9 <<stringBits ), & thisOutIx , & nextOutBit , & outBits );
233
259
inPos += strCount ;
234
260
nextInVal = inVals [inPos ];
235
261
}
@@ -240,22 +266,24 @@ int32_t encodeExtendedStringMode(const unsigned char *inVals, unsigned char *out
240
266
twoVals [UOinVal ] |= 1llu << UOinValsInPosP1 ;
241
267
twoValsPoss [(UOinVal <<6 ) | UOinValsInPosP1 ] = inPos + 1 ;
242
268
// output repeated value: 01 plus unique occurrence
243
- esmOutputBits (outValsT , 2 + nUniqueBits , 1 |(UOinVal <<2 ), & nextOutIx , & nextOutBit );
269
+ thisOutIx2 (outValsT , 2 + nUniqueBits , ( uint64_t )( 1 |(UOinVal <<2 )) , & thisOutIx , & nextOutBit , & outBits );
244
270
}
245
271
}
246
272
// output final bits
247
- if (nextOutBit > 0 )
248
- nextOutIx ++ ; // index past final bits
249
273
if (inPos < nValuesMax )
250
274
{
251
275
// occurs for both end of input on last pos -1 and for max uniques exceeded
252
276
if (maxUniquesExceeded )
253
- outValsT [ nextOutIx ++ ] = inVals [maxUniquesExceeded - 1 ]; // output last byte that is last unique encountered
277
+ thisOutIx2 ( outValsT , 8 , ( uint64_t ) inVals [maxUniquesExceeded - 1 ], & thisOutIx , & nextOutBit , & outBits );
254
278
else
255
- outValsT [nextOutIx ++ ] = inVals [lastPos ]; // output last input byte
279
+ thisOutIx2 (outValsT , 8 , (uint64_t )inVals [lastPos ], & thisOutIx , & nextOutBit , & outBits );
280
+ }
281
+ if (nextOutBit > 0 )
282
+ {
283
+ esmOutputRemainder (outValsT , & thisOutIx , & nextOutBit , & outBits ); // index past final bits
256
284
}
257
285
* nValuesOut = maxUniquesExceeded ? maxUniquesExceeded : nValuesMax ;
258
- if (nextOutIx + nUniques > * nValuesOut - 1 )
286
+ if (thisOutIx + nUniques > * nValuesOut - 1 )
259
287
return 0 ;
260
288
// use 7-bit encoding on uniques if all high bits set
261
289
int32_t uniqueOffset ;
@@ -279,10 +307,10 @@ int32_t encodeExtendedStringMode(const unsigned char *inVals, unsigned char *out
279
307
{
280
308
uniqueOffset = nUniques + 2 ;
281
309
}
282
- memcpy (outVals + uniqueOffset , outValsT , nextOutIx );
310
+ memcpy (outVals + uniqueOffset , outValsT , thisOutIx );
283
311
outVals [0 ] = 0x7f ; // indicate external string mode
284
312
outVals [1 ] |= nUniques - 1 ; // number uniques in first 7 bits then compressed uniques bit
285
- return (int32_t )(nextOutIx + uniqueOffset ) * 8 ;
313
+ return (int32_t )(thisOutIx + uniqueOffset ) * 8 ;
286
314
} // end encodeExtendedStringMode
287
315
288
316
static inline void dsmGetBits (const unsigned char * inVals , const uint32_t nBitsToGet , uint32_t * thisInVal , uint32_t * thisVal , uint32_t * bitPos , int32_t * theBits )
@@ -452,14 +480,15 @@ int32_t decodeExtendedStringMode(const unsigned char *inVals, unsigned char *out
452
480
}
453
481
}
454
482
}
455
- if (bitPos > 0 )
456
- thisInVal ++ ; // inc past partial input value
457
483
if (nextOutVal == nOrigMinus1 )
458
484
{
459
485
// output last byte in input when not ending with a string
460
486
// string at end will catch last byte
461
- outVals [nOrigMinus1 ] = inVals [thisInVal ++ ];
487
+ dsmGetBits (inVals , 8 , & thisInVal , & thisVal , & bitPos , & theBits );
488
+ outVals [nOrigMinus1 ] = (unsigned char )theBits ;
462
489
}
490
+ if (bitPos > 0 )
491
+ thisInVal ++ ; // inc past partial input value
463
492
* bytesProcessed = thisInVal ;
464
493
return (int32_t )nOriginalValues ;
465
494
} // end decodeExtendedStringMode
0 commit comments