@@ -255,16 +255,39 @@ const INDEXED_PAYLOAD_KEYS: &[&str] = &[
255255 UUID_PAYLOAD_KEY ,
256256] ;
257257
258- /// Check that every indexed payload field's total (non_empty + empty) equals the unfiltered
259- /// point count. A divergence indicates index corruption after a crash.
258+ /// Tolerated overshoot of `non_empty + empty` above the point count, as a fraction of that count.
259+ /// See [`check_payload_indexes_consistency`] for why the sum can only ever drift upward.
260+ const PAYLOAD_INDEX_OVERCOUNT_MARGIN_RATIO : f64 = 0.06 ;
261+
262+ /// Check the indexed crasher payload fields for internal consistency. The workload sets all of
263+ /// them together on the same points (see `set payload` in the workload), so the assertions are
264+ /// crash-safe regardless of how many points have been set — they hold whether zero, some, or all
265+ /// points carry the payload, which matters because the check also runs after a crash that may have
266+ /// interrupted the set-payload phase. We deliberately do *not* assert the fully-set distribution
267+ /// (e.g. "exactly half are non-empty"), since a partially-applied set-payload is a valid recovered
268+ /// state. For every field we require:
260269///
261- /// Stronger than a pairwise inter-index agreement check: if a bug drops the same record
262- /// from every index, all indexes still agree with each other but disagree with the oracle.
270+ /// 1. all fields report an identical `non_empty` count, and
271+ /// 2. all fields report an identical `empty` count — they are set as a unit, so a per-field
272+ /// divergence is real index corruption, not benign lag; and
273+ /// 3. `non_empty + empty` matches the point count, tolerating an upward overshoot of up to
274+ /// [`PAYLOAD_INDEX_OVERCOUNT_MARGIN_RATIO`] but never a deficit.
275+ ///
276+ /// Why the sum can only overshoot, never fall short: `is_empty` and `must_not(is_empty)` are
277+ /// complements, so they should sum to the point count. Under copy-on-write, when `set_payload`
278+ /// touches a point in a non-appendable segment Qdrant copies it into an appendable segment, applies
279+ /// the payload, then retires the source version. After a crash that retirement can lag, leaving a
280+ /// live duplicate — the new copy carries the value, the stale copy is empty — counted once on each
281+ /// side. These duplicates are scrubbed lazily by the optimizer's deduplication pass, so a transient
282+ /// overshoot is expected and benign. A *deficit* cannot come from this and signals points genuinely
283+ /// lost from both indexes.
263284pub async fn check_payload_indexes_consistency (
264285 collection_name : & str ,
265286 client : & Qdrant ,
266287) -> Result < ( ) , CrasherError > {
267288 let total_count = get_exact_points_count ( client, collection_name) . await ? as u64 ;
289+ let overcount_margin =
290+ ( total_count as f64 * PAYLOAD_INDEX_OVERCOUNT_MARGIN_RATIO ) . ceil ( ) as u64 ;
268291
269292 let mut index_totals: Vec < ( & str , u64 , u64 ) > = Vec :: new ( ) ;
270293 for & field_name in INDEXED_PAYLOAD_KEYS {
@@ -293,26 +316,51 @@ pub async fn check_payload_indexes_consistency(
293316 index_totals. push ( ( field_name, non_empty_count, empty_count) ) ;
294317 }
295318
296- let mismatches: Vec < & ( & str , u64 , u64 ) > = index_totals
297- . iter ( )
298- . filter ( |( _, non_empty, empty) | non_empty + empty != total_count)
299- . collect ( ) ;
319+ let mut errors: Vec < String > = Vec :: new ( ) ;
300320
301- if mismatches. is_empty ( ) {
321+ // (1) & (2) All fields are set as a unit, so they must agree on both counts. A per-field
322+ // divergence is index corruption, not the uniform lag of copy-on-write deduplication.
323+ let ( _, first_non_empty, first_empty) = index_totals[ 0 ] ;
324+ if index_totals. iter ( ) . any ( |( _, ne, _) | * ne != first_non_empty) {
325+ let listing = index_totals
326+ . iter ( )
327+ . map ( |( name, ne, _) | format ! ( "'{name}': non_empty({ne})" ) )
328+ . collect :: < Vec < _ > > ( )
329+ . join ( ", " ) ;
330+ errors. push ( format ! (
331+ "non_empty counts disagree across fields (must be identical): {listing}"
332+ ) ) ;
333+ }
334+ if index_totals. iter ( ) . any ( |( _, _, e) | * e != first_empty) {
335+ let listing = index_totals
336+ . iter ( )
337+ . map ( |( name, _, e) | format ! ( "'{name}': empty({e})" ) )
338+ . collect :: < Vec < _ > > ( )
339+ . join ( ", " ) ;
340+ errors. push ( format ! (
341+ "empty counts disagree across fields (must be identical): {listing}"
342+ ) ) ;
343+ }
344+
345+ // (3) non_empty + empty must equal the point count, tolerating an upward overshoot from
346+ // not-yet-deduplicated copy-on-write duplicates, but never a deficit (points lost from both
347+ // indexes).
348+ for ( name, non_empty, empty) in & index_totals {
349+ let sum = non_empty + empty;
350+ if sum < total_count || sum > total_count + overcount_margin {
351+ errors. push ( format ! (
352+ "'{name}': non_empty({non_empty}) + empty({empty}) = {sum} outside [{total_count}, {}] (tolerated overcount +{overcount_margin})" ,
353+ total_count + overcount_margin
354+ ) ) ;
355+ }
356+ }
357+
358+ if errors. is_empty ( ) {
302359 Ok ( ( ) )
303360 } else {
304- let details: Vec < String > = mismatches
305- . iter ( )
306- . map ( |( name, non_empty, empty) | {
307- format ! (
308- "'{name}': non_empty({non_empty}) + empty({empty}) = {} (expected {total_count})" ,
309- non_empty + empty
310- )
311- } )
312- . collect ( ) ;
313361 Err ( Invariant ( format ! (
314- "Payload indexes disagree with total point count ({total_count}):\n {}" ,
315- details . join( "\n " )
362+ "Payload indexes disagree with point count ({total_count}):\n {}" ,
363+ errors . join( "\n " )
316364 ) ) )
317365 }
318366}
0 commit comments