@@ -18,12 +18,16 @@ package recsplit
1818
1919import (
2020 "context"
21+ "crypto/sha256"
2122 "fmt"
23+ "io"
24+ "os"
2225 "path/filepath"
2326 "testing"
2427
2528 "github.com/spaolacci/murmur3"
2629 "github.com/stretchr/testify/assert"
30+ "github.com/stretchr/testify/require"
2731
2832 "github.com/erigontech/erigon/common/log/v3"
2933)
@@ -526,6 +530,65 @@ func TestIndexLookupParallel(t *testing.T) {
526530 }
527531}
528532
533+ // TestParallelMatchesSequential checks that the index file produced by the parallel
534+ // build path is byte-for-byte identical to the one produced by the sequential path.
535+ func TestParallelMatchesSequential (t * testing.T ) {
536+ logger := log .New ()
537+ tmpDir := t .TempDir ()
538+ salt := uint32 (42 )
539+ const N = 10_000
540+
541+ keys := make ([][]byte , N )
542+ for i := range keys {
543+ keys [i ] = fmt .Appendf (nil , "key-%d" , i )
544+ }
545+
546+ fileChecksum := func (path string ) []byte {
547+ t .Helper ()
548+ f , err := os .Open (path )
549+ require .NoError (t , err )
550+ defer f .Close ()
551+ h := sha256 .New ()
552+ _ , err = io .Copy (h , f )
553+ require .NoError (t , err )
554+ return h .Sum (nil )
555+ }
556+
557+ build := func (workers int , indexFile string ) {
558+ t .Helper ()
559+ rs , err := NewRecSplit (RecSplitArgs {
560+ KeyCount : N ,
561+ BucketSize : 100 ,
562+ Salt : & salt ,
563+ TmpDir : tmpDir ,
564+ IndexFile : indexFile ,
565+ LeafSize : 8 ,
566+ NoFsync : true ,
567+ Workers : workers ,
568+ }, logger )
569+ require .NoError (t , err )
570+ defer rs .Close ()
571+ for i , k := range keys {
572+ require .NoError (t , rs .AddKey (k , uint64 (i * 17 )))
573+ }
574+ require .NoError (t , rs .Build (context .Background ()))
575+ }
576+
577+ seqFile := filepath .Join (tmpDir , "seq.idx" )
578+ build (1 , seqFile )
579+ seqSum := fileChecksum (seqFile )
580+
581+ for _ , workers := range []int {2 , 4 , 8 } {
582+ workers := workers
583+ t .Run (fmt .Sprintf ("workers=%d" , workers ), func (t * testing.T ) {
584+ parFile := filepath .Join (tmpDir , fmt .Sprintf ("par_w%d.idx" , workers ))
585+ build (workers , parFile )
586+ assert .Equal (t , seqSum , fileChecksum (parFile ),
587+ "parallel (workers=%d) index file differs from sequential" , workers )
588+ })
589+ }
590+ }
591+
529592func BenchmarkBuildParallel (b * testing.B ) {
530593 b .ReportAllocs ()
531594 logger := log .New ()
0 commit comments