@@ -61,7 +61,8 @@ type Batch struct {
61
61
// ids is a slice of length batchSize of record IDs
62
62
ids []uint64
63
63
64
- // rowIDs is a slice of length len(Batch.header) which contains slices of length batchSize
64
+ // rowIDs is a map of field names to slices of length batchSize
65
+ // which contain row IDs.
65
66
rowIDs map [string ][]uint64
66
67
67
68
// values holds the values for each record of an int field
@@ -70,17 +71,9 @@ type Batch struct {
70
71
// times holds a time for each record. (if any of the fields are time fields)
71
72
times []QuantizedTime
72
73
73
- // clearValues holds a slice of indices into b.ids for each
74
- // integer field which has nil values. After translation, these
75
- // slices will be filled out with the actual column IDs those
76
- // indices pertain to so that they can be cleared.
77
- //
78
- // TODO: This is actually a problem — a nil value doesn't mean
79
- // "clear this value", it should mean "don't touch this value", so
80
- // there is no way currently to update a record with int values
81
- // without knowing all the int values, clearing them, or setting
82
- // them to something else in the process.
83
- clearValues map [string ][]uint64
74
+ // nullIndices holds a slice of indices into b.ids for each
75
+ // integer field which has nil values.
76
+ nullIndices map [string ][]uint64
84
77
85
78
// TODO, support timestamps, set fields with more than one value per record, mutex, and bool.
86
79
@@ -147,7 +140,7 @@ func NewBatch(client *pilosa.Client, size int, index *pilosa.Index, fields []*pi
147
140
ids : make ([]uint64 , 0 , size ),
148
141
rowIDs : rowIDs ,
149
142
values : values ,
150
- clearValues : make (map [string ][]uint64 ),
143
+ nullIndices : make (map [string ][]uint64 ),
151
144
toTranslate : tt ,
152
145
toTranslateID : make (map [string ][]int ),
153
146
transCache : NewMapTranslator (),
@@ -164,11 +157,7 @@ func NewBatch(client *pilosa.Client, size int, index *pilosa.Index, fields []*pi
164
157
return b , nil
165
158
}
166
159
167
- // Row represents a single record which can be added to a RecordBatch.
168
- //
169
- // Note: it is not named "Record" because there is a conflict with
170
- // another type in this package. This may be rectified by deprecating
171
- // something or splitting packages in the future.
160
+ // Row represents a single record which can be added to a Batch.
172
161
type Row struct {
173
162
ID interface {}
174
163
Values []interface {}
@@ -316,12 +305,12 @@ func (b *Batch) Add(rec Row) error {
316
305
case nil :
317
306
if field .Opts ().Type () == pilosa .FieldTypeInt {
318
307
b .values [field .Name ()] = append (b .values [field .Name ()], 0 )
319
- clearIndexes , ok := b .clearValues [field .Name ()]
308
+ nullIndices , ok := b .nullIndices [field .Name ()]
320
309
if ! ok {
321
- clearIndexes = make ([]uint64 , 0 )
310
+ nullIndices = make ([]uint64 , 0 )
322
311
}
323
- clearIndexes = append (clearIndexes , uint64 (len (b .ids )- 1 ))
324
- b .clearValues [field .Name ()] = clearIndexes
312
+ nullIndices = append (nullIndices , uint64 (len (b .ids )- 1 ))
313
+ b .nullIndices [field .Name ()] = nullIndices
325
314
326
315
} else {
327
316
b .rowIDs [field .Name ()] = append (b .rowIDs [field .Name ()], nilSentinel )
@@ -425,11 +414,6 @@ func (b *Batch) doTranslation() error {
425
414
}
426
415
}
427
416
428
- for _ , idIndexes := range b .clearValues {
429
- for i , index := range idIndexes {
430
- idIndexes [i ] = b .ids [index ]
431
- }
432
- }
433
417
return nil
434
418
}
435
419
@@ -511,77 +495,53 @@ func (b *Batch) importValueData() error {
511
495
if shardWidth == 0 {
512
496
shardWidth = pilosa .DefaultShardWidth
513
497
}
514
-
515
498
eg := errgroup.Group {}
516
- curShard := b .ids [0 ] / shardWidth
517
- startIdx := 0
518
- for i := 1 ; i <= len (b .ids ); i ++ {
519
- // when i==len(b.ids) we ensure that the import logic gets run
520
- // by making a fake shard once we're past the last ID
521
- recordID := (curShard + 2 ) * shardWidth
522
- if i < len (b .ids ) {
523
- recordID = b .ids [i ]
524
- }
525
- if recordID / shardWidth != curShard {
526
- endIdx := i
527
- ids := b .ids [startIdx :endIdx ]
528
- for field , values := range b .values {
529
- field := field
530
- shard := curShard
531
- vslice := values [startIdx :endIdx ]
532
- eg .Go (func () error {
533
- err := b .client .ImportValues (b .index .Name (), field , shard , vslice , ids , false )
534
- return errors .Wrapf (err , "importing values for %s" , field )
535
- })
536
- }
537
- startIdx = i
538
- curShard = recordID / shardWidth
539
- }
540
- }
541
-
542
- err := eg .Wait ()
543
- if err != nil {
544
- return errors .Wrap (err , "importing value data" )
545
- }
546
499
547
- // Now we clear any values for which we got a nil.
548
- //
549
- // TODO we need an endpoint which lets us set and clear
550
- // transactionally... this is kind of a hack.
551
- maxLen := 0
552
- for _ , ids := range b .clearValues {
553
- if len (ids ) > maxLen {
554
- maxLen = len (ids )
500
+ ids := make ([]uint64 , len (b .ids ))
501
+ for field , values := range b .values {
502
+ // grow our temp ids slice to full length
503
+ ids = ids [:len (b .ids )]
504
+ // copy orig ids back in
505
+ copy (ids , b .ids )
506
+
507
+ // trim out null values from ids and values.
508
+ nullIndices := b .nullIndices [field ]
509
+ for i , nullIndex := range nullIndices {
510
+ nullIndex -= uint64 (i ) // offset the index by the number of items removed so far
511
+ ids = append (ids [:nullIndex ], ids [nullIndex + 1 :]... )
512
+ values = append (values [:nullIndex ], values [nullIndex + 1 :]... )
555
513
}
556
- }
557
- eg = errgroup.Group {}
558
- values := make ([]int64 , 0 , maxLen )
559
- for field , ids := range b .clearValues {
560
- // TODO maybe sort ids here
561
- curShard := b .ids [0 ] / shardWidth
514
+
515
+ // now do imports by shard
516
+ curShard := ids [0 ] / shardWidth
562
517
startIdx := 0
563
518
for i := 1 ; i <= len (ids ); i ++ {
564
- recordID := ( curShard + 2 ) * shardWidth
519
+ var recordID uint64
565
520
if i < len (ids ) {
566
- recordID = b .ids [i ]
521
+ recordID = ids [i ]
522
+ } else {
523
+ recordID = (curShard + 2 ) * shardWidth
567
524
}
525
+
568
526
if recordID / shardWidth != curShard {
569
527
endIdx := i
570
- idSlice := ids [startIdx :endIdx ]
571
- values := values [:len (idSlice )]
572
- field := field
573
528
shard := curShard
529
+ field := field
530
+ path , data , err := b .client .EncodeImportValues (b .index .Name (), field , shard , values [startIdx :endIdx ], ids [startIdx :endIdx ], false )
531
+ if err != nil {
532
+ return errors .Wrap (err , "encoding import values" )
533
+ }
574
534
eg .Go (func () error {
575
- err := b .client .ImportValues (b .index .Name (), field , shard , values , idSlice , true )
576
- return errors .Wrap (err , "clearing values" )
535
+ err := b .client .DoImportValues (b .index .Name (), shard , path , data )
536
+ return errors .Wrapf (err , "importing values for %s" , field )
577
537
})
578
538
startIdx = i
579
539
curShard = recordID / shardWidth
580
540
}
581
541
}
582
542
}
583
-
584
- return errors .Wrap (eg . Wait () , "importing clear value data" )
543
+ err := eg . Wait ()
544
+ return errors .Wrap (err , "importing value data" )
585
545
}
586
546
587
547
// reset is called at the end of importing to ready the batch for the
@@ -602,8 +562,8 @@ func (b *Batch) reset() {
602
562
for k := range b .values {
603
563
delete (b .values , k ) // TODO pool these slices
604
564
}
605
- for k := range b .clearValues {
606
- delete (b .clearValues , k ) // TODO pool these slices
565
+ for k := range b .nullIndices {
566
+ delete (b .nullIndices , k ) // TODO pool these slices
607
567
}
608
568
}
609
569
0 commit comments