@@ -61,7 +61,8 @@ type Batch struct {
6161 // ids is a slice of length batchSize of record IDs
6262 ids []uint64
6363
64- // rowIDs is a slice of length len(Batch.header) which contains slices of length batchSize
64+ // rowIDs is a map of field names to slices of length batchSize
65+ // which contain row IDs.
6566 rowIDs map [string ][]uint64
6667
6768 // values holds the values for each record of an int field
@@ -70,17 +71,9 @@ type Batch struct {
7071 // times holds a time for each record. (if any of the fields are time fields)
7172 times []QuantizedTime
7273
73- // clearValues holds a slice of indices into b.ids for each
74- // integer field which has nil values. After translation, these
75- // slices will be filled out with the actual column IDs those
76- // indices pertain to so that they can be cleared.
77- //
78- // TODO: This is actually a problem — a nil value doesn't mean
79- // "clear this value", it should mean "don't touch this value", so
80- // there is no way currently to update a record with int values
81- // without knowing all the int values, clearing them, or setting
82- // them to something else in the process.
83- clearValues map [string ][]uint64
74+ // nullIndices holds a slice of indices into b.ids for each
75+ // integer field which has nil values.
76+ nullIndices map [string ][]uint64
8477
8578 // TODO, support timestamps, set fields with more than one value per record, mutex, and bool.
8679
@@ -147,7 +140,7 @@ func NewBatch(client *pilosa.Client, size int, index *pilosa.Index, fields []*pi
147140 ids : make ([]uint64 , 0 , size ),
148141 rowIDs : rowIDs ,
149142 values : values ,
150- clearValues : make (map [string ][]uint64 ),
143+ nullIndices : make (map [string ][]uint64 ),
151144 toTranslate : tt ,
152145 toTranslateID : make (map [string ][]int ),
153146 transCache : NewMapTranslator (),
@@ -164,11 +157,7 @@ func NewBatch(client *pilosa.Client, size int, index *pilosa.Index, fields []*pi
164157 return b , nil
165158}
166159
167- // Row represents a single record which can be added to a RecordBatch.
168- //
169- // Note: it is not named "Record" because there is a conflict with
170- // another type in this package. This may be rectified by deprecating
171- // something or splitting packages in the future.
160+ // Row represents a single record which can be added to a Batch.
172161type Row struct {
173162 ID interface {}
174163 Values []interface {}
@@ -316,12 +305,12 @@ func (b *Batch) Add(rec Row) error {
316305 case nil :
317306 if field .Opts ().Type () == pilosa .FieldTypeInt {
318307 b .values [field .Name ()] = append (b .values [field .Name ()], 0 )
319- clearIndexes , ok := b .clearValues [field .Name ()]
308+ nullIndices , ok := b .nullIndices [field .Name ()]
320309 if ! ok {
321- clearIndexes = make ([]uint64 , 0 )
310+ nullIndices = make ([]uint64 , 0 )
322311 }
323- clearIndexes = append (clearIndexes , uint64 (len (b .ids )- 1 ))
324- b .clearValues [field .Name ()] = clearIndexes
312+ nullIndices = append (nullIndices , uint64 (len (b .ids )- 1 ))
313+ b .nullIndices [field .Name ()] = nullIndices
325314
326315 } else {
327316 b .rowIDs [field .Name ()] = append (b .rowIDs [field .Name ()], nilSentinel )
@@ -425,11 +414,6 @@ func (b *Batch) doTranslation() error {
425414 }
426415 }
427416
428- for _ , idIndexes := range b .clearValues {
429- for i , index := range idIndexes {
430- idIndexes [i ] = b .ids [index ]
431- }
432- }
433417 return nil
434418}
435419
@@ -511,77 +495,53 @@ func (b *Batch) importValueData() error {
511495 if shardWidth == 0 {
512496 shardWidth = pilosa .DefaultShardWidth
513497 }
514-
515498 eg := errgroup.Group {}
516- curShard := b .ids [0 ] / shardWidth
517- startIdx := 0
518- for i := 1 ; i <= len (b .ids ); i ++ {
519- // when i==len(b.ids) we ensure that the import logic gets run
520- // by making a fake shard once we're past the last ID
521- recordID := (curShard + 2 ) * shardWidth
522- if i < len (b .ids ) {
523- recordID = b .ids [i ]
524- }
525- if recordID / shardWidth != curShard {
526- endIdx := i
527- ids := b .ids [startIdx :endIdx ]
528- for field , values := range b .values {
529- field := field
530- shard := curShard
531- vslice := values [startIdx :endIdx ]
532- eg .Go (func () error {
533- err := b .client .ImportValues (b .index .Name (), field , shard , vslice , ids , false )
534- return errors .Wrapf (err , "importing values for %s" , field )
535- })
536- }
537- startIdx = i
538- curShard = recordID / shardWidth
539- }
540- }
541-
542- err := eg .Wait ()
543- if err != nil {
544- return errors .Wrap (err , "importing value data" )
545- }
546499
547- // Now we clear any values for which we got a nil.
548- //
549- // TODO we need an endpoint which lets us set and clear
550- // transactionally... this is kind of a hack.
551- maxLen := 0
552- for _ , ids := range b .clearValues {
553- if len (ids ) > maxLen {
554- maxLen = len (ids )
500+ ids := make ([]uint64 , len (b .ids ))
501+ for field , values := range b .values {
502+ // grow our temp ids slice to full length
503+ ids = ids [:len (b .ids )]
504+ // copy orig ids back in
505+ copy (ids , b .ids )
506+
507+ // trim out null values from ids and values.
508+ nullIndices := b .nullIndices [field ]
509+ for i , nullIndex := range nullIndices {
510+ nullIndex -= uint64 (i ) // offset the index by the number of items removed so far
511+ ids = append (ids [:nullIndex ], ids [nullIndex + 1 :]... )
512+ values = append (values [:nullIndex ], values [nullIndex + 1 :]... )
555513 }
556- }
557- eg = errgroup.Group {}
558- values := make ([]int64 , 0 , maxLen )
559- for field , ids := range b .clearValues {
560- // TODO maybe sort ids here
561- curShard := b .ids [0 ] / shardWidth
514+
515+ // now do imports by shard
516+ curShard := ids [0 ] / shardWidth
562517 startIdx := 0
563518 for i := 1 ; i <= len (ids ); i ++ {
564- recordID := ( curShard + 2 ) * shardWidth
519+ var recordID uint64
565520 if i < len (ids ) {
566- recordID = b .ids [i ]
521+ recordID = ids [i ]
522+ } else {
523+ recordID = (curShard + 2 ) * shardWidth
567524 }
525+
568526 if recordID / shardWidth != curShard {
569527 endIdx := i
570- idSlice := ids [startIdx :endIdx ]
571- values := values [:len (idSlice )]
572- field := field
573528 shard := curShard
529+ field := field
530+ path , data , err := b .client .EncodeImportValues (b .index .Name (), field , shard , values [startIdx :endIdx ], ids [startIdx :endIdx ], false )
531+ if err != nil {
532+ return errors .Wrap (err , "encoding import values" )
533+ }
574534 eg .Go (func () error {
575- err := b .client .ImportValues (b .index .Name (), field , shard , values , idSlice , true )
576- return errors .Wrap (err , "clearing values" )
535+ err := b .client .DoImportValues (b .index .Name (), shard , path , data )
536+ return errors .Wrapf (err , "importing values for %s" , field )
577537 })
578538 startIdx = i
579539 curShard = recordID / shardWidth
580540 }
581541 }
582542 }
583-
584- return errors .Wrap (eg . Wait () , "importing clear value data" )
543+ err := eg . Wait ()
544+ return errors .Wrap (err , "importing value data" )
585545}
586546
587547// reset is called at the end of importing to ready the batch for the
@@ -602,8 +562,8 @@ func (b *Batch) reset() {
602562 for k := range b .values {
603563 delete (b .values , k ) // TODO pool these slices
604564 }
605- for k := range b .clearValues {
606- delete (b .clearValues , k ) // TODO pool these slices
565+ for k := range b .nullIndices {
566+ delete (b .nullIndices , k ) // TODO pool these slices
607567 }
608568}
609569
0 commit comments