Skip to content
This repository was archived by the owner on Sep 28, 2022. It is now read-only.

Commit e40d84a

Browse files
committed
complete per-record timestamp support
1 parent b811c3b commit e40d84a

File tree

2 files changed

+139
-33
lines changed

2 files changed

+139
-33
lines changed

gpexp/importbatch.go

Lines changed: 83 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,19 @@ type Batch struct {
6767
// values holds the values for each record of an int field
6868
values map[string][]int64
6969

70+
// times holds a time for each record. (if any of the fields are time fields)
71+
times []QuantizedTime
72+
7073
// clearValues holds a slice of indices into b.ids for each
7174
// integer field which has nil values. After translation, these
7275
// slices will be filled out with the actual column IDs those
7376
// indices pertain to so that they can be cleared.
77+
//
78+
// TODO: This is actually a problem — a nil value doesn't mean
79+
// "clear this value", it should mean "don't touch this value", so
80+
// there is no way currently to update a record with int values
81+
// without knowing all the int values, clearing them, or setting
82+
// them to something else in the process.
7483
clearValues map[string][]uint64
7584

7685
// TODO, support timestamps, set fields with more than one value per record, mutex, and bool.
@@ -90,8 +99,11 @@ type Batch struct {
9099
transCache Translator
91100
}
92101

102+
// BatchOption is a functional option for Batch objects.
93103
type BatchOption func(b *Batch) error
94104

105+
// OptTranslator allows one to pass in a custom Translator
106+
// implementation for mapping keys to IDs.
95107
func OptTranslator(t Translator) BatchOption {
96108
return func(b *Batch) error {
97109
b.transCache = t
@@ -112,15 +124,17 @@ func NewBatch(client *pilosa.Client, size int, index *pilosa.Index, fields []*pi
112124
rowIDs := make(map[string][]uint64)
113125
values := make(map[string][]int64)
114126
tt := make(map[string]map[string][]int)
127+
hasTime := false
115128
for _, field := range fields {
116129
headerMap[field.Name()] = field
117130
opts := field.Opts()
118-
switch opts.Type() {
119-
case pilosa.FieldTypeDefault, pilosa.FieldTypeSet:
131+
switch typ := opts.Type(); typ {
132+
case pilosa.FieldTypeDefault, pilosa.FieldTypeSet, pilosa.FieldTypeTime:
120133
if opts.Keys() {
121134
tt[field.Name()] = make(map[string][]int)
122135
}
123136
rowIDs[field.Name()] = make([]uint64, 0, size)
137+
hasTime = typ == pilosa.FieldTypeTime || hasTime
124138
case pilosa.FieldTypeInt:
125139
values[field.Name()] = make([]int64, 0, size)
126140
}
@@ -138,6 +152,9 @@ func NewBatch(client *pilosa.Client, size int, index *pilosa.Index, fields []*pi
138152
toTranslateID: make(map[string][]int),
139153
transCache: NewMapTranslator(),
140154
}
155+
if hasTime {
156+
b.times = make([]QuantizedTime, 0, size)
157+
}
141158
for _, opt := range opts {
142159
err := opt(b)
143160
if err != nil {
@@ -155,7 +172,7 @@ func NewBatch(client *pilosa.Client, size int, index *pilosa.Index, fields []*pi
155172
type Row struct {
156173
ID interface{}
157174
Values []interface{}
158-
Time *QuantizedTime
175+
Time QuantizedTime
159176
}
160177

161178
// QuantizedTime represents a moment in time down to some granularity
@@ -194,7 +211,20 @@ func (qt *QuantizedTime) SetHour(hour string) {
194211
copy(qt.ymdh[8:10], hour)
195212
}
196213

214+
// Reset sets the time to the zero value which generates no time views.
215+
func (qt *QuantizedTime) Reset() {
216+
for i := range qt.ymdh {
217+
qt.ymdh[i] = 0
218+
}
219+
}
220+
221+
// views builds the list of Pilosa views for this particular time,
222+
// given a quantum.
197223
func (qt *QuantizedTime) views(q pilosa.TimeQuantum) ([]string, error) {
224+
zero := QuantizedTime{}
225+
if *qt == zero {
226+
return nil, nil
227+
}
198228
views := make([]string, 0, len(q))
199229
for _, unit := range q {
200230
switch unit {
@@ -256,6 +286,10 @@ func (b *Batch) Add(rec Row) error {
256286
return errors.Errorf("unsupported id type %T value %v", rid, rid)
257287
}
258288

289+
if b.times != nil {
290+
b.times = append(b.times, rec.Time)
291+
}
292+
259293
for i := 0; i < len(rec.Values); i++ {
260294
field := b.header[i]
261295
switch val := rec.Values[i].(type) {
@@ -402,14 +436,17 @@ func (b *Batch) doTranslation() error {
402436
func (b *Batch) doImport() error {
403437
eg := errgroup.Group{}
404438

405-
frags := b.makeFragments()
406-
for shard, viewMap := range frags {
407-
for fieldView, bitmap := range viewMap {
408-
fieldView := fieldView
409-
bitmap := bitmap
439+
frags, err := b.makeFragments()
440+
if err != nil {
441+
return errors.Wrap(err, "making fragments")
442+
}
443+
for shard, fieldMap := range frags {
444+
for field, viewMap := range fieldMap {
445+
field := field
446+
viewMap := viewMap
410447
eg.Go(func() error {
411-
err := b.client.ImportRoaringBitmap(b.index.Field(fieldView.field), shard, map[string]*roaring.Bitmap{"": bitmap}, false)
412-
return errors.Wrapf(err, "importing data for %s", fieldView.field)
448+
err := b.client.ImportRoaringBitmap(b.index.Field(field), shard, viewMap, false)
449+
return errors.Wrapf(err, "importing data for %s", field)
413450
})
414451
}
415452
}
@@ -425,13 +462,15 @@ func (b *Batch) doImport() error {
425462
// if needed though).
426463
var nilSentinel = ^uint64(0)
427464

428-
func (b *Batch) makeFragments() fragments {
465+
func (b *Batch) makeFragments() (fragments, error) {
429466
shardWidth := b.index.ShardWidth()
430467
if shardWidth == 0 {
431468
shardWidth = pilosa.DefaultShardWidth
432469
}
433470
frags := make(fragments)
434471
for fname, rowIDs := range b.rowIDs {
472+
field := b.headerMap[fname]
473+
opts := field.Opts()
435474
curShard := ^uint64(0) // impossible sentinel value for shard.
436475
var curBM *roaring.Bitmap
437476
for j := range b.ids {
@@ -443,12 +482,30 @@ func (b *Batch) makeFragments() fragments {
443482
curShard = col / shardWidth
444483
curBM = frags.GetOrCreate(curShard, fname, "")
445484
}
446-
curBM.DirectAdd(row*shardWidth + (col % shardWidth))
485+
// TODO this is super ugly, but we want to avoid setting
486+
// bits on the standard view in the specific case when
487+
// there isn't one. Should probably refactor this whole
488+
// loop to be more general w.r.t. views. Also... tests for
489+
// the NoStandardView case would be great.
490+
if !(opts.Type() == pilosa.FieldTypeTime && opts.NoStandardView()) {
491+
curBM.DirectAdd(row*shardWidth + (col % shardWidth))
492+
}
493+
if opts.Type() == pilosa.FieldTypeTime {
494+
views, err := b.times[j].views(opts.TimeQuantum())
495+
if err != nil {
496+
return nil, errors.Wrap(err, "calculating views")
497+
}
498+
for _, view := range views {
499+
tbm := frags.GetOrCreate(curShard, fname, view)
500+
tbm.DirectAdd(row*shardWidth + (col % shardWidth))
501+
}
502+
}
447503
}
448504
}
449-
return frags
505+
return frags, nil
450506
}
451507

508+
// importValueData imports data for int fields.
452509
func (b *Batch) importValueData() error {
453510
shardWidth := b.index.ShardWidth()
454511
if shardWidth == 0 {
@@ -531,6 +588,7 @@ func (b *Batch) importValueData() error {
531588
// next round. Where possible it does not re-allocate memory.
532589
func (b *Batch) reset() {
533590
b.ids = b.ids[:0]
591+
b.times = b.times[:0]
534592
for fieldName, rowIDs := range b.rowIDs {
535593
b.rowIDs[fieldName] = rowIDs[:0]
536594
m := b.toTranslate[fieldName]
@@ -549,24 +607,24 @@ func (b *Batch) reset() {
549607
}
550608
}
551609

552-
type fieldView struct {
553-
field string
554-
view string
555-
}
556-
557-
// map[shard][fieldview]fragmentData
558-
type fragments map[uint64]map[fieldView]*roaring.Bitmap
610+
// map[shard][field][view]fragmentData
611+
type fragments map[uint64]map[string]map[string]*roaring.Bitmap
559612

560613
func (f fragments) GetOrCreate(shard uint64, field, view string) *roaring.Bitmap {
561-
viewMap, ok := f[shard]
614+
fieldMap, ok := f[shard]
615+
if !ok {
616+
fieldMap = make(map[string]map[string]*roaring.Bitmap)
617+
}
618+
viewMap, ok := fieldMap[field]
562619
if !ok {
563-
viewMap = make(map[fieldView]*roaring.Bitmap)
620+
viewMap = make(map[string]*roaring.Bitmap)
564621
}
565-
bm, ok := viewMap[fieldView{field: field, view: view}]
622+
bm, ok := viewMap[view]
566623
if !ok {
567624
bm = roaring.NewBTreeBitmap()
568-
viewMap[fieldView{field: field, view: view}] = bm
625+
viewMap[view] = bm
569626
}
570-
f[shard] = viewMap
627+
fieldMap[field] = viewMap
628+
f[shard] = fieldMap
571629
return bm
572630
}

0 commit comments

Comments
 (0)