fix and comment for importvalues

jaffee · jaffee · commit 9b81c1a48126 · 2019-10-04T15:52:03.000-05:00
diff --git a/gpexp/importbatch.go b/gpexp/importbatch.go
@@ -522,13 +522,30 @@ func (b *Batch) importValueData() error {
 
 		// trim out null values from ids and values.
 		nullIndices := b.nullIndices[field]
+		// TODO(jaffee) I think this may be very inefficient. It looks
+		// like we're copying the `ids` and `values` slices over
+		// themselves (an O(n) operation) for each nullIndex so this
+		// is effectively O(n^2). What we could do is iterate through
+		// ids and values each once, while simultaneously iterating
+		// through nullindices and keeping track of how many
+		// nullIndices we've passed, and so how far back we need to
+		// copy each item.
+		//
+		// It was a couple weeks ago that I wrote this code, and I
+		// vaguely remember thinking about this, so I may just be
+		// missing something now. We should benchmark on what should
+		// be a bad case (an int field which is mostly null), and see
+		// if the improved implementation helps a lot.
 		for i, nullIndex := range nullIndices {
 			nullIndex -= uint64(i) // offset the index by the number of items removed so far
 			ids = append(ids[:nullIndex], ids[nullIndex+1:]...)
 			values = append(values[:nullIndex], values[nullIndex+1:]...)
 		}
 
 		// now do imports by shard
+		if len(ids) == 0 {
+			continue // TODO test this "all nil" case
+		}
 		curShard := ids[0] / shardWidth
 		startIdx := 0
 		for i := 1; i <= len(ids); i++ {