Skip to content

Commit 857b8cd

Browse files
committed
backported apache#19023 expression indexing improvement
1 parent 8ef5bf3 commit 857b8cd

12 files changed

Lines changed: 157 additions & 35 deletions

File tree

benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,11 @@ public class SqlExpressionBenchmark extends SqlBaseQueryBenchmark
156156
// numeric no lhs null
157157
"SELECT NVL(long1, long3), SUM(double1) FROM expressions GROUP BY 1 ORDER BY 2",
158158
"SELECT NVL(long1, long5 + long3), SUM(double1) FROM expressions GROUP BY 1 ORDER BY 2",
159-
"SELECT CASE WHEN MOD(long1, 2) = 0 THEN -1 WHEN MOD(long1, 2) = 1 THEN long2 / MOD(long1, 2) ELSE long3 END FROM expressions GROUP BY 1"
160-
159+
"SELECT CASE WHEN MOD(long1, 2) = 0 THEN -1 WHEN MOD(long1, 2) = 1 THEN long2 / MOD(long1, 2) ELSE long3 END FROM expressions GROUP BY 1",
160+
// cast
161+
"SELECT CAST(string1 as BIGINT) + CAST(string3 as DOUBLE) + long3, COUNT(*) FROM expressions GROUP BY 1 ORDER BY 2",
162+
"SELECT COUNT(*), SUM(CAST(string1 as BIGINT) + CAST(string3 as BIGINT)) FROM expressions WHERE double3 < 1010.0 AND double3 > 100.0",
163+
"SELECT COUNT(*) FROM expressions WHERE __time >= TIMESTAMP '2000-01-01 00:00:00' AND __time < TIMESTAMP '2000-01-02 00:00:00' AND (UPPER(COALESCE(string3,'')) LIKE '1%' OR TRIM(UPPER(COALESCE(string3,''))) LIKE '1%' OR SUBSTRING(UPPER(COALESCE(string3,'')),1,1) IN ('1','2','3','4','5') OR ('X' || UPPER(COALESCE(string3,''))) LIKE 'X1%') AND (UPPER(COALESCE(string5,'')) LIKE '2%' OR TRIM(UPPER(COALESCE(string5,''))) LIKE '2%' OR SUBSTRING(UPPER(COALESCE(string5,'')),1,1) IN ('1','2','3','4','5') OR ('Y' || UPPER(COALESCE(string5,''))) LIKE 'Y2%') AND CAST(double4 * 1000 AS BIGINT) BETWEEN -850000000 AND 850000000"
161164
);
162165

163166
@Param({
@@ -229,7 +232,10 @@ public class SqlExpressionBenchmark extends SqlBaseQueryBenchmark
229232
"55",
230233
"56",
231234
"57",
232-
"58"
235+
"58",
236+
"59",
237+
"60",
238+
"61"
233239
})
234240
private String query;
235241

processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,13 +225,16 @@ boolean nextMatches(@Nullable Object nextValue)
225225
private abstract static class BitmapIterator implements Iterator<ImmutableBitmap>
226226
{
227227
private final DictionaryEncodedValueIndex<?> inputColumnIndexes;
228+
228229
int next;
229230
int index = 0;
230231
boolean nextSet = false;
232+
private final Iterator<?> valuesIterator;
231233

232234
private BitmapIterator(DictionaryEncodedValueIndex<?> inputColumnIndexes)
233235
{
234236
this.inputColumnIndexes = inputColumnIndexes;
237+
this.valuesIterator = inputColumnIndexes.getValueIterator();
235238
}
236239

237240
@Override
@@ -258,8 +261,8 @@ public ImmutableBitmap next()
258261

259262
private void findNext()
260263
{
261-
while (!nextSet && index < inputColumnIndexes.getCardinality()) {
262-
Object nextValue = inputColumnIndexes.getValue(index);
264+
while (!nextSet && valuesIterator.hasNext()) {
265+
final Object nextValue = valuesIterator.next();
263266
nextSet = nextMatches(nextValue);
264267
if (nextSet) {
265268
next = index;

processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
import javax.annotation.Nullable;
5555
import java.io.IOException;
5656
import java.util.EnumSet;
57+
import java.util.Iterator;
5758
import java.util.LinkedHashMap;
5859
import java.util.Map;
5960
import java.util.Objects;
@@ -212,11 +213,14 @@ private ColumnAnalysis analyzeStringColumn(
212213
if (valueIndex != null) {
213214
cardinality = valueIndex.getCardinality();
214215
if (analyzingSize()) {
215-
for (int i = 0; i < cardinality; ++i) {
216-
String value = valueIndex.getValue(i);
216+
final Iterator<String> valueIterator = valueIndex.getValueIterator();
217+
int i = 0;
218+
while (valueIterator.hasNext()) {
219+
final String value = valueIterator.next();
217220
if (value != null) {
218221
size += StringUtils.estimatedBinaryLengthAsUTF8(value) * ((long) valueIndex.getBitmap(i).size());
219222
}
223+
i++;
220224
}
221225
}
222226
if (analyzingMinMax() && cardinality > 0) {

processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import java.util.ArrayList;
5252
import java.util.Arrays;
5353
import java.util.Collections;
54+
import java.util.Iterator;
5455
import java.util.List;
5556

5657
public class UseIndexesStrategy extends SearchStrategy
@@ -305,9 +306,12 @@ public Object2IntRBTreeMap<SearchHit> execute(int limit)
305306
// these were checked to be non-null in partitionDimensionList
306307
final DictionaryEncodedStringValueIndex bitmapIndex =
307308
indexSupplier.as(DictionaryEncodedStringValueIndex.class);
308-
for (int i = 0; i < bitmapIndex.getCardinality(); ++i) {
309-
String dimVal = extractionFn.apply(bitmapIndex.getValue(i));
309+
final Iterator<String> iterator = bitmapIndex.getValueIterator();
310+
int i = 0;
311+
while (iterator.hasNext()) {
312+
final String dimVal = extractionFn.apply(iterator.next());
310313
if (!searchQuerySpec.accept(dimVal)) {
314+
i++;
311315
continue;
312316
}
313317
ImmutableBitmap bitmap = bitmapIndex.getBitmap(i);
@@ -320,6 +324,7 @@ public Object2IntRBTreeMap<SearchHit> execute(int limit)
320324
return retVal;
321325
}
322326
}
327+
i++;
323328
}
324329
}
325330
}

processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -581,8 +581,8 @@ protected IndexSeeker[] toIndexSeekers(
581581

582582
private boolean allNull(Indexed<T> dimValues)
583583
{
584-
for (int i = 0, size = dimValues.size(); i < size; i++) {
585-
if (dimValues.get(i) != null) {
584+
for (T dimValue : dimValues) {
585+
if (dimValue != null) {
586586
return false;
587587
}
588588
}

processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex;
2626

2727
import javax.annotation.Nullable;
28+
import java.util.Iterator;
2829

2930
public final class IndexedStringDictionaryEncodedStringValueIndex<TDictionary extends Indexed<String>>
3031
implements DictionaryEncodedStringValueIndex
@@ -63,6 +64,12 @@ public BitmapFactory getBitmapFactory()
6364
return bitmapFactory;
6465
}
6566

67+
@Override
68+
public Iterator<String> getValueIterator()
69+
{
70+
return dictionary.iterator();
71+
}
72+
6673
@Override
6774
public ImmutableBitmap getBitmap(int idx)
6875
{

processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.druid.segment.column.DictionaryEncodedColumn;
2525

2626
import javax.annotation.Nullable;
27+
import java.util.Iterator;
2728

2829
/**
2930
* This exposes a 'raw' view into bitmap value indexes for {@link DictionaryEncodedColumn}. This allows callers
@@ -54,5 +55,10 @@ public interface DictionaryEncodedValueIndex<T>
5455
@Nullable
5556
T getValue(int index);
5657

58+
/**
59+
* Returns an {@link Iterator} containing all the underlying values of the dictionary in order
60+
*/
61+
Iterator<T> getValueIterator();
62+
5763
BitmapFactory getBitmapFactory();
5864
}

processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ protected ImmutableBitmap getUnknownsBitmap()
352352
};
353353
}
354354

355-
private class NestedFieldDictionaryEncodedStringValueIndex implements DictionaryEncodedStringValueIndex
355+
private final class NestedFieldDictionaryEncodedStringValueIndex implements DictionaryEncodedStringValueIndex
356356
{
357357
final FixedIndexed<Integer> localDictionary = localDictionarySupplier.get();
358358
final Indexed<ByteBuffer> stringDictionary = globalStringDictionarySupplier.get();
@@ -369,14 +369,7 @@ public int getCardinality()
369369
@Override
370370
public String getValue(int index)
371371
{
372-
int globalIndex = localDictionary.get(index);
373-
if (globalIndex < adjustLongId) {
374-
return StringUtils.fromUtf8Nullable(stringDictionary.get(globalIndex));
375-
} else if (globalIndex < adjustDoubleId) {
376-
return String.valueOf(longDictionary.get(globalIndex - adjustLongId));
377-
} else {
378-
return String.valueOf(doubleDictionary.get(globalIndex - adjustDoubleId));
379-
}
372+
return getStringValueFromGlobalId(localDictionary.get(index));
380373
}
381374

382375
@Override
@@ -385,11 +378,46 @@ public BitmapFactory getBitmapFactory()
385378
return bitmapFactory;
386379
}
387380

381+
@Override
382+
public Iterator<String> getValueIterator()
383+
{
384+
final Iterator<Integer> localIterator = localDictionary.iterator();
385+
return new Iterator<>()
386+
{
387+
@Override
388+
public boolean hasNext()
389+
{
390+
return localIterator.hasNext();
391+
}
392+
393+
@Override
394+
public String next()
395+
{
396+
return getStringValueFromGlobalId(localIterator.next());
397+
}
398+
};
399+
}
400+
388401
@Override
389402
public ImmutableBitmap getBitmap(int idx)
390403
{
391404
return NestedFieldColumnIndexSupplier.this.getBitmap(idx);
392405
}
406+
407+
@Nullable
408+
private String getStringValueFromGlobalId(int globalIndex)
409+
{
410+
if (globalIndex == 0) {
411+
return null;
412+
}
413+
if (globalIndex < adjustLongId) {
414+
return StringUtils.fromUtf8Nullable(stringDictionary.get(globalIndex));
415+
} else if (globalIndex < adjustDoubleId) {
416+
return String.valueOf(longDictionary.get(globalIndex - adjustLongId));
417+
} else {
418+
return String.valueOf(doubleDictionary.get(globalIndex - adjustDoubleId));
419+
}
420+
}
393421
}
394422

395423
private class NestedStringValueSetIndexes implements StringValueSetIndexes

processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.apache.druid.java.util.common.RE;
3636
import org.apache.druid.java.util.common.StringUtils;
3737
import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper;
38+
import org.apache.druid.math.expr.Evals;
3839
import org.apache.druid.math.expr.ExprEval;
3940
import org.apache.druid.math.expr.ExpressionType;
4041
import org.apache.druid.query.BitmapResultFactory;
@@ -635,13 +636,33 @@ public int getCardinality()
635636
public String getValue(int index)
636637
{
637638
final Double value = dictionary.get(index);
638-
return value == null ? null : String.valueOf(value);
639+
return Evals.asString(value);
639640
}
640641

641642
@Override
642643
public BitmapFactory getBitmapFactory()
643644
{
644645
return bitmapFactory;
645646
}
647+
648+
@Override
649+
public Iterator<String> getValueIterator()
650+
{
651+
final Iterator<Double> delegate = dictionary.iterator();
652+
return new Iterator<>()
653+
{
654+
@Override
655+
public boolean hasNext()
656+
{
657+
return delegate.hasNext();
658+
}
659+
660+
@Override
661+
public String next()
662+
{
663+
return Evals.asString(delegate.next());
664+
}
665+
};
666+
}
646667
}
647668
}

processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.apache.druid.java.util.common.RE;
3535
import org.apache.druid.java.util.common.StringUtils;
3636
import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper;
37+
import org.apache.druid.math.expr.Evals;
3738
import org.apache.druid.math.expr.ExprEval;
3839
import org.apache.druid.math.expr.ExpressionType;
3940
import org.apache.druid.query.BitmapResultFactory;
@@ -646,13 +647,33 @@ public int getCardinality()
646647
public String getValue(int index)
647648
{
648649
final Long value = dictionary.get(index);
649-
return value == null ? null : String.valueOf(value);
650+
return Evals.asString(value);
650651
}
651652

652653
@Override
653654
public BitmapFactory getBitmapFactory()
654655
{
655656
return bitmapFactory;
656657
}
658+
659+
@Override
660+
public Iterator<String> getValueIterator()
661+
{
662+
final Iterator<Long> delegate = dictionary.iterator();
663+
return new Iterator<>()
664+
{
665+
@Override
666+
public boolean hasNext()
667+
{
668+
return delegate.hasNext();
669+
}
670+
671+
@Override
672+
public String next()
673+
{
674+
return Evals.asString(delegate.next());
675+
}
676+
};
677+
}
657678
}
658679
}

0 commit comments

Comments
 (0)