Skip to content

Commit 6944d36

Browse files
xin-zhang2ethanyzhang
authored andcommitted
feat(PartitionedOutput): Add VARCHAR support in ExchangeBenchmark
1 parent 7241c63 commit 6944d36

1 file changed

Lines changed: 55 additions & 4 deletions

File tree

velox/exec/benchmarks/ExchangeBenchmark.cpp

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ enum class SimpleColType {
123123
kHugeint,
124124
kLongDecimal,
125125
kDouble,
126+
kVarchar,
126127
};
127128

128129
TypePtr simpleColTypeToType(SimpleColType colType) {
@@ -141,6 +142,8 @@ TypePtr simpleColTypeToType(SimpleColType colType) {
141142
return DECIMAL(20, 3);
142143
case SimpleColType::kDouble:
143144
return DOUBLE();
145+
case SimpleColType::kVarchar:
146+
return VARCHAR();
144147
}
145148
VELOX_UNREACHABLE();
146149
}
@@ -161,6 +164,8 @@ std::string simpleColTypeName(SimpleColType colType) {
161164
return "LongDecimal";
162165
case SimpleColType::kDouble:
163166
return "Double";
167+
case SimpleColType::kVarchar:
168+
return "Varchar";
164169
}
165170
VELOX_UNREACHABLE();
166171
}
@@ -356,8 +361,14 @@ class ExchangeBenchmark : public VectorTestBase {
356361
/// Creates a single flat column of `type` with `numRows` rows.
357362
/// Approximately `nullPct` percent of rows are set to null, distributed
358363
/// uniformly (row % 100 < nullPct). Non-null values are sequential integers
359-
/// cast to the native type.
360-
VectorPtr makeColumn(const TypePtr& type, int32_t numRows, int32_t nullPct) {
364+
/// cast to the native type. VARCHAR values alternate between inline and
365+
/// non-inline strings, and `vectorIndex` helps constant columns cover both
366+
/// cases.
367+
VectorPtr makeColumn(
368+
const TypePtr& type,
369+
int32_t numRows,
370+
int32_t nullPct,
371+
int32_t vectorIndex = 0) {
361372
std::function<bool(vector_size_t)> isNull;
362373
if (nullPct == 100) {
363374
isNull = [](auto) { return true; };
@@ -400,6 +411,17 @@ class ExchangeBenchmark : public VectorTestBase {
400411
[](auto row) { return static_cast<int128_t>(row); },
401412
isNull,
402413
type);
414+
case TypeKind::VARCHAR:
415+
return makeFlatVector<std::string>(
416+
numRows,
417+
[vectorIndex](auto row) {
418+
return std::string(
419+
row % 2 == 0 ? StringView::kInlineSize - 2
420+
: StringView::kInlineSize + 8,
421+
static_cast<char>('a' + ((row + vectorIndex) % 26)));
422+
},
423+
isNull,
424+
type);
403425
default:
404426
VELOX_NYI(
405427
"makeColumn does not support complex type {} yet",
@@ -415,7 +437,7 @@ class ExchangeBenchmark : public VectorTestBase {
415437
const auto baseNullPct =
416438
nullPct > 0 && (vectorIndex % 100) < nullPct ? 100 : 0;
417439
return BaseVector::wrapInConstant(
418-
numRows, 0, makeColumn(type, 1, baseNullPct));
440+
numRows, 0, makeColumn(type, 1, baseNullPct, vectorIndex));
419441
}
420442

421443
/// Generates input batches for the exchange benchmark.
@@ -450,7 +472,7 @@ class ExchangeBenchmark : public VectorTestBase {
450472
type->childAt(col), rowsPerVector, nullPct, i));
451473
} else {
452474
children.push_back(
453-
makeColumn(type->childAt(col), rowsPerVector, nullPct));
475+
makeColumn(type->childAt(col), rowsPerVector, nullPct, i));
454476
}
455477
}
456478
auto vector = makeRowVector(type->names(), children);
@@ -994,6 +1016,35 @@ EXCHANGE_BENCHMARK_DICTIONARY_CASE(
9941016
EXCHANGE_BENCHMARK_DICTIONARY_CASE(
9951017
10K_Double_col16,
9961018
makeInputSpec(SimpleColType::kDouble, 16));
1019+
EXCHANGE_BENCHMARK_CASE(
1020+
10K_Varchar_col1,
1021+
makeInputSpec(SimpleColType::kVarchar, 1));
1022+
EXCHANGE_BENCHMARK_CASE(
1023+
10K_Varchar_col4,
1024+
makeInputSpec(SimpleColType::kVarchar, 4));
1025+
EXCHANGE_BENCHMARK_CASE(
1026+
10K_Varchar_col16,
1027+
makeInputSpec(SimpleColType::kVarchar, 16));
1028+
EXCHANGE_BENCHMARK_CONSTANT_CASE(
1029+
10K_Varchar_col1,
1030+
makeInputSpec(SimpleColType::kVarchar, 1));
1031+
EXCHANGE_BENCHMARK_CONSTANT_CASE(
1032+
10K_Varchar_col4,
1033+
makeInputSpec(SimpleColType::kVarchar, 4));
1034+
EXCHANGE_BENCHMARK_CONSTANT_CASE(
1035+
10K_Varchar_col16,
1036+
makeInputSpec(SimpleColType::kVarchar, 16));
1037+
EXCHANGE_BENCHMARK_DICTIONARY_CASE(
1038+
10K_Varchar_col1,
1039+
makeInputSpec(SimpleColType::kVarchar, 1));
1040+
EXCHANGE_BENCHMARK_DICTIONARY_CASE(
1041+
10K_Varchar_col4,
1042+
makeInputSpec(SimpleColType::kVarchar, 4));
1043+
1044+
// Temporarily disable this benchmark because of large memory usage.
1045+
// EXCHANGE_BENCHMARK_DICTIONARY_CASE(
1046+
// 10K_Varchar_col16,
1047+
// makeInputSpec(SimpleColType::kVarchar, 16));
9971048

9981049
// The complex type benchmarks are temporarily disabled.
9991050
// EXCHANGE_BENCHMARK_CASE(Deep10K, makeInputSpec(ExchangeInputKind::kDeep10K));

0 commit comments

Comments
 (0)