Skip to content

Commit 24e6ab9

Browse files
jinchengchenghhmeta-codesync[bot]
authored andcommitted
fix(cudf): Fix complex data type name in format conversion and add tests(Part1) (facebookincubator#16818)
Summary: Fix facebookincubator#16786 And map is not supported. Pull Request resolved: facebookincubator#16818 Reviewed By: bikramSingh91 Differential Revision: D98168787 Pulled By: kgpai fbshipit-source-id: 9b35087baef05b5790d462fe08c286a4fbea40c8
1 parent d92b900 commit 24e6ab9

File tree

5 files changed

+518
-12
lines changed

5 files changed

+518
-12
lines changed

velox/experimental/cudf/connectors/hive/CudfHiveDataSource.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -335,11 +335,7 @@ std::optional<RowVectorPtr> CudfHiveDataSource::next(
335335
? std::make_shared<CudfVector>(
336336
pool_, outputType_, nRows, std::move(cudfTable), stream_)
337337
: with_arrow::toVeloxColumn(
338-
cudfTable->view(),
339-
pool_,
340-
outputType_->names(),
341-
stream_,
342-
get_temp_mr());
338+
cudfTable->view(), pool_, outputType_, stream_, get_temp_mr());
343339
stream_.synchronize();
344340

345341
// Check if conversion yielded a nullptr

velox/experimental/cudf/exec/VeloxCudfInterop.cpp

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,38 @@ getMetadata(Iterator begin, Iterator end, const std::string& namePrefix) {
246246
return metadata;
247247
}
248248

249+
// Recursively generate metadata using exact names from Velox RowType.
250+
cudf::column_metadata getMetadataWithName(
251+
const facebook::velox::TypePtr& type,
252+
const std::string& name) {
253+
cudf::column_metadata meta(name);
254+
if (type->kind() == facebook::velox::TypeKind::ROW) {
255+
auto rowType =
256+
std::dynamic_pointer_cast<const facebook::velox::RowType>(type);
257+
for (size_t i = 0; i < rowType->size(); ++i) {
258+
meta.children_meta.push_back(
259+
getMetadataWithName(rowType->childAt(i), rowType->nameOf(i)));
260+
}
261+
} else if (type->kind() == facebook::velox::TypeKind::ARRAY) {
262+
// cudf::lists_column_view::child_column_index is 1, the first metadata is
263+
// offsets
264+
meta.children_meta.emplace_back(cudf::column_metadata(name + "_offsets"));
265+
meta.children_meta.push_back(
266+
getMetadataWithName(type->childAt(0), "element"));
267+
}
268+
return meta;
269+
}
270+
271+
std::vector<cudf::column_metadata> getMetadataWithName(
272+
const RowTypePtr& rowType) {
273+
std::vector<cudf::column_metadata> metadata;
274+
for (size_t i = 0; i < rowType->size(); ++i) {
275+
metadata.push_back(
276+
getMetadataWithName(rowType->childAt(i), rowType->nameOf(i)));
277+
}
278+
return metadata;
279+
}
280+
249281
} // namespace
250282

251283
facebook::velox::RowVectorPtr toVeloxColumn(
@@ -269,18 +301,23 @@ facebook::velox::RowVectorPtr toVeloxColumn(
269301
return toVeloxColumn(table, pool, metadata, &outputType, stream, mr);
270302
}
271303

304+
// New overload: Accepts a Velox TypePtr for recursive metadata construction.
272305
RowVectorPtr toVeloxColumn(
273306
const cudf::table_view& table,
274307
memory::MemoryPool* pool,
275-
const std::vector<std::string>& columnNames,
308+
const TypePtr& type,
276309
rmm::cuda_stream_view stream,
277310
rmm::device_async_resource_ref mr) {
278-
std::vector<cudf::column_metadata> metadata;
279-
for (auto name : columnNames) {
280-
metadata.emplace_back(cudf::column_metadata(name));
281-
}
282-
return toVeloxColumn(table, pool, metadata, nullptr, stream, mr);
311+
// Recursively generate metadata using Velox type names for all columns.
312+
// This assumes 'type' is a RowType and its children match the cudf table
313+
// columns.
314+
auto rowType =
315+
std::dynamic_pointer_cast<const facebook::velox::RowType>(type);
316+
VELOX_CHECK_NOT_NULL(rowType);
317+
auto metadata = getMetadataWithName(rowType);
318+
return toVeloxColumn(table, pool, metadata, &rowType, stream, mr);
283319
}
284320

285321
} // namespace with_arrow
322+
286323
} // namespace facebook::velox::cudf_velox

velox/experimental/cudf/exec/VeloxCudfInterop.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ facebook::velox::RowVectorPtr toVeloxColumn(
4545
rmm::cuda_stream_view stream,
4646
rmm::device_async_resource_ref mr);
4747

48+
// Accepts a Velox TypePtr for recursive metadata construction.
4849
facebook::velox::RowVectorPtr toVeloxColumn(
4950
const cudf::table_view& table,
5051
facebook::velox::memory::MemoryPool* pool,
@@ -56,7 +57,7 @@ facebook::velox::RowVectorPtr toVeloxColumn(
5657
facebook::velox::RowVectorPtr toVeloxColumn(
5758
const cudf::table_view& table,
5859
facebook::velox::memory::MemoryPool* pool,
59-
const std::vector<std::string>& columnNames,
60+
const TypePtr& type,
6061
rmm::cuda_stream_view stream,
6162
rmm::device_async_resource_ref mr);
6263

velox/experimental/cudf/tests/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ add_executable(
3838
# add_executable(velox_cudf_table_write_test Main.cpp TableWriteTest.cpp)
3939
add_executable(velox_cudf_topn_test Main.cpp TopNTest.cpp)
4040
add_executable(velox_cudf_batch_concat_test Main.cpp BatchConcatTest.cpp)
41+
add_executable(velox_cudf_interop_test Main.cpp InteropTest.cpp)
4142

4243
add_test(
4344
NAME velox_cudf_aggregation_test
@@ -130,6 +131,12 @@ add_test(
130131
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
131132
)
132133

134+
add_test(
135+
NAME velox_cudf_interop_test
136+
COMMAND velox_cudf_interop_test
137+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
138+
)
139+
133140
add_test(
134141
NAME velox_cudf_aggregation_selection_test
135142
COMMAND velox_cudf_aggregation_selection_test
@@ -179,6 +186,7 @@ set_tests_properties(
179186
PROPERTIES LABELS cuda_driver TIMEOUT 3000
180187
)
181188
set_tests_properties(velox_cudf_batch_concat_test PROPERTIES LABELS cuda_driver TIMEOUT 3000)
189+
set_tests_properties(velox_cudf_interop_test PROPERTIES LABELS cuda_driver TIMEOUT 3000)
182190

183191
target_link_libraries(
184192
velox_cudf_aggregation_test
@@ -376,6 +384,16 @@ target_link_libraries(
376384
velox_cudf_exec
377385
)
378386

387+
target_link_libraries(
388+
velox_cudf_interop_test
389+
velox_cudf_exec
390+
velox_exec
391+
velox_exec_test_lib
392+
velox_test_util
393+
gtest
394+
gtest_main
395+
)
396+
379397
add_subdirectory(utils)
380398

381399
if(${VELOX_ENABLE_SPARK_FUNCTIONS})

0 commit comments

Comments
 (0)