diff --git a/cpp/meson.build b/cpp/meson.build index f4d006d31f450..d8953bf130e23 100644 --- a/cpp/meson.build +++ b/cpp/meson.build @@ -59,14 +59,21 @@ endif needs_benchmarks = get_option('benchmarks').enabled() needs_compute = get_option('compute').enabled() needs_csv = get_option('csv').enabled() +needs_dataset = get_option('dataset').enabled() needs_azure = get_option('azure').enabled() needs_gcs = get_option('gcs').enabled() needs_hdfs = get_option('hdfs').enabled() needs_s3 = get_option('s3').enabled() -needs_filesystem = get_option('filesystem').enabled() or needs_azure or needs_gcs or needs_hdfs or needs_s3 +needs_filesystem = (get_option('filesystem').enabled() +or needs_azure +or needs_dataset +or needs_gcs +or needs_hdfs +or needs_s3 +) needs_integration = get_option('integration').enabled() needs_tests = get_option('tests').enabled() -needs_acero = get_option('acero').enabled() +needs_acero = get_option('acero').enabled() or needs_dataset needs_ipc = get_option('ipc').enabled() or needs_tests or needs_acero or needs_benchmarks needs_fuzzing = get_option('fuzzing').enabled() needs_testing = (get_option('testing').enabled() diff --git a/cpp/meson.options b/cpp/meson.options index a3a969b8bbce3..1fdd97d0d083d 100644 --- a/cpp/meson.options +++ b/cpp/meson.options @@ -27,6 +27,7 @@ option('brotli', type: 'feature', description: 'Build with Brotli compression') option('bz2', type: 'feature', description: 'Build with BZ2 compression') option('compute', type: 'feature', description: 'Build all Arrow Compute kernels') option('csv', type: 'feature', description: 'Build the Arrow CSV Parser Module') +option('dataset', type: 'feature', description: 'Build the Arrow Dataset Modules') option('filesystem', type: 'feature', description: 'Build the Arrow Filesystem Layer') option('fuzzing', type: 'feature', description: 'Build Arrow Fuzzing executables') diff --git a/cpp/src/arrow/acero/meson.build b/cpp/src/arrow/acero/meson.build index 6029610eb30f2..adb20369a4209 100644 --- a/cpp/src/arrow/acero/meson.build +++ b/cpp/src/arrow/acero/meson.build @@ -85,7 +85,7 @@ arrow_acero_lib = library( arrow_acero_dep = declare_dependency(link_with: [arrow_acero_lib]) -arrow_acero_testing_sources = ['test_nodes.cc', 'test_util_internal.cc'] + arrow_compute_testing_srcs +arrow_acero_testing_sources = files('test_nodes.cc', 'test_util_internal.cc') + arrow_compute_testing_srcs arrow_acero_tests = { 'plan-test': {'sources': ['plan_test.cc', 'test_nodes_test.cc']}, diff --git a/cpp/src/arrow/dataset/meson.build b/cpp/src/arrow/dataset/meson.build new file mode 100644 index 0000000000000..d82c516644a2c --- /dev/null +++ b/cpp/src/arrow/dataset/meson.build @@ -0,0 +1,142 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +install_headers( + [ + 'api.h', + 'dataset.h', + 'dataset_writer.h', + 'discovery.h', + 'file_base.h', + 'file_csv.h', + 'file_ipc.h', + 'file_json.h', + 'file_orc.h', + 'file_parquet.h', + 'parquet_encryption_config.h', + 'partition.h', + 'plan.h', + 'projector.h', + 'scanner.h', + 'type_fwd.h', + 'visibility.h', + ], + subdir: 'arrow/dataset', +) + +arrow_dataset_srcs = [ + 'dataset.cc', + 'dataset_writer.cc', + 'discovery.cc', + 'file_base.cc', + 'file_ipc.cc', + 'partition.cc', + 'plan.cc', + 'projector.cc', + 'scanner.cc', + 'scan_node.cc', +] + +if needs_csv + arrow_dataset_srcs += ['file_csv.cc'] +endif + +if needs_json + arrow_dataset_srcs += ['file_json.cc'] +endif + +# requires https://github.com/apache/arrow/pull/46409 +#if needs_orc +# arrow_dataset_srcs += ['file_orc.cc'] +#endif + +# requires https://github.com/apache/arrow/issues/46410 +# if needs_parquet +# arrow_dataset_srcs += ['file_parquet.cc'] +# endif + +arrow_dataset_lib = library( + 'arrow-dataset', + sources: arrow_dataset_srcs, + dependencies: [arrow_dep, arrow_acero_dep], +) + +arrow_dataset_testing_srcs = files('test_util_internal.cc') + +arrow_dataset_tests = { + 'dataset-test': {'sources': ['dataset_test.cc']}, + 'dataset-writer-test': {'sources': ['dataset_writer_test.cc']}, + 'discovery-test': {'sources': ['discovery_test.cc']}, + 'file-ipc-test': {'sources': ['file_ipc_test.cc']}, + 'file-test': {'sources': ['file_test.cc'] + arrow_acero_testing_sources}, + 'partition-test': {'sources': ['partition_test.cc']}, + 'scanner-test': {'sources': ['scanner_test.cc']}, + 'subtree-test': {'sources': ['subtree_test.cc']}, + 'write-node-test': {'sources': ['write_node_test.cc']}, +} + +if needs_csv + arrow_dataset_tests += {'file-csv-test': {'sources': ['file_csv_test.cc']}} +endif + +if needs_json + arrow_dataset_tests += { + 'file-json-test': { + 'sources': ['file_json_test.cc'], + 'dependencies': [rapidjson_dep], + }, + } +endif + + +# requires https://github.com/apache/arrow/pull/46409 +#if needs_orc +# arrow_dataset_test_srcs += ['file_orc_test'] +#endif + +# requires https://github.com/apache/arrow/issues/46410 +# if needs_parquet +# arrow_dataset_test_srcs += ['file_parquet_test'] +# if needs_parquet_encryption +# ... +# endif +# endif + +foreach key, val : arrow_dataset_tests + exc = executable( + 'arrow-dataset-@0@'.format(key), + sources: val['sources'] + arrow_dataset_testing_srcs, + dependencies: [ + arrow_acero_dep, + arrow_test_dep, + val.get('dependencies', []), + ], + link_with: arrow_dataset_lib, + ) + test('arrow-dataset-@0@'.format(key), exc) +endforeach + +arrow_dataset_benchmarks = ['file_benchmark', 'scanner_benchmark'] + +foreach arrow_dataset_benchmark : arrow_dataset_benchmarks + exc = executable( + 'arrow-dataset-@0@'.format(arrow_dataset_benchmark.replace('_', '-')), + sources: '@0@.cc'.format(arrow_dataset_benchmark), + dependencies: [arrow_acero_dep, arrow_benchmark_dep, gmock_dep], + link_with: arrow_dataset_lib, + ) +endforeach diff --git a/cpp/src/arrow/meson.build b/cpp/src/arrow/meson.build index 7dc9994b6bf10..b4bc7bf1b2d10 100644 --- a/cpp/src/arrow/meson.build +++ b/cpp/src/arrow/meson.build @@ -717,6 +717,10 @@ if needs_acero subdir('acero') endif +if needs_dataset + subdir('dataset') +endif + if needs_filesystem subdir('filesystem') endif