Skip to content

Commit 84ce20f

Browse files
committed
GH-46411: [C++] Add dataset option to Meson configuration
1 parent 5240670 commit 84ce20f

File tree

5 files changed

+159
-3
lines changed

5 files changed

+159
-3
lines changed

cpp/meson.build

+9-2
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,21 @@ endif
5959
needs_benchmarks = get_option('benchmarks').enabled()
6060
needs_compute = get_option('compute').enabled()
6161
needs_csv = get_option('csv').enabled()
62+
needs_dataset = get_option('dataset').enabled()
6263
needs_azure = get_option('azure').enabled()
6364
needs_gcs = get_option('gcs').enabled()
6465
needs_hdfs = get_option('hdfs').enabled()
6566
needs_s3 = get_option('s3').enabled()
66-
needs_filesystem = get_option('filesystem').enabled() or needs_azure or needs_gcs or needs_hdfs or needs_s3
67+
needs_filesystem = (get_option('filesystem').enabled()
68+
or needs_azure
69+
or needs_dataset
70+
or needs_gcs
71+
or needs_hdfs
72+
or needs_s3
73+
)
6774
needs_integration = get_option('integration').enabled()
6875
needs_tests = get_option('tests').enabled()
69-
needs_acero = get_option('acero').enabled()
76+
needs_acero = get_option('acero').enabled() or needs_dataset
7077
needs_ipc = get_option('ipc').enabled() or needs_tests or needs_acero or needs_benchmarks
7178
needs_fuzzing = get_option('fuzzing').enabled()
7279
needs_testing = (get_option('testing').enabled()

cpp/meson.options

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ option('brotli', type: 'feature', description: 'Build with Brotli compression')
2727
option('bz2', type: 'feature', description: 'Build with BZ2 compression')
2828
option('compute', type: 'feature', description: 'Build all Arrow Compute kernels')
2929
option('csv', type: 'feature', description: 'Build the Arrow CSV Parser Module')
30+
option('dataset', type: 'feature', description: 'Build the Arrow Dataset Modules')
3031
option('filesystem', type: 'feature', description: 'Build the Arrow Filesystem Layer')
3132
option('fuzzing', type: 'feature', description: 'Build Arrow Fuzzing executables')
3233

cpp/src/arrow/acero/meson.build

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ arrow_acero_lib = library(
8585

8686
arrow_acero_dep = declare_dependency(link_with: [arrow_acero_lib])
8787

88-
arrow_acero_testing_sources = ['test_nodes.cc', 'test_util_internal.cc'] + arrow_compute_testing_srcs
88+
arrow_acero_testing_sources = files('test_nodes.cc', 'test_util_internal.cc') + arrow_compute_testing_srcs
8989

9090
arrow_acero_tests = {
9191
'plan-test': {'sources': ['plan_test.cc', 'test_nodes_test.cc']},

cpp/src/arrow/dataset/meson.build

+144
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
install_headers(
19+
[
20+
'api.h',
21+
'dataset.h',
22+
'dataset_writer.h',
23+
'discovery.h',
24+
'file_base.h',
25+
'file_csv.h',
26+
'file_ipc.h',
27+
'file_json.h',
28+
'file_orc.h',
29+
'file_parquet.h',
30+
'parquet_encryption_config.h',
31+
'partition.h',
32+
'plan.h',
33+
'projector.h',
34+
'scanner.h',
35+
'type_fwd.h',
36+
'visibility.h',
37+
],
38+
subdir: 'arrow/dataset',
39+
)
40+
41+
arrow_dataset_srcs = [
42+
'dataset.cc',
43+
'dataset_writer.cc',
44+
'discovery.cc',
45+
'file_base.cc',
46+
'file_ipc.cc',
47+
'partition.cc',
48+
'plan.cc',
49+
'projector.cc',
50+
'scanner.cc',
51+
'scan_node.cc',
52+
]
53+
54+
if needs_csv
55+
arrow_dataset_srcs += ['file_csv.cc']
56+
endif
57+
58+
if needs_json
59+
arrow_dataset_srcs += ['file_json.cc']
60+
endif
61+
62+
# requires https://github.com/apache/arrow/pull/46409
63+
#if needs_orc
64+
# arrow_dataset_srcs += ['file_orc.cc']
65+
#endif
66+
67+
# requires https://github.com/apache/arrow/issues/46410
68+
# if needs_parquet
69+
# arrow_dataset_srcs += ['file_parquet.cc']
70+
# endif
71+
72+
arrow_dataset_lib = library(
73+
'arrow-dataset',
74+
sources: arrow_dataset_srcs,
75+
dependencies: [arrow_dep, arrow_acero_dep],
76+
)
77+
78+
arrow_dataset_testing_srcs = files('test_util_internal.cc')
79+
80+
arrow_dataset_tests = {
81+
'dataset-test': {'sources': ['dataset_test.cc']},
82+
'dataset-writer-test': {'sources': ['dataset_writer_test.cc']},
83+
'discovery-test': {'sources': ['discovery_test.cc']},
84+
'file-ipc-test': {'sources': ['file_ipc_test.cc']},
85+
'file-test': {'sources': ['file_test.cc'] + arrow_acero_testing_sources},
86+
'partition-test': {'sources': ['partition_test.cc']},
87+
'scanner-test': {'sources': ['scanner_test.cc']},
88+
'subtree-test': {'sources': ['subtree_test.cc']},
89+
'write-node-test': {'sources': ['write_node_test.cc']},
90+
}
91+
arrow_dataset_test_deps = []
92+
93+
if needs_csv
94+
arrow_dataset_tests += {'file-csv-test': {'sources': ['file_csv_test.cc']}}
95+
endif
96+
97+
if needs_json
98+
arrow_dataset_tests += {
99+
'file-json-test': {
100+
'sources': ['file_json_test.cc'],
101+
'dependencies': [rapidjson_dep],
102+
},
103+
}
104+
endif
105+
106+
107+
# requires https://github.com/apache/arrow/pull/46409
108+
#if needs_orc
109+
# arrow_dataset_test_srcs += ['file_orc_test']
110+
# arrow_dataset_test_deps += orc_dep
111+
#endif
112+
113+
# requires https://github.com/apache/arrow/issues/46410
114+
# if needs_parquet
115+
# arrow_dataset_test_srcs += ['file_parquet_test']
116+
# if needs_parquet_encryption
117+
# ...
118+
# endif
119+
# endif
120+
121+
foreach key, val : arrow_dataset_tests
122+
exc = executable(
123+
'arrow-dataset-@0@'.format(key),
124+
sources: val['sources'] + arrow_dataset_testing_srcs,
125+
dependencies: [
126+
arrow_acero_dep,
127+
arrow_test_dep,
128+
val.get('dependencies', []),
129+
],
130+
link_with: arrow_dataset_lib,
131+
)
132+
test('arrow-dataset-@0@'.format(key), exc)
133+
endforeach
134+
135+
arrow_dataset_benchmarks = ['file_benchmark', 'scanner_benchmark']
136+
137+
foreach arrow_dataset_benchmark : arrow_dataset_benchmarks
138+
exc = executable(
139+
'arrow-dataset-@0@'.format(arrow_dataset_benchmark.replace('_', '-')),
140+
sources: '@[email protected]'.format(arrow_dataset_benchmark),
141+
dependencies: [arrow_acero_dep, arrow_benchmark_dep, gmock_dep],
142+
link_with: arrow_dataset_lib,
143+
)
144+
endforeach

cpp/src/arrow/meson.build

+4
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,10 @@ if needs_acero
717717
subdir('acero')
718718
endif
719719

720+
if needs_dataset
721+
subdir('dataset')
722+
endif
723+
720724
if needs_filesystem
721725
subdir('filesystem')
722726
endif

0 commit comments

Comments
 (0)