Skip to content

Commit 9db7b0b

Browse files
Merge remote-tracking branch 'upstream/main' into pandas-metadata-field-name
2 parents 251cd97 + dfb6d0e commit 9db7b0b

20 files changed

+173
-45
lines changed

.github/workflows/dev_pr/helpers.js

+1-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ const https = require('https');
2020
/**
2121
* Given the title of a PullRequest return the Issue
2222
*
23-
* @param {String} title
23+
* @param {String} title
2424
* @returns {Issue} or null if no issue detected.
2525
*
2626
* @typedef {Object} Issue
@@ -62,6 +62,5 @@ function detectIssue(title) {
6262

6363
module.exports = {
6464
detectIssue,
65-
getJiraInfo,
6665
getGitHubInfo
6766
};

cpp/cmake_modules/ThirdpartyToolchain.cmake

+9-4
Original file line numberDiff line numberDiff line change
@@ -2061,10 +2061,14 @@ macro(build_substrait)
20612061

20622062
# Missing dll-interface:
20632063
list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "/wd4251")
2064-
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
2065-
"Clang")
2066-
# Protobuf generated files trigger some errors on CLANG TSAN builds
2067-
list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "-Wno-error=shorten-64-to-32")
2064+
else()
2065+
# GH-44954: silence [[deprecated]] declarations in protobuf-generated code
2066+
list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "-Wno-deprecated")
2067+
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
2068+
"Clang")
2069+
# Protobuf generated files trigger some errors on CLANG TSAN builds
2070+
list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "-Wno-error=shorten-64-to-32")
2071+
endif()
20682072
endif()
20692073

20702074
set(SUBSTRAIT_SOURCES)
@@ -2116,6 +2120,7 @@ macro(build_substrait)
21162120

21172121
add_library(substrait STATIC ${SUBSTRAIT_SOURCES})
21182122
set_target_properties(substrait PROPERTIES POSITION_INDEPENDENT_CODE ON)
2123+
target_compile_options(substrait PRIVATE "${SUBSTRAIT_SUPPRESSED_FLAGS}")
21192124
target_include_directories(substrait PUBLIC ${SUBSTRAIT_INCLUDES})
21202125
target_link_libraries(substrait PUBLIC ${ARROW_PROTOBUF_LIBPROTOBUF})
21212126
add_dependencies(substrait substrait_gen)

cpp/src/arrow/engine/substrait/expression_internal.h

+4
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,12 @@
2727
#include "arrow/engine/substrait/type_fwd.h"
2828
#include "arrow/engine/substrait/visibility.h"
2929
#include "arrow/result.h"
30+
#include "arrow/util/macros.h"
3031

32+
// GH-44954: silence [[deprecated]] declarations in protobuf-generated code
33+
ARROW_SUPPRESS_DEPRECATION_WARNING
3134
#include "substrait/algebra.pb.h" // IWYU pragma: export
35+
ARROW_UNSUPPRESS_DEPRECATION_WARNING
3236

3337
namespace arrow {
3438
namespace engine {

cpp/src/arrow/engine/substrait/extended_expression_internal.h

+4
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,12 @@
2828
#include "arrow/engine/substrait/visibility.h"
2929
#include "arrow/result.h"
3030
#include "arrow/status.h"
31+
#include "arrow/util/macros.h"
3132

33+
// GH-44954: silence [[deprecated]] declarations in protobuf-generated code
34+
ARROW_SUPPRESS_DEPRECATION_WARNING
3235
#include "substrait/extended_expression.pb.h" // IWYU pragma: export
36+
ARROW_UNSUPPRESS_DEPRECATION_WARNING
3337

3438
namespace arrow {
3539
namespace engine {

cpp/src/arrow/engine/substrait/plan_internal.h

+4
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,12 @@
2727
#include "arrow/engine/substrait/visibility.h"
2828
#include "arrow/result.h"
2929
#include "arrow/status.h"
30+
#include "arrow/util/macros.h"
3031

32+
// GH-44954: silence [[deprecated]] declarations in protobuf-generated code
33+
ARROW_SUPPRESS_DEPRECATION_WARNING
3134
#include "substrait/plan.pb.h" // IWYU pragma: export
35+
ARROW_UNSUPPRESS_DEPRECATION_WARNING
3236

3337
namespace arrow {
3438
namespace engine {

cpp/src/arrow/engine/substrait/relation_internal.h

+4
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,12 @@
2828
#include "arrow/engine/substrait/type_fwd.h"
2929
#include "arrow/engine/substrait/visibility.h"
3030
#include "arrow/result.h"
31+
#include "arrow/util/macros.h"
3132

33+
// GH-44954: silence [[deprecated]] declarations in protobuf-generated code
34+
ARROW_SUPPRESS_DEPRECATION_WARNING
3235
#include "substrait/algebra.pb.h" // IWYU pragma: export
36+
ARROW_UNSUPPRESS_DEPRECATION_WARNING
3337

3438
namespace arrow {
3539
namespace engine {

cpp/src/arrow/engine/substrait/test_plan_builder.cc

+5-1
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,12 @@
3131
#include "arrow/status.h"
3232
#include "arrow/table.h"
3333
#include "arrow/type_fwd.h"
34+
#include "arrow/util/macros.h"
3435

35-
#include "substrait/algebra.pb.h"
36+
// GH-44954: silence [[deprecated]] declarations in protobuf-generated code
37+
ARROW_SUPPRESS_DEPRECATION_WARNING
38+
#include "substrait/algebra.pb.h" // IWYU pragma: export
39+
ARROW_UNSUPPRESS_DEPRECATION_WARNING
3640

3741
namespace arrow {
3842
namespace engine {

cpp/src/arrow/engine/substrait/util_internal.h

+4
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,14 @@
2424
#include "arrow/engine/substrait/visibility.h"
2525
#include "arrow/result.h"
2626
#include "arrow/util/hashing.h"
27+
#include "arrow/util/macros.h"
2728
#include "arrow/util/unreachable.h"
2829

30+
// GH-44954: silence [[deprecated]] declarations in protobuf-generated code
31+
ARROW_SUPPRESS_DEPRECATION_WARNING
2932
#include "substrait/algebra.pb.h" // IWYU pragma: export
3033
#include "substrait/plan.pb.h" // IWYU pragma: export
34+
ARROW_UNSUPPRESS_DEPRECATION_WARNING
3135

3236
namespace arrow {
3337
namespace engine {

cpp/src/arrow/flight/sql/client.cc

+4
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "arrow/ipc/reader.h"
3232
#include "arrow/result.h"
3333
#include "arrow/util/logging.h"
34+
#include "arrow/util/macros.h"
3435

3536
namespace flight_sql_pb = arrow::flight::protocol::sql;
3637

@@ -829,6 +830,8 @@ Status FlightSqlClient::Rollback(const FlightCallOptions& options,
829830
return results->Drain();
830831
}
831832

833+
// ActionCancelQuery{Request,Result} are deprecated
834+
ARROW_SUPPRESS_DEPRECATION_WARNING
832835
::arrow::Result<CancelResult> FlightSqlClient::CancelQuery(
833836
const FlightCallOptions& options, const FlightInfo& info) {
834837
flight_sql_pb::ActionCancelQueryRequest cancel_query;
@@ -855,6 +858,7 @@ ::arrow::Result<CancelResult> FlightSqlClient::CancelQuery(
855858
}
856859
return Status::IOError("Server returned unknown result ", result.result());
857860
}
861+
ARROW_UNSUPPRESS_DEPRECATION_WARNING
858862

859863
Status FlightSqlClient::Close() { return impl_->Close(); }
860864

cpp/src/arrow/flight/sql/protocol_internal.cc

+5
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,15 @@
1414
// KIND, either express or implied. See the License for the
1515
// specific language governing permissions and limitations
1616

17+
#include "arrow/util/macros.h"
18+
19+
// GH-44954: silence [[deprecated]] declarations in protobuf-generated code
20+
ARROW_SUPPRESS_DEPRECATION_WARNING
1721
#include "arrow/flight/sql/protocol_internal.h"
1822

1923
// NOTE(lidavidm): Normally this is forbidden, but on Windows to get
2024
// the dllexport/dllimport macro in the right places, we need to
2125
// ensure our header gets included (and Protobuf will not insert the
2226
// include for you)
2327
#include "arrow/flight/sql/FlightSql.pb.cc" // NOLINT
28+
ARROW_UNSUPPRESS_DEPRECATION_WARNING

cpp/src/arrow/flight/sql/protocol_internal.h

+3
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,12 @@
1818

1919
// This addresses platform-specific defines, e.g. on Windows
2020
#include "arrow/flight/platform.h" // IWYU pragma: keep
21+
#include "arrow/util/macros.h"
2122

2223
// This header holds the Flight SQL definitions.
2324

2425
#include "arrow/flight/sql/visibility.h"
2526

27+
ARROW_SUPPRESS_DEPRECATION_WARNING
2628
#include "arrow/flight/sql/FlightSql.pb.h" // IWYU pragma: export
29+
ARROW_UNSUPPRESS_DEPRECATION_WARNING

cpp/src/arrow/flight/sql/server.cc

+7
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "arrow/flight/sql/sql_info_internal.h"
3232
#include "arrow/type.h"
3333
#include "arrow/util/checked_cast.h"
34+
#include "arrow/util/macros.h"
3435

3536
#define PROPERTY_TO_OPTIONAL(COMMAND, PROPERTY) \
3637
COMMAND.has_##PROPERTY() ? std::make_optional(COMMAND.PROPERTY()) : std::nullopt
@@ -337,6 +338,8 @@ arrow::Result<ActionBeginTransactionRequest> ParseActionBeginTransactionRequest(
337338
return result;
338339
}
339340

341+
// ActionCancelQueryRequest is deprecated
342+
ARROW_SUPPRESS_DEPRECATION_WARNING
340343
arrow::Result<ActionCancelQueryRequest> ParseActionCancelQueryRequest(
341344
const Action& action) {
342345
pb::sql::ActionCancelQueryRequest command;
@@ -346,6 +349,7 @@ arrow::Result<ActionCancelQueryRequest> ParseActionCancelQueryRequest(
346349
ARROW_ASSIGN_OR_RAISE(result.info, FlightInfo::Deserialize(command.info()));
347350
return result;
348351
}
352+
ARROW_UNSUPPRESS_DEPRECATION_WARNING
349353

350354
arrow::Result<ActionCreatePreparedStatementRequest>
351355
ParseActionCreatePreparedStatementRequest(const Action& action) {
@@ -468,6 +472,8 @@ arrow::Result<Result> PackActionResult(const FlightEndpoint& endpoint) {
468472
return endpoint.SerializeToBuffer();
469473
}
470474

475+
// ActionCancelQueryResult is deprecated
476+
ARROW_SUPPRESS_DEPRECATION_WARNING
471477
arrow::Result<Result> PackActionResult(CancelResult result) {
472478
pb::sql::ActionCancelQueryResult pb_result;
473479
switch (result) {
@@ -487,6 +493,7 @@ arrow::Result<Result> PackActionResult(CancelResult result) {
487493
}
488494
return PackActionResult(pb_result);
489495
}
496+
ARROW_UNSUPPRESS_DEPRECATION_WARNING
490497

491498
arrow::Result<Result> PackActionResult(ActionCreatePreparedStatementResult result) {
492499
pb::sql::ActionCreatePreparedStatementResult pb_result;

docs/source/python/api/arrays.rst

+10
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ may expose data type-specific methods or properties.
7373
DurationArray
7474
MonthDayNanoIntervalArray
7575
Decimal128Array
76+
Decimal256Array
7677
DictionaryArray
7778
ListArray
7879
FixedSizeListArray
@@ -86,6 +87,9 @@ may expose data type-specific methods or properties.
8687
ExtensionArray
8788
FixedShapeTensorArray
8889
OpaqueArray
90+
JsonArray
91+
UuidArray
92+
Bool8Array
8993

9094
.. _api.scalar:
9195

@@ -112,6 +116,7 @@ classes may expose data type-specific methods or properties.
112116
Int16Scalar
113117
Int32Scalar
114118
Int64Scalar
119+
NullScalar
115120
UInt8Scalar
116121
UInt16Scalar
117122
UInt32Scalar
@@ -134,9 +139,11 @@ classes may expose data type-specific methods or properties.
134139
DurationScalar
135140
MonthDayNanoIntervalScalar
136141
Decimal128Scalar
142+
Decimal256Scalar
137143
DictionaryScalar
138144
RunEndEncodedScalar
139145
ListScalar
146+
FixedSizeListScalar
140147
LargeListScalar
141148
ListViewScalar
142149
LargeListViewScalar
@@ -146,3 +153,6 @@ classes may expose data type-specific methods or properties.
146153
ExtensionScalar
147154
FixedShapeTensorScalar
148155
OpaqueScalar
156+
JsonScalar
157+
UuidScalar
158+
Bool8Scalar

docs/source/python/api/datatypes.rst

+17
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,13 @@ These should be used to create Arrow data types and schemas.
6868
dictionary
6969
run_end_encoded
7070
fixed_shape_tensor
71+
union
72+
dense_union
73+
sparse_union
7174
opaque
75+
bool8
76+
uuid
77+
json_
7278
field
7379
schema
7480
from_numpy_dtype
@@ -96,13 +102,19 @@ functions above.
96102
DataType
97103
DictionaryType
98104
ListType
105+
ListViewType
106+
FixedSizeListType
99107
LargeListType
108+
LargeListViewType
100109
MapType
101110
StructType
102111
UnionType
112+
DenseUnionType
113+
SparseUnionType
103114
TimestampType
104115
Time32Type
105116
Time64Type
117+
DurationType
106118
FixedSizeBinaryType
107119
Decimal128Type
108120
Decimal256Type
@@ -115,8 +127,10 @@ Specific classes and functions for extension types.
115127
.. autosummary::
116128
:toctree: ../generated/
117129

130+
BaseExtensionType
118131
ExtensionType
119132
PyExtensionType
133+
UnknownExtensionType
120134
register_extension_type
121135
unregister_extension_type
122136

@@ -128,6 +142,9 @@ implemented by PyArrow.
128142

129143
FixedShapeTensorType
130144
OpaqueType
145+
JsonType
146+
UuidType
147+
Bool8Type
131148

132149
.. _api.types.checking:
133150
.. currentmodule:: pyarrow.types

r/R/arrow-package.R

+4-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,10 @@ supported_dplyr_methods <- list(
6262
relocate = NULL,
6363
compute = NULL,
6464
collapse = NULL,
65-
distinct = "`.keep_all = TRUE` not supported",
65+
distinct = c(
66+
"`.keep_all = TRUE` returns a non-missing value if present,",
67+
"only returning missing values if all are missing."
68+
),
6669
left_join = "the `copy` argument is ignored",
6770
right_join = "the `copy` argument is ignored",
6871
inner_join = "the `copy` argument is ignored",

r/R/dplyr-distinct.R

+18-7
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,6 @@
1818
# The following S3 methods are registered on load if dplyr is present
1919

2020
distinct.arrow_dplyr_query <- function(.data, ..., .keep_all = FALSE) {
21-
if (.keep_all == TRUE) {
22-
# TODO(ARROW-14045): the function is called "hash_one" (from ARROW-13993)
23-
# May need to call it: `summarize(x = one(x), ...)` for x in non-group cols
24-
arrow_not_supported("`distinct()` with `.keep_all = TRUE`")
25-
}
26-
2721
original_gv <- dplyr::group_vars(.data)
2822
if (length(quos(...))) {
2923
# group_by() calls mutate() if there are any expressions in ...
@@ -33,11 +27,28 @@ distinct.arrow_dplyr_query <- function(.data, ..., .keep_all = FALSE) {
3327
.data <- dplyr::group_by(.data, !!!syms(names(.data)))
3428
}
3529

36-
out <- dplyr::summarize(.data, .groups = "drop")
30+
if (isTRUE(.keep_all)) {
31+
# Note: in regular dplyr, `.keep_all = TRUE` returns the first row's value.
32+
# However, Acero's `hash_one` function prefers returning non-null values.
33+
# So, you'll get the same shape of data, but the values may differ.
34+
keeps <- names(.data)[!(names(.data) %in% .data$group_by_vars)]
35+
exprs <- lapply(keeps, function(x) call2("one", sym(x)))
36+
names(exprs) <- keeps
37+
} else {
38+
exprs <- list()
39+
}
40+
41+
out <- dplyr::summarize(.data, !!!exprs, .groups = "drop")
42+
3743
# distinct() doesn't modify group by vars, so restore the original ones
3844
if (length(original_gv)) {
3945
out$group_by_vars <- original_gv
4046
}
47+
if (isTRUE(.keep_all)) {
48+
# Also ensure the column order matches the original
49+
# summarize() will put the group_by_vars first
50+
out <- dplyr::select(out, !!!syms(names(.data)))
51+
}
4152
out
4253
}
4354

r/R/dplyr-funcs-agg.R

+7
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,13 @@ register_bindings_aggregate <- function() {
150150
options = list(skip_nulls = na.rm, min_count = 0L)
151151
)
152152
})
153+
register_binding("arrow::one", function(...) {
154+
set_agg(
155+
fun = "one",
156+
data = ensure_one_arg(list2(...), "one"),
157+
options = list()
158+
)
159+
})
153160
}
154161

155162
set_agg <- function(...) {

0 commit comments

Comments
 (0)