Skip to content

Commit ea4cbf4

Browse files
Merge remote-tracking branch 'upstream/main' into apachegh-43683-pandas-string-dtype
2 parents 70a2c3c + 3752109 commit ea4cbf4

File tree

5 files changed

+63
-11
lines changed

5 files changed

+63
-11
lines changed

ci/scripts/python_wheel_windows_build.bat

+28-2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@
2020
echo "Building windows wheel..."
2121

2222
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
23+
@echo on
24+
25+
@REM Install a more recent msvcp140.dll in C:\Windows\System32
26+
choco install -r -y --no-progress vcredist140
27+
choco upgrade -r -y --no-progress vcredist140
28+
dir C:\Windows\System32\msvcp140.dll
2329

2430
echo "=== (%PYTHON_VERSION%) Clear output directories and leftovers ==="
2531
del /s /q C:\arrow-build
@@ -121,7 +127,27 @@ set ARROW_HOME=C:\arrow-dist
121127
set CMAKE_PREFIX_PATH=C:\arrow-dist
122128

123129
pushd C:\arrow\python
124-
@REM bundle the msvc runtime
125-
cp "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Redist\MSVC\14.28.29325\x64\Microsoft.VC142.CRT\msvcp140.dll" pyarrow\
130+
131+
@REM Bundle the C++ runtime
132+
cp C:\Windows\System32\msvcp140.dll pyarrow\
133+
134+
@REM Build wheel
126135
python setup.py bdist_wheel || exit /B 1
136+
137+
@REM Repair the wheel with delvewheel
138+
@REM
139+
@REM Since we bundled the Arrow C++ libraries ourselves, we only need to
140+
@REM mangle msvcp140.dll so as to avoid ABI issues when msvcp140.dll is
141+
@REM required by multiple Python libraries in the same process.
142+
@REM
143+
@REM For now this requires a custom version of delvewheel:
144+
@REM https://github.com/adang1345/delvewheel/pull/59
145+
pip install https://github.com/pitrou/delvewheel/archive/refs/heads/fixes-for-arrow.zip || exit /B 1
146+
147+
for /f %%i in ('dir dist\pyarrow-*.whl /B') do (set WHEEL_NAME=%cd%\dist\%%i) || exit /B 1
148+
echo "Wheel name: %WHEEL_NAME%"
149+
150+
delvewheel repair -vv --mangle-only=msvcp140.dll --no-patch ^
151+
-w repaired_wheels %WHEEL_NAME% || exit /B 1
152+
127153
popd

ci/scripts/python_wheel_windows_test.bat

+2-2
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ set PYTHON_CMD=py -%PYTHON%
4848
%PYTHON_CMD% -m pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B 1
4949

5050
@REM Install the built wheels
51-
%PYTHON_CMD% -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B 1
51+
%PYTHON_CMD% -m pip install --no-index --find-links=C:\arrow\python\repaired_wheels pyarrow || exit /B 1
5252

5353
@REM Test that the modules are importable
5454
%PYTHON_CMD% -c "import pyarrow" || exit /B 1
@@ -65,7 +65,7 @@ set PYTHON_CMD=py -%PYTHON%
6565
%PYTHON_CMD% -c "import pyarrow.substrait" || exit /B 1
6666

6767
@REM Validate wheel contents
68-
%PYTHON_CMD% C:\arrow\ci\scripts\python_wheel_validate_contents.py --path C:\arrow\python\dist || exit /B 1
68+
%PYTHON_CMD% C:\arrow\ci\scripts\python_wheel_validate_contents.py --path C:\arrow\python\repaired_wheels || exit /B 1
6969

7070
@rem Download IANA Timezone Database for ORC C++
7171
curl https://cygwin.osuosl.org/noarch/release/tzdata/tzdata-2024a-1.tar.xz --output tzdata.tar.xz || exit /B

cpp/src/parquet/arrow/arrow_schema_test.cc

+21
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,27 @@ TEST_F(TestConvertParquetSchema, IllegalParquetNestedSchema) {
832832
Invalid, testing::HasSubstr("LIST-annotated groups must not be repeated."),
833833
ConvertSchema(parquet_fields));
834834
}
835+
// List<List<>>: outer list is two-level encoding, inner list is empty.
836+
//
837+
// optional group my_list (LIST) {
838+
// repeated group array (LIST) {
839+
// repeated group list {
840+
// }
841+
// }
842+
// }
843+
{
844+
auto list = GroupNode::Make("list", Repetition::REPEATED, {});
845+
auto array =
846+
GroupNode::Make("array", Repetition::REPEATED, {list}, ConvertedType::LIST);
847+
std::vector<NodePtr> parquet_fields;
848+
parquet_fields.push_back(
849+
GroupNode::Make("my_list", Repetition::OPTIONAL, {array}, ConvertedType::LIST));
850+
851+
EXPECT_RAISES_WITH_MESSAGE_THAT(
852+
Invalid,
853+
testing::HasSubstr("LIST-annotated groups must have at least one child."),
854+
ConvertSchema(parquet_fields));
855+
}
835856
}
836857

837858
Status ArrowSchemaToParquetMetadata(std::shared_ptr<::arrow::Schema>& arrow_schema,

cpp/src/parquet/arrow/schema.cc

+8-3
Original file line numberDiff line numberDiff line change
@@ -676,9 +676,14 @@ Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels,
676676
return Status::Invalid("Group with one repeated child must be LIST-annotated.");
677677
}
678678
// LIST-annotated group with three-level encoding cannot be repeated.
679-
if (repeated_field->is_group() &&
680-
!static_cast<const GroupNode&>(*repeated_field).field(0)->is_repeated()) {
681-
return Status::Invalid("LIST-annotated groups must not be repeated.");
679+
if (repeated_field->is_group()) {
680+
auto& repeated_group_field = static_cast<const GroupNode&>(*repeated_field);
681+
if (repeated_group_field.field_count() == 0) {
682+
return Status::Invalid("LIST-annotated groups must have at least one child.");
683+
}
684+
if (!repeated_group_field.field(0)->is_repeated()) {
685+
return Status::Invalid("LIST-annotated groups must not be repeated.");
686+
}
682687
}
683688
RETURN_NOT_OK(
684689
NodeToSchemaField(*repeated_field, current_levels, ctx, out, child_field));

dev/tasks/python-wheels/github.windows.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -63,17 +63,17 @@ jobs:
6363
- uses: actions/upload-artifact@v4
6464
with:
6565
name: wheel
66-
path: arrow/python/dist/*.whl
66+
path: arrow/python/repaired_wheels/*.whl
6767

6868
- name: Test wheel
6969
shell: cmd
7070
run: |
7171
cd arrow
7272
archery docker run python-wheel-windows-test
7373
74-
{{ macros.github_upload_releases("arrow/python/dist/*.whl")|indent }}
75-
{{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }}
76-
{{ macros.github_upload_wheel_scientific_python("arrow/python/dist/*.whl")|indent }}
74+
{{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
75+
{{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
76+
{{ macros.github_upload_wheel_scientific_python("arrow/repaired_wheels/repaired_wheels/*.whl")|indent }}
7777

7878
{% if arrow.is_default_branch() %}
7979
- name: Push Docker Image

0 commit comments

Comments
 (0)