Skip to content

Commit 89ab6bb

Browse files
Switch to latin1 encoding for binary output for debug symbol checks in compiled objects (#309)
Co-authored-by: James Lamb <[email protected]>
1 parent 2b6cf8f commit 89ab6bb

File tree

4 files changed

+59
-4
lines changed

4 files changed

+59
-4
lines changed

Makefile

+9-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
NUMPY_WIN_DEBUG_WHL=tests/data/numpy-1.26.3-cp310-cp310-win_amd64.whl
2+
13
.PHONY: build
24
build:
35
rm -r ./dist || true
@@ -83,14 +85,19 @@ test-data-conda-dot-conda-packages:
8385
'./tests/data/*-0.tar.bz2' \
8486
'.conda'
8587

88+
$(NUMPY_WIN_DEBUG_WHL):
89+
curl \
90+
https://files.pythonhosted.org/packages/be/b0/611101990ddac767e54e2d27d1f4576ae1662cca64e2d55ef0e62558ec26/numpy-1.26.3-cp310-cp310-win_amd64.whl \
91+
-o $(NUMPY_WIN_DEBUG_WHL)
92+
8693
.PHONY: test
87-
test:
94+
test: $(NUMPY_WIN_DEBUG_WHL)
8895
pytest \
8996
--cov pydistcheck \
9097
./tests
9198

9299
.PHONY: test-local
93-
test-local:
100+
test-local: $(NUMPY_WIN_DEBUG_WHL)
94101
PYTHONPATH=src \
95102
pytest \
96103
--cov=src/pydistcheck \

src/pydistcheck/_shared_lib_utils.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
def _run_command(args: List[str]) -> str:
2020
try:
2121
stdout = subprocess.run(args, capture_output=True, check=True).stdout
22-
return stdout.decode("utf-8")
22+
# Use latin1 encoding, which can handle any byte value without data loss.
23+
# See https://github.com/jameslamb/pydistcheck/issues/206 for rationale.
24+
return stdout.decode("latin1")
2325
except subprocess.CalledProcessError:
2426
return _COMMAND_FAILED
2527
except FileNotFoundError:

tests/test_cli.py

+9
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@
4242
]
4343
TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
4444

45+
# see https://github.com/jameslamb/pydistcheck/issues/206
46+
NUMPY_WIN_DEBUG_WHL = "numpy-1.26.3-cp310-cp310-win_amd64.whl"
47+
if os.path.isfile(os.path.join(TEST_DATA_DIR, NUMPY_WIN_DEBUG_WHL)):
48+
PACKAGES_WITH_DEBUG_SYMBOLS.append(NUMPY_WIN_DEBUG_WHL)
49+
4550

4651
def _assert_log_matches_pattern(
4752
result: Result, pattern: str, num_times: int = 1
@@ -1074,6 +1079,10 @@ def test_debug_symbols_check_works(distro_file):
10741079
else:
10751080
# dsymutil works on both macOS and Linux
10761081
debug_cmd = r"'dsymutil \-s " + lib_file
1082+
elif NUMPY_WIN_DEBUG_WHL in distro_file:
1083+
# windows wheels
1084+
lib_file = r"\"numpy\.libs/libopenblas64__v0\.3\.23-293-gc2f4bdbb-gcc_10_3_0-2bde3a66a51006b2b53eb373ff767a3f\.dll\"'"
1085+
debug_cmd = r"'objdump \-\-all\-headers " + lib_file
10771086
else:
10781087
# linux wheels
10791088
debug_cmd = r"'objdump \-\-all\-headers \"lib/lib_baseballmetrics\.so\"'\."

tests/test_shared_lib_utils.py

+38-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,43 @@
1+
import subprocess
12
from unittest.mock import Mock, patch
23

3-
from pydistcheck._shared_lib_utils import _MACHO_STRIP_SYMBOL, _get_symbols
4+
from pydistcheck._shared_lib_utils import (
5+
_MACHO_STRIP_SYMBOL,
6+
_get_symbols,
7+
_run_command,
8+
)
9+
10+
11+
def test_run_command_handles_binary_output():
12+
"""Test that _run_command can handle binary output containing invalid UTF-8 bytes."""
13+
mock_output = bytes(
14+
[0x68, 0x65, 0x6C, 0x6C, 0x6F, 0xD7, 0x77, 0x6F, 0x72, 0x6C, 0x64]
15+
) # i.e., "hello" + invalid UTF-8 + "world"
16+
17+
with patch("subprocess.run") as mock_run:
18+
mock_run.return_value = Mock(stdout=mock_output)
19+
20+
# Should not raise UnicodeDecodeError,
21+
result = _run_command(["some", "command"])
22+
# latin1 encoding should preserve all bytes.
23+
assert len(result) == len(mock_output)
24+
# Check valid parts of the string.
25+
assert "hello" in result
26+
assert "world" in result
27+
28+
29+
def test_run_command_handles_command_failure():
30+
with patch("subprocess.run") as mock_run:
31+
mock_run.side_effect = subprocess.CalledProcessError(1, ["cmd"])
32+
result = _run_command(["failing", "command"])
33+
assert result == "__command_failed__"
34+
35+
36+
def test_run_command_handles_missing_command():
37+
with patch("subprocess.run") as mock_run:
38+
mock_run.side_effect = FileNotFoundError()
39+
result = _run_command(["nonexistent"])
40+
assert result == "__tool_not_available__"
441

542

643
def test_get_symbols_filters_radr_symbol():

0 commit comments

Comments
 (0)