Skip to content

Commit d7381be

Browse files
Fix nested struct parsing in _parse_named_struct (Issue #627)
This commit fixes a critical bug where nested STRUCT (ROW) types were not being parsed correctly, causing nested fields to be lost during data conversion. ## Problem The `_parse_named_struct` function in `pyathena/converter.py` was using simple comma-splitting which failed for nested structures like: `{header={stamp=2024-01-01, seq=123}, x=4.736}` This caused: 1. Incorrect splitting at commas inside nested braces 2. Nested fields being skipped due to brace-containing value filtering ## Solution - Updated `_parse_named_struct` to use `_split_array_items` for proper brace-depth-aware splitting - Added recursive parsing for nested struct values - Updated docstring to document nested struct support ## Testing Added comprehensive test cases: - Converter tests: 7 nested struct patterns + 3 array patterns - SQLAlchemy integration tests: Query execution with nested ROW types All existing tests pass without regression. Fixes #627 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent de0e332 commit d7381be

File tree

3 files changed

+202
-5
lines changed

3 files changed

+202
-5
lines changed

pyathena/converter.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,9 @@ def _parse_map_native(inner: str) -> Optional[Dict[str, Any]]:
336336

337337

338338
def _parse_named_struct(inner: str) -> Optional[Dict[str, Any]]:
339-
"""Parse named struct format: a=1, b=2.
339+
"""Parse named struct format: key1=value1, key2=value2.
340+
341+
Supports nested structs: outer={inner_key=inner_value}, field=value.
340342
341343
Args:
342344
inner: Interior content of struct without braces.
@@ -346,8 +348,8 @@ def _parse_named_struct(inner: str) -> Optional[Dict[str, Any]]:
346348
"""
347349
result = {}
348350

349-
# Simple split by comma for basic cases
350-
pairs = [pair.strip() for pair in inner.split(",")]
351+
# Use smart split to handle nested structures
352+
pairs = _split_array_items(inner)
351353

352354
for pair in pairs:
353355
if "=" not in pair:
@@ -357,10 +359,18 @@ def _parse_named_struct(inner: str) -> Optional[Dict[str, Any]]:
357359
key = key.strip()
358360
value = value.strip()
359361

360-
# Skip pairs with special characters (safety check)
361-
if any(char in key for char in '{}="') or any(char in value for char in '{}="'):
362+
# Skip if key contains special characters (safety check)
363+
if any(char in key for char in '{}="'):
362364
continue
363365

366+
# Handle nested struct values
367+
if value.startswith("{") and value.endswith("}"):
368+
# Try to parse as nested struct
369+
nested_struct = _to_struct(value)
370+
if nested_struct is not None:
371+
result[key] = nested_struct
372+
continue
373+
364374
# Convert value to appropriate type
365375
result[key] = _convert_value(value)
366376

tests/pyathena/sqlalchemy/test_base.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,118 @@ def test_json_type_with_cast(self, engine):
9292
).fetchone()
9393
assert result.json_types == {"str": "value", "num": 42, "bool": True, "nil": None}
9494

95+
def test_select_nested_struct_query(self, engine):
96+
"""Test SELECT query with nested STRUCT (ROW) types (Issue #627)."""
97+
engine, conn = engine
98+
99+
# Test single level nested struct (simulating Issue #627 scenario)
100+
query = sqlalchemy.text(
101+
"""
102+
SELECT
103+
CAST(ROW(
104+
ROW('2024-01-01', 123),
105+
CAST(4.736 AS DOUBLE),
106+
CAST(0.583 AS DOUBLE)
107+
) AS ROW(header ROW(stamp VARCHAR, seq INTEGER), x DOUBLE, y DOUBLE)) as positions
108+
"""
109+
)
110+
result = conn.execute(query).fetchone()
111+
assert result is not None
112+
assert result.positions is not None
113+
assert isinstance(result.positions, dict)
114+
assert "header" in result.positions
115+
assert isinstance(result.positions["header"], dict)
116+
assert result.positions["header"]["stamp"] == "2024-01-01"
117+
assert result.positions["header"]["seq"] == 123
118+
assert result.positions["x"] == 4.736
119+
assert result.positions["y"] == 0.583
120+
121+
# Test double nested struct
122+
query = sqlalchemy.text(
123+
"""
124+
SELECT
125+
CAST(ROW(
126+
ROW(ROW('value')),
127+
123
128+
) AS ROW(level1 ROW(level2 ROW(level3 VARCHAR)), field INTEGER)) as data
129+
"""
130+
)
131+
result = conn.execute(query).fetchone()
132+
assert result is not None
133+
assert result.data["level1"]["level2"]["level3"] == "value"
134+
assert result.data["field"] == 123
135+
136+
# Test multiple nested fields
137+
query = sqlalchemy.text(
138+
"""
139+
SELECT
140+
CAST(ROW(
141+
ROW(1, 2),
142+
ROW(CAST(0.5 AS DOUBLE), CAST(0.3 AS DOUBLE)),
143+
12345
144+
) AS ROW(
145+
pos ROW(x INTEGER, y INTEGER),
146+
vel ROW(x DOUBLE, y DOUBLE),
147+
timestamp INTEGER
148+
)) as data
149+
"""
150+
)
151+
result = conn.execute(query).fetchone()
152+
assert result is not None
153+
assert result.data["pos"]["x"] == 1
154+
assert result.data["pos"]["y"] == 2
155+
assert result.data["vel"]["x"] == 0.5
156+
assert result.data["vel"]["y"] == 0.3
157+
assert result.data["timestamp"] == 12345
158+
159+
def test_select_array_with_nested_struct(self, engine):
160+
"""Test SELECT query with ARRAY containing nested STRUCT (Issue #627)."""
161+
engine, conn = engine
162+
163+
# Array with nested structs (simulating Issue #627 scenario)
164+
query = sqlalchemy.text(
165+
"""
166+
SELECT
167+
CAST(ARRAY[
168+
ROW(
169+
ROW('2024-01-01', 123),
170+
CAST(4.736 AS DOUBLE)
171+
)
172+
] AS ARRAY<ROW(header ROW(stamp VARCHAR, seq INTEGER), x DOUBLE)>) as positions
173+
"""
174+
)
175+
result = conn.execute(query).fetchone()
176+
assert result is not None
177+
assert result.positions is not None
178+
assert isinstance(result.positions, list)
179+
assert len(result.positions) == 1
180+
assert isinstance(result.positions[0], dict)
181+
assert "header" in result.positions[0]
182+
assert isinstance(result.positions[0]["header"], dict)
183+
assert result.positions[0]["header"]["stamp"] == "2024-01-01"
184+
assert result.positions[0]["header"]["seq"] == 123
185+
assert result.positions[0]["x"] == 4.736
186+
187+
# Multiple elements with nested structs
188+
query = sqlalchemy.text(
189+
"""
190+
SELECT
191+
CAST(ARRAY[
192+
ROW(ROW(1, 2), ROW(CAST(0.5 AS DOUBLE))),
193+
ROW(ROW(3, 4), ROW(CAST(1.5 AS DOUBLE)))
194+
] AS ARRAY<ROW(pos ROW(x INTEGER, y INTEGER), vel ROW(x DOUBLE))>) as data
195+
"""
196+
)
197+
result = conn.execute(query).fetchone()
198+
assert result is not None
199+
assert len(result.data) == 2
200+
assert result.data[0]["pos"]["x"] == 1
201+
assert result.data[0]["pos"]["y"] == 2
202+
assert result.data[0]["vel"]["x"] == 0.5
203+
assert result.data[1]["pos"]["x"] == 3
204+
assert result.data[1]["pos"]["y"] == 4
205+
assert result.data[1]["vel"]["x"] == 1.5
206+
95207
def test_reflect_no_such_table(self, engine):
96208
engine, conn = engine
97209
pytest.raises(

tests/pyathena/test_converter.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,52 @@ def test_to_struct_athena_native_formats(input_value, expected):
4646
assert result == expected
4747

4848

49+
@pytest.mark.parametrize(
50+
"input_value,expected",
51+
[
52+
# Single level nesting (Issue #627)
53+
(
54+
"{header={stamp=2024-01-01, seq=123}, x=4.736, y=0.583}",
55+
{"header": {"stamp": "2024-01-01", "seq": 123}, "x": 4.736, "y": 0.583},
56+
),
57+
# Double nesting
58+
(
59+
"{outer={middle={inner=value}}, field=123}",
60+
{"outer": {"middle": {"inner": "value"}}, "field": 123},
61+
),
62+
# Multiple nested fields
63+
(
64+
"{pos={x=1, y=2}, vel={x=0.5, y=0.3}, timestamp=12345}",
65+
{"pos": {"x": 1, "y": 2}, "vel": {"x": 0.5, "y": 0.3}, "timestamp": 12345},
66+
),
67+
# Triple nesting
68+
(
69+
"{level1={level2={level3={value=deep}}}}",
70+
{"level1": {"level2": {"level3": {"value": "deep"}}}},
71+
),
72+
# Mixed types in nested struct
73+
(
74+
"{metadata={id=123, active=true, name=test}, count=5}",
75+
{"metadata": {"id": 123, "active": True, "name": "test"}, "count": 5},
76+
),
77+
# Nested struct with null value
78+
(
79+
"{data={value=null, status=ok}, flag=true}",
80+
{"data": {"value": None, "status": "ok"}, "flag": True},
81+
),
82+
# Complex nesting with multiple levels and fields
83+
(
84+
"{a={b={c=1, d=2}, e=3}, f=4, g={h=5}}",
85+
{"a": {"b": {"c": 1, "d": 2}, "e": 3}, "f": 4, "g": {"h": 5}},
86+
),
87+
],
88+
)
89+
def test_to_struct_athena_nested_formats(input_value, expected):
90+
"""Test STRUCT conversion for nested struct formats (Issue #627)."""
91+
result = _to_struct(input_value)
92+
assert result == expected
93+
94+
4995
@pytest.mark.parametrize(
5096
"input_value",
5197
[
@@ -106,6 +152,35 @@ def test_to_array_athena_unnamed_struct_elements():
106152
assert result == expected
107153

108154

155+
@pytest.mark.parametrize(
156+
"input_value,expected",
157+
[
158+
# Array with nested structs (Issue #627)
159+
(
160+
"[{header={stamp=2024-01-01, seq=123}, x=4.736}]",
161+
[{"header": {"stamp": "2024-01-01", "seq": 123}, "x": 4.736}],
162+
),
163+
# Multiple elements with nested structs
164+
(
165+
"[{pos={x=1, y=2}, vel={x=0.5}}, {pos={x=3, y=4}, vel={x=1.5}}]",
166+
[
167+
{"pos": {"x": 1, "y": 2}, "vel": {"x": 0.5}},
168+
{"pos": {"x": 3, "y": 4}, "vel": {"x": 1.5}},
169+
],
170+
),
171+
# Array with deeply nested structs
172+
(
173+
"[{data={meta={id=1, active=true}}}]",
174+
[{"data": {"meta": {"id": 1, "active": True}}}],
175+
),
176+
],
177+
)
178+
def test_to_array_athena_nested_struct_elements(input_value, expected):
179+
"""Test Athena array with nested struct elements (Issue #627)."""
180+
result = _to_array(input_value)
181+
assert result == expected
182+
183+
109184
@pytest.mark.parametrize(
110185
"input_value",
111186
[

0 commit comments

Comments
 (0)