Skip to content

Commit d488868

Browse files
committed
fix select without where fields
1 parent 3e4ea08 commit d488868

File tree

3 files changed

+77
-9
lines changed

3 files changed

+77
-9
lines changed

paimon-python/pypaimon/cli/cli_table.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,24 +63,37 @@ def cmd_table_read(args):
6363
# Build read pipeline
6464
read_builder = table.new_read_builder()
6565

66-
# Apply projection (select columns) if specified
66+
available_fields = set(field.name for field in table.table_schema.fields)
67+
68+
# Parse select and where options
6769
select_columns = args.select
70+
where_clause = args.where
71+
user_columns = None
72+
extra_where_columns = []
73+
6874
if select_columns:
6975
# Parse column names (comma-separated)
70-
columns = [col.strip() for col in select_columns.split(',')]
71-
76+
user_columns = [col.strip() for col in select_columns.split(',')]
77+
7278
# Validate that all columns exist in the table schema
73-
available_fields = set(field.name for field in table.table_schema.fields)
74-
invalid_columns = [col for col in columns if col not in available_fields]
75-
79+
invalid_columns = [col for col in user_columns if col not in available_fields]
7680
if invalid_columns:
7781
print(f"Error: Column(s) {invalid_columns} do not exist in table '{table_identifier}'.", file=sys.stderr)
7882
sys.exit(1)
79-
80-
read_builder = read_builder.with_projection(columns)
83+
84+
# When both select and where are specified, ensure where-referenced fields
85+
# are included in the projection so the filter can work correctly.
86+
if user_columns and where_clause:
87+
from pypaimon.cli.where_parser import extract_fields_from_where
88+
where_fields = extract_fields_from_where(where_clause, available_fields)
89+
user_column_set = set(user_columns)
90+
extra_where_columns = [f for f in where_fields if f not in user_column_set]
91+
projection_columns = user_columns + extra_where_columns
92+
read_builder = read_builder.with_projection(projection_columns)
93+
elif user_columns:
94+
read_builder = read_builder.with_projection(user_columns)
8195

8296
# Apply where filter if specified
83-
where_clause = args.where
8497
if where_clause:
8598
from pypaimon.cli.where_parser import parse_where_clause
8699
try:
@@ -107,6 +120,11 @@ def cmd_table_read(args):
107120
df = read.to_pandas(splits)
108121
if limit and len(df) > limit:
109122
df = df.head(limit)
123+
124+
# Drop extra columns that were added only for where-clause filtering
125+
if extra_where_columns:
126+
df = df.drop(columns=extra_where_columns, errors='ignore')
127+
110128
print(df.to_string(index=False))
111129

112130

paimon-python/pypaimon/cli/where_parser.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,27 @@
4848
from pypaimon.schema.data_types import AtomicType, DataField
4949

5050

51+
def extract_fields_from_where(where_string: str, available_fields: set) -> set:
52+
"""Extract all field names referenced in a WHERE clause.
53+
54+
Args:
55+
where_string: The WHERE clause string.
56+
available_fields: Set of valid field names from the table schema.
57+
58+
Returns:
59+
A set of field names referenced in the WHERE clause.
60+
"""
61+
if not where_string or not where_string.strip():
62+
return set()
63+
64+
tokens = _tokenize(where_string.strip())
65+
referenced_fields = set()
66+
for token in tokens:
67+
if token in available_fields:
68+
referenced_fields.add(token)
69+
return referenced_fields
70+
71+
5172
def parse_where_clause(where_string: str, fields: List[DataField]) -> Optional[Predicate]:
5273
"""Parse a SQL-like WHERE clause string into a Predicate.
5374

paimon-python/pypaimon/tests/cli_table_test.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,35 @@ def test_cli_table_read_with_where_and_select(self):
10201020
self.assertIn('Eve', output)
10211021
self.assertNotIn('Alice', output)
10221022

1023+
def test_cli_table_read_where_field_not_in_select(self):
1024+
"""Test that where filter works even when the filtered field is not in select."""
1025+
with patch('sys.argv',
1026+
['paimon', '-c', self.config_file,
1027+
'table', 'read', 'test_db.users',
1028+
'--select', 'name,city',
1029+
'--where', 'age > 30']):
1030+
with patch('sys.stdout', new_callable=StringIO) as mock_stdout:
1031+
try:
1032+
main()
1033+
except SystemExit:
1034+
pass
1035+
1036+
output = mock_stdout.getvalue()
1037+
# age > 30: Charlie(35,Guangzhou), Eve(32,Hangzhou)
1038+
# Filter should work even though 'age' is not in select
1039+
self.assertIn('Charlie', output)
1040+
self.assertIn('Eve', output)
1041+
self.assertIn('Guangzhou', output)
1042+
self.assertIn('Hangzhou', output)
1043+
# Excluded rows should not appear
1044+
self.assertNotIn('Alice', output)
1045+
self.assertNotIn('Bob', output)
1046+
self.assertNotIn('David', output)
1047+
# The 'age' column should NOT appear in output (it was only needed for filtering)
1048+
self.assertNotIn(' 35', output)
1049+
self.assertNotIn(' 32', output)
1050+
self.assertNotIn(' 25', output)
1051+
10231052
def test_cli_table_read_with_invalid_where(self):
10241053
"""Test table read with invalid --where clause via CLI."""
10251054
with patch('sys.argv',

0 commit comments

Comments
 (0)