Skip to content

Commit 6245b5b

Browse files
committed
[python] Fix wrong result when using where and limit in CLI table read
1 parent b7d1af0 commit 6245b5b

File tree

2 files changed

+58
-2
lines changed

2 files changed

+58
-2
lines changed

paimon-python/pypaimon/cli/cli_table.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,11 @@ def cmd_table_read(args):
104104
print(f"Error: Invalid WHERE clause: {e}", file=sys.stderr)
105105
sys.exit(1)
106106

107-
# Apply limit if specified
107+
# Apply limit: only push down when there is no where clause,
108+
# because limit push-down may stop reading before enough rows
109+
# pass the filter, leading to fewer results than expected.
108110
limit = args.limit
109-
if limit:
111+
if limit and not where_clause:
110112
read_builder = read_builder.with_limit(limit)
111113

112114
# Scan and read

paimon-python/pypaimon/tests/cli_table_test.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,60 @@ def test_cli_table_read_where_field_not_in_select(self):
10491049
self.assertNotIn(' 32', output)
10501050
self.assertNotIn(' 25', output)
10511051

1052+
def test_cli_table_read_with_where_and_limit(self):
1053+
"""Test that where + limit returns correct filtered results without limit push-down.
1054+
1055+
Writes data in two batches to produce multiple splits, so that limit
1056+
push-down would actually take effect and potentially miss matching rows
1057+
in later splits.
1058+
"""
1059+
1060+
# Create a dedicated table for this test with two batches of data
1061+
pa_schema = pa.schema([
1062+
('id', pa.int32()),
1063+
('val', pa.string()),
1064+
('score', pa.int32()),
1065+
])
1066+
# Important: multiple splits are required for the limit to take effect
1067+
schema = Schema.from_pyarrow_schema(pa_schema, options={'source.split.target-size': '1b'})
1068+
self.catalog.create_table('test_db.limit_test', schema, True)
1069+
table = self.catalog.get_table('test_db.limit_test')
1070+
1071+
def write_batch():
1072+
write_builder = table.new_batch_write_builder()
1073+
table_write = write_builder.new_write()
1074+
table_commit = write_builder.new_commit()
1075+
batch = pa.Table.from_pydict({
1076+
'id': [1, 2, 3],
1077+
'val': ['a', 'b', 'c'],
1078+
'score': [10, 20, 30],
1079+
}, schema=pa_schema)
1080+
table_write.write_arrow(batch)
1081+
table_commit.commit(table_write.prepare_commit())
1082+
table_write.close()
1083+
table_commit.close()
1084+
1085+
write_batch()
1086+
write_batch()
1087+
write_batch()
1088+
1089+
with patch('sys.argv',
1090+
['paimon', '-c', self.config_file,
1091+
'table', 'read', 'test_db.limit_test',
1092+
'--where', 'score = 20',
1093+
'--limit', '2']):
1094+
with patch('sys.stdout', new_callable=StringIO) as mock_stdout:
1095+
try:
1096+
main()
1097+
except SystemExit:
1098+
pass
1099+
1100+
output = mock_stdout.getvalue()
1101+
lines = [line for line in output.strip().split('\n') if line.strip()]
1102+
self.assertEqual(len(lines), 3)
1103+
self.assertNotIn(' a ', output)
1104+
self.assertNotIn(' c ', output)
1105+
10521106
def test_cli_table_read_with_invalid_where(self):
10531107
"""Test table read with invalid --where clause via CLI."""
10541108
with patch('sys.argv',

0 commit comments

Comments
 (0)