@@ -63,24 +63,37 @@ def cmd_table_read(args):
6363 # Build read pipeline
6464 read_builder = table .new_read_builder ()
6565
66- # Apply projection (select columns) if specified
66+ available_fields = set (field .name for field in table .table_schema .fields )
67+
68+ # Parse select and where options
6769 select_columns = args .select
70+ where_clause = args .where
71+ user_columns = None
72+ extra_where_columns = []
73+
6874 if select_columns :
6975 # Parse column names (comma-separated)
70- columns = [col .strip () for col in select_columns .split (',' )]
71-
76+ user_columns = [col .strip () for col in select_columns .split (',' )]
77+
7278 # Validate that all columns exist in the table schema
73- available_fields = set (field .name for field in table .table_schema .fields )
74- invalid_columns = [col for col in columns if col not in available_fields ]
75-
79+ invalid_columns = [col for col in user_columns if col not in available_fields ]
7680 if invalid_columns :
7781 print (f"Error: Column(s) { invalid_columns } do not exist in table '{ table_identifier } '." , file = sys .stderr )
7882 sys .exit (1 )
79-
80- read_builder = read_builder .with_projection (columns )
83+
84+ # When both select and where are specified, ensure where-referenced fields
85+ # are included in the projection so the filter can work correctly.
86+ if user_columns and where_clause :
87+ from pypaimon .cli .where_parser import extract_fields_from_where
88+ where_fields = extract_fields_from_where (where_clause , available_fields )
89+ user_column_set = set (user_columns )
90+ extra_where_columns = [f for f in where_fields if f not in user_column_set ]
91+ projection_columns = user_columns + extra_where_columns
92+ read_builder = read_builder .with_projection (projection_columns )
93+ elif user_columns :
94+ read_builder = read_builder .with_projection (user_columns )
8195
8296 # Apply where filter if specified
83- where_clause = args .where
8497 if where_clause :
8598 from pypaimon .cli .where_parser import parse_where_clause
8699 try :
@@ -107,6 +120,11 @@ def cmd_table_read(args):
107120 df = read .to_pandas (splits )
108121 if limit and len (df ) > limit :
109122 df = df .head (limit )
123+
124+ # Drop extra columns that were added only for where-clause filtering
125+ if extra_where_columns :
126+ df = df .drop (columns = extra_where_columns , errors = 'ignore' )
127+
110128 print (df .to_string (index = False ))
111129
112130
0 commit comments