Skip to content

Commit 5938626

Browse files
committed
[python] Add list partitions support for Python CLI
1 parent 2807a22 commit 5938626

File tree

12 files changed

+530
-5
lines changed

12 files changed

+530
-5
lines changed

docs/content/pypaimon/cli.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,38 @@ Successfully imported 3 rows into 'mydb.users'.
309309
- Data types should be compatible with the table schema
310310
- The import operation appends data to the existing table
311311

312+
### Table List Partitions
313+
314+
List partitions of a Paimon table. Supports optional pattern filtering to match specific partitions.
315+
316+
```shell
317+
paimon table list-partitions mydb.orders
318+
```
319+
320+
**Options:**
321+
322+
- `--pattern, -p`: Partition name pattern to filter partitions
323+
324+
**Examples:**
325+
326+
```shell
327+
# List all partitions
328+
paimon table list-partitions mydb.orders
329+
330+
# List partitions matching a pattern
331+
paimon table list-partitions mydb.orders --pattern "dt=2024*"
332+
```
333+
334+
Output:
335+
```
336+
Partition RecordCount FileSizeInBytes FileCount LastFileCreationTime UpdatedAt UpdatedBy
337+
dt=2024-01-01,region=us 500 1048576 10 1704067200000 1704153600000 admin
338+
dt=2024-01-02,region=eu 300 524288 5 1704153600000 1704240000000 user1
339+
dt=2024-01-03,region=us 200 262144 3 1704240000000 1704326400000 admin
340+
```
341+
342+
**Note:** Both filesystem and REST catalogs support listing partitions.
343+
312344
### Table Rename
313345

314346
Rename a table in the catalog. Both source and target must be specified in `database.table` format.

paimon-python/pypaimon/api/api_response.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from pypaimon.common.json_util import T, json_field
2424
from pypaimon.common.options import Options
2525
from pypaimon.schema.schema import Schema
26+
from pypaimon.snapshot.snapshot_commit import PartitionStatistics
2627
from pypaimon.snapshot.table_snapshot import TableSnapshot
2728

2829

@@ -132,6 +133,37 @@ def get_next_page_token(self) -> str:
132133
return self.next_page_token
133134

134135

136+
@dataclass
137+
class Partition(PartitionStatistics):
138+
"""Partition data model matching Java org.apache.paimon.partition.Partition."""
139+
140+
FIELD_DONE = "done"
141+
FIELD_OPTIONS = "options"
142+
143+
done: bool = json_field(FIELD_DONE, default=False)
144+
created_at: Optional[int] = json_field("createdAt", default=None)
145+
created_by: Optional[str] = json_field("createdBy", default=None)
146+
updated_at: Optional[int] = json_field("updatedAt", default=None)
147+
updated_by: Optional[str] = json_field("updatedBy", default=None)
148+
options: Optional[Dict[str, str]] = json_field(FIELD_OPTIONS, default=None)
149+
150+
151+
@dataclass
152+
class ListPartitionsResponse(PagedResponse['Partition']):
153+
"""Response for listing partitions."""
154+
FIELD_PARTITIONS = "partitions"
155+
156+
partitions: Optional[List[Partition]] = json_field(FIELD_PARTITIONS)
157+
next_page_token: Optional[str] = json_field(
158+
PagedResponse.FIELD_NEXT_PAGE_TOKEN)
159+
160+
def data(self) -> Optional[List[Partition]]:
161+
return self.partitions
162+
163+
def get_next_page_token(self) -> Optional[str]:
164+
return self.next_page_token
165+
166+
135167
@dataclass
136168
class ListTablesResponse(PagedResponse[str]):
137169
FIELD_TABLES = "tables"

paimon-python/pypaimon/api/resource_paths.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class ResourcePaths:
2727
DATABASES = "databases"
2828
TABLES = "tables"
2929
TABLE_DETAILS = "table-details"
30+
PARTITIONS = "partitions"
3031

3132
def __init__(self, prefix: str):
3233
self.base_path = "/{}/{}".format(self.V1, prefix).rstrip("/")
@@ -78,3 +79,7 @@ def rollback_table(self, database_name: str, table_name: str) -> str:
7879
def table_snapshot(self, database_name: str, table_name: str) -> str:
7980
return ("{}/{}/{}/{}/{}/snapshot".format(self.base_path, self.DATABASES, RESTUtil.encode_string(database_name),
8081
self.TABLES, RESTUtil.encode_string(table_name)))
82+
83+
def partitions(self, database_name: str, table_name: str) -> str:
84+
return ("{}/{}/{}/{}/{}/{}".format(self.base_path, self.DATABASES, RESTUtil.encode_string(database_name),
85+
self.TABLES, RESTUtil.encode_string(table_name), self.PARTITIONS))

paimon-python/pypaimon/api/rest_api.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@
2626
GetDatabaseResponse, GetTableResponse,
2727
GetTableTokenResponse,
2828
ListDatabasesResponse,
29+
ListPartitionsResponse,
2930
ListTablesResponse, PagedList,
30-
PagedResponse, GetTableSnapshotResponse)
31+
PagedResponse, GetTableSnapshotResponse,
32+
Partition)
3133
from pypaimon.api.auth import AuthProviderFactory, RESTAuthFunction
3234
from pypaimon.api.client import HttpClient
3335
from pypaimon.api.resource_paths import ResourcePaths
@@ -48,6 +50,7 @@ class RESTApi:
4850
DATABASE_NAME_PATTERN = "databaseNamePattern"
4951
TABLE_NAME_PATTERN = "tableNamePattern"
5052
TABLE_TYPE = "tableType"
53+
PARTITION_NAME_PATTERN = "partitionNamePattern"
5154
TOKEN_EXPIRATION_SAFE_TIME_MILLIS = 3_600_000
5255

5356
def __init__(self, options: Union[Options, Dict[str, str]], config_required: bool = True):
@@ -399,6 +402,29 @@ def load_snapshot(self, identifier: Identifier) -> Optional['TableSnapshot']:
399402
return None
400403
return response.get_snapshot()
401404

405+
def list_partitions_paged(
406+
self,
407+
identifier: Identifier,
408+
max_results: Optional[int] = None,
409+
page_token: Optional[str] = None,
410+
partition_name_pattern: Optional[str] = None,
411+
) -> PagedList[Partition]:
412+
database_name, table_name = self.__validate_identifier(identifier)
413+
414+
response = self.client.get_with_params(
415+
self.resource_paths.partitions(database_name, table_name),
416+
self.__build_paged_query_params(
417+
max_results,
418+
page_token,
419+
{self.PARTITION_NAME_PATTERN: partition_name_pattern},
420+
),
421+
ListPartitionsResponse,
422+
self.rest_auth_function,
423+
)
424+
425+
partitions = response.data() or []
426+
return PagedList(partitions, response.get_next_page_token())
427+
402428
@staticmethod
403429
def __validate_identifier(identifier: Identifier):
404430
if not identifier:

paimon-python/pypaimon/catalog/catalog.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,3 +200,28 @@ def drop_partitions(
200200
raise NotImplementedError(
201201
"drop_partitions is not supported by this catalog. Use REST catalog for partition drop."
202202
)
203+
204+
def list_partitions_paged(
205+
self,
206+
identifier: Union[str, Identifier],
207+
max_results: Optional[int] = None,
208+
page_token: Optional[str] = None,
209+
partition_name_pattern: Optional[str] = None,
210+
):
211+
"""List partitions of a table with pagination.
212+
213+
Args:
214+
identifier: Path of the table.
215+
max_results: Maximum number of results to return per page.
216+
page_token: Token for pagination.
217+
partition_name_pattern: Optional pattern to filter partition names.
218+
219+
Returns:
220+
PagedList of Partition objects.
221+
222+
Raises:
223+
NotImplementedError: If the catalog does not support listing partitions.
224+
"""
225+
raise NotImplementedError(
226+
"list_partitions_paged is not supported by this catalog."
227+
)

paimon-python/pypaimon/catalog/filesystem_catalog.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,3 +254,77 @@ def load_snapshot(self, identifier: Identifier):
254254
NotImplementedError: FileSystemCatalog does not support version management
255255
"""
256256
raise NotImplementedError("Filesystem catalog does not support load_snapshot")
257+
258+
def list_partitions_paged(
259+
self,
260+
identifier: Union[str, Identifier],
261+
max_results: Optional[int] = None,
262+
page_token: Optional[str] = None,
263+
partition_name_pattern: Optional[str] = None,
264+
):
265+
from pypaimon.api.api_response import Partition, PagedList
266+
from pypaimon.manifest.manifest_list_manager import ManifestListManager
267+
from pypaimon.manifest.manifest_file_manager import ManifestFileManager
268+
269+
if not isinstance(identifier, Identifier):
270+
identifier = Identifier.from_string(identifier)
271+
272+
table = self.get_table(identifier)
273+
snapshot = table.snapshot_manager().get_latest_snapshot()
274+
if snapshot is None:
275+
return PagedList(elements=[])
276+
277+
# Read all manifest entries (ADD - DELETE merged)
278+
manifest_list_manager = ManifestListManager(table)
279+
manifest_file_manager = ManifestFileManager(table)
280+
manifest_files = manifest_list_manager.read_all(snapshot)
281+
entries = manifest_file_manager.read_entries_parallel(manifest_files, drop_stats=True)
282+
283+
# Group entries by partition spec
284+
partition_map = {} # spec_key -> aggregated stats
285+
for entry in entries:
286+
spec = {field.name: str(v) for field, v in
287+
zip(entry.partition.fields, entry.partition.values)}
288+
spec_key = tuple(sorted(spec.items()))
289+
290+
if spec_key not in partition_map:
291+
partition_map[spec_key] = {
292+
'spec': spec,
293+
'record_count': 0,
294+
'file_size_in_bytes': 0,
295+
'file_count': 0,
296+
'last_file_creation_time': 0,
297+
'buckets': set(),
298+
}
299+
stats = partition_map[spec_key]
300+
stats['record_count'] += entry.file.row_count
301+
stats['file_size_in_bytes'] += entry.file.file_size
302+
stats['file_count'] += 1
303+
if entry.file.creation_time is not None:
304+
ct = entry.file.creation_time.get_millisecond()
305+
if ct > stats['last_file_creation_time']:
306+
stats['last_file_creation_time'] = ct
307+
stats['buckets'].add(entry.bucket)
308+
309+
# Convert to Partition objects
310+
partitions = []
311+
for stats in partition_map.values():
312+
partitions.append(Partition(
313+
spec=stats['spec'],
314+
record_count=stats['record_count'],
315+
file_size_in_bytes=stats['file_size_in_bytes'],
316+
file_count=stats['file_count'],
317+
last_file_creation_time=stats['last_file_creation_time'],
318+
total_buckets=len(stats['buckets']),
319+
))
320+
321+
# Apply pattern filter
322+
if partition_name_pattern:
323+
import re
324+
regex = re.compile(partition_name_pattern.replace('*', '.*'))
325+
partitions = [
326+
p for p in partitions
327+
if regex.fullmatch(','.join(f'{k}={v}' for k, v in p.spec.items()))
328+
]
329+
330+
return PagedList(elements=partitions)

paimon-python/pypaimon/catalog/rest/rest_catalog.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,27 @@ def drop_partitions(
267267
finally:
268268
commit.close()
269269

270+
def list_partitions_paged(
271+
self,
272+
identifier: Union[str, Identifier],
273+
max_results: Optional[int] = None,
274+
page_token: Optional[str] = None,
275+
partition_name_pattern: Optional[str] = None,
276+
):
277+
if not isinstance(identifier, Identifier):
278+
identifier = Identifier.from_string(identifier)
279+
try:
280+
return self.rest_api.list_partitions_paged(
281+
identifier,
282+
max_results,
283+
page_token,
284+
partition_name_pattern
285+
)
286+
except NoSuchResourceException as e:
287+
raise TableNotExistException(identifier) from e
288+
except ForbiddenException as e:
289+
raise TableNoPermissionException(identifier) from e
290+
270291
def alter_table(
271292
self,
272293
identifier: Union[str, Identifier],

paimon-python/pypaimon/cli/cli_table.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,70 @@ def cmd_table_alter(args):
573573
sys.exit(1)
574574

575575

576+
def cmd_table_list_partitions(args):
577+
"""
578+
Execute the 'table list-partitions' command.
579+
580+
Lists partitions of a Paimon table with optional pattern filtering.
581+
582+
Args:
583+
args: Parsed command line arguments.
584+
"""
585+
from pypaimon.cli.cli import load_catalog_config, create_catalog
586+
587+
# Load catalog configuration
588+
config_path = args.config
589+
config = load_catalog_config(config_path)
590+
591+
# Create catalog
592+
catalog = create_catalog(config)
593+
594+
# Parse table identifier
595+
table_identifier = args.table
596+
parts = table_identifier.split('.')
597+
if len(parts) != 2:
598+
print(f"Error: Invalid table identifier '{table_identifier}'. "
599+
f"Expected format: 'database.table'", file=sys.stderr)
600+
sys.exit(1)
601+
602+
# List partitions with pagination
603+
pattern = getattr(args, 'pattern', None)
604+
try:
605+
paged_list = catalog.list_partitions_paged(
606+
table_identifier,
607+
partition_name_pattern=pattern,
608+
)
609+
import pandas as pd
610+
611+
partitions = paged_list.elements
612+
if not partitions:
613+
print("No partitions found.")
614+
return
615+
616+
data = []
617+
for p in partitions:
618+
spec_str = ",".join(f"{k}={v}" for k, v in p.spec.items())
619+
data.append({
620+
'Partition': spec_str,
621+
'RecordCount': p.record_count,
622+
'FileSizeInBytes': p.file_size_in_bytes,
623+
'FileCount': p.file_count,
624+
'LastFileCreationTime': p.last_file_creation_time,
625+
'UpdatedAt': p.updated_at,
626+
'UpdatedBy': p.updated_by or '',
627+
})
628+
629+
df = pd.DataFrame(data)
630+
print(df.to_string(index=False))
631+
632+
except NotImplementedError as e:
633+
print(f"Error: {e}", file=sys.stderr)
634+
sys.exit(1)
635+
except Exception as e:
636+
print(f"Error: Failed to list partitions: {e}", file=sys.stderr)
637+
sys.exit(1)
638+
639+
576640
def add_table_subcommands(table_parser):
577641
"""
578642
Add table subcommands to the parser.
@@ -669,6 +733,20 @@ def add_table_subcommands(table_parser):
669733
)
670734
import_parser.set_defaults(func=cmd_table_import)
671735

736+
# table list-partitions command
737+
list_partitions_parser = table_subparsers.add_parser('list-partitions', help='List partitions of a table')
738+
list_partitions_parser.add_argument(
739+
'table',
740+
help='Table identifier in format: database.table'
741+
)
742+
list_partitions_parser.add_argument(
743+
'--pattern', '-p',
744+
type=str,
745+
default=None,
746+
help='Partition name pattern to filter partitions (e.g., "dt=2024*")'
747+
)
748+
list_partitions_parser.set_defaults(func=cmd_table_list_partitions)
749+
672750
# table rename command
673751
rename_parser = table_subparsers.add_parser('rename', help='Rename a table')
674752
rename_parser.add_argument(

paimon-python/pypaimon/common/json_util.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,12 @@ def __from_dict(data: Dict[str, Any], target_class: Type[T]) -> T:
101101
field_type = field_info.type
102102
if origin_type is Union and len(args) == 2:
103103
field_type = args[0]
104+
origin_type = getattr(field_type, '__origin__', None)
105+
args = getattr(field_type, '__args__', None)
104106
if is_dataclass(field_type):
105107
type_mapping[json_name] = field_type
106-
elif origin_type in (list, List) and is_dataclass(args[0]):
107-
type_mapping[json_name] = field_info.type
108+
elif origin_type in (list, List) and args and is_dataclass(args[0]):
109+
type_mapping[json_name] = field_type
108110

109111
# Map JSON data to field names
110112
kwargs = {}

0 commit comments

Comments
 (0)