Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
393b940
:alien: Remove workaround now that upstream change has been merged
jemrobinson Feb 26, 2026
a7529f1
:sparkles: Check dataset status before deciding what to do with it
jemrobinson Feb 26, 2026
fb6a7cd
:sparkles: Add simple function to load in chunks using native Anemoi …
jemrobinson Feb 26, 2026
f4543f2
:sparkles: Add an initialise function to wrap Anemoi Init
jemrobinson Feb 26, 2026
71f060e
:recycle: Replace download Create with download in parts
jemrobinson Feb 26, 2026
7ca0ea4
:coffin: Remove load_in_parts and associated tracking functionality a…
jemrobinson Feb 26, 2026
ba1980b
:coffin: Remove init command and CLI
jemrobinson Feb 26, 2026
aa800cc
:coffin: Remove load command and CLI
jemrobinson Feb 26, 2026
4d36449
:truck: Alphabetise DataDownloader methods
jemrobinson Feb 26, 2026
25f00ca
:children_crossing: Also check whether statistics are ready before fi…
jemrobinson Feb 26, 2026
aa1f278
:bug: Remove invalid datasets whether or not they appear to be complete
jemrobinson Feb 26, 2026
3e0e0b5
:coffin: Removed tests for removed code
jemrobinson Feb 26, 2026
5ac32f9
:alembic: Test whether chunk-checking is sufficient to cause download…
jemrobinson Feb 26, 2026
7adc822
:wrench: Default to statistics=False when inspecting
jemrobinson Feb 26, 2026
bdfb08b
:goal_net: Exit gracefully when inspect fails during download
jemrobinson Feb 26, 2026
3b53ac5
:wrench: Do not set progress when detailed is set or the same informa…
jemrobinson Feb 26, 2026
5dd9c36
:bug: If only the statistics are missing we need to finalise
jemrobinson Feb 26, 2026
1c9a4bb
:rotating_light: Perform explicit type checking to avoid linting errors
jemrobinson Feb 26, 2026
1e2e500
:bug: Finalise when statistics are *not* ready
jemrobinson Feb 27, 2026
b95c965
:bug: Ensure that typer.Exit is raised
jemrobinson Feb 27, 2026
5208621
:goal_net: Raise exception if status cannot be retrieved, rather than…
jemrobinson Feb 27, 2026
3f49369
:coffin: Removed the unused finalise CLI command
jemrobinson Feb 27, 2026
7fad64f
Merge branch 'main' into 229-fix-download-logic
jemrobinson Feb 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 5 additions & 80 deletions icenet_mp/cli/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,93 +32,18 @@ def create(

@datasets_cli.command("inspect")
@hydra_adaptor
def inspect(config: DictConfig) -> None:
"""Inspect all datasets."""
factory = DataDownloaderFactory(config)
for downloader in factory.downloaders:
logger.info("Working on %s.", downloader.name)
downloader.inspect()


@datasets_cli.command("init")
@hydra_adaptor
def init(
def inspect(
config: DictConfig,
*,
overwrite: Annotated[
bool, typer.Option(help="Specify whether to overwrite existing datasets")
statistics: Annotated[
bool, typer.Option(help="Specify whether to show dataset statistics")
] = False,
) -> None:
"""Create all datasets."""
factory = DataDownloaderFactory(config)
for downloader in factory.downloaders:
logger.info("Working on %s.", downloader.name)
downloader.init(overwrite=overwrite)


@datasets_cli.command("load")
@hydra_adaptor
def load(
config: DictConfig,
parts: Annotated[str, typer.Option(help="The part to process, specified as 'i/n'")],
) -> None:
"""Load dataset in parts."""
factory = DataDownloaderFactory(config)
for downloader in factory.downloaders:
logger.info("Working on %s.", downloader.name)
downloader.load(parts=parts)


@datasets_cli.command("load_in_parts")
@hydra_adaptor
def load_in_parts(
config: DictConfig,
*,
continue_on_error: Annotated[
bool, typer.Option(help="Continue to next part on error")
] = True,
force_reset: Annotated[
bool,
typer.Option(
help="Clear existing progress part_tracker file and start from part 1"
),
] = False,
dataset: Annotated[
str | None, typer.Option(help="Run only a single dataset by name")
] = None,
total_parts: Annotated[
int, typer.Option(help="Override default total parts (10)")
] = 10,
overwrite: Annotated[
bool,
typer.Option(help="Delete the dataset directory before loading"),
] = False,
) -> None:
"""Load all parts for all datasets in parts, recording progress so runs can be resumed."""
factory = DataDownloaderFactory(config)
for downloader in factory.downloaders:
if dataset and downloader.name != dataset:
logger.info("Not loading %s.", downloader.name)
continue
logger.info("Working on %s.", downloader.name)
downloader.load_in_parts(
continue_on_error=continue_on_error,
force_reset=force_reset,
total_parts=total_parts,
overwrite=overwrite,
)


@datasets_cli.command("finalise")
@hydra_adaptor
def finalise(
config: DictConfig,
) -> None:
"""Finalise loaded dataset."""
"""Inspect all datasets."""
factory = DataDownloaderFactory(config)
for downloader in factory.downloaders:
logger.info("Working on %s.", downloader.name)
downloader.finalise()
downloader.inspect(statistics=statistics)


if __name__ == "__main__":
Expand Down
Loading
Loading