From 341b11c90ae3de7a56f03b3c8072212222ea7e6a Mon Sep 17 00:00:00 2001 From: Eugene M Date: Fri, 9 Aug 2024 16:05:27 -0400 Subject: [PATCH 1/5] ENH: add alembic revision --- ...7_change_path_to_dataset_in_hdf5_assets.py | 39 +++++++++++++++++++ ...49ddc_add_table_to_structurefamily_enum.py | 2 +- 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py diff --git a/tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py b/tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py new file mode 100644 index 000000000..858964098 --- /dev/null +++ b/tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py @@ -0,0 +1,39 @@ +"""Change 'path' to 'dataset' in HDF5 assets + +Revision ID: 562203c724c7 +Revises: ed3a4223a600 +Create Date: 2024-08-09 10:13:36.384838 + +""" +from alembic import op +import sqlalchemy as sa +from tiled.catalog.orm import JSONVariant + +# revision identifiers, used by Alembic. +revision = '562203c724c7' +down_revision = 'ed3a4223a600' +branch_labels = None +depends_on = None + +def upgrade(): + connection = op.get_bind() + nodes = sa.Table( + "nodes", + sa.MetaData(), + sa.Column("id", sa.Integer), + sa.Column("metadata", JSONVariant), + ) + + # Loop over all nodes that have 'parameters' field, choose and update those related to hdf5 files + condition = sa.text("CAST(nodes.metadata->>'parameters' AS TEXT) != ''") + cursor = connection.execute(sa.select(nodes.c.id, nodes.c.metadata).filter(condition).select_from(nodes)) + for _id, _md in cursor: + if 'hdf5' in (_md.get('mimetype', '') + _md.get('spec', '')).lower(): + if isinstance(_md['parameters'], dict) and ('path' in _md['parameters'].keys()): + _md['parameters']['dataset'] = _md['parameters'].pop('path') + connection.execute(nodes.update().where(nodes.c.id == _id).values(metadata=_md)) + +def downgrade(): + # This _could_ be implemented but we will wait for a need since we are + # still in alpha releases. + raise NotImplementedError diff --git a/tiled/catalog/migrations/versions/83889e049ddc_add_table_to_structurefamily_enum.py b/tiled/catalog/migrations/versions/83889e049ddc_add_table_to_structurefamily_enum.py index 4d769b020..5d0648a15 100644 --- a/tiled/catalog/migrations/versions/83889e049ddc_add_table_to_structurefamily_enum.py +++ b/tiled/catalog/migrations/versions/83889e049ddc_add_table_to_structurefamily_enum.py @@ -31,7 +31,7 @@ def upgrade(): connection = op.get_bind() if connection.engine.dialect.name == "postgresql": - # This change must be committed befor ethe new 'table' enum value can be used. + # This change must be committed before the new 'table' enum value can be used. with op.get_context().autocommit_block(): op.execute( sa.text( From 95c99c76c57a919ffe2a19648ee2e6e0f1362d6f Mon Sep 17 00:00:00 2001 From: Eugene M Date: Fri, 9 Aug 2024 16:11:22 -0400 Subject: [PATCH 2/5] DOC: Add changelog entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e68e29f4..bb0619ce7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Write the date in place of the "Unreleased" in the case a new version is release ### Added - Add method to `TableAdapter` which accepts a Python dictionary. - Added an `Arrow` adapter which supports reading/writing arrow tables via `RecordBatchFileReader`/`RecordBatchFileWriter`. +- Add an alembic catalog migration script to rename `path` parameters of HDF5 nodes to `dataset`. ### Changed - Make `tiled.client` accept a Python dictionary when fed to `write_dataframe()`. From 161fea5c44821b6399f1b3071b58abcec529f1f1 Mon Sep 17 00:00:00 2001 From: Eugene M Date: Fri, 9 Aug 2024 16:22:37 -0400 Subject: [PATCH 3/5] MNT: format and lint --- ...7_change_path_to_dataset_in_hdf5_assets.py | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py b/tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py index 858964098..bc3b72bdb 100644 --- a/tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py +++ b/tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py @@ -5,16 +5,18 @@ Create Date: 2024-08-09 10:13:36.384838 """ -from alembic import op import sqlalchemy as sa +from alembic import op + from tiled.catalog.orm import JSONVariant # revision identifiers, used by Alembic. -revision = '562203c724c7' -down_revision = 'ed3a4223a600' +revision = "562203c724c7" +down_revision = "ed3a4223a600" branch_labels = None depends_on = None + def upgrade(): connection = op.get_bind() nodes = sa.Table( @@ -26,12 +28,19 @@ def upgrade(): # Loop over all nodes that have 'parameters' field, choose and update those related to hdf5 files condition = sa.text("CAST(nodes.metadata->>'parameters' AS TEXT) != ''") - cursor = connection.execute(sa.select(nodes.c.id, nodes.c.metadata).filter(condition).select_from(nodes)) + cursor = connection.execute( + sa.select(nodes.c.id, nodes.c.metadata).filter(condition).select_from(nodes) + ) for _id, _md in cursor: - if 'hdf5' in (_md.get('mimetype', '') + _md.get('spec', '')).lower(): - if isinstance(_md['parameters'], dict) and ('path' in _md['parameters'].keys()): - _md['parameters']['dataset'] = _md['parameters'].pop('path') - connection.execute(nodes.update().where(nodes.c.id == _id).values(metadata=_md)) + if "hdf5" in (_md.get("mimetype", "") + _md.get("spec", "")).lower(): + if isinstance(_md["parameters"], dict) and ( + "path" in _md["parameters"].keys() + ): + _md["parameters"]["dataset"] = _md["parameters"].pop("path") + connection.execute( + nodes.update().where(nodes.c.id == _id).values(metadata=_md) + ) + def downgrade(): # This _could_ be implemented but we will wait for a need since we are From d3f1b934483d1ea57e5b5bf9565863404a01e6e2 Mon Sep 17 00:00:00 2001 From: Eugene M Date: Fri, 9 Aug 2024 16:52:15 -0400 Subject: [PATCH 4/5] FIX: parse json metadata outside of SQL engine --- .gitignore | 3 +++ .../562203c724c7_change_path_to_dataset_in_hdf5_assets.py | 7 ++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index b10648632..53bf5d928 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,9 @@ target/ .DS_Store *~ +#alembic +alembic.ini + #vim *.swp *.swo diff --git a/tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py b/tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py index bc3b72bdb..38efa09a6 100644 --- a/tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py +++ b/tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py @@ -27,13 +27,10 @@ def upgrade(): ) # Loop over all nodes that have 'parameters' field, choose and update those related to hdf5 files - condition = sa.text("CAST(nodes.metadata->>'parameters' AS TEXT) != ''") - cursor = connection.execute( - sa.select(nodes.c.id, nodes.c.metadata).filter(condition).select_from(nodes) - ) + cursor = connection.execute(sa.select(nodes.c.id, nodes.c.metadata).select_from(nodes)) for _id, _md in cursor: if "hdf5" in (_md.get("mimetype", "") + _md.get("spec", "")).lower(): - if isinstance(_md["parameters"], dict) and ( + if isinstance(_md.get("parameters"), dict) and ( "path" in _md["parameters"].keys() ): _md["parameters"]["dataset"] = _md["parameters"].pop("path") From 4b0c3b525b936a73786fa8a8c68d59a44c0bed05 Mon Sep 17 00:00:00 2001 From: Eugene M Date: Fri, 9 Aug 2024 16:57:48 -0400 Subject: [PATCH 5/5] MNT: format and lint --- .../562203c724c7_change_path_to_dataset_in_hdf5_assets.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py b/tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py index 38efa09a6..e4c8939b1 100644 --- a/tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py +++ b/tiled/catalog/migrations/versions/562203c724c7_change_path_to_dataset_in_hdf5_assets.py @@ -27,7 +27,9 @@ def upgrade(): ) # Loop over all nodes that have 'parameters' field, choose and update those related to hdf5 files - cursor = connection.execute(sa.select(nodes.c.id, nodes.c.metadata).select_from(nodes)) + cursor = connection.execute( + sa.select(nodes.c.id, nodes.c.metadata).select_from(nodes) + ) for _id, _md in cursor: if "hdf5" in (_md.get("mimetype", "") + _md.get("spec", "")).lower(): if isinstance(_md.get("parameters"), dict) and (