diff --git a/cli/macrostrat/cli/database/migrations/file_storage_updates/__init__.py b/cli/macrostrat/cli/database/migrations/file_storage_updates/__init__.py index cc621e673..770532d19 100644 --- a/cli/macrostrat/cli/database/migrations/file_storage_updates/__init__.py +++ b/cli/macrostrat/cli/database/migrations/file_storage_updates/__init__.py @@ -1,47 +1,74 @@ -from macrostrat.core.migrations import ( - Migration, - column_type_is, - exists, - has_columns, - not_exists, - view_exists, -) +from macrostrat.core.migrations import Migration, _any, _not, exists, has_columns -class FileStorageUpdates(Migration): - name = "file-storage-updates" +class MapFiles(Migration): + name = "map-files" subsystem = "maps" description = "Update storage schema for better file management." depends_on = ["api-v3"] preconditions = [ - exists("storage", "object"), - exists("storage", "object_group"), exists("maps_metadata", "ingest_process"), + exists("storage", "object"), ] postconditions = [ # storage.object no longer has object_group_id - has_columns( - "storage", - "object", - "scheme", - "host", - "bucket", - "key", - "source", - "mime_type", - "sha256_hash", - "created_on", - "updated_on", - "deleted_on", - ), # intersection table exists in storage schema exists("storage", "map_files"), # intersection table columns exist has_columns( - "storage", + "maps_metadata", "map_files", "id", "ingest_process_id", "object_id", ), ] + + +class MapFilesChangeSchema(Migration): + name = "map-files-change-schema" + subsystem = "maps" + description = "Change map_files table to use the map_metadata schema" + depends_on = ["map-files"] + preconditions = [ + exists("storage", "map_files"), + exists("maps_metadata", "map_files"), + ] + postconditions = [_not(exists("storage", "map_files"))] + + def apply(self, db): + db.run_sql( + """ + INSERT INTO maps_metadata.map_files (ingest_process_id, object_id) + SELECT ingest_process_id, object_id FROM storage.map_files + ON CONFLICT (ingest_process_id, object_id) DO NOTHING; + + DROP TABLE IF EXISTS storage.map_files; + """ + ) + + +has_object_group = _any( + [ + exists("storage", "object_group"), + has_columns("storage", "object", "object_group_id"), + ] +) + + +class StorageAddColumns(Migration): + name = "storage-add-columns" + + def apply(self, db): + db.run_sql( + """ + ALTER TABLE storage.object DROP COLUMN IF EXISTS object_group_id; + DROP TABLE IF EXISTS storage.object_group; + """ + ) + + preconditions = [has_object_group] + + postconditions = [ + _not(has_object_group), + ] diff --git a/cli/macrostrat/cli/database/migrations/file_storage_updates/file_storage_updates.sql b/cli/macrostrat/cli/database/migrations/file_storage_updates/file_storage_updates.sql deleted file mode 100644 index b7e4df094..000000000 --- a/cli/macrostrat/cli/database/migrations/file_storage_updates/file_storage_updates.sql +++ /dev/null @@ -1,10 +0,0 @@ -ALTER TABLE storage.object DROP COLUMN IF EXISTS object_group_id; -DROP TABLE IF EXISTS storage.object_group CASCADE; - -CREATE TABLE IF NOT EXISTS storage.map_files ( - id serial PRIMARY KEY, - ingest_process_id integer NOT NULL - REFERENCES maps_metadata.ingest_process(id) ON DELETE CASCADE, - object_id integer NOT NULL - REFERENCES storage.object(id) ON DELETE CASCADE - ); \ No newline at end of file diff --git a/cli/macrostrat/cli/database/migrations/file_storage_updates/map-files-table.sql b/cli/macrostrat/cli/database/migrations/file_storage_updates/map-files-table.sql new file mode 100644 index 000000000..32a819580 --- /dev/null +++ b/cli/macrostrat/cli/database/migrations/file_storage_updates/map-files-table.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS maps_metadata.map_files ( + id integer GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + ingest_process_id integer NOT NULL + REFERENCES maps_metadata.ingest_process(id) ON DELETE CASCADE, + object_id integer NOT NULL REFERENCES storage.object(id), + -- No cascade: don't allow deletion of files in use without explicit dereferencing + UNIQUE (ingest_process_id, object_id) -- prevent duplicate entries for the same file in one ingest +); diff --git a/core/macrostrat/core/migrations/__init__.py b/core/macrostrat/core/migrations/__init__.py index dae7e71d9..3da2e1b33 100644 --- a/core/macrostrat/core/migrations/__init__.py +++ b/core/macrostrat/core/migrations/__init__.py @@ -18,7 +18,7 @@ from ..config import settings from ..database import get_database -""" Higher-order functions that return a function that evaluates whether a condition is met on the database """ +"""Higher-order functions that return a function that evaluates whether a condition is met on the database """ DbEvaluator = Callable[[Database], bool] PathDependency = Callable[["Migration"], Path] | Path diff --git a/map-integration/macrostrat/map_integration/__init__.py b/map-integration/macrostrat/map_integration/__init__.py index 798643323..7e665a302 100644 --- a/map-integration/macrostrat/map_integration/__init__.py +++ b/map-integration/macrostrat/map_integration/__init__.py @@ -158,12 +158,12 @@ def delete_sources( print("Ingest Process ID", ingest_process_id) if file_name is None: rows = db.run_query( - "select f.object_id from storage.map_files f where ingest_process_id = :ingest_process_id", + "select f.object_id from maps_metadata.map_files f where ingest_process_id = :ingest_process_id", dict(ingest_process_id=ingest_process_id), ).fetchall() object_ids = [r[0] for r in rows] db.run_sql( - "DELETE FROM storage.map_files WHERE ingest_process_id = :ingest_process_id", + "DELETE FROM maps_metadata.map_files WHERE ingest_process_id = :ingest_process_id", dict(ingest_process_id=ingest_process_id), ) if object_ids: @@ -432,7 +432,7 @@ def staging( for object in object_ids: db.run_sql( """ - INSERT INTO storage.map_files (ingest_process_id, object_id) + INSERT INTO maps_metadata.map_files (ingest_process_id, object_id) VALUES (:ingest_process_id, :object_id) """, dict(ingest_process_id=ingest_id, object_id=object), @@ -674,7 +674,7 @@ def staging_bulk( for object in object_ids: db.run_sql( """ - INSERT INTO storage.map_files (ingest_process_id, object_id) + INSERT INTO maps_metadata.map_files (ingest_process_id, object_id) VALUES (:ingest_process_id, :object_id) """, dict(ingest_process_id=ingest_id, object_id=object),