Skip to content

Commit e81b583

Browse files
authored
Merge pull request #1416 from microbiomedata/issue-1415-workflow-type-migration
Migrate `workflow_type` values in `data_object` table
2 parents fd8ff76 + 5b1a995 commit e81b583

File tree

5 files changed

+87
-9
lines changed

5 files changed

+87
-9
lines changed

nmdc_server/alembic.ini

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ keys = console
6262
keys = generic
6363

6464
[logger_root]
65-
level = WARN
6665
handlers = console
6766
qualname =
6867

nmdc_server/cli.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from nmdc_server.config import Settings
1313
from nmdc_server.database import SessionLocalIngest
1414
from nmdc_server.ingest import errors
15-
from nmdc_server.logger import get_logger
1615

1716

1817
@click.group()
@@ -75,9 +74,7 @@ def ingest(verbose, function_limit, skip_annotation, swap_rancher_secrets):
7574
level = logging.INFO
7675
elif verbose > 1:
7776
level = logging.DEBUG
78-
logger = get_logger(__name__)
7977
logging.basicConfig(level=level, format="%(message)s")
80-
logger.setLevel(logging.INFO)
8178

8279
jobs.do_ingest(function_limit, skip_annotation)
8380

@@ -269,3 +266,7 @@ def load_db(key_file, user, host, list_backups, backup_file):
269266
sys.exit(1)
270267

271268
click.secho(f"\nSuccessfully loaded {settings.current_db_uri}", fg="green")
269+
270+
271+
if __name__ == "__main__":
272+
cli()

nmdc_server/ingest/all.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,5 +253,3 @@ def load(db: Session, function_limit=None, skip_annotation=False):
253253
logger.info("Loading search indices")
254254
search_index.load(db)
255255
db.commit()
256-
257-
logger.info("Ingest finished successfully")

nmdc_server/jobs.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,6 @@ def ping():
2424

2525
def update_nmdc_functions():
2626
"""Update NMDC custom functions for both databases."""
27-
logger = get_logger(__name__)
28-
logging.basicConfig(level=logging.INFO, format="%(message)s")
29-
logger.setLevel(logging.INFO)
3027
for db_info in [(database.SessionLocal, "active"), (database.SessionLocalIngest, "ingest")]:
3128
db_to_update, db_type = db_info
3229
with db_to_update() as db:
@@ -84,10 +81,15 @@ def do_ingest(function_limit, skip_annotation):
8481
load(ingest_db, function_limit=function_limit, skip_annotation=skip_annotation)
8582

8683
# copy persistent data from the production db to the ingest db
84+
logger.info("Merging file_download")
8785
maybe_merge_download_artifact(ingest_db, prod_db.query(models.FileDownload))
86+
logger.info("Merging bulk_download")
8887
maybe_merge_download_artifact(ingest_db, prod_db.query(models.BulkDownload))
88+
logger.info("Merging bulk_download_data_object")
8989
maybe_merge_download_artifact(ingest_db, prod_db.query(models.BulkDownloadDataObject))
9090

91+
logger.info("Ingest finished successfully")
92+
9193

9294
@celery_app.task
9395
def ingest(function_limit=None, skip_annotation=False):
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
"""Rename data object workflow types
2+
3+
The Berkeley schema migration changed a number of workflow type names. Even though
4+
the `data_object` table is truncated at the start of the ingest process, the old
5+
workflow types were being introduced by a merge operation from the live database
6+
to the ingest database.
7+
8+
See also: https://github.com/microbiomedata/nmdc-server/issues/1415
9+
10+
Revision ID: e54d37bfb90b
11+
Revises: 2ec2d0b4f840
12+
Create Date: 2024-10-11 18:06:08.521445
13+
14+
"""
15+
16+
from typing import Optional
17+
18+
import sqlalchemy as sa
19+
from alembic import op
20+
from sqlalchemy.sql import column, table
21+
22+
# revision identifiers, used by Alembic.
23+
revision: str = "e54d37bfb90b"
24+
down_revision: Optional[str] = "2ec2d0b4f840"
25+
branch_labels: Optional[str] = None
26+
depends_on: Optional[str] = None
27+
28+
29+
WORKFLOW_TYPE_MAP = [
30+
{
31+
"old": "nmdc:MAGsAnalysisActivity",
32+
"new": "nmdc:MagsAnalysis",
33+
},
34+
{
35+
"old": "nmdc:MetabolomicsAnalysisActivity",
36+
"new": "nmdc:MetabolomicsAnalysis",
37+
},
38+
{
39+
"old": "nmdc:MetaProteomicAnalysis",
40+
"new": "nmdc:MetaproteomicAnalysis",
41+
},
42+
{
43+
"old": "nmdc:metaT",
44+
"new": "nmdc:MetatranscriptomeAnalysis",
45+
},
46+
{
47+
"old": "nmdc:NomAnalysisActivity",
48+
"new": "nmdc:NomAnalysis",
49+
},
50+
{
51+
"old": "nmdc:ReadbasedAnalysis",
52+
"new": "nmdc:ReadBasedTaxonomyAnalysis",
53+
},
54+
{
55+
"old": "nmdc:ReadQCAnalysisActivity",
56+
"new": "nmdc:ReadQcAnalysis",
57+
},
58+
]
59+
60+
61+
def upgrade():
62+
data_object = table("data_object", column("workflow_type", sa.String))
63+
for mapping in WORKFLOW_TYPE_MAP:
64+
op.execute(
65+
data_object.update()
66+
.where(data_object.c.workflow_type == mapping["old"])
67+
.values(workflow_type=mapping["new"])
68+
)
69+
70+
71+
def downgrade():
72+
data_object = table("data_object", column("workflow_type", sa.String))
73+
for mapping in WORKFLOW_TYPE_MAP:
74+
op.execute(
75+
data_object.update()
76+
.where(data_object.c.workflow_type == mapping["new"])
77+
.values(workflow_type=mapping["old"])
78+
)

0 commit comments

Comments
 (0)