diff --git a/.github/workflows/run-checks.yml b/.github/workflows/run-checks.yml index 7d4bad63..f45f7872 100644 --- a/.github/workflows/run-checks.yml +++ b/.github/workflows/run-checks.yml @@ -33,7 +33,7 @@ jobs: run: pip install -r ./catalog/build/py/requirements.txt - name: Run linkml-lint # Run linting on the LinkML schemas, to enforce conventions such as in naming, and to catch simple errors. - run: linkml-lint ./catalog/schema --validate --verbose + run: npm run lint-schema - name: Test LinkML Python generation # Generate Python code from the main LinkML schemas, discarding the output; this will catch more subtle errors such as references to nonexistent elements. run: npm run test-gen-python diff --git a/README.md b/README.md index 413c1db2..cde310f0 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ These values will be substituted with assembly-specific values at runtime. ## Editing the LinkML schemas -If the LinkML schemas in `catalog/schema` are edited, the derived JSON schemas and TypeScript definitions should be +If the LinkML schemas in `catalog/py_package/catalog_build/schema` are edited, the derived JSON schemas and TypeScript definitions should be updated as follows: diff --git a/catalog/README.md b/catalog/README.md index 5a21158d..e273b2d6 100644 --- a/catalog/README.md +++ b/catalog/README.md @@ -7,5 +7,6 @@ This directory provides the catalog data (information on genome assemblies, orga - `py` - Python scripts. - `ts` - Typescript scripts. - `output` - JSON files output by the catalog build process, to be consumed by the app. -- `schema` - LinkML schemas for source files. +- `py_package` - Python package used to share catalog features, such as the schemas and build process, with other projects. +- `schema` - Schema-related scripts and derived models. - `source` - YAML files providing data used as input for building the catalog. diff --git a/catalog/build/py/build-files-from-ncbi.py b/catalog/build/py/build_files_from_ncbi.py similarity index 97% rename from catalog/build/py/build-files-from-ncbi.py rename to catalog/build/py/build_files_from_ncbi.py index 39e70e29..6b0912fd 100644 --- a/catalog/build/py/build-files-from-ncbi.py +++ b/catalog/build/py/build_files_from_ncbi.py @@ -1,4 +1,4 @@ -from package.catalog_build import build_files +from ...py_package.catalog_build import build_files ASSEMBLIES_PATH = "catalog/source/assemblies.yml" ORGANISMS_PATH = "catalog/source/organisms.yml" diff --git a/catalog/build/py/package/catalog_build/__init__.py b/catalog/py_package/catalog_build/__init__.py similarity index 100% rename from catalog/build/py/package/catalog_build/__init__.py rename to catalog/py_package/catalog_build/__init__.py diff --git a/catalog/build/py/package/catalog_build/build.py b/catalog/py_package/catalog_build/build.py similarity index 100% rename from catalog/build/py/package/catalog_build/build.py rename to catalog/py_package/catalog_build/build.py diff --git a/catalog/build/py/generated_schema/schema.py b/catalog/py_package/catalog_build/generated_schema/schema.py similarity index 94% rename from catalog/build/py/generated_schema/schema.py rename to catalog/py_package/catalog_build/generated_schema/schema.py index 1836806d..d9d6b418 100644 --- a/catalog/build/py/generated_schema/schema.py +++ b/catalog/py_package/catalog_build/generated_schema/schema.py @@ -44,9 +44,9 @@ def __contains__(self, key: str) -> bool: linkml_meta = LinkMLMeta( { - "default_prefix": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/schema.yaml#", + "default_prefix": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/schema.yaml#", "description": "Combined source data schemas.", - "id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/schema.yaml#", + "id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/schema.yaml#", "imports": [ "./assemblies", "./organisms", @@ -61,7 +61,7 @@ def __contains__(self, key: str) -> bool: "prefix_reference": "https://w3id.org/linkml/", } }, - "source_file": "./catalog/schema/schema.yaml", + "source_file": "/Users/hunter/git-repos/brc-analytics/catalog/py_package/catalog_build/schema_utils/../schema/schema.yaml", } ) @@ -146,7 +146,7 @@ class Assemblies(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/assemblies.yaml#", + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/assemblies.yaml#", "tree_root": True, } ) @@ -167,7 +167,7 @@ class Assembly(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/assemblies.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/assemblies.yaml#" } ) @@ -187,7 +187,7 @@ class Organisms(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/organisms.yaml#", + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/organisms.yaml#", "tree_root": True, } ) @@ -208,7 +208,7 @@ class Organism(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/organisms.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/organisms.yaml#" } ) @@ -238,7 +238,7 @@ class Outbreaks(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/outbreaks.yaml#", + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/outbreaks.yaml#", "tree_root": True, } ) @@ -259,7 +259,7 @@ class Outbreak(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/outbreaks.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/outbreaks.yaml#" } ) @@ -333,7 +333,7 @@ class OutbreakResource(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/outbreaks.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/outbreaks.yaml#" } ) @@ -370,7 +370,7 @@ class MarkdownFileReference(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/outbreaks.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/outbreaks.yaml#" } ) @@ -402,7 +402,7 @@ class WorkflowCategories(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflow_categories.yaml#", + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflow_categories.yaml#", "tree_root": True, } ) @@ -426,7 +426,7 @@ class WorkflowCategory(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflow_categories.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflow_categories.yaml#" } ) @@ -476,7 +476,7 @@ class Workflows(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflows.yaml#", + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflows.yaml#", "tree_root": True, } ) @@ -497,7 +497,7 @@ class Workflow(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflows.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflows.yaml#" } ) @@ -569,7 +569,7 @@ class WorkflowParameter(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflows.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflows.yaml#" } ) @@ -610,7 +610,7 @@ class WorkflowUrlSpec(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflows.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflows.yaml#" } ) diff --git a/catalog/build/py/iwc_manifest_to_workflows_yaml.py b/catalog/py_package/catalog_build/iwc_manifest_to_workflows_yaml.py similarity index 90% rename from catalog/build/py/iwc_manifest_to_workflows_yaml.py rename to catalog/py_package/catalog_build/iwc_manifest_to_workflows_yaml.py index 37dc927c..2e87bdbc 100644 --- a/catalog/build/py/iwc_manifest_to_workflows_yaml.py +++ b/catalog/py_package/catalog_build/iwc_manifest_to_workflows_yaml.py @@ -6,7 +6,8 @@ import requests import yaml -from generated_schema.schema import ( + +from .generated_schema.schema import ( Workflow, WorkflowCategoryId, WorkflowParameter, @@ -15,7 +16,6 @@ ) URL = "https://iwc.galaxyproject.org/workflow_manifest.json" -WORKFLOWS_PATH = "catalog/source/workflows.yml" DOCKSTORE_COLLECTION_TO_CATEGORY = { "Variant Calling": WorkflowCategoryId.VARIANT_CALLING, "Transcriptomics": WorkflowCategoryId.TRANSCRIPTOMICS, @@ -31,9 +31,9 @@ ) -def read_existing_yaml(): - if os.path.exists(WORKFLOWS_PATH): - with open(WORKFLOWS_PATH) as fh: +def read_existing_yaml(workflows_path): + if os.path.exists(workflows_path): + with open(workflows_path) as fh: workflows = Workflows.model_validate(yaml.safe_load(fh)).workflows else: # start from scratch @@ -116,8 +116,8 @@ def generate_current_workflows(): return by_trs_id -def merge_into_existing(): - existing = read_existing_yaml() +def merge_into_existing(workflows_path): + existing = read_existing_yaml(workflows_path) current = generate_current_workflows() merged: Dict[str, Workflow] = {} for versionless_trs_id, current_workflow_input in current.items(): @@ -144,8 +144,8 @@ def merge_into_existing(): return merged -def to_workflows_yaml(exclude_other: bool): - by_trs_id = merge_into_existing() +def to_workflows_yaml(workflows_path: str, exclude_other: bool): + by_trs_id = merge_into_existing(workflows_path) # sort by trs id, should play nicer with git diffs sorted_workflows = list(dict(sorted(by_trs_id.items())).values()) if exclude_other: @@ -160,7 +160,7 @@ def to_workflows_yaml(exclude_other: bool): final_workflows.append(workflow) sorted_workflows = final_workflows final_workflows = sorted_workflows - with open(WORKFLOWS_PATH, "w") as out: + with open(workflows_path, "w") as out: yaml.safe_dump( Workflows(workflows=final_workflows).model_dump(exclude_none=True), out, @@ -168,17 +168,20 @@ def to_workflows_yaml(exclude_other: bool): sort_keys=False, ) # Turns out the YAML style prettier likes is really hard to create in python ... - subprocess.run(["npx", "prettier", "--write", WORKFLOWS_PATH]) + subprocess.run(["npx", "prettier", "--write", workflows_path]) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Build workflows.yaml file from latest IWC JSON manifest." ) + parser.add_argument( + "workflows_path", help="Path of workflows YAML file to read/write." + ) parser.add_argument( "--exclude-other", action="store_true", help="Exclude other items from processing.", ) args = parser.parse_args() - to_workflows_yaml(exclude_other=args.exclude_other) + to_workflows_yaml(args.workflows_path, exclude_other=args.exclude_other) diff --git a/catalog/schema/assemblies.yaml b/catalog/py_package/catalog_build/schema/assemblies.yaml similarity index 94% rename from catalog/schema/assemblies.yaml rename to catalog/py_package/catalog_build/schema/assemblies.yaml index c017ad68..437130e2 100644 --- a/catalog/schema/assemblies.yaml +++ b/catalog/py_package/catalog_build/schema/assemblies.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/assemblies.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/assemblies.yaml# name: assemblies description: Schema for defining genomic assemblies available in the BRC Analytics platform. diff --git a/catalog/schema/enums/organism_ploidy.yaml b/catalog/py_package/catalog_build/schema/enums/organism_ploidy.yaml similarity index 78% rename from catalog/schema/enums/organism_ploidy.yaml rename to catalog/py_package/catalog_build/schema/enums/organism_ploidy.yaml index 7d5f4ce2..807d3cd5 100644 --- a/catalog/schema/enums/organism_ploidy.yaml +++ b/catalog/py_package/catalog_build/schema/enums/organism_ploidy.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/enums/organism_ploidy.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/enums/organism_ploidy.yaml# name: enums_organism_ploidy enums: diff --git a/catalog/schema/enums/outbreak_priority.yaml b/catalog/py_package/catalog_build/schema/enums/outbreak_priority.yaml similarity index 80% rename from catalog/schema/enums/outbreak_priority.yaml rename to catalog/py_package/catalog_build/schema/enums/outbreak_priority.yaml index 7dc944ef..60346a40 100644 --- a/catalog/schema/enums/outbreak_priority.yaml +++ b/catalog/py_package/catalog_build/schema/enums/outbreak_priority.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/enums/outbreak_priority.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/enums/outbreak_priority.yaml# name: enums_outbreak_priority enums: diff --git a/catalog/schema/enums/outbreak_resource_type.yaml b/catalog/py_package/catalog_build/schema/enums/outbreak_resource_type.yaml similarity index 81% rename from catalog/schema/enums/outbreak_resource_type.yaml rename to catalog/py_package/catalog_build/schema/enums/outbreak_resource_type.yaml index bc5ade2a..23d09198 100644 --- a/catalog/schema/enums/outbreak_resource_type.yaml +++ b/catalog/py_package/catalog_build/schema/enums/outbreak_resource_type.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/enums/outbreak_resource_type.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/enums/outbreak_resource_type.yaml# name: enums_outbreak_resource_type enums: diff --git a/catalog/schema/enums/workflow_category_id.yaml b/catalog/py_package/catalog_build/schema/enums/workflow_category_id.yaml similarity index 87% rename from catalog/schema/enums/workflow_category_id.yaml rename to catalog/py_package/catalog_build/schema/enums/workflow_category_id.yaml index 473193e2..18cd25fd 100644 --- a/catalog/schema/enums/workflow_category_id.yaml +++ b/catalog/py_package/catalog_build/schema/enums/workflow_category_id.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/enums/workflow_type.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/enums/workflow_type.yaml# name: enums_workflow_category_id description: Definition of the workflow category ID enum. diff --git a/catalog/schema/enums/workflow_parameter_variable.yaml b/catalog/py_package/catalog_build/schema/enums/workflow_parameter_variable.yaml similarity index 81% rename from catalog/schema/enums/workflow_parameter_variable.yaml rename to catalog/py_package/catalog_build/schema/enums/workflow_parameter_variable.yaml index 5261d2d3..ae1bb3fd 100644 --- a/catalog/schema/enums/workflow_parameter_variable.yaml +++ b/catalog/py_package/catalog_build/schema/enums/workflow_parameter_variable.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/enums/workflow_parameter_variable.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/enums/workflow_parameter_variable.yaml# name: enums_workflow_parameter_variable enums: diff --git a/catalog/schema/enums/workflow_ploidy.yaml b/catalog/py_package/catalog_build/schema/enums/workflow_ploidy.yaml similarity index 79% rename from catalog/schema/enums/workflow_ploidy.yaml rename to catalog/py_package/catalog_build/schema/enums/workflow_ploidy.yaml index 48d699b0..8177b53f 100644 --- a/catalog/schema/enums/workflow_ploidy.yaml +++ b/catalog/py_package/catalog_build/schema/enums/workflow_ploidy.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/enums/workflow_ploidy.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/enums/workflow_ploidy.yaml# name: enums_workflow_ploidy enums: diff --git a/catalog/schema/organisms.yaml b/catalog/py_package/catalog_build/schema/organisms.yaml similarity index 95% rename from catalog/schema/organisms.yaml rename to catalog/py_package/catalog_build/schema/organisms.yaml index 376cbe14..99631823 100644 --- a/catalog/schema/organisms.yaml +++ b/catalog/py_package/catalog_build/schema/organisms.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/organisms.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/organisms.yaml# name: organisms description: Schema for defining source organism information used in the BRC Analytics platform. diff --git a/catalog/schema/outbreaks.yaml b/catalog/py_package/catalog_build/schema/outbreaks.yaml similarity index 98% rename from catalog/schema/outbreaks.yaml rename to catalog/py_package/catalog_build/schema/outbreaks.yaml index 5a41234d..7f9f1063 100644 --- a/catalog/schema/outbreaks.yaml +++ b/catalog/py_package/catalog_build/schema/outbreaks.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/outbreaks.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/outbreaks.yaml# name: outbreaks description: Schema for defining outbreak and pathogen information used in the BRC Analytics platform. diff --git a/catalog/schema/schema.yaml b/catalog/py_package/catalog_build/schema/schema.yaml similarity index 84% rename from catalog/schema/schema.yaml rename to catalog/py_package/catalog_build/schema/schema.yaml index 0b342712..dffb5a39 100644 --- a/catalog/schema/schema.yaml +++ b/catalog/py_package/catalog_build/schema/schema.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/schema.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/schema.yaml# name: schema description: Combined source data schemas. diff --git a/catalog/schema/workflow_categories.yaml b/catalog/py_package/catalog_build/schema/workflow_categories.yaml similarity index 96% rename from catalog/schema/workflow_categories.yaml rename to catalog/py_package/catalog_build/schema/workflow_categories.yaml index 7ba52215..0e1ca156 100644 --- a/catalog/schema/workflow_categories.yaml +++ b/catalog/py_package/catalog_build/schema/workflow_categories.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflow_categories.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflow_categories.yaml# name: workflow_categories description: Schema for defining workflow categories used to organize Galaxy workflows in the BRC Analytics platform. diff --git a/catalog/schema/workflows.yaml b/catalog/py_package/catalog_build/schema/workflows.yaml similarity index 98% rename from catalog/schema/workflows.yaml rename to catalog/py_package/catalog_build/schema/workflows.yaml index 6177a6e0..bfd71987 100644 --- a/catalog/schema/workflows.yaml +++ b/catalog/py_package/catalog_build/schema/workflows.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflows.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflows.yaml# name: workflows description: Schema for defining Galaxy workflows available in the BRC Analytics platform. diff --git a/catalog/py_package/catalog_build/schema_utils/gen_schema.py b/catalog/py_package/catalog_build/schema_utils/gen_schema.py new file mode 100644 index 00000000..ab550b8c --- /dev/null +++ b/catalog/py_package/catalog_build/schema_utils/gen_schema.py @@ -0,0 +1,103 @@ +import os.path +from argparse import ArgumentParser + +from linkml.generators import JsonSchemaGenerator, PydanticGenerator + +from .gen_typescript import TypescriptGeneratorFixed + +SCHEMA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../schema") + +# Mapping from name to tuple of generator class and output file extension +GENERATOR_TYPES = { + "Pydantic": (PydanticGenerator, "py"), + "TypeScript": (TypescriptGeneratorFixed, "ts"), + "JSON Schema": (JsonSchemaGenerator, "json"), +} + + +def gen_schema_type(type_name, out_dir, schema_names, default_schema_names): + if out_dir is None: + if schema_names is not None: + print(f"No output path specified for {type_name} generator\n") + return + + if not schema_names: + if not default_schema_names: + print(f"No schemas specified for {type_name} generator\n") + return + schema_names = default_schema_names + + print(f"Generating {type_name}") + + generator, extension = GENERATOR_TYPES[type_name] + + for name in schema_names: + result_text = generator(os.path.join(SCHEMA_DIR, f"{name}.yaml")).serialize() + out_path = os.path.join(out_dir, f"{name}.{extension}") + with open(out_path, "w") as file: + file.write(result_text + "\n") + print(f"Wrote to {out_path}") + + print("") + + +def gen_schema( + default_schema_names, + *, + py_path=None, + py_names=None, + ts_path=None, + ts_names=None, + json_path=None, + json_names=None, +): + gen_schema_type("Pydantic", py_path, py_names, default_schema_names) + gen_schema_type("TypeScript", ts_path, ts_names, default_schema_names) + gen_schema_type("JSON Schema", json_path, json_names, default_schema_names) + + +def cli(): + parser = ArgumentParser() + parser.add_argument( + "schema_name", + nargs="*", + help="name of a default schema to generate from if generator-specific schemas aren't specified", + ) + parser.add_argument( + "--py-path", help="path of directory to output pydantic files to" + ) + parser.add_argument( + "--py-name", + action="append", + help="name of a schema to generate pydantic models from", + ) + parser.add_argument( + "--ts-path", help="path of directory to output typescript files to" + ) + parser.add_argument( + "--ts-name", + action="append", + help="name of a schema to generate typescript definitions from", + ) + parser.add_argument( + "--json-path", help="path of directory to output json schema files to" + ) + parser.add_argument( + "--json-name", + action="append", + help="name of a schema to generate json schema from", + ) + args = parser.parse_args() + gen_schema( + args.schema_name, + py_path=args.py_path, + py_names=args.py_name, + ts_path=args.ts_path, + ts_names=args.ts_name, + json_path=args.json_path, + json_names=args.json_name, + ) + + +if __name__ == "__main__": + cli() diff --git a/catalog/schema/scripts/gen-typescript.py b/catalog/py_package/catalog_build/schema_utils/gen_typescript.py similarity index 90% rename from catalog/schema/scripts/gen-typescript.py rename to catalog/py_package/catalog_build/schema_utils/gen_typescript.py index 6ebe7041..91a5ea75 100644 --- a/catalog/schema/scripts/gen-typescript.py +++ b/catalog/py_package/catalog_build/schema_utils/gen_typescript.py @@ -24,4 +24,5 @@ def range(self, slot): return base_result if slot.required else f"{base_result} | null" -print(TypescriptGeneratorFixed(sys.argv[1]).serialize()) +if __name__ == "__main__": + print(TypescriptGeneratorFixed(sys.argv[1]).serialize()) diff --git a/catalog/py_package/catalog_build/schema_utils/validate_catalog.py b/catalog/py_package/catalog_build/schema_utils/validate_catalog.py new file mode 100644 index 00000000..e9fde2d5 --- /dev/null +++ b/catalog/py_package/catalog_build/schema_utils/validate_catalog.py @@ -0,0 +1,52 @@ +import os.path +import sys +from argparse import ArgumentParser + +from linkml.validator import ( + JsonschemaValidationPlugin, + Validator, + default_loader_for_file, +) +from linkml.validator.report import Severity + +SCHEMA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../schema") + + +def validate_catalog(source_dir, source_types): + source_dir = os.path.abspath(source_dir) + found_errors = False + for name in source_types: + print(f"{name}:") + validator = Validator( + os.path.join(SCHEMA_DIR, f"{name}.yaml"), + validation_plugins=[JsonschemaValidationPlugin(closed=True)], + ) + loader = default_loader_for_file(os.path.join(source_dir, f"{name}.yml")) + severities = set() + for result in validator.iter_results_from_source(loader): + severities.add(result.severity) + print(f"[{result.severity.value}] {result.message}") + if not severities: + print("No issues found") + elif Severity.ERROR in severities: + found_errors = True + print("") + if found_errors: + print("Validation failed for one or more schemas.") + sys.exit(1) + + +def cli(): + parser = ArgumentParser() + parser.add_argument( + "source_dir", help="path of directory to validate catalog source files from" + ) + parser.add_argument( + "source_type", nargs="+", help="name of a schema/entity type to validate" + ) + args = parser.parse_args() + validate_catalog(args.source_dir, args.source_type) + + +if __name__ == "__main__": + cli() diff --git a/catalog/build/py/package/setup.py b/catalog/py_package/setup.py similarity index 100% rename from catalog/build/py/package/setup.py rename to catalog/py_package/setup.py diff --git a/catalog/schema/generated/assemblies.json b/catalog/schema/generated/assemblies.json index 0de25805..b6281d68 100644 --- a/catalog/schema/generated/assemblies.json +++ b/catalog/schema/generated/assemblies.json @@ -34,7 +34,7 @@ "type": "object" } }, - "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/assemblies.yaml#", + "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/assemblies.yaml#", "$schema": "https://json-schema.org/draft/2019-09/schema", "additionalProperties": true, "description": "Root object containing a collection of genomic assembly definitions for the BRC Analytics platform.", diff --git a/catalog/schema/generated/organisms.json b/catalog/schema/generated/organisms.json index 84456d21..9a46eae4 100644 --- a/catalog/schema/generated/organisms.json +++ b/catalog/schema/generated/organisms.json @@ -52,7 +52,7 @@ "type": "object" } }, - "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/organisms.yaml#", + "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/organisms.yaml#", "$schema": "https://json-schema.org/draft/2019-09/schema", "additionalProperties": true, "description": "Root object containing a collection of organism definitions for the BRC Analytics platform.", diff --git a/catalog/schema/generated/outbreaks.json b/catalog/schema/generated/outbreaks.json index 81080d98..841d033b 100644 --- a/catalog/schema/generated/outbreaks.json +++ b/catalog/schema/generated/outbreaks.json @@ -139,7 +139,7 @@ "type": "object" } }, - "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/outbreaks.yaml#", + "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/outbreaks.yaml#", "$schema": "https://json-schema.org/draft/2019-09/schema", "additionalProperties": true, "description": "Root object containing a collection of pathogen definitions for the BRC Analytics platform to highlight as outbreaks/priority pathogens.", diff --git a/catalog/schema/generated/workflow_categories.json b/catalog/schema/generated/workflow_categories.json index f4e9b8cc..0d790c56 100644 --- a/catalog/schema/generated/workflow_categories.json +++ b/catalog/schema/generated/workflow_categories.json @@ -64,7 +64,7 @@ "type": "string" } }, - "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflow_categories.yaml#", + "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflow_categories.yaml#", "$schema": "https://json-schema.org/draft/2019-09/schema", "additionalProperties": true, "description": "Root object containing a collection of workflow category definitions used to organize workflows in the BRC Analytics platform.", diff --git a/catalog/schema/generated/workflows.json b/catalog/schema/generated/workflows.json index 6512d435..d06bdda1 100644 --- a/catalog/schema/generated/workflows.json +++ b/catalog/schema/generated/workflows.json @@ -192,7 +192,7 @@ "type": "object" } }, - "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflows.yaml#", + "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflows.yaml#", "$schema": "https://json-schema.org/draft/2019-09/schema", "additionalProperties": true, "description": "Root object containing a collection of Galaxy workflow definitions for the BRC Analytics platform.", diff --git a/catalog/schema/scripts/gen-schema.sh b/catalog/schema/scripts/gen-schema.sh index 74195e92..91420e7c 100755 --- a/catalog/schema/scripts/gen-schema.sh +++ b/catalog/schema/scripts/gen-schema.sh @@ -2,14 +2,7 @@ source ./catalog/schema/scripts/source-file-schema-names.sh -# Generate Pydantic models for all source data types -gen-pydantic ./catalog/schema/schema.yaml > ./catalog/build/py/generated_schema/schema.py - -# Generate TypeScript definitions for all source data types -python3 ./catalog/schema/scripts/gen-typescript.py ./catalog/schema/schema.yaml > ./catalog/schema/generated/schema.ts - -# Generate a JSON schema for each source file -for name in ${SOURCE_FILE_SCHEMA_NAMES[@]} -do - gen-json-schema "./catalog/schema/$name.yaml" > "./catalog/schema/generated/$name.json" -done +python3 -m catalog.py_package.catalog_build.schema_utils.gen_schema "${SOURCE_FILE_SCHEMA_NAMES[@]}" \ + --json-path ./catalog/schema/generated \ + --py-path ./catalog/py_package/catalog_build/generated_schema --py-name schema \ + --ts-path ./catalog/schema/generated --ts-name schema diff --git a/catalog/schema/scripts/test-gen-python.sh b/catalog/schema/scripts/test-gen-python.sh index 6a8e3d22..dbf0dd27 100755 --- a/catalog/schema/scripts/test-gen-python.sh +++ b/catalog/schema/scripts/test-gen-python.sh @@ -6,7 +6,7 @@ validation_failed=false for name in schema ${SOURCE_FILE_SCHEMA_NAMES[@]} do - gen-python "./catalog/schema/$name.yaml" > /dev/null + gen-python "./catalog/py_package/catalog_build/schema/$name.yaml" > /dev/null if [ $? -ne 0 ]; then validation_failed=true fi diff --git a/catalog/schema/scripts/validate-catalog.sh b/catalog/schema/scripts/validate-catalog.sh index 434da4bc..5eee3f4a 100755 --- a/catalog/schema/scripts/validate-catalog.sh +++ b/catalog/schema/scripts/validate-catalog.sh @@ -2,19 +2,4 @@ source ./catalog/schema/scripts/source-file-schema-names.sh -validation_failed=false - -for name in ${SOURCE_FILE_SCHEMA_NAMES[@]} -do - echo "$name:" - linkml-validate -s "./catalog/schema/$name.yaml" "./catalog/source/$name.yml" - if [ $? -ne 0 ]; then - validation_failed=true - fi - echo "" -done - -if [ "$validation_failed" = true ]; then - echo "Validation failed for one or more schemas." - exit 1 -fi +python3 -m catalog.py_package.catalog_build.schema_utils.validate_catalog "./catalog/source" "${SOURCE_FILE_SCHEMA_NAMES[@]}" diff --git a/package.json b/package.json index 47013764..398d1712 100644 --- a/package.json +++ b/package.json @@ -15,8 +15,9 @@ "prepare": "husky", "test": "jest --runInBand", "build-brc-db": "esrun catalog/build/ts/build-catalog.ts", - "build-files-from-ncbi": "python3 ./catalog/build/py/build-files-from-ncbi.py", - "iwc-manifest-to-workflows-yaml": "python3 ./catalog/build/py/iwc_manifest_to_workflows_yaml.py --exclude-other", + "build-files-from-ncbi": "python3 -m catalog.build.py.build_files_from_ncbi", + "iwc-manifest-to-workflows-yaml": "python3 -m catalog.py_package.catalog_build.iwc_manifest_to_workflows_yaml ./catalog/source/workflows.yml --exclude-other", + "lint-schema": "linkml-lint ./catalog/py_package/catalog_build/schema --validate --verbose", "gen-schema": "./catalog/schema/scripts/gen-schema.sh", "test-gen-python": "./catalog/schema/scripts/test-gen-python.sh", "validate-catalog": "./catalog/schema/scripts/validate-catalog.sh"