From 281902a0a3bbde6a0e0c897620374fd35d90c2be Mon Sep 17 00:00:00 2001 From: hunterckx <118154470+hunterckx@users.noreply.github.com> Date: Mon, 26 May 2025 12:18:32 -0700 Subject: [PATCH 1/8] refactor: move catalog build package to be directly under catalog directory (#540) --- .../py/{build-files-from-ncbi.py => build_files_from_ncbi.py} | 2 +- .../{build/py/package => py_package}/catalog_build/__init__.py | 0 catalog/{build/py/package => py_package}/catalog_build/build.py | 0 catalog/{build/py/package => py_package}/setup.py | 0 package.json | 2 +- 5 files changed, 2 insertions(+), 2 deletions(-) rename catalog/build/py/{build-files-from-ncbi.py => build_files_from_ncbi.py} (96%) rename catalog/{build/py/package => py_package}/catalog_build/__init__.py (100%) rename catalog/{build/py/package => py_package}/catalog_build/build.py (100%) rename catalog/{build/py/package => py_package}/setup.py (100%) diff --git a/catalog/build/py/build-files-from-ncbi.py b/catalog/build/py/build_files_from_ncbi.py similarity index 96% rename from catalog/build/py/build-files-from-ncbi.py rename to catalog/build/py/build_files_from_ncbi.py index 30c70f6e..279a84e6 100644 --- a/catalog/build/py/build-files-from-ncbi.py +++ b/catalog/build/py/build_files_from_ncbi.py @@ -1,4 +1,4 @@ -from package.catalog_build import build_files +from ...py_package.catalog_build import build_files ASSEMBLIES_PATH = "catalog/source/assemblies.yml" ORGANISMS_PATH = "catalog/source/organisms.yml" diff --git a/catalog/build/py/package/catalog_build/__init__.py b/catalog/py_package/catalog_build/__init__.py similarity index 100% rename from catalog/build/py/package/catalog_build/__init__.py rename to catalog/py_package/catalog_build/__init__.py diff --git a/catalog/build/py/package/catalog_build/build.py b/catalog/py_package/catalog_build/build.py similarity index 100% rename from catalog/build/py/package/catalog_build/build.py rename to catalog/py_package/catalog_build/build.py diff --git a/catalog/build/py/package/setup.py b/catalog/py_package/setup.py similarity index 100% rename from catalog/build/py/package/setup.py rename to catalog/py_package/setup.py diff --git a/package.json b/package.json index fa913355..4083e02a 100644 --- a/package.json +++ b/package.json @@ -15,7 +15,7 @@ "prepare": "husky", "test": "jest --runInBand", "build-brc-db": "esrun catalog/build/ts/build-catalog.ts", - "build-files-from-ncbi": "python3 ./catalog/build/py/build-files-from-ncbi.py", + "build-files-from-ncbi": "python3 -m catalog.build.py.build_files_from_ncbi", "iwc-manifest-to-workflows-yaml": "python3 ./catalog/build/py/iwc_manifest_to_workflows_yaml.py --exclude-other", "gen-schema": "./catalog/schema/scripts/gen-schema.sh", "test-gen-python": "./catalog/schema/scripts/test-gen-python.sh", From a5cb1ef98a92b0ee332c3c02edce7f48f14ff780 Mon Sep 17 00:00:00 2001 From: hunterckx <118154470+hunterckx@users.noreply.github.com> Date: Mon, 26 May 2025 12:38:52 -0700 Subject: [PATCH 2/8] refactor: move schema source files to catalog build package (#540) --- .../{ => py_package/catalog_build}/schema/assemblies.yaml | 2 +- .../catalog_build}/schema/enums/organism_ploidy.yaml | 2 +- .../catalog_build}/schema/enums/outbreak_priority.yaml | 2 +- .../catalog_build}/schema/enums/outbreak_resource_type.yaml | 2 +- .../catalog_build}/schema/enums/workflow_category_id.yaml | 2 +- .../schema/enums/workflow_parameter_variable.yaml | 2 +- .../catalog_build}/schema/enums/workflow_ploidy.yaml | 2 +- .../{ => py_package/catalog_build}/schema/organisms.yaml | 2 +- .../{ => py_package/catalog_build}/schema/outbreaks.yaml | 2 +- catalog/{ => py_package/catalog_build}/schema/schema.yaml | 2 +- .../catalog_build}/schema/workflow_categories.yaml | 2 +- .../{ => py_package/catalog_build}/schema/workflows.yaml | 2 +- catalog/schema/scripts/gen-schema.sh | 6 +++--- catalog/schema/scripts/test-gen-python.sh | 2 +- catalog/schema/scripts/validate-catalog.sh | 2 +- 15 files changed, 17 insertions(+), 17 deletions(-) rename catalog/{ => py_package/catalog_build}/schema/assemblies.yaml (91%) rename catalog/{ => py_package/catalog_build}/schema/enums/organism_ploidy.yaml (78%) rename catalog/{ => py_package/catalog_build}/schema/enums/outbreak_priority.yaml (80%) rename catalog/{ => py_package/catalog_build}/schema/enums/outbreak_resource_type.yaml (81%) rename catalog/{ => py_package/catalog_build}/schema/enums/workflow_category_id.yaml (87%) rename catalog/{ => py_package/catalog_build}/schema/enums/workflow_parameter_variable.yaml (81%) rename catalog/{ => py_package/catalog_build}/schema/enums/workflow_ploidy.yaml (79%) rename catalog/{ => py_package/catalog_build}/schema/organisms.yaml (93%) rename catalog/{ => py_package/catalog_build}/schema/outbreaks.yaml (97%) rename catalog/{ => py_package/catalog_build}/schema/schema.yaml (84%) rename catalog/{ => py_package/catalog_build}/schema/workflow_categories.yaml (94%) rename catalog/{ => py_package/catalog_build}/schema/workflows.yaml (98%) diff --git a/catalog/schema/assemblies.yaml b/catalog/py_package/catalog_build/schema/assemblies.yaml similarity index 91% rename from catalog/schema/assemblies.yaml rename to catalog/py_package/catalog_build/schema/assemblies.yaml index 30122042..c2e3d9b0 100644 --- a/catalog/schema/assemblies.yaml +++ b/catalog/py_package/catalog_build/schema/assemblies.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/assemblies.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/assemblies.yaml# name: assemblies description: Schema for source assembly list. diff --git a/catalog/schema/enums/organism_ploidy.yaml b/catalog/py_package/catalog_build/schema/enums/organism_ploidy.yaml similarity index 78% rename from catalog/schema/enums/organism_ploidy.yaml rename to catalog/py_package/catalog_build/schema/enums/organism_ploidy.yaml index 7d5f4ce2..807d3cd5 100644 --- a/catalog/schema/enums/organism_ploidy.yaml +++ b/catalog/py_package/catalog_build/schema/enums/organism_ploidy.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/enums/organism_ploidy.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/enums/organism_ploidy.yaml# name: enums_organism_ploidy enums: diff --git a/catalog/schema/enums/outbreak_priority.yaml b/catalog/py_package/catalog_build/schema/enums/outbreak_priority.yaml similarity index 80% rename from catalog/schema/enums/outbreak_priority.yaml rename to catalog/py_package/catalog_build/schema/enums/outbreak_priority.yaml index 7dc944ef..60346a40 100644 --- a/catalog/schema/enums/outbreak_priority.yaml +++ b/catalog/py_package/catalog_build/schema/enums/outbreak_priority.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/enums/outbreak_priority.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/enums/outbreak_priority.yaml# name: enums_outbreak_priority enums: diff --git a/catalog/schema/enums/outbreak_resource_type.yaml b/catalog/py_package/catalog_build/schema/enums/outbreak_resource_type.yaml similarity index 81% rename from catalog/schema/enums/outbreak_resource_type.yaml rename to catalog/py_package/catalog_build/schema/enums/outbreak_resource_type.yaml index bc5ade2a..23d09198 100644 --- a/catalog/schema/enums/outbreak_resource_type.yaml +++ b/catalog/py_package/catalog_build/schema/enums/outbreak_resource_type.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/enums/outbreak_resource_type.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/enums/outbreak_resource_type.yaml# name: enums_outbreak_resource_type enums: diff --git a/catalog/schema/enums/workflow_category_id.yaml b/catalog/py_package/catalog_build/schema/enums/workflow_category_id.yaml similarity index 87% rename from catalog/schema/enums/workflow_category_id.yaml rename to catalog/py_package/catalog_build/schema/enums/workflow_category_id.yaml index 473193e2..18cd25fd 100644 --- a/catalog/schema/enums/workflow_category_id.yaml +++ b/catalog/py_package/catalog_build/schema/enums/workflow_category_id.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/enums/workflow_type.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/enums/workflow_type.yaml# name: enums_workflow_category_id description: Definition of the workflow category ID enum. diff --git a/catalog/schema/enums/workflow_parameter_variable.yaml b/catalog/py_package/catalog_build/schema/enums/workflow_parameter_variable.yaml similarity index 81% rename from catalog/schema/enums/workflow_parameter_variable.yaml rename to catalog/py_package/catalog_build/schema/enums/workflow_parameter_variable.yaml index 5261d2d3..ae1bb3fd 100644 --- a/catalog/schema/enums/workflow_parameter_variable.yaml +++ b/catalog/py_package/catalog_build/schema/enums/workflow_parameter_variable.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/enums/workflow_parameter_variable.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/enums/workflow_parameter_variable.yaml# name: enums_workflow_parameter_variable enums: diff --git a/catalog/schema/enums/workflow_ploidy.yaml b/catalog/py_package/catalog_build/schema/enums/workflow_ploidy.yaml similarity index 79% rename from catalog/schema/enums/workflow_ploidy.yaml rename to catalog/py_package/catalog_build/schema/enums/workflow_ploidy.yaml index 48d699b0..8177b53f 100644 --- a/catalog/schema/enums/workflow_ploidy.yaml +++ b/catalog/py_package/catalog_build/schema/enums/workflow_ploidy.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/enums/workflow_ploidy.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/enums/workflow_ploidy.yaml# name: enums_workflow_ploidy enums: diff --git a/catalog/schema/organisms.yaml b/catalog/py_package/catalog_build/schema/organisms.yaml similarity index 93% rename from catalog/schema/organisms.yaml rename to catalog/py_package/catalog_build/schema/organisms.yaml index 80dcdc55..f4161b68 100644 --- a/catalog/schema/organisms.yaml +++ b/catalog/py_package/catalog_build/schema/organisms.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/organisms.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/organisms.yaml# name: organisms description: Schema for source organism info. diff --git a/catalog/schema/outbreaks.yaml b/catalog/py_package/catalog_build/schema/outbreaks.yaml similarity index 97% rename from catalog/schema/outbreaks.yaml rename to catalog/py_package/catalog_build/schema/outbreaks.yaml index 8903a84c..cb2db45d 100644 --- a/catalog/schema/outbreaks.yaml +++ b/catalog/py_package/catalog_build/schema/outbreaks.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/outbreaks.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/outbreaks.yaml# name: outbreaks description: Schema for source outbreak info. diff --git a/catalog/schema/schema.yaml b/catalog/py_package/catalog_build/schema/schema.yaml similarity index 84% rename from catalog/schema/schema.yaml rename to catalog/py_package/catalog_build/schema/schema.yaml index 0b342712..dffb5a39 100644 --- a/catalog/schema/schema.yaml +++ b/catalog/py_package/catalog_build/schema/schema.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/schema.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/schema.yaml# name: schema description: Combined source data schemas. diff --git a/catalog/schema/workflow_categories.yaml b/catalog/py_package/catalog_build/schema/workflow_categories.yaml similarity index 94% rename from catalog/schema/workflow_categories.yaml rename to catalog/py_package/catalog_build/schema/workflow_categories.yaml index 54472d3c..7b4aca8c 100644 --- a/catalog/schema/workflow_categories.yaml +++ b/catalog/py_package/catalog_build/schema/workflow_categories.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflow_categories.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflow_categories.yaml# name: workflow_categories description: Schema for source workflow categories list. diff --git a/catalog/schema/workflows.yaml b/catalog/py_package/catalog_build/schema/workflows.yaml similarity index 98% rename from catalog/schema/workflows.yaml rename to catalog/py_package/catalog_build/schema/workflows.yaml index 37c9cd91..e4ea274e 100644 --- a/catalog/schema/workflows.yaml +++ b/catalog/py_package/catalog_build/schema/workflows.yaml @@ -1,4 +1,4 @@ -id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflows.yaml# +id: https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflows.yaml# name: workflows description: Schema for source workflows list. diff --git a/catalog/schema/scripts/gen-schema.sh b/catalog/schema/scripts/gen-schema.sh index 74195e92..f066f6c4 100755 --- a/catalog/schema/scripts/gen-schema.sh +++ b/catalog/schema/scripts/gen-schema.sh @@ -3,13 +3,13 @@ source ./catalog/schema/scripts/source-file-schema-names.sh # Generate Pydantic models for all source data types -gen-pydantic ./catalog/schema/schema.yaml > ./catalog/build/py/generated_schema/schema.py +gen-pydantic ./catalog/py_package/catalog_build/schema/schema.yaml > ./catalog/build/py/generated_schema/schema.py # Generate TypeScript definitions for all source data types -python3 ./catalog/schema/scripts/gen-typescript.py ./catalog/schema/schema.yaml > ./catalog/schema/generated/schema.ts +python3 ./catalog/schema/scripts/gen-typescript.py ./catalog/py_package/catalog_build/schema/schema.yaml > ./catalog/schema/generated/schema.ts # Generate a JSON schema for each source file for name in ${SOURCE_FILE_SCHEMA_NAMES[@]} do - gen-json-schema "./catalog/schema/$name.yaml" > "./catalog/schema/generated/$name.json" + gen-json-schema "./catalog/py_package/catalog_build/schema/$name.yaml" > "./catalog/schema/generated/$name.json" done diff --git a/catalog/schema/scripts/test-gen-python.sh b/catalog/schema/scripts/test-gen-python.sh index 6a8e3d22..dbf0dd27 100755 --- a/catalog/schema/scripts/test-gen-python.sh +++ b/catalog/schema/scripts/test-gen-python.sh @@ -6,7 +6,7 @@ validation_failed=false for name in schema ${SOURCE_FILE_SCHEMA_NAMES[@]} do - gen-python "./catalog/schema/$name.yaml" > /dev/null + gen-python "./catalog/py_package/catalog_build/schema/$name.yaml" > /dev/null if [ $? -ne 0 ]; then validation_failed=true fi diff --git a/catalog/schema/scripts/validate-catalog.sh b/catalog/schema/scripts/validate-catalog.sh index 434da4bc..2f8432e2 100755 --- a/catalog/schema/scripts/validate-catalog.sh +++ b/catalog/schema/scripts/validate-catalog.sh @@ -7,7 +7,7 @@ validation_failed=false for name in ${SOURCE_FILE_SCHEMA_NAMES[@]} do echo "$name:" - linkml-validate -s "./catalog/schema/$name.yaml" "./catalog/source/$name.yml" + linkml-validate -s "./catalog/py_package/catalog_build/schema/$name.yaml" "./catalog/source/$name.yml" if [ $? -ne 0 ]; then validation_failed=true fi From 278459cc485fae8cb225113b244204642c2d9886 Mon Sep 17 00:00:00 2001 From: hunterckx <118154470+hunterckx@users.noreply.github.com> Date: Mon, 26 May 2025 14:25:17 -0700 Subject: [PATCH 3/8] feat: move validation script logic to package (#540) --- .../schema_utils/validate_catalog.py | 48 +++++++++++++++++++ catalog/schema/scripts/validate-catalog.sh | 17 +------ 2 files changed, 49 insertions(+), 16 deletions(-) create mode 100644 catalog/py_package/catalog_build/schema_utils/validate_catalog.py diff --git a/catalog/py_package/catalog_build/schema_utils/validate_catalog.py b/catalog/py_package/catalog_build/schema_utils/validate_catalog.py new file mode 100644 index 00000000..eff0a959 --- /dev/null +++ b/catalog/py_package/catalog_build/schema_utils/validate_catalog.py @@ -0,0 +1,48 @@ +import os.path +import sys +from argparse import ArgumentParser + +from linkml.validator import ( + JsonschemaValidationPlugin, + Validator, + default_loader_for_file, +) +from linkml.validator.report import Severity + +SCHEMA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../schema") + + +def validate_catalog(source_dir, source_types): + source_dir = os.path.abspath(source_dir) + found_errors = False + for name in source_types: + print(f"{name}:") + validator = Validator( + os.path.join(SCHEMA_DIR, f"{name}.yaml"), + validation_plugins=[JsonschemaValidationPlugin(closed=True)], + ) + loader = default_loader_for_file(os.path.join(source_dir, f"{name}.yml")) + severities = set() + for result in validator.iter_results_from_source(loader): + severities.add(result.severity) + print(f"[{result.severity.value}] {result.message}") + if not severities: + print("No issues found") + elif Severity.ERROR in severities: + found_errors = True + print("") + if found_errors: + print("Validation failed for one or more schemas.") + sys.exit(1) + + +def cli(): + parser = ArgumentParser() + parser.add_argument("source_dir") + parser.add_argument("source_type", nargs="+") + args = parser.parse_args() + validate_catalog(args.source_dir, args.source_type) + + +if __name__ == "__main__": + cli() diff --git a/catalog/schema/scripts/validate-catalog.sh b/catalog/schema/scripts/validate-catalog.sh index 2f8432e2..5eee3f4a 100755 --- a/catalog/schema/scripts/validate-catalog.sh +++ b/catalog/schema/scripts/validate-catalog.sh @@ -2,19 +2,4 @@ source ./catalog/schema/scripts/source-file-schema-names.sh -validation_failed=false - -for name in ${SOURCE_FILE_SCHEMA_NAMES[@]} -do - echo "$name:" - linkml-validate -s "./catalog/py_package/catalog_build/schema/$name.yaml" "./catalog/source/$name.yml" - if [ $? -ne 0 ]; then - validation_failed=true - fi - echo "" -done - -if [ "$validation_failed" = true ]; then - echo "Validation failed for one or more schemas." - exit 1 -fi +python3 -m catalog.py_package.catalog_build.schema_utils.validate_catalog "./catalog/source" "${SOURCE_FILE_SCHEMA_NAMES[@]}" From 129eb0ed040e79176bc49615365b4c1cfb2c654f Mon Sep 17 00:00:00 2001 From: hunterckx <118154470+hunterckx@users.noreply.github.com> Date: Mon, 26 May 2025 17:00:52 -0700 Subject: [PATCH 4/8] feat: move schema generation logic to package (#540) --- catalog/build/py/generated_schema/schema.py | 34 ++++---- .../catalog_build/schema_utils/gen_schema.py | 81 +++++++++++++++++++ .../schema_utils/gen_typescript.py} | 3 +- catalog/schema/generated/assemblies.json | 2 +- catalog/schema/generated/organisms.json | 2 +- catalog/schema/generated/outbreaks.json | 2 +- .../schema/generated/workflow_categories.json | 2 +- catalog/schema/generated/workflows.json | 2 +- catalog/schema/scripts/gen-schema.sh | 15 +--- 9 files changed, 109 insertions(+), 34 deletions(-) create mode 100644 catalog/py_package/catalog_build/schema_utils/gen_schema.py rename catalog/{schema/scripts/gen-typescript.py => py_package/catalog_build/schema_utils/gen_typescript.py} (90%) diff --git a/catalog/build/py/generated_schema/schema.py b/catalog/build/py/generated_schema/schema.py index 36611da4..ed9fc1db 100644 --- a/catalog/build/py/generated_schema/schema.py +++ b/catalog/build/py/generated_schema/schema.py @@ -44,9 +44,9 @@ def __contains__(self, key: str) -> bool: linkml_meta = LinkMLMeta( { - "default_prefix": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/schema.yaml#", + "default_prefix": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/schema.yaml#", "description": "Combined source data schemas.", - "id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/schema.yaml#", + "id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/schema.yaml#", "imports": [ "./assemblies", "./organisms", @@ -61,7 +61,7 @@ def __contains__(self, key: str) -> bool: "prefix_reference": "https://w3id.org/linkml/", } }, - "source_file": "./catalog/schema/schema.yaml", + "source_file": "/Users/hunter/git-repos/brc-analytics/catalog/py_package/catalog_build/schema_utils/../schema/schema.yaml", } ) @@ -146,7 +146,7 @@ class Assemblies(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/assemblies.yaml#", + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/assemblies.yaml#", "tree_root": True, } ) @@ -167,7 +167,7 @@ class Assembly(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/assemblies.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/assemblies.yaml#" } ) @@ -187,7 +187,7 @@ class Organisms(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/organisms.yaml#", + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/organisms.yaml#", "tree_root": True, } ) @@ -208,7 +208,7 @@ class Organism(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/organisms.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/organisms.yaml#" } ) @@ -238,7 +238,7 @@ class Outbreaks(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/outbreaks.yaml#", + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/outbreaks.yaml#", "tree_root": True, } ) @@ -259,7 +259,7 @@ class Outbreak(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/outbreaks.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/outbreaks.yaml#" } ) @@ -333,7 +333,7 @@ class OutbreakResource(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/outbreaks.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/outbreaks.yaml#" } ) @@ -370,7 +370,7 @@ class MarkdownFileReference(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/outbreaks.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/outbreaks.yaml#" } ) @@ -402,7 +402,7 @@ class WorkflowCategories(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflow_categories.yaml#", + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflow_categories.yaml#", "tree_root": True, } ) @@ -426,7 +426,7 @@ class WorkflowCategory(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflow_categories.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflow_categories.yaml#" } ) @@ -476,7 +476,7 @@ class Workflows(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflows.yaml#", + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflows.yaml#", "tree_root": True, } ) @@ -497,7 +497,7 @@ class Workflow(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflows.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflows.yaml#" } ) @@ -569,7 +569,7 @@ class WorkflowParameter(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflows.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflows.yaml#" } ) @@ -610,7 +610,7 @@ class WorkflowUrlSpec(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( { - "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflows.yaml#" + "from_schema": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflows.yaml#" } ) diff --git a/catalog/py_package/catalog_build/schema_utils/gen_schema.py b/catalog/py_package/catalog_build/schema_utils/gen_schema.py new file mode 100644 index 00000000..95b86d86 --- /dev/null +++ b/catalog/py_package/catalog_build/schema_utils/gen_schema.py @@ -0,0 +1,81 @@ +import os.path +from argparse import ArgumentParser + +from linkml.generators import JsonSchemaGenerator, PydanticGenerator + +from .gen_typescript import TypescriptGeneratorFixed + +SCHEMA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../schema") + +# Mapping from name to tuple of generator class and output file extension +GENERATOR_TYPES = { + "Pydantic": (PydanticGenerator, "py"), + "TypeScript": (TypescriptGeneratorFixed, "ts"), + "JSON Schema": (JsonSchemaGenerator, "json"), +} + + +def gen_schema_type(type_name, out_dir, schema_names, default_schema_names): + if out_dir is None: + if schema_names is not None: + print(f"No output path specified for {type_name} generator\n") + return + + if not schema_names: + if not default_schema_names: + print(f"No schemas specified for {type_name} generator\n") + return + schema_names = default_schema_names + + print(f"Generating {type_name}") + + generator, extension = GENERATOR_TYPES[type_name] + + for name in schema_names: + result_text = generator(os.path.join(SCHEMA_DIR, f"{name}.yaml")).serialize() + out_path = os.path.join(out_dir, f"{name}.{extension}") + with open(out_path, "w") as file: + file.write(result_text + "\n") + print(f"Wrote to {out_path}") + + print("") + + +def gen_schema( + default_schema_names, + *, + py_path=None, + py_names=None, + ts_path=None, + ts_names=None, + json_path=None, + json_names=None, +): + gen_schema_type("Pydantic", py_path, py_names, default_schema_names) + gen_schema_type("TypeScript", ts_path, ts_names, default_schema_names) + gen_schema_type("JSON Schema", json_path, json_names, default_schema_names) + + +def cli(): + parser = ArgumentParser() + parser.add_argument("schema_name", nargs="*") + parser.add_argument("--py-path") + parser.add_argument("--py-name", action="append") + parser.add_argument("--ts-path") + parser.add_argument("--ts-name", action="append") + parser.add_argument("--json-path") + parser.add_argument("--json-name", action="append") + args = parser.parse_args() + gen_schema( + args.schema_name, + py_path=args.py_path, + py_names=args.py_name, + ts_path=args.ts_path, + ts_names=args.ts_name, + json_path=args.json_path, + json_names=args.json_name, + ) + + +if __name__ == "__main__": + cli() diff --git a/catalog/schema/scripts/gen-typescript.py b/catalog/py_package/catalog_build/schema_utils/gen_typescript.py similarity index 90% rename from catalog/schema/scripts/gen-typescript.py rename to catalog/py_package/catalog_build/schema_utils/gen_typescript.py index 6ebe7041..91a5ea75 100644 --- a/catalog/schema/scripts/gen-typescript.py +++ b/catalog/py_package/catalog_build/schema_utils/gen_typescript.py @@ -24,4 +24,5 @@ def range(self, slot): return base_result if slot.required else f"{base_result} | null" -print(TypescriptGeneratorFixed(sys.argv[1]).serialize()) +if __name__ == "__main__": + print(TypescriptGeneratorFixed(sys.argv[1]).serialize()) diff --git a/catalog/schema/generated/assemblies.json b/catalog/schema/generated/assemblies.json index b991ac72..a57b0d01 100644 --- a/catalog/schema/generated/assemblies.json +++ b/catalog/schema/generated/assemblies.json @@ -34,7 +34,7 @@ "type": "object" } }, - "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/assemblies.yaml#", + "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/assemblies.yaml#", "$schema": "https://json-schema.org/draft/2019-09/schema", "additionalProperties": true, "description": "Object containing list of assemblies.", diff --git a/catalog/schema/generated/organisms.json b/catalog/schema/generated/organisms.json index ef4cf5a3..9ba0ed2e 100644 --- a/catalog/schema/generated/organisms.json +++ b/catalog/schema/generated/organisms.json @@ -52,7 +52,7 @@ "type": "object" } }, - "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/organisms.yaml#", + "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/organisms.yaml#", "$schema": "https://json-schema.org/draft/2019-09/schema", "additionalProperties": true, "description": "Object containing list of organisms.", diff --git a/catalog/schema/generated/outbreaks.json b/catalog/schema/generated/outbreaks.json index 3e9795bf..d4d099f7 100644 --- a/catalog/schema/generated/outbreaks.json +++ b/catalog/schema/generated/outbreaks.json @@ -139,7 +139,7 @@ "type": "object" } }, - "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/outbreaks.yaml#", + "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/outbreaks.yaml#", "$schema": "https://json-schema.org/draft/2019-09/schema", "additionalProperties": true, "description": "Object containing list of outbreaks.", diff --git a/catalog/schema/generated/workflow_categories.json b/catalog/schema/generated/workflow_categories.json index fc1ec740..dc182646 100644 --- a/catalog/schema/generated/workflow_categories.json +++ b/catalog/schema/generated/workflow_categories.json @@ -64,7 +64,7 @@ "type": "string" } }, - "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflow_categories.yaml#", + "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflow_categories.yaml#", "$schema": "https://json-schema.org/draft/2019-09/schema", "additionalProperties": true, "description": "Object containing list of workflow categories.", diff --git a/catalog/schema/generated/workflows.json b/catalog/schema/generated/workflows.json index a5d20190..bd99bbea 100644 --- a/catalog/schema/generated/workflows.json +++ b/catalog/schema/generated/workflows.json @@ -192,7 +192,7 @@ "type": "object" } }, - "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflows.yaml#", + "$id": "https://github.com/galaxyproject/brc-analytics/blob/main/catalog/py_package/catalog_build/schema/workflows.yaml#", "$schema": "https://json-schema.org/draft/2019-09/schema", "additionalProperties": true, "description": "Object containing list of workflows.", diff --git a/catalog/schema/scripts/gen-schema.sh b/catalog/schema/scripts/gen-schema.sh index f066f6c4..57e2ff65 100755 --- a/catalog/schema/scripts/gen-schema.sh +++ b/catalog/schema/scripts/gen-schema.sh @@ -2,14 +2,7 @@ source ./catalog/schema/scripts/source-file-schema-names.sh -# Generate Pydantic models for all source data types -gen-pydantic ./catalog/py_package/catalog_build/schema/schema.yaml > ./catalog/build/py/generated_schema/schema.py - -# Generate TypeScript definitions for all source data types -python3 ./catalog/schema/scripts/gen-typescript.py ./catalog/py_package/catalog_build/schema/schema.yaml > ./catalog/schema/generated/schema.ts - -# Generate a JSON schema for each source file -for name in ${SOURCE_FILE_SCHEMA_NAMES[@]} -do - gen-json-schema "./catalog/py_package/catalog_build/schema/$name.yaml" > "./catalog/schema/generated/$name.json" -done +python3 -m catalog.py_package.catalog_build.schema_utils.gen_schema "${SOURCE_FILE_SCHEMA_NAMES[@]}" \ + --json-path ./catalog/schema/generated \ + --py-path ./catalog/build/py/generated_schema --py-name schema \ + --ts-path ./catalog/schema/generated --ts-name schema From 09e6e30a44b1dccf777fe0703def13d5e38569f2 Mon Sep 17 00:00:00 2001 From: hunterckx <118154470+hunterckx@users.noreply.github.com> Date: Mon, 26 May 2025 17:19:02 -0700 Subject: [PATCH 5/8] feat: add iwc workflows script to package (#540) --- .../catalog_build}/generated_schema/schema.py | 0 .../iwc_manifest_to_workflows_yaml.py | 27 ++++++++++--------- catalog/schema/scripts/gen-schema.sh | 2 +- package.json | 2 +- 4 files changed, 17 insertions(+), 14 deletions(-) rename catalog/{build/py => py_package/catalog_build}/generated_schema/schema.py (100%) rename catalog/{build/py => py_package/catalog_build}/iwc_manifest_to_workflows_yaml.py (90%) diff --git a/catalog/build/py/generated_schema/schema.py b/catalog/py_package/catalog_build/generated_schema/schema.py similarity index 100% rename from catalog/build/py/generated_schema/schema.py rename to catalog/py_package/catalog_build/generated_schema/schema.py diff --git a/catalog/build/py/iwc_manifest_to_workflows_yaml.py b/catalog/py_package/catalog_build/iwc_manifest_to_workflows_yaml.py similarity index 90% rename from catalog/build/py/iwc_manifest_to_workflows_yaml.py rename to catalog/py_package/catalog_build/iwc_manifest_to_workflows_yaml.py index 37dc927c..2e87bdbc 100644 --- a/catalog/build/py/iwc_manifest_to_workflows_yaml.py +++ b/catalog/py_package/catalog_build/iwc_manifest_to_workflows_yaml.py @@ -6,7 +6,8 @@ import requests import yaml -from generated_schema.schema import ( + +from .generated_schema.schema import ( Workflow, WorkflowCategoryId, WorkflowParameter, @@ -15,7 +16,6 @@ ) URL = "https://iwc.galaxyproject.org/workflow_manifest.json" -WORKFLOWS_PATH = "catalog/source/workflows.yml" DOCKSTORE_COLLECTION_TO_CATEGORY = { "Variant Calling": WorkflowCategoryId.VARIANT_CALLING, "Transcriptomics": WorkflowCategoryId.TRANSCRIPTOMICS, @@ -31,9 +31,9 @@ ) -def read_existing_yaml(): - if os.path.exists(WORKFLOWS_PATH): - with open(WORKFLOWS_PATH) as fh: +def read_existing_yaml(workflows_path): + if os.path.exists(workflows_path): + with open(workflows_path) as fh: workflows = Workflows.model_validate(yaml.safe_load(fh)).workflows else: # start from scratch @@ -116,8 +116,8 @@ def generate_current_workflows(): return by_trs_id -def merge_into_existing(): - existing = read_existing_yaml() +def merge_into_existing(workflows_path): + existing = read_existing_yaml(workflows_path) current = generate_current_workflows() merged: Dict[str, Workflow] = {} for versionless_trs_id, current_workflow_input in current.items(): @@ -144,8 +144,8 @@ def merge_into_existing(): return merged -def to_workflows_yaml(exclude_other: bool): - by_trs_id = merge_into_existing() +def to_workflows_yaml(workflows_path: str, exclude_other: bool): + by_trs_id = merge_into_existing(workflows_path) # sort by trs id, should play nicer with git diffs sorted_workflows = list(dict(sorted(by_trs_id.items())).values()) if exclude_other: @@ -160,7 +160,7 @@ def to_workflows_yaml(exclude_other: bool): final_workflows.append(workflow) sorted_workflows = final_workflows final_workflows = sorted_workflows - with open(WORKFLOWS_PATH, "w") as out: + with open(workflows_path, "w") as out: yaml.safe_dump( Workflows(workflows=final_workflows).model_dump(exclude_none=True), out, @@ -168,17 +168,20 @@ def to_workflows_yaml(exclude_other: bool): sort_keys=False, ) # Turns out the YAML style prettier likes is really hard to create in python ... - subprocess.run(["npx", "prettier", "--write", WORKFLOWS_PATH]) + subprocess.run(["npx", "prettier", "--write", workflows_path]) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Build workflows.yaml file from latest IWC JSON manifest." ) + parser.add_argument( + "workflows_path", help="Path of workflows YAML file to read/write." + ) parser.add_argument( "--exclude-other", action="store_true", help="Exclude other items from processing.", ) args = parser.parse_args() - to_workflows_yaml(exclude_other=args.exclude_other) + to_workflows_yaml(args.workflows_path, exclude_other=args.exclude_other) diff --git a/catalog/schema/scripts/gen-schema.sh b/catalog/schema/scripts/gen-schema.sh index 57e2ff65..91420e7c 100755 --- a/catalog/schema/scripts/gen-schema.sh +++ b/catalog/schema/scripts/gen-schema.sh @@ -4,5 +4,5 @@ source ./catalog/schema/scripts/source-file-schema-names.sh python3 -m catalog.py_package.catalog_build.schema_utils.gen_schema "${SOURCE_FILE_SCHEMA_NAMES[@]}" \ --json-path ./catalog/schema/generated \ - --py-path ./catalog/build/py/generated_schema --py-name schema \ + --py-path ./catalog/py_package/catalog_build/generated_schema --py-name schema \ --ts-path ./catalog/schema/generated --ts-name schema diff --git a/package.json b/package.json index 4083e02a..d4dac911 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,7 @@ "test": "jest --runInBand", "build-brc-db": "esrun catalog/build/ts/build-catalog.ts", "build-files-from-ncbi": "python3 -m catalog.build.py.build_files_from_ncbi", - "iwc-manifest-to-workflows-yaml": "python3 ./catalog/build/py/iwc_manifest_to_workflows_yaml.py --exclude-other", + "iwc-manifest-to-workflows-yaml": "python3 -m catalog.py_package.catalog_build.iwc_manifest_to_workflows_yaml ./catalog/source/workflows.yml --exclude-other", "gen-schema": "./catalog/schema/scripts/gen-schema.sh", "test-gen-python": "./catalog/schema/scripts/test-gen-python.sh", "validate-catalog": "./catalog/schema/scripts/validate-catalog.sh" From 4a77baf65ea1b84843ec46fe302103ba4fd70951 Mon Sep 17 00:00:00 2001 From: hunterckx <118154470+hunterckx@users.noreply.github.com> Date: Mon, 26 May 2025 17:44:32 -0700 Subject: [PATCH 6/8] docs: update readmes to reflect new directory structure (#540) --- README.md | 2 +- catalog/README.md | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 413c1db2..cde310f0 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ These values will be substituted with assembly-specific values at runtime. ## Editing the LinkML schemas -If the LinkML schemas in `catalog/schema` are edited, the derived JSON schemas and TypeScript definitions should be +If the LinkML schemas in `catalog/py_package/catalog_build/schema` are edited, the derived JSON schemas and TypeScript definitions should be updated as follows: diff --git a/catalog/README.md b/catalog/README.md index 5a21158d..e273b2d6 100644 --- a/catalog/README.md +++ b/catalog/README.md @@ -7,5 +7,6 @@ This directory provides the catalog data (information on genome assemblies, orga - `py` - Python scripts. - `ts` - Typescript scripts. - `output` - JSON files output by the catalog build process, to be consumed by the app. -- `schema` - LinkML schemas for source files. +- `py_package` - Python package used to share catalog features, such as the schemas and build process, with other projects. +- `schema` - Schema-related scripts and derived models. - `source` - YAML files providing data used as input for building the catalog. From 0211191db16212214bf2b5ccf93e5dc2b2a5c938 Mon Sep 17 00:00:00 2001 From: hunterckx <118154470+hunterckx@users.noreply.github.com> Date: Mon, 26 May 2025 17:52:19 -0700 Subject: [PATCH 7/8] fix: update linting in checks workflow for new directory structure (#540) --- .github/workflows/run-checks.yml | 2 +- package.json | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-checks.yml b/.github/workflows/run-checks.yml index 7d4bad63..f45f7872 100644 --- a/.github/workflows/run-checks.yml +++ b/.github/workflows/run-checks.yml @@ -33,7 +33,7 @@ jobs: run: pip install -r ./catalog/build/py/requirements.txt - name: Run linkml-lint # Run linting on the LinkML schemas, to enforce conventions such as in naming, and to catch simple errors. - run: linkml-lint ./catalog/schema --validate --verbose + run: npm run lint-schema - name: Test LinkML Python generation # Generate Python code from the main LinkML schemas, discarding the output; this will catch more subtle errors such as references to nonexistent elements. run: npm run test-gen-python diff --git a/package.json b/package.json index d4dac911..4b2b5c3f 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,7 @@ "build-brc-db": "esrun catalog/build/ts/build-catalog.ts", "build-files-from-ncbi": "python3 -m catalog.build.py.build_files_from_ncbi", "iwc-manifest-to-workflows-yaml": "python3 -m catalog.py_package.catalog_build.iwc_manifest_to_workflows_yaml ./catalog/source/workflows.yml --exclude-other", + "lint-schema": "linkml-lint ./catalog/py_package/catalog_build/schema --validate --verbose", "gen-schema": "./catalog/schema/scripts/gen-schema.sh", "test-gen-python": "./catalog/schema/scripts/test-gen-python.sh", "validate-catalog": "./catalog/schema/scripts/validate-catalog.sh" From b749ad34e4a6ac1bfe7e20c92f118a03ac985134 Mon Sep 17 00:00:00 2001 From: hunterckx <118154470+hunterckx@users.noreply.github.com> Date: Mon, 26 May 2025 18:41:57 -0700 Subject: [PATCH 8/8] docs: add descriptions to schema scripts' arguments (#540) --- .../catalog_build/schema_utils/gen_schema.py | 36 +++++++++++++++---- .../schema_utils/validate_catalog.py | 8 +++-- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/catalog/py_package/catalog_build/schema_utils/gen_schema.py b/catalog/py_package/catalog_build/schema_utils/gen_schema.py index 95b86d86..ab550b8c 100644 --- a/catalog/py_package/catalog_build/schema_utils/gen_schema.py +++ b/catalog/py_package/catalog_build/schema_utils/gen_schema.py @@ -58,13 +58,35 @@ def gen_schema( def cli(): parser = ArgumentParser() - parser.add_argument("schema_name", nargs="*") - parser.add_argument("--py-path") - parser.add_argument("--py-name", action="append") - parser.add_argument("--ts-path") - parser.add_argument("--ts-name", action="append") - parser.add_argument("--json-path") - parser.add_argument("--json-name", action="append") + parser.add_argument( + "schema_name", + nargs="*", + help="name of a default schema to generate from if generator-specific schemas aren't specified", + ) + parser.add_argument( + "--py-path", help="path of directory to output pydantic files to" + ) + parser.add_argument( + "--py-name", + action="append", + help="name of a schema to generate pydantic models from", + ) + parser.add_argument( + "--ts-path", help="path of directory to output typescript files to" + ) + parser.add_argument( + "--ts-name", + action="append", + help="name of a schema to generate typescript definitions from", + ) + parser.add_argument( + "--json-path", help="path of directory to output json schema files to" + ) + parser.add_argument( + "--json-name", + action="append", + help="name of a schema to generate json schema from", + ) args = parser.parse_args() gen_schema( args.schema_name, diff --git a/catalog/py_package/catalog_build/schema_utils/validate_catalog.py b/catalog/py_package/catalog_build/schema_utils/validate_catalog.py index eff0a959..e9fde2d5 100644 --- a/catalog/py_package/catalog_build/schema_utils/validate_catalog.py +++ b/catalog/py_package/catalog_build/schema_utils/validate_catalog.py @@ -38,8 +38,12 @@ def validate_catalog(source_dir, source_types): def cli(): parser = ArgumentParser() - parser.add_argument("source_dir") - parser.add_argument("source_type", nargs="+") + parser.add_argument( + "source_dir", help="path of directory to validate catalog source files from" + ) + parser.add_argument( + "source_type", nargs="+", help="name of a schema/entity type to validate" + ) args = parser.parse_args() validate_catalog(args.source_dir, args.source_type)