Skip to content
Draft
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions scripts/write_jsonschemas_to_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import synapseclient
from synapseclient.models import Table

import pandas as pd


def main():
"""
The main entry point for this script.

This script logs in to Synapse, finds all the JSONschema organizations
and versions in the given list, and writes them to the Synapse table
with Synapse ID syn69735275.

The table will have the following columns:
- org: the name of the JSONschema organization
- name: the full name of the JSONschema version
- dcc: the name of the DCC
- datatype: the name of the datatype
- uri: the URI of the JSONschema version
- version: the semantic version of the JSONschema version
- link: a link to the JSONschema version in the Synapse repo
"""
syn = synapseclient.login()
# json_schema_organizations = ["sage.schemas.v2571", "sage.schemas.v2581"]
json_schema_organizations = ["sage.schemas.v2571", "sage.schemas.v2581"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to have the orgs as a parameter to the main function so that if/when we add new orgs we wouldn't have to update this?

js = syn.service("json_schema")
to_write_schemas = []
for organization_name in json_schema_organizations:
org = js.JsonSchemaOrganization(organization_name)
schemas = org.list_json_schemas()
for schema in schemas:
print(schema)
versions = schema.list_versions()
try:
for version in versions:
if (
(
version.name.startswith("ad")
and version.semantic_version == "0.1.0"
)
or (
version.name.startswith("el")
and version.semantic_version == "0.0.1"
)
or ".validation." in version.name
):
continue
to_write_schemas.append(
{
"org": organization_name,
"name": version.name,
"dcc": version.name.split(".")[0],
"datatype": version.name.split(".")[1],
"uri": version.uri,
"version": version.semantic_version,
"link": f"https://repo-prod.prod.sagebase.org/repo/v1/schema/type/registered/{version.uri}",
}
)
except Exception as e:
print(e)

df = pd.DataFrame(to_write_schemas)
# exclude HTAN1, HTAN2, and NF schemas as they have their own JSONschema organizations
df = df[~df["dcc"].isin(["htan", "htan2", "nf"])]
table = Table(id="syn69735275").get(include_columns=True)
table.upsert_rows(values=df, primary_keys=["uri"])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great to see this upsert functionality getting some use!



if __name__ == "__main__":
main()