@@ -4021,9 +4021,11 @@ def export_schema(schema: dict, file_path: str, logger: Logger) -> None:
40214021 filepath, str: path to store the schema
40224022 """
40234023 file_path = os .path .expanduser (file_path )
4024- json_schema_dirname = os .path .dirname (file_path )
4025- if json_schema_dirname != "" :
4026- os .makedirs (json_schema_dirname , exist_ok = True )
4024+ # Don't create directories if the path looks like a URL
4025+ if not (file_path .startswith ("http://" ) or file_path .startswith ("https://" )):
4026+ json_schema_dirname = os .path .dirname (file_path )
4027+ if json_schema_dirname != "" :
4028+ os .makedirs (json_schema_dirname , exist_ok = True )
40274029 with open (file_path , "w" , encoding = "utf-8" ) as json_file :
40284030 json .dump (schema , json_file , sort_keys = True , indent = 4 , ensure_ascii = False )
40294031
@@ -5558,6 +5560,14 @@ def get_json_schema_log_file_path(data_model_path: str, source_node: str) -> str
55585560 Returns:
55595561 json_schema_log_file_path: str, file name for the log file
55605562 """
5563+ # If it's a URL, extract just the filename
5564+ if data_model_path .startswith ("http://" ) or data_model_path .startswith ("https://" ):
5565+ from urllib .parse import urlparse
5566+
5567+ parsed_url = urlparse (data_model_path )
5568+ # Get the last part of the path (filename)
5569+ data_model_path = os .path .basename (parsed_url .path )
5570+
55615571 data_model_path_root , _ = os .path .splitext (data_model_path )
55625572 prefix = data_model_path_root
55635573 prefix_root , prefix_ext = os .path .splitext (prefix )
@@ -5682,7 +5692,11 @@ def _write_data_model(
56825692 data_model_path = jsonld_path , source_node = name
56835693 )
56845694 json_schema_dirname = os .path .dirname (json_schema_path )
5685- if json_schema_dirname != "" :
5695+ # Don't create directories if the path looks like a URL
5696+ if json_schema_dirname != "" and not (
5697+ json_schema_path .startswith ("http://" )
5698+ or json_schema_path .startswith ("https://" )
5699+ ):
56865700 os .makedirs (json_schema_dirname , exist_ok = True )
56875701
56885702 logger .info (
@@ -5761,7 +5775,20 @@ def _build_output_path(self, output_directory: Path) -> Path:
57615775 """
57625776
57635777 stripped_component = self .component .replace (" " , "" )
5764- data_model_basename = Path (self .data_model_source ).stem
5778+
5779+ # Handle URL by extracting just the filename
5780+ if self .data_model_source .startswith (
5781+ "http://"
5782+ ) or self .data_model_source .startswith ("https://" ):
5783+ from urllib .parse import urlparse
5784+
5785+ parsed_url = urlparse (self .data_model_source )
5786+ # Get the last part of the path (filename)
5787+ filename = os .path .basename (parsed_url .path )
5788+ data_model_basename = Path (filename ).stem
5789+ else :
5790+ data_model_basename = Path (self .data_model_source ).stem
5791+
57655792 return Path (
57665793 output_directory ,
57675794 data_model_basename ,
@@ -5815,6 +5842,7 @@ def get_component_json_schema(
58155842 schema_name = self .component + "_validation" ,
58165843 jsonld_path = metadata_model .inputMModelLocation ,
58175844 use_property_display_names = use_display_names ,
5845+ write_schema = False , # Don't write intermediate files; write_json_schema_to_file() will handle final output
58185846 )
58195847 self .component_json_schema = json_schema
58205848
@@ -5876,12 +5904,12 @@ def generate_jsonschema(
58765904 in your validation rules. This allows different validation behavior per manifest type.
58775905
58785906 Arguments:
5879- data_model_source: Path to the data model file (CSV or JSONLD) or URL to the raw
5880- JSONLD. Can accept:
5907+ data_model_source: Path or URL to the data model file (CSV or JSONLD). Can accept:
58815908
5882- - A CSV file with your data model specification (will be parsed automatically)
5883- - A JSONLD file generated from `generate_jsonld()` or equivalent
5884- - A URL pointing to a raw JSONLD data model
5909+ - A local CSV file with your data model specification (will be parsed automatically)
5910+ - A local JSONLD file generated from `generate_jsonld()` or equivalent
5911+ - A URL pointing to a raw CSV data model (e.g., from GitHub)
5912+ - A URL pointing to a raw JSONLD data model (e.g., from GitHub)
58855913 output_directory: Directory path where JSON Schema files will be saved. Each
58865914 component will generate a separate `<Component>_validation_schema.json` file.
58875915 data_type: List of specific component names (data types) to generate schemas for.
@@ -5931,7 +5959,7 @@ def generate_jsonschema(
59315959 )
59325960 ```
59335961
5934- Generate schema for specific components:
5962+ Generate schema for specific components from URL :
59355963
59365964 ```python
59375965 schemas, file_paths = generate_jsonschema(
@@ -5942,6 +5970,18 @@ def generate_jsonschema(
59425970 synapse_client=syn
59435971 )
59445972 ```
5973+
5974+ Generate schema from CSV URL:
5975+
5976+ ```python
5977+ schemas, file_paths = generate_jsonschema(
5978+ data_model_source="https://raw.githubusercontent.com/org/repo/main/model.csv",
5979+ output_directory="./schemas",
5980+ data_type=None,
5981+ data_model_labels="class_label",
5982+ synapse_client=syn
5983+ )
5984+ ```
59455985 """
59465986
59475987 synapse_client = Synapse .get_client (synapse_client = synapse_client )
@@ -5996,8 +6036,9 @@ def generate_jsonld(
59966036 - Verifies the graph structure is a valid directed acyclic graph (DAG)
59976037
59986038 Arguments:
5999- schema: Path to your data model CSV file. This file should contain your complete
6000- data model specification with all attributes, validation rules, and relationships.
6039+ schema: Path or URL to your data model CSV file. Can be a local file path or a URL
6040+ (e.g., from GitHub). This file should contain your complete data model
6041+ specification with all attributes, validation rules, and relationships.
60016042 data_model_labels: Label format for the JSON-LD output:
60026043
60036044 - `"class_label"` (default, recommended): Uses standard attribute names as labels
@@ -6058,6 +6099,16 @@ class labels. Use cautiously as this can affect downstream compatibility.
60586099 synapse_client=syn
60596100 )
60606101 ```
6102+
6103+ Load from URL:
6104+ ```python
6105+ jsonld_model = generate_jsonld(
6106+ schema="https://raw.githubusercontent.com/org/repo/main/model.csv",
6107+ data_model_labels="class_label",
6108+ output_jsonld="downloaded_model.jsonld",
6109+ synapse_client=syn
6110+ )
6111+ ```
60616112 """
60626113 syn = Synapse .get_client (synapse_client = synapse_client )
60636114
@@ -6109,7 +6160,14 @@ class labels. Use cautiously as this can affect downstream compatibility.
61096160 # output JSON-LD file alongside CSV file by default, get path.
61106161 if output_jsonld is None :
61116162 if ".jsonld" not in schema :
6112- csv_no_ext = re .sub ("[.]csv$" , "" , schema )
6163+ # If schema is a URL, extract just the filename for local output
6164+ schema_path = schema
6165+ if schema .startswith ("http://" ) or schema .startswith ("https://" ):
6166+ from urllib .parse import urlparse
6167+
6168+ parsed_url = urlparse (schema )
6169+ schema_path = os .path .basename (parsed_url .path )
6170+ csv_no_ext = re .sub ("[.]csv$" , "" , schema_path )
61136171 output_jsonld = csv_no_ext + ".jsonld"
61146172 else :
61156173 output_jsonld = schema
0 commit comments