1212import shutil
1313import tempfile
1414import unittest
15+ from typing import Any
1516from unittest .mock import Mock , mock_open , patch
1617
1718import pandas as pd
2627)
2728from synapseclient .extensions .curator .file_based_metadata_task import (
2829 _create_columns_from_json_schema ,
30+ _create_synapse_column_from_js_property ,
2931 _get_column_type_from_js_one_of_list ,
3032 _get_column_type_from_js_property ,
3133 _get_list_column_type_from_js_property ,
5052 SchemaRegistryColumnConfig ,
5153 get_latest_schema_uri ,
5254)
53- from synapseclient .models import ColumnType
55+ from synapseclient .models import Column , ColumnType
5456from synapseclient .models .curation import (
5557 FileBasedMetadataTaskProperties ,
5658 RecordBasedMetadataTaskProperties ,
@@ -1670,50 +1672,6 @@ def test_create_columns_from_json_schema_success(self):
16701672 assert all (hasattr (col , "name" ) for col in columns )
16711673 assert all (hasattr (col , "column_type" ) for col in columns )
16721674
1673- def test_get_column_type_from_js_property_enum (self ):
1674- """Test getting column type for enum property."""
1675- # GIVEN a JSON schema property with an enum
1676- js_property = {"enum" : ["option1" , "option2" , "option3" ]}
1677-
1678- # WHEN I get the column type
1679- result = _get_column_type_from_js_property (js_property )
1680-
1681- # THEN it should return STRING type
1682- assert result == ColumnType .STRING
1683-
1684- def test_get_column_type_from_js_property_array (self ):
1685- """Test getting column type for array property."""
1686- # GIVEN a JSON schema property with array type
1687- js_property = {"type" : "array" , "items" : {"type" : "string" }}
1688-
1689- # WHEN I get the column type
1690- result = _get_column_type_from_js_property (js_property )
1691-
1692- # THEN it should return a list type
1693- assert result == ColumnType .STRING_LIST
1694-
1695- def test_get_column_type_from_js_property_one_of (self ):
1696- """Test getting column type for oneOf property."""
1697- # GIVEN a JSON schema property with oneOf
1698- js_property = {"oneOf" : [{"type" : "string" }, {"type" : "null" }]}
1699-
1700- # WHEN I get the column type
1701- result = _get_column_type_from_js_property (js_property )
1702-
1703- # THEN it should return STRING type
1704- assert result == ColumnType .STRING
1705-
1706- def test_get_column_type_from_js_property_fallback (self ):
1707- """Test getting column type fallback to STRING."""
1708- # GIVEN a JSON schema property without recognizable type
1709- js_property = {"description" : "some property" }
1710-
1711- # WHEN I get the column type
1712- result = _get_column_type_from_js_property (js_property )
1713-
1714- # THEN it should return STRING type as fallback
1715- assert result == ColumnType .STRING
1716-
17171675 def test_get_column_type_from_js_one_of_list_with_enum (self ):
17181676 """Test getting column type from oneOf list containing enum."""
17191677 # GIVEN a oneOf list with an enum
@@ -1722,8 +1680,8 @@ def test_get_column_type_from_js_one_of_list_with_enum(self):
17221680 # WHEN I get the column type
17231681 result = _get_column_type_from_js_one_of_list (js_one_of_list )
17241682
1725- # THEN it should return STRING type
1726- assert result == ColumnType .STRING
1683+ # THEN it should return MEDIUMTEXT type
1684+ assert result == ColumnType .MEDIUMTEXT
17271685
17281686 def test_get_column_type_from_js_one_of_list_single_type (self ):
17291687 """Test getting column type from oneOf list with single non-null type."""
@@ -1758,8 +1716,8 @@ def test_get_column_type_from_js_one_of_list_fallback(self):
17581716 # WHEN I get the column type
17591717 result = _get_column_type_from_js_one_of_list (js_one_of_list )
17601718
1761- # THEN it should return STRING type as fallback
1762- assert result == ColumnType .STRING
1719+ # THEN it should return MEDIUMTEXT type as fallback
1720+ assert result == ColumnType .MEDIUMTEXT
17631721
17641722 def test_get_list_column_type_from_js_property_with_enum (self ):
17651723 """Test getting list column type for property with enum items."""
@@ -1795,6 +1753,157 @@ def test_get_list_column_type_from_js_property_fallback(self):
17951753 assert result == ColumnType .STRING_LIST
17961754
17971755
1756+ @pytest .mark .parametrize (
1757+ "json_schema, expected_columns" ,
1758+ [
1759+ (
1760+ {
1761+ "properties" : {
1762+ "string_col" : {"type" : "string" },
1763+ }
1764+ },
1765+ [
1766+ Column (name = "string_col" , column_type = ColumnType .MEDIUMTEXT ),
1767+ ],
1768+ ),
1769+ (
1770+ {
1771+ "properties" : {
1772+ "string_col" : {"type" : "string" },
1773+ "int_col" : {"type" : "integer" },
1774+ "bool_col" : {"type" : "boolean" },
1775+ "number_col" : {"type" : "number" },
1776+ }
1777+ },
1778+ [
1779+ Column (name = "string_col" , column_type = ColumnType .MEDIUMTEXT ),
1780+ Column (name = "int_col" , column_type = ColumnType .INTEGER ),
1781+ Column (name = "bool_col" , column_type = ColumnType .BOOLEAN ),
1782+ Column (name = "number_col" , column_type = ColumnType .DOUBLE ),
1783+ ],
1784+ ),
1785+ ],
1786+ ids = ["one column" , "three columns" ],
1787+ )
1788+ def test_create_columns_from_json_schema (
1789+ json_schema : dict [str , Any ], expected_columns : list [Column ]
1790+ ):
1791+ """Test successful column creation from JSON schema."""
1792+ assert _create_columns_from_json_schema (json_schema ) == expected_columns
1793+
1794+
1795+ @pytest .mark .parametrize (
1796+ "json_schema" ,
1797+ [{}, {"properties" : []}],
1798+ ids = ["empty schema" , "properties is not a dict" ],
1799+ )
1800+ def test_create_columns_from_json_schema_exceptions (json_schema : dict [str , Any ]):
1801+ """Test exceptions when creating columns from invalid JSON schema."""
1802+ with pytest .raises (ValueError ):
1803+ _create_columns_from_json_schema (json_schema )
1804+
1805+
1806+ @pytest .mark .parametrize (
1807+ "json_schema_property, property_name, expected_column_type" ,
1808+ [
1809+ (
1810+ {"type" : "array" , "items" : {"type" : "string" }},
1811+ "string_list_col" ,
1812+ ColumnType .STRING_LIST ,
1813+ ),
1814+ (
1815+ {"type" : "array" , "items" : {"type" : "integer" }},
1816+ "int_list_col" ,
1817+ ColumnType .INTEGER_LIST ,
1818+ ),
1819+ (
1820+ {"type" : "array" , "items" : {"type" : "boolean" }},
1821+ "bool_list_col" ,
1822+ ColumnType .BOOLEAN_LIST ,
1823+ ),
1824+ (
1825+ {"type" : "number" },
1826+ "number_col" ,
1827+ ColumnType .DOUBLE ,
1828+ ),
1829+ (
1830+ {"type" : "integer" },
1831+ "integer_col" ,
1832+ ColumnType .INTEGER ,
1833+ ),
1834+ (
1835+ {"type" : "boolean" },
1836+ "boolean_col" ,
1837+ ColumnType .BOOLEAN ,
1838+ ),
1839+ (
1840+ {"type" : "string" },
1841+ "string_col" ,
1842+ ColumnType .MEDIUMTEXT ,
1843+ ),
1844+ ],
1845+ ids = [
1846+ "string_list" ,
1847+ "integer_list" ,
1848+ "boolean_list" ,
1849+ "number" ,
1850+ "integer" ,
1851+ "boolean" ,
1852+ "string" ,
1853+ ],
1854+ )
1855+ def test_create_synapse_column_from_js_property (
1856+ json_schema_property : dict [str , Any ],
1857+ property_name : str ,
1858+ expected_column_type : ColumnType ,
1859+ ):
1860+ """Test successful column creation from JSON schema property."""
1861+ result = _create_synapse_column_from_js_property (
1862+ json_schema_property , property_name
1863+ )
1864+ assert isinstance (result , Column )
1865+ assert result .name == property_name
1866+ assert result .column_type == expected_column_type
1867+
1868+
1869+ @pytest .mark .parametrize (
1870+ "json_schema_property, expected_column_type" ,
1871+ [
1872+ ({"enum" : ["a" , "b" , "c" ]}, ColumnType .MEDIUMTEXT ),
1873+ ({"type" : "string" }, ColumnType .MEDIUMTEXT ),
1874+ ({"type" : "integer" }, ColumnType .INTEGER ),
1875+ ({"type" : "number" }, ColumnType .DOUBLE ),
1876+ ({"type" : "boolean" }, ColumnType .BOOLEAN ),
1877+ ({"type" : ["integer" , "null" ]}, ColumnType .INTEGER ),
1878+ ({"type" : ["integer" , "string" ]}, ColumnType .MEDIUMTEXT ),
1879+ ({"type" : "array" , "items" : {"type" : "integer" }}, ColumnType .INTEGER_LIST ),
1880+ ({"oneOf" : [{"type" : "integer" }, {"type" : "null" }]}, ColumnType .INTEGER ),
1881+ ({"type" : "unknown" }, ColumnType .MEDIUMTEXT ),
1882+ ({}, ColumnType .MEDIUMTEXT ),
1883+ ],
1884+ ids = [
1885+ "enum_property" ,
1886+ "type_string" ,
1887+ "type_integer" ,
1888+ "type_number" ,
1889+ "type_boolean" ,
1890+ "type_list_nullable" ,
1891+ "type_list_multiple_types" ,
1892+ "type_array" ,
1893+ "one_of_list" ,
1894+ "unknown_type" ,
1895+ "empty_property" ,
1896+ ],
1897+ )
1898+ def test_get_column_type_from_js_property (
1899+ json_schema_property : dict [str , Any ], expected_column_type : ColumnType
1900+ ):
1901+ """Test getting column type from JSON schema property."""
1902+ assert (
1903+ _get_column_type_from_js_property (json_schema_property ) == expected_column_type
1904+ )
1905+
1906+
17981907class TestGetLatestSchemaUri (unittest .TestCase ):
17991908 """Test cases for get_latest_schema_uri function."""
18001909
0 commit comments