diff --git a/singer/metadata.py b/singer/metadata.py index 41153ea..e53abba 100644 --- a/singer/metadata.py +++ b/singer/metadata.py @@ -22,9 +22,40 @@ def write(compiled_metadata, breadcrumb, k, val): def get(compiled_metadata, breadcrumb, k): return compiled_metadata.get(breadcrumb, {}).get(k) +def get_properties_metadata(schema, key_properties, parent=()): + mdata = {} + if 'object' in schema['type']: + for field_name, field_props in schema['properties'].items(): + breadcrumb = parent + ('properties', field_name) + if key_properties and field_name in key_properties: + inclusion = 'automatic' + else: + inclusion = 'available' + + + mdata = write(mdata, breadcrumb, 'inclusion', inclusion) + mdata.update( + get_properties_metadata( + field_props, + key_properties, + parent=breadcrumb + ) + ) + elif 'array' in schema['type']: + breadcrumb = parent + ('items',) + mdata.update( + get_properties_metadata( + schema['items'], + key_properties, + parent=breadcrumb + ) + ) + + return mdata + def get_standard_metadata(schema=None, schema_name=None, key_properties=None, valid_replication_keys=None, replication_method=None): - mdata = {} + mdata = {(): {}} if key_properties is not None: mdata = write(mdata, (), 'table-key-properties', key_properties) @@ -37,10 +68,6 @@ def get_standard_metadata(schema=None, schema_name=None, key_properties=None, if schema_name: mdata = write(mdata, (), 'schema-name', schema_name) - for field_name in schema['properties'].keys(): - if key_properties and field_name in key_properties: - mdata = write(mdata, ('properties', field_name), 'inclusion', 'automatic') - else: - mdata = write(mdata, ('properties', field_name), 'inclusion', 'available') + mdata.update(get_properties_metadata(schema, key_properties)) return to_list(mdata) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index fd97ef2..e4f80e0 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -2,7 +2,7 @@ import unittest from singer.metadata import get_standard_metadata -def make_expected_metadata(base_obj, dict_of_extras): +def make_expected_metadata(base_obj, dict_of_extras, test_kp=False): metadata_value = {**base_obj} metadata_value.update(dict_of_extras) @@ -13,7 +13,7 @@ def make_expected_metadata(base_obj, dict_of_extras): }, { 'metadata': { - 'inclusion': 'available', + 'inclusion': 'available' if test_kp is False else 'automatic', }, 'breadcrumb': ('properties', 'id') }, @@ -23,6 +23,36 @@ def make_expected_metadata(base_obj, dict_of_extras): }, 'breadcrumb': ('properties', 'name') }, + { + 'metadata': { + 'inclusion': 'available', + }, + 'breadcrumb': ('properties', 'location') + }, + { + 'metadata': { + 'inclusion': 'available', + }, + 'breadcrumb': ('properties', 'location', 'properties', 'country') + }, + { + 'metadata': { + 'inclusion': 'available', + }, + 'breadcrumb': ('properties', 'amounts') + }, + { + 'metadata': { + 'inclusion': 'available', + }, + 'breadcrumb': ('properties', 'amounts', 'items', 'properties', 'value') + }, + { + 'metadata': { + 'inclusion': 'available', + }, + 'breadcrumb': ('properties', 'ratings') + }, { 'metadata': { 'inclusion': 'available', @@ -44,7 +74,7 @@ def test_standard_metadata(self): test_rk = ['id', 'created'] metadata_kp = {'table-key-properties': ['id']} metadata_rm = {'forced-replication-method': 'INCREMENTAL'} - metadata_rk = {'valid_replication_keys': ['id','created']} + metadata_rk = {'valid-replication-keys': ['id','created']} schema_present_base_obj = {'inclusion': 'available'} test_schema = { 'type': ['null', 'object'], @@ -52,6 +82,30 @@ def test_standard_metadata(self): 'properties': { 'id': {'type': ['null', 'string']}, 'name': {'type': ['null', 'string']}, + # test nested object + 'location': { + 'type': ['null', 'object'], + 'properties': { + 'country': {'type': ['null', 'string']} + } + }, + # test array of objects + 'amounts' : { + 'type': ['null', 'array'], + 'items': { + 'type': ['null', 'object'], + 'properties': { + 'value': {'type': ['null', 'number']}, + } + } + }, + # test array of simple types + 'ratings': { + 'type': ['null', 'array'], + 'items': { + 'type': ['null', 'number'], + } + }, 'created': {'type': ['null', 'string'], 'format': 'date-time'}, } @@ -84,7 +138,7 @@ def test_standard_metadata(self): }, make_expected_metadata( schema_present_base_obj, - {'valid_replication_keys': ['id','created'], + {'valid-replication-keys': ['id','created'], 'schema-name':tap_stream_id} ) ), @@ -112,7 +166,7 @@ def test_standard_metadata(self): }, make_expected_metadata( schema_present_base_obj, - {'valid_replication_keys': ['id','created'], + {'valid-replication-keys': ['id','created'], 'forced-replication-method': 'INCREMENTAL', 'schema-name':tap_stream_id} ) @@ -128,7 +182,8 @@ def test_standard_metadata(self): make_expected_metadata( schema_present_base_obj, {'table-key-properties': ['id'], - 'schema-name':tap_stream_id} + 'schema-name':tap_stream_id}, + test_kp=True ) ), ( @@ -143,8 +198,9 @@ def test_standard_metadata(self): schema_present_base_obj, {'table-key-properties': ['id'], - 'valid_replication_keys': ['id','created'], - 'schema-name':tap_stream_id} + 'valid-replication-keys': ['id','created'], + 'schema-name':tap_stream_id}, + test_kp=True ) ), ( @@ -159,7 +215,8 @@ def test_standard_metadata(self): schema_present_base_obj, {'table-key-properties': ['id'], 'forced-replication-method': 'INCREMENTAL', - 'schema-name':tap_stream_id} + 'schema-name':tap_stream_id}, + test_kp=True ) ), ( @@ -174,8 +231,9 @@ def test_standard_metadata(self): schema_present_base_obj, {'table-key-properties': ['id'], 'forced-replication-method': 'INCREMENTAL', - 'valid_replication_keys': ['id','created'], - 'schema-name':tap_stream_id} + 'valid-replication-keys': ['id','created'], + 'schema-name':tap_stream_id}, + test_kp=True ) ), ( @@ -188,7 +246,7 @@ def test_standard_metadata(self): [ { 'metadata': {}, - 'breadcrumb': [] + 'breadcrumb': () } ] ), @@ -202,10 +260,9 @@ def test_standard_metadata(self): [ { 'metadata': { - 'inclusion': 'available', - 'valid_replication_keys': ['id','created'] + 'valid-replication-keys': ['id','created'] }, - 'breadcrumb': [] + 'breadcrumb': () } ] ), @@ -219,10 +276,9 @@ def test_standard_metadata(self): [ { 'metadata': { - 'inclusion': 'available', 'forced-replication-method': 'INCREMENTAL' }, - 'breadcrumb': [] + 'breadcrumb': () } ] ), @@ -236,11 +292,10 @@ def test_standard_metadata(self): [ { 'metadata': { - 'inclusion': 'available', 'forced-replication-method': 'INCREMENTAL', - 'valid_replication_keys': ['id','created'] + 'valid-replication-keys': ['id','created'] }, - 'breadcrumb': [] + 'breadcrumb': () } ] ), @@ -254,10 +309,9 @@ def test_standard_metadata(self): [ { 'metadata': { - 'inclusion': 'available', 'table-key-properties': ['id'], }, - 'breadcrumb': [] + 'breadcrumb': () } ] ), @@ -271,11 +325,10 @@ def test_standard_metadata(self): [ { 'metadata': { - 'inclusion': 'available', 'table-key-properties': ['id'], - 'valid_replication_keys': ['id','created'] + 'valid-replication-keys': ['id','created'] }, - 'breadcrumb': [] + 'breadcrumb': () } ] ), @@ -289,12 +342,11 @@ def test_standard_metadata(self): [ { 'metadata': { - 'inclusion': 'available', 'table-key-properties': ['id'], 'forced-replication-method': 'INCREMENTAL', - 'valid_replication_keys': ['id','created'] + 'valid-replication-keys': ['id','created'] }, - 'breadcrumb': [] + 'breadcrumb': () } ] ) @@ -307,8 +359,7 @@ def test_standard_metadata(self): test_value = get_standard_metadata(**function_params) for obj in expected_metadata: - if obj in test_value: - self.assertIn(obj, test_value) + self.assertIn(obj, test_value) # Test one function call where the parameters are not splat in test_value = get_standard_metadata(test_schema, @@ -320,11 +371,11 @@ def test_standard_metadata(self): expected_metadata = make_expected_metadata(schema_present_base_obj, {'table-key-properties': ['id'], 'forced-replication-method': 'INCREMENTAL', - 'valid_replication_keys': ['id','created'], - 'schema-name':tap_stream_id}) + 'valid-replication-keys': ['id','created'], + 'schema-name':tap_stream_id}, + test_kp=True) for obj in expected_metadata: - if obj in test_value: - self.assertIn(obj, test_value) + self.assertIn(obj, test_value) def test_empty_key_properties_are_written(self): mdata = get_standard_metadata(key_properties=[])