From 6c7291a9c060e950dfec01624d99cc51bb389949 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20J=C3=BAnior?= Date: Thu, 5 Oct 2023 10:56:15 -0300 Subject: [PATCH 1/6] add failing test for update of field name in non-primary key field when primary keys exists --- tests/steps/field/test_field_update.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/steps/field/test_field_update.py b/tests/steps/field/test_field_update.py index 9d32b3e979..34d8dbef02 100644 --- a/tests/steps/field/test_field_update.py +++ b/tests/steps/field/test_field_update.py @@ -89,6 +89,7 @@ def test_step_field_update_field_name_with_primary_key(): pipeline = Pipeline( steps=[ steps.field_update(name="id", descriptor={"name": "pkey"}), + steps.field_update(name="population", descriptor={"name": "pop"}), ], ) target = source.transform(pipeline) From 4616a7f43d99062b96d0a6fc932a1d1bd33d9831 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20J=C3=BAnior?= Date: Thu, 24 Aug 2023 19:30:24 -0300 Subject: [PATCH 2/6] Fix x not in list error if self.name is not part of the primary key, it can't be dropped from the pk --- frictionless/steps/field/field_update.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frictionless/steps/field/field_update.py b/frictionless/steps/field/field_update.py index e93097601d..831d530776 100644 --- a/frictionless/steps/field/field_update.py +++ b/frictionless/steps/field/field_update.py @@ -68,7 +68,7 @@ def transform_resource(self, resource: Resource): resource.data = table.update(self.name, self.value) # type: ignore elif new_name: resource.data = table.rename({self.name: new_name}) # type: ignore - if new_name and resource.schema.primary_key: + if new_name and self.name in resource.schema.primary_key: resource.schema.primary_key.remove(self.name) resource.schema.primary_key.append(new_name) resources = resource.package.resources if resource.package else [] From 6f5f9e424944dd61a52c8dac9105a691c4857263 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20J=C3=BAnior?= Date: Thu, 5 Oct 2023 11:49:03 -0300 Subject: [PATCH 3/6] add failing test for update of field name of foreign key --- data/transform-fk.csv | 4 ++++ tests/steps/field/test_field_update.py | 15 +++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 data/transform-fk.csv diff --git a/data/transform-fk.csv b/data/transform-fk.csv new file mode 100644 index 0000000000..93ff113196 --- /dev/null +++ b/data/transform-fk.csv @@ -0,0 +1,4 @@ +id,address,country_name +1,a,germany +2,b,france +3,c,spain diff --git a/tests/steps/field/test_field_update.py b/tests/steps/field/test_field_update.py index 34d8dbef02..922e6ba38a 100644 --- a/tests/steps/field/test_field_update.py +++ b/tests/steps/field/test_field_update.py @@ -98,7 +98,7 @@ def test_step_field_update_field_name_with_primary_key(): def test_step_field_update_referenced_as_foreign_key(): resource1 = TableResource(name="resource1", path="data/transform.csv") - resource2 = TableResource(name="resource2") + resource2 = TableResource(name="resource2", path="data/transform-fk.csv") resource1.schema = Schema.from_descriptor( { "fields": [ @@ -135,13 +135,24 @@ def test_step_field_update_referenced_as_foreign_key(): ) ], ) + + transform( + package, + steps=[ + steps.resource_transform( + name="resource2", + steps=[steps.field_update(name="country_name", descriptor={"name": "country"})], + ) + ], + ) + assert ( package.get_resource("resource1").validate().flatten(["title", "message"]) == [] ) assert package.get_resource("resource1").schema.primary_key == ["pkey"] assert package.get_resource("resource2").schema.foreign_keys == [ { - "fields": ["country_name"], + "fields": ["country"], "reference": {"fields": ["pkey"], "resource": "resource1"}, } ] From 268ee638c099b06f31e4e6691575653a590008e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20J=C3=BAnior?= Date: Thu, 24 Aug 2023 19:47:58 -0300 Subject: [PATCH 4/6] Update foreignKeys regardless of primaryKey --- frictionless/steps/field/field_update.py | 49 +++++++++++++----------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/frictionless/steps/field/field_update.py b/frictionless/steps/field/field_update.py index 831d530776..0cf55a20fa 100644 --- a/frictionless/steps/field/field_update.py +++ b/frictionless/steps/field/field_update.py @@ -71,31 +71,34 @@ def transform_resource(self, resource: Resource): if new_name and self.name in resource.schema.primary_key: resource.schema.primary_key.remove(self.name) resource.schema.primary_key.append(new_name) - resources = resource.package.resources if resource.package else [] - # update name in all the resources where it is referenced - for package_resource in resources: - for index, fk in enumerate(package_resource.schema.foreign_keys): - fields = fk["reference"]["fields"] - if isinstance(fields, list): - if self.name in fk["reference"]["fields"]: - package_resource.schema.foreign_keys[index]["reference"][ - "fields" - ].remove(self.name) - package_resource.schema.foreign_keys[index]["reference"][ - "fields" - ].append(new_name) - else: + resources = resource.package.resources if resource.package else [] + # update name in all the resources where it is referenced + for package_resource in resources: + for index, fk in enumerate(package_resource.schema.foreign_keys): + if new_name and self.name in fk["fields"]: + package_resource.schema.foreign_keys[index]["fields"].remove(self.name) + package_resource.schema.foreign_keys[index]["fields"].append(new_name) + fields = fk["reference"]["fields"] + if isinstance(fields, list): + if self.name in fk["reference"]["fields"]: package_resource.schema.foreign_keys[index]["reference"][ "fields" - ] = new_name - - package_resource.schema.foreign_keys = ( - package_resource.schema.foreign_keys - ) - if resource.package: - resource.package.metadata_descriptor_initial = ( - resource.package.to_descriptor() - ) + ].remove(self.name) + package_resource.schema.foreign_keys[index]["reference"][ + "fields" + ].append(new_name) + else: + package_resource.schema.foreign_keys[index]["reference"][ + "fields" + ] = new_name + + package_resource.schema.foreign_keys = ( + package_resource.schema.foreign_keys + ) + if resource.package: + resource.package.metadata_descriptor_initial = ( + resource.package.to_descriptor() + ) # Metadata From 1d697931b8e2e9e83920e06a280e12b8b2701728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20J=C3=BAnior?= Date: Thu, 5 Oct 2023 12:13:03 -0300 Subject: [PATCH 5/6] run make format --- frictionless/steps/field/field_update.py | 8 ++++---- tests/steps/field/test_field_update.py | 6 +++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/frictionless/steps/field/field_update.py b/frictionless/steps/field/field_update.py index 0cf55a20fa..06815eddec 100644 --- a/frictionless/steps/field/field_update.py +++ b/frictionless/steps/field/field_update.py @@ -76,7 +76,9 @@ def transform_resource(self, resource: Resource): for package_resource in resources: for index, fk in enumerate(package_resource.schema.foreign_keys): if new_name and self.name in fk["fields"]: - package_resource.schema.foreign_keys[index]["fields"].remove(self.name) + package_resource.schema.foreign_keys[index]["fields"].remove( + self.name + ) package_resource.schema.foreign_keys[index]["fields"].append(new_name) fields = fk["reference"]["fields"] if isinstance(fields, list): @@ -92,9 +94,7 @@ def transform_resource(self, resource: Resource): "fields" ] = new_name - package_resource.schema.foreign_keys = ( - package_resource.schema.foreign_keys - ) + package_resource.schema.foreign_keys = package_resource.schema.foreign_keys if resource.package: resource.package.metadata_descriptor_initial = ( resource.package.to_descriptor() diff --git a/tests/steps/field/test_field_update.py b/tests/steps/field/test_field_update.py index 922e6ba38a..f838b74955 100644 --- a/tests/steps/field/test_field_update.py +++ b/tests/steps/field/test_field_update.py @@ -141,7 +141,11 @@ def test_step_field_update_referenced_as_foreign_key(): steps=[ steps.resource_transform( name="resource2", - steps=[steps.field_update(name="country_name", descriptor={"name": "country"})], + steps=[ + steps.field_update( + name="country_name", descriptor={"name": "country"} + ) + ], ) ], ) From 0e98f66d96e56afbd5873d476d4876e4e268b1de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20J=C3=BAnior?= Date: Mon, 9 Oct 2023 15:24:27 -0300 Subject: [PATCH 6/6] add failing test when foreign key and referenced column have same name --- tests/steps/field/test_field_update.py | 66 ++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/tests/steps/field/test_field_update.py b/tests/steps/field/test_field_update.py index f838b74955..acd67b19cc 100644 --- a/tests/steps/field/test_field_update.py +++ b/tests/steps/field/test_field_update.py @@ -160,3 +160,69 @@ def test_step_field_update_referenced_as_foreign_key(): "reference": {"fields": ["pkey"], "resource": "resource1"}, } ] + + +def test_step_field_update_referenced_as_foreign_key_with_same_name(): + resource1 = TableResource.from_descriptor( + { + "name": "resource1", + "data": [ + ["id", "name", "population"], + [1, "germany", 83], + [2, "france", 66], + [3, "spain", 47], + ], + "schema": { + "fields": [ + {"name": "id", "type": "integer"}, + {"name": "name", "type": "string"}, + {"name": "population", "type": "integer"}, + ], + "primaryKey": ["id"], + "foreignKeys": [ + { + "fields": ["name"], + "reference": {"fields": ["name"], "resource": "resource2"}, + } + ], + }, + } + ) + resource2 = TableResource.from_descriptor( + { + "name": "resource2", + "data": [ + ["name", "continent"], + ["germany", "europe"], + ["france", "europe"], + ["spain", "europe"], + ], + "schema": { + "fields": [ + {"name": "name", "type": "string"}, + {"name": "continent", "type": "string"}, + ], + "primaryKey": ["name"], + }, + } + ) + package = Package(name="test-package", resources=[resource1, resource2]) + + transform( + package, + steps=[ + steps.resource_transform( + name="resource1", + steps=[ + steps.field_update(name="name", descriptor={"name": "country_name"}) + ], + ) + ], + ) + + assert package.get_resource("resource1").schema.foreign_keys == [ + { + "fields": ["country_name"], + "reference": {"fields": ["name"], "resource": "resource2"}, + } + ]