Skip to content

Commit 69d8454

Browse files
authored
feat: ✨ check foreign keys fields exist (#227)
# Description This PR checks that foreign key source and destination fields exist. #218 should be reviewed first. Closes #217 Needs an in-depth review. ## Checklist - [x] Formatted Markdown - [x] Ran `just run-all`
1 parent 5f2bc06 commit 69d8454

File tree

2 files changed

+404
-27
lines changed

2 files changed

+404
-27
lines changed

src/check_datapackage/check.py

Lines changed: 143 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,17 @@ def _check_keys(properties: dict[str, Any], issues: list[Issue]) -> list[Issue]:
153153
key_issues = _flat_map(resources_with_pk, _check_primary_key)
154154

155155
# Foreign keys
156-
156+
resources_with_fk = _get_fields_at_jsonpath(
157+
"$.resources[?(length(@.schema.foreignKeys) > 0)]",
158+
properties,
159+
)
160+
resources_with_fk = _keep_resources_with_no_issue_at_property(
161+
resources_with_fk, issues, "schema.foreignKeys"
162+
)
163+
key_issues += _flat_map(
164+
resources_with_fk,
165+
lambda resource: _check_foreign_keys(resource, properties),
166+
)
157167
return key_issues
158168

159169

@@ -197,6 +207,35 @@ def _check_primary_key(resource: PropertyField) -> list[Issue]:
197207
]
198208

199209

210+
def _check_foreign_keys(
211+
resource: PropertyField, properties: dict[str, Any]
212+
) -> list[Issue]:
213+
"""Check that foreign key source and destination fields exist."""
214+
# Safe, as only FKs of the correct type here
215+
foreign_keys = cast(
216+
list[dict[str, Any]], resolve("/schema/foreignKeys", resource.value)
217+
)
218+
foreign_keys_diff_resource = _filter(
219+
foreign_keys,
220+
lambda fk: "resource" in fk["reference"] and fk["reference"]["resource"] != "",
221+
)
222+
foreign_keys_same_resource = _filter(
223+
foreign_keys, lambda fk: fk not in foreign_keys_diff_resource
224+
)
225+
226+
issues = _flat_map(foreign_keys, lambda fk: _check_fk_source_fields(fk, resource))
227+
issues += _flat_map(
228+
foreign_keys_same_resource,
229+
lambda fk: _check_fk_dest_fields_same_resource(fk, resource),
230+
)
231+
issues += _flat_map(
232+
foreign_keys_diff_resource,
233+
lambda fk: _check_fk_dest_fields_diff_resource(fk, resource, properties),
234+
)
235+
236+
return issues
237+
238+
200239
def _key_fields_as_str_list(key_fields: Any) -> list[str]:
201240
"""Returns the list representation of primary and foreign key fields.
202241
@@ -220,6 +259,109 @@ def _get_unknown_key_fields(
220259
return ", ".join(unknown_fields)
221260

222261

262+
def _check_fk_source_fields(
263+
foreign_key: dict[str, Any], resource: PropertyField
264+
) -> list[Issue]:
265+
"""Check that foreign key source fields exist and have the correct number."""
266+
issues = []
267+
source_fields = resolve("/fields", foreign_key)
268+
source_field_list = _key_fields_as_str_list(source_fields)
269+
unknown_fields = _get_unknown_key_fields(source_field_list, resource.value)
270+
if unknown_fields:
271+
issues.append(
272+
Issue(
273+
jsonpath=f"{resource.jsonpath}.schema.foreignKeys.fields",
274+
type="foreign-key-source-fields",
275+
message=(
276+
"No fields found in resource for foreign key source fields: "
277+
f"{unknown_fields}."
278+
),
279+
instance=source_fields,
280+
)
281+
)
282+
283+
dest_fields = _key_fields_as_str_list(resolve("/reference/fields", foreign_key))
284+
if len(source_field_list) != len(dest_fields):
285+
issues.append(
286+
Issue(
287+
jsonpath=f"{resource.jsonpath}.schema.foreignKeys.fields",
288+
type="foreign-key-source-fields",
289+
message=(
290+
"The number of foreign key source fields must be the same as "
291+
"the number of foreign key destination fields."
292+
),
293+
instance=source_fields,
294+
)
295+
)
296+
return issues
297+
298+
299+
def _check_fk_dest_fields_same_resource(
300+
foreign_key: dict[str, Any],
301+
resource: PropertyField,
302+
) -> list[Issue]:
303+
"""Check that foreign key destination fields exist on the same resource."""
304+
dest_fields = resolve("/reference/fields", foreign_key)
305+
dest_field_list = _key_fields_as_str_list(dest_fields)
306+
unknown_fields = _get_unknown_key_fields(dest_field_list, resource.value)
307+
if not unknown_fields:
308+
return []
309+
310+
return [
311+
Issue(
312+
jsonpath=f"{resource.jsonpath}.schema.foreignKeys.reference.fields",
313+
type="foreign-key-destination-fields",
314+
message=(
315+
"No fields found in resource for foreign key "
316+
f"destination fields: {unknown_fields}."
317+
),
318+
instance=dest_fields,
319+
)
320+
]
321+
322+
323+
def _check_fk_dest_fields_diff_resource(
324+
foreign_key: dict[str, Any], resource: PropertyField, properties: dict[str, Any]
325+
) -> list[Issue]:
326+
"""Check that foreign key destination fields exist on the destination resource."""
327+
dest_fields = resolve("/reference/fields", foreign_key)
328+
dest_field_list = _key_fields_as_str_list(dest_fields)
329+
# Safe, as only keys of the correct type here
330+
dest_resource_name = cast(str, resolve("/reference/resource", foreign_key))
331+
332+
dest_resource_path = f"resources[?(@.name == '{dest_resource_name}')]"
333+
if not findall(dest_resource_path, properties):
334+
return [
335+
Issue(
336+
jsonpath=f"{resource.jsonpath}.schema.foreignKeys.reference.resource",
337+
type="foreign-key-destination-resource",
338+
message=(
339+
f"The destination resource {dest_resource_name!r} of this foreign "
340+
"key doesn't exist in the package."
341+
),
342+
instance=dest_resource_name,
343+
)
344+
]
345+
346+
unknown_fields = _get_unknown_key_fields(
347+
dest_field_list, properties, f"{dest_resource_path}."
348+
)
349+
if not unknown_fields:
350+
return []
351+
352+
return [
353+
Issue(
354+
jsonpath=f"{resource.jsonpath}.schema.foreignKeys.reference.fields",
355+
type="foreign-key-destination-fields",
356+
message=(
357+
f"No fields found in destination resource {dest_resource_name!r} "
358+
f"for foreign key destination fields: {unknown_fields}."
359+
),
360+
instance=dest_fields,
361+
)
362+
]
363+
364+
223365
def _set_should_fields_to_required(schema: dict[str, Any]) -> dict[str, Any]:
224366
"""Set 'SHOULD' fields to 'REQUIRED' in the schema."""
225367
should_fields = ("name", "id", "licenses")

0 commit comments

Comments
 (0)