33from dataclasses import dataclass , field
44from functools import reduce
55from types import TracebackType
6- from typing import Any , Callable , Iterator , Optional
6+ from typing import Any , Callable , Iterator , Optional , cast
77
8+ from jsonpath import findall , resolve
89from jsonschema import Draft7Validator , FormatChecker , ValidationError
910
1011from check_datapackage .config import Config
1617from check_datapackage .exclusion import exclude
1718from check_datapackage .extensions import apply_extensions
1819from check_datapackage .internals import (
20+ PropertyField ,
1921 _filter ,
2022 _flat_map ,
23+ _get_fields_at_jsonpath ,
2124 _map ,
2225)
2326from check_datapackage .issue import Issue
@@ -126,6 +129,7 @@ class for more details, especially about the default values.
126129 _set_should_fields_to_required (schema )
127130
128131 issues = _check_object_against_json_schema (properties , schema )
132+ issues += _check_keys (properties , issues )
129133 issues += apply_extensions (properties , config .extensions )
130134 issues = exclude (issues , config .exclusions , properties )
131135 issues = sorted (set (issues ))
@@ -136,6 +140,86 @@ class for more details, especially about the default values.
136140 return issues
137141
138142
143+ def _check_keys (properties : dict [str , Any ], issues : list [Issue ]) -> list [Issue ]:
144+ """Check that primary and foreign keys exist."""
145+ # Primary keys
146+ resources_with_pk = _get_fields_at_jsonpath (
147+ "$.resources[?(length(@.schema.primaryKey) > 0 || @.schema.primaryKey == '')]" ,
148+ properties ,
149+ )
150+ resources_with_pk = _keep_resources_with_no_issue_at_property (
151+ resources_with_pk , issues , "schema.primaryKey"
152+ )
153+ key_issues = _flat_map (resources_with_pk , _check_primary_key )
154+
155+ # Foreign keys
156+
157+ return key_issues
158+
159+
160+ def _issues_at_property (
161+ resource : PropertyField , issues : list [Issue ], jsonpath : str
162+ ) -> list [Issue ]:
163+ return _filter (
164+ issues ,
165+ lambda issue : f"{ resource .jsonpath } .{ jsonpath } " in issue .jsonpath ,
166+ )
167+
168+
169+ def _keep_resources_with_no_issue_at_property (
170+ resources : list [PropertyField ], issues : list [Issue ], jsonpath : str
171+ ) -> list [PropertyField ]:
172+ """Filter out resources that have an issue at or under the given `jsonpath`."""
173+ return _filter (
174+ resources ,
175+ lambda resource : not _issues_at_property (resource , issues , jsonpath ),
176+ )
177+
178+
179+ def _check_primary_key (resource : PropertyField ) -> list [Issue ]:
180+ """Check that primary key fields exist in the resource."""
181+ pk_fields = resolve ("/schema/primaryKey" , resource .value )
182+ pk_fields_list = _key_fields_as_str_list (pk_fields )
183+ unknown_fields = _get_unknown_key_fields (pk_fields_list , resource .value )
184+
185+ if not unknown_fields :
186+ return []
187+
188+ return [
189+ Issue (
190+ jsonpath = f"{ resource .jsonpath } .schema.primaryKey" ,
191+ type = "primary-key" ,
192+ message = (
193+ f"No fields found in resource for primary key fields: { unknown_fields } ."
194+ ),
195+ instance = pk_fields ,
196+ )
197+ ]
198+
199+
200+ def _key_fields_as_str_list (key_fields : Any ) -> list [str ]:
201+ """Returns the list representation of primary and foreign key fields.
202+
203+ Key fields can be represented either as a string (containing one field name)
204+ or a list of strings.
205+
206+ The input should contain a correctly typed `key_fields` object.
207+ """
208+ if not isinstance (key_fields , list ):
209+ key_fields = [key_fields ]
210+ return cast (list [str ], key_fields )
211+
212+
213+ def _get_unknown_key_fields (
214+ key_fields : list [str ], properties : dict [str , Any ], resource_path : str = ""
215+ ) -> str :
216+ """Return the key fields that don't exist on the specified resource."""
217+ known_fields = findall (f"{ resource_path } schema.fields[*].name" , properties )
218+ unknown_fields = _filter (key_fields , lambda field : field not in known_fields )
219+ unknown_fields = _map (unknown_fields , lambda field : f"{ field !r} " )
220+ return ", " .join (unknown_fields )
221+
222+
139223def _set_should_fields_to_required (schema : dict [str , Any ]) -> dict [str , Any ]:
140224 """Set 'SHOULD' fields to 'REQUIRED' in the schema."""
141225 should_fields = ("name" , "id" , "licenses" )
0 commit comments