Skip to content

refactor: proper class for field info #1730

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 23 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
bef5fdc
fix: parallel file validation for a datapackage
pierrecamilleri Dec 13, 2024
b0554a2
🔵 rename local variable
pierrecamilleri Jan 27, 2025
f20aab1
🔵 improve documentation
pierrecamilleri Jan 27, 2025
bc27f9b
Mv `validate` methods & soft deprecate Validator
pierrecamilleri Jan 27, 2025
0498e9b
fix: repair tests of parallel validation
pierrecamilleri Jan 27, 2025
e2f2a33
Dispatching tests according to method change
pierrecamilleri Jan 27, 2025
ee4ad2f
first attempt
pierrecamilleri Jan 17, 2025
39aa0f2
squash! first attempt
pierrecamilleri Jan 24, 2025
70aadac
🔵 Mv to resources/table
pierrecamilleri Jan 24, 2025
add8dac
🔵 rename
pierrecamilleri Jan 27, 2025
e26393b
Schema sync functionnality inside FieldsInfo
pierrecamilleri Jan 27, 2025
8b72ae8
🔵 remove empty / unused file
pierrecamilleri Jan 29, 2025
c7479c6
🟢 Test passes
pierrecamilleri Jan 29, 2025
9b7d0ad
🟢 get rid of FieldInfo
pierrecamilleri Feb 7, 2025
a80e79a
remove unnecessary review noise
pierrecamilleri Feb 7, 2025
b7f693d
remove unused function
pierrecamilleri Feb 7, 2025
77a2c9f
remove unused FieldInfo
pierrecamilleri Feb 7, 2025
305d2b9
linting
pierrecamilleri Feb 7, 2025
91d3ffb
Information on processing for Row.__str__ and Row.__repr__
pierrecamilleri Feb 7, 2025
575885b
typo
pierrecamilleri Feb 7, 2025
c41590f
unintended rename
pierrecamilleri Feb 7, 2025
37fd709
Remove __repr__ change as it is used for tests
pierrecamilleri Feb 7, 2025
c6c90a2
fix: oopsie
pierrecamilleri Feb 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions data/invalid/datapackage_no_foreign_key.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"name": "testing",
"resources": [
{
"name": "data",
"path": "data.csv",
"schema": {
"fields": [
{
"name": "id",
"type": "string",
"constraints": {
"required": true
}
},
{
"name": "name",
"type": "string"
},
{
"name": "description",
"type": "string"
},
{
"name": "amount",
"type": "number"
}
],
"primaryKey": "id"
}
},
{
"name": "data2",
"path": "data2.csv",
"schema": {
"fields": [
{
"type": "string",
"name": "parent"
},
{
"type": "string",
"name": "comment"
}
]
}
}
]
}
2 changes: 2 additions & 0 deletions frictionless/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,6 @@
from .table import Lookup as Lookup
from .table import Row as Row
from .transformer import Transformer as Transformer

# Deprecated
from .validator import Validator as Validator
82 changes: 0 additions & 82 deletions frictionless/detector/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

from .. import helpers, settings
from ..dialect import Dialect
from ..exception import FrictionlessException
from ..fields import AnyField
from ..metadata import Metadata
from ..platform import platform
Expand Down Expand Up @@ -403,33 +402,6 @@ def detect_schema(
fields[index] = AnyField(name=name, schema=schema) # type: ignore
schema.fields = fields # type: ignore

# Sync schema
if self.schema_sync:
if labels:
case_sensitive = options["header_case"]

if not case_sensitive:
labels = [label.lower() for label in labels]

if len(labels) != len(set(labels)):
note = '"schema_sync" requires unique labels in the header'
raise FrictionlessException(note)

mapped_fields = self.mapped_schema_fields_names(
schema.fields, # type: ignore
case_sensitive,
)

self.rearrange_schema_fields_given_labels(
mapped_fields,
schema,
labels,
)

self.add_missing_required_labels_to_schema_fields(
mapped_fields, schema, labels, case_sensitive
)

# Patch schema
if self.schema_patch:
patch = deepcopy(self.schema_patch)
Expand All @@ -443,57 +415,3 @@ def detect_schema(
schema = Schema.from_descriptor(descriptor)

return schema

@staticmethod
def mapped_schema_fields_names(
fields: List[Field], case_sensitive: bool
) -> Dict[str, Field]:
"""Create a dictionnary to map field names with schema fields"""
if case_sensitive:
return {field.name: field for field in fields}
else:
return {field.name.lower(): field for field in fields}

@staticmethod
def rearrange_schema_fields_given_labels(
fields_mapping: Dict[str, Field],
schema: Schema,
labels: List[str],
):
"""Rearrange fields according to the order of labels. All fields
missing from labels are dropped"""
schema.clear_fields()

for name in labels:
default_field = Field.from_descriptor({"name": name, "type": "any"})
field = fields_mapping.get(name, default_field)
schema.add_field(field)

def add_missing_required_labels_to_schema_fields(
self,
fields_mapping: Dict[str, Field],
schema: Schema,
labels: List[str],
case_sensitive: bool,
):
"""This method aims to add missing required labels and
primary key field not in labels to schema fields.
"""
for name, field in fields_mapping.items():
if (
self.field_is_required(field, schema, case_sensitive)
and name not in labels
):
schema.add_field(field)

@staticmethod
def field_is_required(
field: Field,
schema: Schema,
case_sensitive: bool,
) -> bool:
if case_sensitive:
return field.required or field.name in schema.primary_key
else:
lower_primary_key = [pk.lower() for pk in schema.primary_key]
return field.required or field.name.lower() in lower_primary_key
Loading