Conversation
| origin = "origin" | ||
| language = "language" | ||
| other_nutritional_substance = "other_nutritional_substance" | ||
| dataset_filename: str |
There was a problem hiding this comment.
added to avoid mypy error
src/openfoodfacts/types.py:901: error: "TaxonomyType" has no attribute "dataset_filename" [attr-defined]
Freso
left a comment
There was a problem hiding this comment.
One comment/suggestion, but overall LGTM!
| def __new__(cls, value: str, dataset_filename: str): | ||
| """ | ||
| Override __new__ to allow storing the dataset filename | ||
| associated with each taxonomy type. | ||
| """ | ||
| obj = str.__new__(cls, value) | ||
| obj._value_ = value | ||
| obj.dataset_filename = dataset_filename | ||
| return obj | ||
|
|
||
| category = ("category", "categories.full.json") | ||
| ingredient = ("ingredient", "ingredients.full.json") | ||
| label = ("label", "labels.full.json") | ||
| brand = ("brand", "brands.full.json") | ||
| packaging_shape = ("packaging_shape", "packaging_shapes.full.json") | ||
| packaging_material = ("packaging_material", "packaging_materials.full.json") | ||
| packaging_recycling = ("packaging_recycling", "packaging_recycling.full.json") | ||
| country = ("country", "countries.full.json") | ||
| store = ("store", "stores.full.json") | ||
| nova_group = ("nova_group", "nova_groups.full.json") | ||
| packaging = ("packaging", "packaging.full.json") | ||
| additive = ("additive", "additives.full.json") | ||
| vitamin = ("vitamin", "vitamins.full.json") | ||
| mineral = ("mineral", "minerals.full.json") | ||
| amino_acid = ("amino_acid", "amino_acids.full.json") | ||
| nucleotide = ("nucleotide", "nucleotides.full.json") | ||
| allergen = ("allergen", "allergens.full.json") | ||
| state = ("state", "states.full.json") | ||
| data_quality = ("data_quality", "data_quality.full.json") | ||
| origin = ("origin", "origins.full.json") | ||
| language = ("language", "languages.full.json") | ||
| other_nutritional_substance = ( | ||
| "other_nutritional_substance", | ||
| "other_nutritional_substances.full.json", | ||
| ) |
There was a problem hiding this comment.
Would it make sense/work to flip these two block around?
| def __new__(cls, value: str, dataset_filename: str): | |
| """ | |
| Override __new__ to allow storing the dataset filename | |
| associated with each taxonomy type. | |
| """ | |
| obj = str.__new__(cls, value) | |
| obj._value_ = value | |
| obj.dataset_filename = dataset_filename | |
| return obj | |
| category = ("category", "categories.full.json") | |
| ingredient = ("ingredient", "ingredients.full.json") | |
| label = ("label", "labels.full.json") | |
| brand = ("brand", "brands.full.json") | |
| packaging_shape = ("packaging_shape", "packaging_shapes.full.json") | |
| packaging_material = ("packaging_material", "packaging_materials.full.json") | |
| packaging_recycling = ("packaging_recycling", "packaging_recycling.full.json") | |
| country = ("country", "countries.full.json") | |
| store = ("store", "stores.full.json") | |
| nova_group = ("nova_group", "nova_groups.full.json") | |
| packaging = ("packaging", "packaging.full.json") | |
| additive = ("additive", "additives.full.json") | |
| vitamin = ("vitamin", "vitamins.full.json") | |
| mineral = ("mineral", "minerals.full.json") | |
| amino_acid = ("amino_acid", "amino_acids.full.json") | |
| nucleotide = ("nucleotide", "nucleotides.full.json") | |
| allergen = ("allergen", "allergens.full.json") | |
| state = ("state", "states.full.json") | |
| data_quality = ("data_quality", "data_quality.full.json") | |
| origin = ("origin", "origins.full.json") | |
| language = ("language", "languages.full.json") | |
| other_nutritional_substance = ( | |
| "other_nutritional_substance", | |
| "other_nutritional_substances.full.json", | |
| ) | |
| category = ("category", "categories.full.json") | |
| ingredient = ("ingredient", "ingredients.full.json") | |
| label = ("label", "labels.full.json") | |
| brand = ("brand", "brands.full.json") | |
| packaging_shape = ("packaging_shape", "packaging_shapes.full.json") | |
| packaging_material = ("packaging_material", "packaging_materials.full.json") | |
| packaging_recycling = ("packaging_recycling", "packaging_recycling.full.json") | |
| country = ("country", "countries.full.json") | |
| store = ("store", "stores.full.json") | |
| nova_group = ("nova_group", "nova_groups.full.json") | |
| packaging = ("packaging", "packaging.full.json") | |
| additive = ("additive", "additives.full.json") | |
| vitamin = ("vitamin", "vitamins.full.json") | |
| mineral = ("mineral", "minerals.full.json") | |
| amino_acid = ("amino_acid", "amino_acids.full.json") | |
| nucleotide = ("nucleotide", "nucleotides.full.json") | |
| allergen = ("allergen", "allergens.full.json") | |
| state = ("state", "states.full.json") | |
| data_quality = ("data_quality", "data_quality.full.json") | |
| origin = ("origin", "origins.full.json") | |
| language = ("language", "languages.full.json") | |
| other_nutritional_substance = ( | |
| "other_nutritional_substance", | |
| "other_nutritional_substances.full.json", | |
| ) | |
| def __new__(cls, value: str, dataset_filename: str): | |
| """ | |
| Override __new__ to allow storing the dataset filename | |
| associated with each taxonomy type. | |
| """ | |
| obj = str.__new__(cls, value) | |
| obj._value_ = value | |
| obj.dataset_filename = dataset_filename | |
| return obj |
| other_nutritional_substance = "other_nutritional_substance" | ||
| dataset_filename: str | ||
|
|
||
| def __new__(cls, value: str, dataset_filename: str): |
There was a problem hiding this comment.
logic taken from https://docs.python.org/3/howto/enum.html#when-to-use-new-vs-init
9a3d0ac to
44df653
Compare
|
| origin = "origin" | ||
| language = "language" | ||
| other_nutritional_substance = "other_nutritional_substance" | ||
| dataset_filename: str |
There was a problem hiding this comment.
@raphodn I'm really not a fan of storing additional data in an Enum. TaxonomyType is used by other projects such as Robotoff, which don't really care about the dataset_filename. Can we have either a dict mapping TaxonomyType to a dataset_filename?
In order to ensure every TaxonomyType has an associated dataset_filename, we could add a unit test that fails in case one is missing.
There was a problem hiding this comment.
Aaah i shouldn't have merged. Can i force push and drop the commit on the develop branch and reopen the PR ?
There was a problem hiding this comment.
It'd be better if every filename mapped perfectly to the enum, there's at least 1 plural exception 😅
- no plural: packaging_recycling, packaging, data_quality
- different plural: category, country



Description
Following #466
Solution
TaxonomyTypetype: add a newdataset_filenamekey &dataset_pathproperty_generate_file_pathand use both TAXONOMY_MAPPING & TaxonomyType.dataset_pathRelated issue(s)