Skip to content

Commit 5af00f1

Browse files
authored
refactor: avoid crashes when the new nutrition schema is deployed (#1837)
This is the first implementation step of #1836. With this first milestone, the backend won't crash if Product Opener uses the new schema version 1003.
1 parent 6d50d4e commit 5af00f1

6 files changed

Lines changed: 38 additions & 63 deletions

File tree

robotoff/insights/importer.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1501,12 +1501,6 @@ def generate_candidates(
15011501
predictions: list[Prediction],
15021502
product_id: ProductIdentifier,
15031503
) -> Iterator[ProductInsight]:
1504-
if product and product.nutrition_data_prepared:
1505-
# Don't generate candidates if the product has nutrition
1506-
# information per prepared product, as the model doesn't
1507-
# handle this case
1508-
return
1509-
15101504
image_orientation_prediction = next(
15111505
(p for p in predictions if p.type == PredictionType.image_orientation),
15121506
None,
@@ -1584,6 +1578,19 @@ def keep_prediction(product: Product | None, nutrients: JSONType) -> bool:
15841578
:param nutrients: the nutrient values extracted from the image
15851579
:return: True if the prediction should be kept, False otherwise
15861580
"""
1581+
if product and (
1582+
# Don't support nutrition extraction with the new `nutriments` schema
1583+
product.schema_version
1584+
> 1002
1585+
):
1586+
return False
1587+
1588+
if product and product.nutrition_data_prepared:
1589+
# Don't generate candidates if the product has nutrition
1590+
# information per prepared product, as the model doesn't
1591+
# handle this case
1592+
return False
1593+
15871594
if product is None or not product.nutriments:
15881595
# We don't have access to MongoDB or the nutriment data is missing
15891596
# completely, so we generate an insight
@@ -1720,6 +1727,10 @@ def add_optional_fields(cls, insight: ProductInsight, product: Product | None):
17201727
# Stop here
17211728
return
17221729

1730+
# Don't support nutrition extraction with the new `nutriments` schema
1731+
if product.schema_version > 1002:
1732+
return
1733+
17231734
campaigns: list[str] = []
17241735
if set(product.nutriments.keys()):
17251736
# The product already has some nutrient values, so we add it to the

robotoff/metrics.py

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import datetime
22
import logging
3-
from typing import Iterable, Iterator
43
from urllib.parse import urlparse
54

65
from influxdb_client import InfluxDBClient
@@ -276,49 +275,3 @@ def generate_insight_metrics(target_datetime: datetime.datetime) -> list[dict]:
276275
}
277276
)
278277
return inserts
279-
280-
281-
def generate_recent_changes_metrics(items: Iterable[dict]) -> Iterator[dict]:
282-
for item in items:
283-
comment: str = item["comment"]
284-
diffs: dict = item["diffs"]
285-
uploaded_images = diffs.setdefault("uploaded_images", {})
286-
selected_images: dict = diffs.setdefault("selected_images", {})
287-
nutriments: dict = diffs.setdefault("nutriments", {})
288-
nutriments_add: dict = nutriments.get("add", {})
289-
nutriments_change: dict = nutriments.get("change", {})
290-
nutriments_delete: dict = nutriments.get("delete", {})
291-
fields: dict = diffs.setdefault("fields", {})
292-
fields_add: dict = fields.setdefault("add", {})
293-
fields_change: dict = fields.setdefault("change", {})
294-
packagings: dict = diffs.setdefault("packagings", {})
295-
yield {
296-
"measurement": "recent_changes",
297-
"tags": {
298-
"countries_tags": item["countries_tags"],
299-
"user_id": item["userid"],
300-
"is_smooth_app": int("Smoothie - OpenFoodFacts" in comment),
301-
"by_robotoff": int("[robotoff]" in comment),
302-
"has_image_upload": int(bool(uploaded_images.get("add", {}))),
303-
"has_image_delete": int(bool(uploaded_images.get("delete", {}))),
304-
"has_image_selection_change": int(
305-
bool(selected_images.get("change", {}))
306-
),
307-
"has_image_selection_add": int(bool(selected_images.get("add", {}))),
308-
"has_image_selection_delete": int(
309-
bool(selected_images.get("delete", {}))
310-
),
311-
"has_nutriment_change": int(bool(nutriments_change)),
312-
"has_nutriment_add": int(bool(nutriments_add)),
313-
"has_nutriment_delete": int(bool(nutriments_delete)),
314-
"has_nutriscore_added": int("nutrition-score-fr" in nutriments_add),
315-
"has_nutriscore_change": int("nutrition-score-fr" in nutriments_change),
316-
"has_nutriscore_delete": int("nutrition-score-fr" in nutriments_delete),
317-
"has_categories_add": int("categories" in fields_add),
318-
"has_categories_change": int("categories" in fields_change),
319-
"has_packagings_add": int(bool(packagings.get("add", {}))),
320-
"has_packagings_change": int(bool(packagings.get("change", {}))),
321-
},
322-
"time": item["t"],
323-
"fields": {"count": 1},
324-
}

robotoff/prediction/category/neural/keras_category_classifier_3_0/preprocessing.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ def generate_inputs_dict(
5353
) -> JSONType:
5454
"""Generate inputs for v3 category predictor model.
5555
56-
:param product: the product dict, the `product_name` and `ingredients`
57-
fields are used, if provided
56+
:param product: the product dict, the `product_name`, `ingredients`
57+
and `nutriments` fields are used, if provided
5858
:param ocr_texts: a list of detected OCR texts, one per image
5959
:param image_embeddings: embeddings generated by CLIP model of up to
6060
the `MAX_IMAGE_EMBEDDING` most recent images.
@@ -75,12 +75,14 @@ def generate_inputs_dict(
7575
or [""],
7676
}
7777

78-
nutriments = product.get("nutriments", {})
79-
for nutriment_name in NUTRIMENT_NAMES:
80-
inputs[nutriment_name] = transform_nutrition_input(
81-
nutriments.get(f"{nutriment_name.replace('_', '-')}_100g"),
82-
nutriment_name=nutriment_name,
83-
)
78+
# Only support legacy nutrition schema for now
79+
if product.get("schema_version", 999) <= 1002:
80+
nutriments = product.get("nutriments", {})
81+
for nutriment_name in NUTRIMENT_NAMES:
82+
inputs[nutriment_name] = transform_nutrition_input(
83+
nutriments.get(f"{nutriment_name.replace('_', '-')}_100g"),
84+
nutriment_name=nutriment_name,
85+
)
8486

8587
if image_embeddings is None:
8688
# No image is available, so we provide zero-filled image embedding

robotoff/products.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,7 @@ class Product:
435435
"nutrition_data_per",
436436
"nutrition_data_prepared",
437437
"serving_size",
438+
"schema_version",
438439
)
439440

440441
def __init__(self, product: JSONType):
@@ -474,6 +475,8 @@ def __init__(self, product: JSONType):
474475
product.get("nutrition_data_prepared") == "on"
475476
)
476477
self.serving_size: str | None = product.get("serving_size")
478+
# if `schema_version` is not present, we assume it's 999
479+
self.schema_version: int = product.get("schema_version", 999)
477480

478481
@staticmethod
479482
def get_fields(item: JSONType) -> set[str]:

robotoff/scheduler/latent.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,16 @@ def generate_fiber_quality_facet() -> None:
4949

5050
product_id = ProductIdentifier(barcode, server_type)
5151
product = product_store.get_product(
52-
product_id, ["nutriments", "data_quality_tags", "images"]
52+
product_id, ["nutriments", "data_quality_tags", "images", "schema_version"]
5353
)
5454

5555
if product is None:
5656
continue
5757

58+
# Don't support (yet) the new nutrition schema
59+
if product.get("schema_version", 999) > 1002:
60+
continue
61+
5862
nutriments = product.get("nutriments", {})
5963
data_quality_tags = product.get("data_quality_tags", {})
6064
images = product.get("images", {})

robotoff/workers/tasks/product_updated.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,9 @@ def should_rerun_category_predictor(diffs: JSONType | None) -> bool:
112112
fields = diffs.get("fields", {})
113113
updated_fields = fields.get("change", [])
114114
added_fields = fields.get("add", [])
115-
has_nutriments_change = "nutriments" in diffs
115+
# TODO(raphael): remove the `"nutriments" in diffs` once the new
116+
# nutrition schema is fully rolled out.
117+
has_nutriments_change = "nutriments" in diffs or "nutrition" in diffs
116118
uploaded_images = diffs.get("uploaded_images", {})
117119
is_uploaded_image = "add" in uploaded_images
118120
is_deleted_image = "delete" in uploaded_images

0 commit comments

Comments
 (0)