Skip to content

Commit 16598f3

Browse files
committed
fix: always convert images field to legacy schema
See openfoodfacts/openfoodfacts-server#11818 for more details about the schema changes.
1 parent f91c59d commit 16598f3

1 file changed

Lines changed: 109 additions & 8 deletions

File tree

app/openfoodfacts.py

Lines changed: 109 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import copy
22
import os
33
import re
4-
from typing import Any
54

65
import requests
76

@@ -84,6 +83,106 @@ def generate_image_url(code: str, image_id: str) -> str:
8483
)
8584

8685

86+
# The two following functions are copied from openfoodfacts-python
87+
# SDK. We don't use the SDK directly as we don't want to add it as a
88+
# dependency.
89+
90+
91+
def convert_to_legacy_schema(images: JSONType) -> JSONType:
92+
"""Convert the images dictionary to the legacy schema.
93+
94+
We've improved the schema of the `images` field, but the new
95+
schema is not compatible with the legacy schema. This function
96+
converts the new schema to the legacy schema.
97+
98+
It can be used while migrating the existing Python codebase to the
99+
new schema.
100+
101+
The new `images` schema is the following:
102+
103+
- the `images` field contains the uploaded images under the `uploaded`
104+
key and the selected images under the `selected` key
105+
- `uploaded` contains the images that are uploaded, and maps the
106+
image ID to the detail about the image:
107+
- `uploaded_t`: the upload timestamp
108+
- `uploader`: the username of the uploader
109+
- `sizes`: dictionary mapping image size (`100`, `200`, `400`, `full`)
110+
to the information about each resized image:
111+
- `h`: the height of the image
112+
- `w`: the width of the image
113+
- `url`: the URL of the image
114+
- `selected` contains the images that are selected, and maps the
115+
image key (`nutrition`, `ingredients`, `packaging`, or `front`) to
116+
a dictionary mapping the language to the selected image details.
117+
The selected image details are the following fields:
118+
- `imgid`: the image ID
119+
- `rev`: the revision ID
120+
- `sizes`: dictionary mapping image size (`100`, `200`, `400`, `full`)
121+
to the information about each resized image:
122+
- `h`: the height of the image
123+
- `w`: the width of the image
124+
- `url`: the URL of the image
125+
- `generation`: information about how to generate the selected image
126+
from the uploaded image:
127+
- `x1`, `y1`, `x2`, `y2`: the coordinates of the crop
128+
- `angle`: the rotation angle of the selected image
129+
- `coordinates_image_size`: 400 or "full", indicates if the
130+
geometry coordinates are relative to the full image, or to a
131+
resized version (max width and max height=400)
132+
- `normalize`: indicates if colors should be normalized
133+
- `white_magic`: indicates if the background is white and should
134+
be removed (e.g. photo on a white sheet of paper)
135+
136+
See https://github.com/openfoodfacts/openfoodfacts-server/pull/11818
137+
for more details.
138+
"""
139+
140+
if not is_new_image_schema(images):
141+
return images
142+
143+
images_with_legacy_schema = {}
144+
145+
for image_id, image_data in images["uploaded"].items():
146+
images_with_legacy_schema[image_id] = {
147+
"sizes": {
148+
# remove URL field
149+
size: {k: v for k, v in image_size_data.items() if k != "url"}
150+
for size, image_size_data in image_data["sizes"].items()
151+
},
152+
"uploaded_t": image_data["uploaded_t"],
153+
"uploader": image_data["uploader"],
154+
}
155+
156+
for selected_key, image_by_lang in images["selected"].items():
157+
for lang, image_data in image_by_lang.items():
158+
new_image_data = {
159+
"imgid": image_data["imgid"],
160+
"rev": image_data["rev"],
161+
"sizes": {
162+
# remove URL field
163+
size: {k: v for k, v in image_size_data.items() if k != "url"}
164+
for size, image_size_data in image_data["sizes"].items()
165+
},
166+
**image_data["generation"],
167+
}
168+
images_with_legacy_schema[f"{selected_key}_{lang}"] = new_image_data
169+
170+
return images_with_legacy_schema
171+
172+
173+
def is_new_image_schema(images_data: JSONType) -> bool:
174+
"""Return True if the `images` dictionary follows the new Product Opener
175+
images schema.
176+
177+
See https://github.com/openfoodfacts/openfoodfacts-server/pull/11818 for
178+
more information about this new schema.
179+
"""
180+
if not images_data:
181+
return False
182+
183+
return "selected" in images_data or "uploaded" in images_data
184+
185+
87186
# This is not part of search-a-licious, so we don't use the settings object
88187
OFF_API_URL = os.environ.get("OFF_API_URL", "https://world.openfoodfacts.org")
89188

@@ -92,9 +191,10 @@ class TaxonomyPreprocessor(BaseTaxonomyPreprocessor):
92191
"""Preprocessor for Open Food Facts taxonomies."""
93192

94193
def preprocess(self, taxonomy: Taxonomy, node: TaxonomyNode) -> TaxonomyNodeResult:
95-
"""Preprocess a taxonomy node,
194+
"""Preprocess a taxonomy node.
96195
97-
We add the main language, and we also have specificities for some taxonomies
196+
We add the main language, and we also have specificities for some
197+
taxonomies.
98198
"""
99199
if taxonomy.name == "brands":
100200
# brands are english only, put them in "main lang"
@@ -165,7 +265,8 @@ def preprocess(self, document: JSONType) -> FetcherResult:
165265
return FetcherResult(status=FetcherStatus.FOUND, document=document)
166266

167267
def add_main_language(self, document: JSONType) -> None:
168-
"""We add a "main" language to translated fields (text_lang and taxonomies)
268+
"""We add a "main" language to translated fields (text_lang and
269+
taxonomies)
169270
170271
This enables searching in the main language of the product.
171272
This is important because most of the time,
@@ -208,14 +309,14 @@ def process_after(self, result: JSONType) -> JSONType:
208309
return result
209310

210311
@staticmethod
211-
def build_image_fields(product: JSONType):
312+
def build_image_fields(product: JSONType) -> JSONType:
212313
"""Images are stored in a weird way in Open Food Facts,
213314
We want to make it far more simple to use in results.
214315
"""
215316
# Python copy of the code from
216317
# https://github.com/openfoodfacts/openfoodfacts-server/blob/b297ed858d526332649562cdec5f1d36be184984/lib/ProductOpener/Display.pm#L10128
217318
code = product["code"]
218-
fields: dict[str, Any] = {}
319+
fields: JSONType = {}
219320

220321
for image_type in ["front", "ingredients", "nutrition", "packaging"]:
221322
display_ids = []
@@ -224,7 +325,7 @@ def build_image_fields(product: JSONType):
224325
display_ids.append(f"{image_type}_{lang}")
225326

226327
display_ids.append(image_type)
227-
images = product.get("images", {})
328+
images = convert_to_legacy_schema(product.get("images", {}))
228329

229330
for display_id in display_ids:
230331
if display_id in images and images[display_id].get("sizes"):
@@ -276,4 +377,4 @@ def build_image_fields(product: JSONType):
276377
}
277378
)
278379

279-
return fields
380+
return fields

0 commit comments

Comments
 (0)