11import copy
22import os
33import re
4- from typing import Any
54
65import requests
76
@@ -84,6 +83,106 @@ def generate_image_url(code: str, image_id: str) -> str:
8483 )
8584
8685
86+ # The two following functions are copied from openfoodfacts-python
87+ # SDK. We don't use the SDK directly as we don't want to add it as a
88+ # dependency.
89+
90+
91+ def convert_to_legacy_schema (images : JSONType ) -> JSONType :
92+ """Convert the images dictionary to the legacy schema.
93+
94+ We've improved the schema of the `images` field, but the new
95+ schema is not compatible with the legacy schema. This function
96+ converts the new schema to the legacy schema.
97+
98+ It can be used while migrating the existing Python codebase to the
99+ new schema.
100+
101+ The new `images` schema is the following:
102+
103+ - the `images` field contains the uploaded images under the `uploaded`
104+ key and the selected images under the `selected` key
105+ - `uploaded` contains the images that are uploaded, and maps the
106+ image ID to the detail about the image:
107+ - `uploaded_t`: the upload timestamp
108+ - `uploader`: the username of the uploader
109+ - `sizes`: dictionary mapping image size (`100`, `200`, `400`, `full`)
110+ to the information about each resized image:
111+ - `h`: the height of the image
112+ - `w`: the width of the image
113+ - `url`: the URL of the image
114+ - `selected` contains the images that are selected, and maps the
115+ image key (`nutrition`, `ingredients`, `packaging`, or `front`) to
116+ a dictionary mapping the language to the selected image details.
117+ The selected image details are the following fields:
118+ - `imgid`: the image ID
119+ - `rev`: the revision ID
120+ - `sizes`: dictionary mapping image size (`100`, `200`, `400`, `full`)
121+ to the information about each resized image:
122+ - `h`: the height of the image
123+ - `w`: the width of the image
124+ - `url`: the URL of the image
125+ - `generation`: information about how to generate the selected image
126+ from the uploaded image:
127+ - `x1`, `y1`, `x2`, `y2`: the coordinates of the crop
128+ - `angle`: the rotation angle of the selected image
129+ - `coordinates_image_size`: 400 or "full", indicates if the
130+ geometry coordinates are relative to the full image, or to a
131+ resized version (max width and max height=400)
132+ - `normalize`: indicates if colors should be normalized
133+ - `white_magic`: indicates if the background is white and should
134+ be removed (e.g. photo on a white sheet of paper)
135+
136+ See https://github.com/openfoodfacts/openfoodfacts-server/pull/11818
137+ for more details.
138+ """
139+
140+ if not is_new_image_schema (images ):
141+ return images
142+
143+ images_with_legacy_schema = {}
144+
145+ for image_id , image_data in images ["uploaded" ].items ():
146+ images_with_legacy_schema [image_id ] = {
147+ "sizes" : {
148+ # remove URL field
149+ size : {k : v for k , v in image_size_data .items () if k != "url" }
150+ for size , image_size_data in image_data ["sizes" ].items ()
151+ },
152+ "uploaded_t" : image_data ["uploaded_t" ],
153+ "uploader" : image_data ["uploader" ],
154+ }
155+
156+ for selected_key , image_by_lang in images ["selected" ].items ():
157+ for lang , image_data in image_by_lang .items ():
158+ new_image_data = {
159+ "imgid" : image_data ["imgid" ],
160+ "rev" : image_data ["rev" ],
161+ "sizes" : {
162+ # remove URL field
163+ size : {k : v for k , v in image_size_data .items () if k != "url" }
164+ for size , image_size_data in image_data ["sizes" ].items ()
165+ },
166+ ** image_data ["generation" ],
167+ }
168+ images_with_legacy_schema [f"{ selected_key } _{ lang } " ] = new_image_data
169+
170+ return images_with_legacy_schema
171+
172+
173+ def is_new_image_schema (images_data : JSONType ) -> bool :
174+ """Return True if the `images` dictionary follows the new Product Opener
175+ images schema.
176+
177+ See https://github.com/openfoodfacts/openfoodfacts-server/pull/11818 for
178+ more information about this new schema.
179+ """
180+ if not images_data :
181+ return False
182+
183+ return "selected" in images_data or "uploaded" in images_data
184+
185+
87186# This is not part of search-a-licious, so we don't use the settings object
88187OFF_API_URL = os .environ .get ("OFF_API_URL" , "https://world.openfoodfacts.org" )
89188
@@ -92,9 +191,10 @@ class TaxonomyPreprocessor(BaseTaxonomyPreprocessor):
92191 """Preprocessor for Open Food Facts taxonomies."""
93192
94193 def preprocess (self , taxonomy : Taxonomy , node : TaxonomyNode ) -> TaxonomyNodeResult :
95- """Preprocess a taxonomy node,
194+ """Preprocess a taxonomy node.
96195
97- We add the main language, and we also have specificities for some taxonomies
196+ We add the main language, and we also have specificities for some
197+ taxonomies.
98198 """
99199 if taxonomy .name == "brands" :
100200 # brands are english only, put them in "main lang"
@@ -165,7 +265,8 @@ def preprocess(self, document: JSONType) -> FetcherResult:
165265 return FetcherResult (status = FetcherStatus .FOUND , document = document )
166266
167267 def add_main_language (self , document : JSONType ) -> None :
168- """We add a "main" language to translated fields (text_lang and taxonomies)
268+ """We add a "main" language to translated fields (text_lang and
269+ taxonomies)
169270
170271 This enables searching in the main language of the product.
171272 This is important because most of the time,
@@ -208,14 +309,14 @@ def process_after(self, result: JSONType) -> JSONType:
208309 return result
209310
210311 @staticmethod
211- def build_image_fields (product : JSONType ):
312+ def build_image_fields (product : JSONType ) -> JSONType :
212313 """Images are stored in a weird way in Open Food Facts,
213314 We want to make it far more simple to use in results.
214315 """
215316 # Python copy of the code from
216317 # https://github.com/openfoodfacts/openfoodfacts-server/blob/b297ed858d526332649562cdec5f1d36be184984/lib/ProductOpener/Display.pm#L10128
217318 code = product ["code" ]
218- fields : dict [ str , Any ] = {}
319+ fields : JSONType = {}
219320
220321 for image_type in ["front" , "ingredients" , "nutrition" , "packaging" ]:
221322 display_ids = []
@@ -224,7 +325,7 @@ def build_image_fields(product: JSONType):
224325 display_ids .append (f"{ image_type } _{ lang } " )
225326
226327 display_ids .append (image_type )
227- images = product .get ("images" , {})
328+ images = convert_to_legacy_schema ( product .get ("images" , {}) )
228329
229330 for display_id in display_ids :
230331 if display_id in images and images [display_id ].get ("sizes" ):
@@ -276,4 +377,4 @@ def build_image_fields(product: JSONType):
276377 }
277378 )
278379
279- return fields
380+ return fields
0 commit comments