2
2
Database helper functions for API
3
3
"""
4
4
5
+ import asyncio
5
6
import logging
6
7
import re
7
8
import shutil
34
35
)
35
36
from .models .node_models import EntryNodeCreate
36
37
from .models .project_models import ProjectCreate , ProjectEdit , ProjectStatus
38
+ from .settings import EXTERNAL_TAXONOMIES
37
39
38
40
log = logging .getLogger (__name__ )
39
41
@@ -50,9 +52,8 @@ def __init__(self, branch_name, taxonomy_name):
50
52
self .branch_name = branch_name
51
53
self .project_name = "p_" + taxonomy_name + "_" + branch_name
52
54
53
- @property
54
- def taxonomy_path_in_repository (self ):
55
- return utils .taxonomy_path_in_repository (self .taxonomy_name )
55
+ def taxonomy_path_in_repository (self , taxonomy_name ):
56
+ return utils .taxonomy_path_in_repository (taxonomy_name )
56
57
57
58
def get_label (self , id ):
58
59
"""
@@ -86,29 +87,34 @@ async def get_local_taxonomy_file(self, tmpdir: str, uploadfile: UploadFile):
86
87
await run_in_threadpool (shutil .copyfileobj , uploadfile .file , f )
87
88
return filepath
88
89
89
- async def get_github_taxonomy_file (self , tmpdir : str ):
90
+ async def get_github_taxonomy_file (self , tmpdir : str , taxonomy_name : str ):
90
91
async with TransactionCtx ():
91
- filepath = f"{ tmpdir } /{ self .taxonomy_name } .txt"
92
+ filepath = f"{ tmpdir } /{ taxonomy_name } .txt"
93
+ path_in_repository = self .taxonomy_path_in_repository (taxonomy_name )
92
94
target_url = (
93
95
f"https://raw.githubusercontent.com/{ settings .repo_uri } "
94
- f"/main/{ self . taxonomy_path_in_repository } "
96
+ f"/main/{ path_in_repository } "
95
97
)
96
98
try :
99
+ # get taxonomy file
97
100
await run_in_threadpool (urllib .request .urlretrieve , target_url , filepath )
98
- github_object = GithubOperations (self .taxonomy_name , self .branch_name )
99
- commit_sha = (await github_object .get_branch ("main" )).commit .sha
100
- file_sha = await github_object .get_file_sha ()
101
- await edit_project (
102
- self .project_name ,
103
- ProjectEdit (
104
- github_checkout_commit_sha = commit_sha , github_file_latest_sha = file_sha
105
- ),
106
- )
101
+ if taxonomy_name == self .taxonomy_name :
102
+ # this is the taxonomy we want to edit
103
+ # track the current commit to know where to start the PR from
104
+ github_object = GithubOperations (self .taxonomy_name , self .branch_name )
105
+ commit_sha = (await github_object .get_branch ("main" )).commit .sha
106
+ file_sha = await github_object .get_file_sha ()
107
+ await edit_project (
108
+ self .project_name ,
109
+ ProjectEdit (
110
+ github_checkout_commit_sha = commit_sha , github_file_latest_sha = file_sha
111
+ ),
112
+ )
107
113
return filepath
108
114
except Exception as e :
109
115
raise TaxonomyImportError () from e
110
116
111
- def parse_taxonomy (self , filepath : str ):
117
+ def parse_taxonomy (self , main_filepath : str , other_filepaths : list [ str ] | None = None ):
112
118
"""
113
119
Helper function to call the Open Food Facts Python Taxonomy Parser
114
120
"""
@@ -117,7 +123,7 @@ def parse_taxonomy(self, filepath: str):
117
123
parser_object = parser .Parser (session )
118
124
try :
119
125
# Parse taxonomy with given file name and branch name
120
- parser_object (filepath , self .branch_name , self .taxonomy_name )
126
+ parser_object (main_filepath , other_filepaths , self .branch_name , self .taxonomy_name )
121
127
except Exception as e :
122
128
# outer exception handler will put project status to FAILED
123
129
raise TaxonomyParsingError () from e
@@ -126,11 +132,14 @@ async def get_and_parse_taxonomy(self, uploadfile: UploadFile | None = None):
126
132
try :
127
133
with tempfile .TemporaryDirectory (prefix = "taxonomy-" ) as tmpdir :
128
134
filepath = await (
129
- self .get_github_taxonomy_file (tmpdir )
135
+ self .get_github_taxonomy_file (tmpdir , self . taxonomy_name )
130
136
if uploadfile is None
131
137
else self .get_local_taxonomy_file (tmpdir , uploadfile )
132
138
)
133
- await run_in_threadpool (self .parse_taxonomy , filepath )
139
+ other_filepaths = None
140
+ if self .taxonomy_name in EXTERNAL_TAXONOMIES :
141
+ other_filepaths = await self .fetch_external_taxonomy_files (tmpdir )
142
+ await run_in_threadpool (self .parse_taxonomy , filepath , other_filepaths )
134
143
async with TransactionCtx ():
135
144
error_node = await get_error_node (self .project_name )
136
145
errors_count = len (error_node .errors ) if error_node else 0
@@ -149,6 +158,25 @@ async def get_and_parse_taxonomy(self, uploadfile: UploadFile | None = None):
149
158
log .exception (e )
150
159
raise e
151
160
161
+ async def fetch_external_taxonomy_files (self , tmpdir : str ) -> list [str ]:
162
+ """
163
+ Helper function to fetch external taxonomies concurrently from Github
164
+ """
165
+ external_taxonomy_filepaths = []
166
+ tasks = []
167
+
168
+ # Create tasks for each external taxonomy and store them in a list
169
+ for external_taxonomy in EXTERNAL_TAXONOMIES [self .taxonomy_name ]:
170
+ task = asyncio .create_task (self .get_github_taxonomy_file (tmpdir , external_taxonomy ))
171
+ tasks .append (task )
172
+
173
+ # Wait for all tasks to complete concurrently
174
+ for task in tasks :
175
+ external_filepath = await task
176
+ external_taxonomy_filepaths .append (external_filepath )
177
+
178
+ return external_taxonomy_filepaths
179
+
152
180
async def import_taxonomy (
153
181
self ,
154
182
description : str ,
@@ -689,9 +717,8 @@ async def full_text_search(self, text):
689
717
where score_ > 0
690
718
return node, score_ as score
691
719
}
692
- with node.id as node, score
693
- RETURN node, sum(score) as score
694
-
720
+ WITH node.id AS node_id, node.is_external AS is_external, score
721
+ RETURN {id: node_id, is_external: is_external} AS node, sum(score) AS score
695
722
ORDER BY score DESC
696
723
"""
697
724
_result = await get_current_transaction ().run (query , params )
0 commit comments