File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -131,7 +131,18 @@ def _search_openalex_by_doi(
131131 logger .warning ("Invalid DOI after normalization: %s" , doi )
132132 return []
133133
134- query = {"bool" : {"filter" : [{"wildcard" : {"doi.keyword" : f"*{ normalized_doi } *" }}]}}
134+ query = {
135+ "bool" : {
136+ "filter" : [
137+ {
138+ "bool" : {
139+ "should" : self ._doi_exact_or_prefix_queries (normalized_doi ),
140+ "minimum_should_match" : 1 ,
141+ }
142+ }
143+ ]
144+ }
145+ }
135146 if year is not None :
136147 try :
137148 year = int (year )
@@ -151,6 +162,23 @@ def _search_openalex_by_doi(
151162 logger .error ("Error searching OpenAlex by DOI: %s" , exc )
152163 return []
153164
165+ def _doi_exact_or_prefix_queries (self , normalized_doi : str ) -> list [dict [str , Any ]]:
166+ doi_values = [
167+ normalized_doi ,
168+ f"https://doi.org/{ normalized_doi } " ,
169+ f"http://doi.org/{ normalized_doi } " ,
170+ f"https://dx.doi.org/{ normalized_doi } " ,
171+ f"http://dx.doi.org/{ normalized_doi } " ,
172+ ]
173+ fields = ["doi.keyword" , "ids.doi.keyword" ]
174+
175+ queries : list [dict [str , Any ]] = []
176+ for field in fields :
177+ queries .extend ({"term" : {field : value }} for value in doi_values )
178+ queries .extend ({"prefix" : {field : value }} for value in doi_values )
179+
180+ return queries
181+
154182 def _search_openalex_by_isbn (
155183 self ,
156184 isbns : List [str ],
Original file line number Diff line number Diff line change @@ -214,6 +214,30 @@ def test_openalex_doi_match_keeps_language_variants_with_same_normalized_doi(sel
214214 )
215215 self .assertTrue (all (match [1 ] == "doi" for match in matches ))
216216
217+ def test_openalex_doi_search_uses_exact_or_prefix_queries (self ):
218+ matcher = make_matcher ("article" )
219+ matcher .input_openalex_index = "raw_openalex_works"
220+ matcher .client = Mock ()
221+ matcher .client .client .search .return_value = {"hits" : {"hits" : []}}
222+
223+ matcher ._search_openalex_by_doi (
224+ "10.1590/0034-7167.202578SUPL101" ,
225+ {"publication_year" : 2025 },
226+ )
227+
228+ body = matcher .client .client .search .call_args .kwargs ["body" ]
229+ self .assertNotIn ("wildcard" , str (body ))
230+ doi_filter = body ["query" ]["bool" ]["filter" ][0 ]["bool" ]
231+ self .assertEqual (doi_filter ["minimum_should_match" ], 1 )
232+ self .assertIn (
233+ {"prefix" : {"doi.keyword" : "https://doi.org/10.1590/0034-7167.202578supl101" }},
234+ doi_filter ["should" ],
235+ )
236+ self .assertIn (
237+ {"term" : {"doi.keyword" : "10.1590/0034-7167.202578supl101" }},
238+ doi_filter ["should" ],
239+ )
240+
217241 def _openalex_article (self , openalex_id , language , doi_suffix ):
218242 titles = {
219243 "en" : "Ethical dilemmas in nursing professionals' work" ,
Original file line number Diff line number Diff line change 751751 "group_order" : 0
752752 }
753753 },
754+ "collection" : {
755+ "kind" : " index" ,
756+ "index_field_name" : " oca_data.scielo.collection" ,
757+ "filter" : {
758+ "size" : 100 ,
759+ "order" : { "_key" : " asc" }
760+ },
761+ "settings" : {
762+ "label" : " SciELO Collection" ,
763+ "widget" : " select" ,
764+ "multiple_selection" : true ,
765+ "group" : " coverage" ,
766+ "group_order" : 0
767+ }
768+ },
754769
755770 "publication_year" : {
756771 "kind" : " index" ,
12481263 { "value" : " scope" , "label" : " Dataset Coverage" },
12491264 { "value" : " source_indexed_in" , "label" : " Indexed In (OpenAlex)" },
12501265 { "value" : " source_scielo_indexed_in" , "label" : " Indexed In (SciELO)" },
1266+ { "value" : " collection" , "label" : " SciELO Collection" },
12511267 { "value" : " document_type" , "label" : " Document Type" },
12521268 { "value" : " document_language" , "label" : " Document Language" },
12531269 { "value" : " open_access" , "label" : " Open Access" },
12811297 " scope" ,
12821298 " source_indexed_in" ,
12831299 " source_scielo_indexed_in" ,
1300+ " collection" ,
12841301
12851302 " publication_year" ,
12861303 " document_type" ,
13661383 " scope" ,
13671384 " source_indexed_in" ,
13681385 " source_scielo_indexed_in" ,
1386+ " collection" ,
13691387
13701388 " publication_year" ,
13711389 " document_type" ,
You can’t perform that action at this time.
0 commit comments