Skip to content

Commit ac9556f

Browse files
committed
Use a separate request handler for similarity searches
Register a /select_similarity request handler on the freesound collection and use it from search_sounds when similar_to is not None This allows us to check metrics for normal search or similarity search in grafana
1 parent 497090e commit ac9556f

6 files changed

Lines changed: 139 additions & 4 deletions

File tree

search/solrapi.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,9 @@ def create_collection_and_schema(self, delete_default_fields_definition, schema_
200200
if copyfields:
201201
self.create_copyfields(copyfields)
202202

203+
for handler in schema_definition.get("requestHandlers", []):
204+
self.upsert_request_handler(handler["name"], handler["class"], defaults=handler.get("defaults"))
205+
203206
def collection_exists(self):
204207
if self.logging_verbose >= 1:
205208
logger.info(f"Checking if collection {self.collection} exists")
@@ -237,6 +240,36 @@ def disable_auto_data_driven_schema(self):
237240
if self.logging_verbose >= 1:
238241
logger.info(f"Disabled auto data driven schema for collection {self.collection}")
239242

243+
def upsert_request_handler(self, name, class_name, defaults=None):
244+
"""Idempotently add or update a Solr request handler.
245+
246+
Tries add-requesthandler first. Solr's Config API returns HTTP 200 with
247+
an errorMessages payload (rather than 4xx) when a command is rejected —
248+
e.g. when the handler already exists — so we detect that and retry with
249+
update-requesthandler. No separate GET, so no TOCTOU window between the
250+
existence check and the write.
251+
"""
252+
handler_def = {"name": name, "class": class_name}
253+
if defaults is not None:
254+
handler_def["defaults"] = defaults
255+
256+
url = urljoin(self.base_url, f"api/collections/{self.collection}/config")
257+
body = self._make_post(url, {"add-requesthandler": handler_def})
258+
if not body.get("errorMessages"):
259+
if self.logging_verbose >= 1:
260+
logger.info(f"add-requesthandler {name} on collection {self.collection}")
261+
return
262+
263+
# Add was rejected — most likely because the handler already exists.
264+
# Retry as update; if that also reports errorMessages, surface the failure.
265+
if self.logging_verbose >= 2:
266+
logger.info(f"add-requesthandler {name} rejected by Solr ({body['errorMessages']}); retrying as update")
267+
body = self._make_post(url, {"update-requesthandler": handler_def})
268+
if body.get("errorMessages"):
269+
raise SolrAPIError(f"Failed to upsert request handler {name}: {body['errorMessages']}")
270+
if self.logging_verbose >= 1:
271+
logger.info(f"update-requesthandler {name} on collection {self.collection}")
272+
240273
def delete_collection(self):
241274
"""Delete a Solr collection."""
242275
url = urljoin(self.base_url, f"api/collections/{self.collection}")

search/test_solrapi.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
from unittest import mock
2+
3+
import pytest
4+
5+
from search.solrapi import SolrAPIError, SolrManagementAPI
6+
7+
8+
def _mock_response(json_payload):
9+
resp = mock.Mock()
10+
resp.json.return_value = json_payload
11+
resp.raise_for_status = mock.Mock()
12+
return resp
13+
14+
15+
@mock.patch("search.solrapi.requests")
16+
def test_upsert_request_handler_adds_when_absent(mock_requests):
17+
# Successful add → 200 with no errorMessages.
18+
mock_requests.post.return_value = _mock_response({})
19+
20+
api = SolrManagementAPI("http://search:8983", "freesound1234")
21+
api.upsert_request_handler("/select_similarity", "solr.SearchHandler")
22+
23+
assert mock_requests.post.call_count == 1
24+
_, kwargs = mock_requests.post.call_args
25+
assert kwargs["json"] == {"add-requesthandler": {"name": "/select_similarity", "class": "solr.SearchHandler"}}
26+
27+
28+
@mock.patch("search.solrapi.requests")
29+
def test_upsert_request_handler_falls_back_to_update_on_duplicate(mock_requests):
30+
# Solr returns 200 with errorMessages when the handler already exists; we
31+
# then retry with update, which succeeds.
32+
mock_requests.post.side_effect = [
33+
_mock_response({"errorMessages": [{"add-requesthandler": "'/select_similarity' already exists"}]}),
34+
_mock_response({}),
35+
]
36+
37+
api = SolrManagementAPI("http://search:8983", "freesound1234")
38+
api.upsert_request_handler("/select_similarity", "solr.SearchHandler")
39+
40+
assert mock_requests.post.call_count == 2
41+
second_call_json = mock_requests.post.call_args_list[1].kwargs["json"]
42+
assert second_call_json == {"update-requesthandler": {"name": "/select_similarity", "class": "solr.SearchHandler"}}
43+
44+
45+
@mock.patch("search.solrapi.requests")
46+
def test_upsert_request_handler_raises_when_update_also_fails(mock_requests):
47+
# Both add and update return errorMessages → surface as SolrAPIError.
48+
mock_requests.post.side_effect = [
49+
_mock_response({"errorMessages": [{"add-requesthandler": "boom"}]}),
50+
_mock_response({"errorMessages": [{"update-requesthandler": "boom too"}]}),
51+
]
52+
53+
api = SolrManagementAPI("http://search:8983", "freesound1234")
54+
with pytest.raises(SolrAPIError):
55+
api.upsert_request_handler("/select_similarity", "solr.SearchHandler")
56+
57+
58+
@mock.patch("search.solrapi.requests")
59+
def test_upsert_request_handler_includes_defaults(mock_requests):
60+
mock_requests.post.return_value = _mock_response({})
61+
62+
api = SolrManagementAPI("http://search:8983", "freesound1234")
63+
api.upsert_request_handler("/select_similarity", "solr.SearchHandler", defaults={"defType": "dismax"})
64+
65+
_, kwargs = mock_requests.post.call_args
66+
assert kwargs["json"]["add-requesthandler"]["defaults"] == {"defType": "dismax"}

utils/search/backends/solr555pysolr.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,8 @@ def search_sounds(
687687
if similar_to is not None
688688
else settings.SEARCH_SOLR_TIMEOUT_SECONDS
689689
)
690+
# Use a separate request handler for similarity to have separate metrics
691+
search_handler = "select_similarity" if similar_to is not None else None
690692

691693
if field_list is None:
692694
# We generally only want the sound IDs of the results as we load data from DB
@@ -890,7 +892,9 @@ def search_sounds(
890892
if not group_counts_as_one_in_facets and "json.facet" in query_as_kwargs:
891893
facets_kwarg = query_as_kwargs["json.facet"]
892894
query_as_kwargs["json.facet"] = {}
893-
results = self.get_sounds_index(timeout=timeout).search(**query_as_kwargs)
895+
results = self.get_sounds_index(timeout=timeout, search_handler=search_handler).search(
896+
**query_as_kwargs
897+
)
894898

895899
# Now make the second query in which we get the facets and the total number of results, and remove the "collapse" filter.
896900
fq = [fq_element for fq_element in query_as_kwargs["fq"] if "collapse field" not in fq_element]
@@ -899,13 +903,17 @@ def search_sounds(
899903
query_as_kwargs["expand"] = False
900904
if not group_counts_as_one_in_facets and facets_kwarg is not None:
901905
query_as_kwargs["json.facet"] = facets_kwarg
902-
results_extra_query = self.get_sounds_index(timeout=timeout).search(**query_as_kwargs)
906+
results_extra_query = self.get_sounds_index(timeout=timeout, search_handler=search_handler).search(
907+
**query_as_kwargs
908+
)
903909
if not group_counts_as_one_in_facets:
904910
results.facets = results_extra_query.facets
905911
results.non_grouped_number_of_results = results_extra_query.num_found
906912
else:
907913
# If we are not using collapse and expand query parser (and/or not grouping by pack), just run the query.
908-
results = self.get_sounds_index(timeout=timeout).search(**query_as_kwargs)
914+
results = self.get_sounds_index(timeout=timeout, search_handler=search_handler).search(
915+
**query_as_kwargs
916+
)
909917

910918
# Facets returned in results use the corresponding solr fieldnames as keys. We want to convert them to the
911919
# original fieldnames so that the rest of the code can use them without knowing about the solr fieldnames.

utils/search/backends/solr9pysolr.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,16 @@ def __init__(self, sounds_index_url=None, forum_index_url=None):
4141
self.forum_index_url = forum_index_url
4242
self.solr_base_url = settings.SOLR9_BASE_URL
4343

44-
def get_sounds_index(self, timeout=settings.SEARCH_SOLR_TIMEOUT_SECONDS):
44+
def get_sounds_index(self, timeout=settings.SEARCH_SOLR_TIMEOUT_SECONDS, search_handler=None):
45+
# Only forward search_handler when set
46+
extra = {"search_handler": search_handler} if search_handler is not None else {}
4547
return pysolr.Solr(
4648
self.sounds_index_url,
4749
encoder=solr555pysolr.FreesoundSoundJsonEncoder(),
4850
results_cls=solr555pysolr.SolrResponseInterpreter,
4951
always_commit=True,
5052
timeout=timeout,
53+
**extra,
5154
)
5255

5356
def get_forum_index(self, timeout=settings.SEARCH_SOLR_TIMEOUT_SECONDS):

utils/search/schema/freesound.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,5 +531,11 @@
531531
"type_facet"
532532
]
533533
}
534+
],
535+
"requestHandlers": [
536+
{
537+
"name": "/select_similarity",
538+
"class": "solr.SearchHandler"
539+
}
534540
]
535541
}

utils/tests/test_solr_query.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
from unittest import mock
2+
13
import pytest
24
from django.conf import settings
35

46
from utils.search import SearchEngineException, SearchEngineTimeoutException
7+
from utils.search.backends.solr9pysolr import Solr9PySolrSearchEngine
58
from utils.search.backends.solr555pysolr import Solr555PySolrSearchEngine
69
from utils.search.backends.solr_common import SolrQuery, SolrResponseInterpreter
710

@@ -108,3 +111,19 @@ def test_solr_response_partial_results_grouped_missing_ngroups():
108111
}
109112
with pytest.raises(SearchEngineTimeoutException):
110113
SolrResponseInterpreter(response)
114+
115+
116+
@mock.patch("utils.search.backends.solr9pysolr.pysolr.Solr")
117+
def test_get_sounds_index_default_omits_search_handler(mock_solr):
118+
# When no search_handler is provided, we don't pass it to pysolr at all so
119+
# pysolr's own default kicks in (the falsy-fallback to "select").
120+
Solr9PySolrSearchEngine().get_sounds_index()
121+
_, kwargs = mock_solr.call_args
122+
assert "search_handler" not in kwargs
123+
124+
125+
@mock.patch("utils.search.backends.solr9pysolr.pysolr.Solr")
126+
def test_get_sounds_index_passes_explicit_search_handler(mock_solr):
127+
Solr9PySolrSearchEngine().get_sounds_index(search_handler="select_similarity")
128+
_, kwargs = mock_solr.call_args
129+
assert kwargs["search_handler"] == "select_similarity"

0 commit comments

Comments
 (0)