Skip to content

Commit 4aac2af

Browse files
fix(qdrant): inherit _similarity_search_with_relevance_scores to enable normalization
1 parent e495651 commit 4aac2af

3 files changed

Lines changed: 54 additions & 49 deletions

File tree

libs/partners/qdrant/langchain_qdrant/vectorstores.py

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1967,55 +1967,6 @@ def _select_relevance_score_fn(self) -> Callable[[float], float]:
19671967
)
19681968
raise ValueError(msg)
19691969

1970-
def _similarity_search_with_relevance_scores(
1971-
self,
1972-
query: str,
1973-
k: int = 4,
1974-
**kwargs: Any,
1975-
) -> list[tuple[Document, float]]:
1976-
"""Return docs and relevance scores in the range `[0, 1]`.
1977-
1978-
`0` is dissimilar, `1` is most similar.
1979-
1980-
Args:
1981-
query: input text
1982-
k: Number of Documents to return.
1983-
**kwargs: Kwargs to be passed to similarity search.
1984-
1985-
Should include `score_threshold`, an optional floating point value
1986-
between `0` to `1` to filter the resulting set of retrieved docs.
1987-
1988-
Returns:
1989-
List of tuples of `(doc, similarity_score)`
1990-
1991-
"""
1992-
return self.similarity_search_with_score(query, k, **kwargs)
1993-
1994-
@sync_call_fallback
1995-
async def _asimilarity_search_with_relevance_scores(
1996-
self,
1997-
query: str,
1998-
k: int = 4,
1999-
**kwargs: Any,
2000-
) -> list[tuple[Document, float]]:
2001-
"""Return docs and relevance scores in the range `[0, 1]`.
2002-
2003-
`0` is dissimilar, `1` is most similar.
2004-
2005-
Args:
2006-
query: input text
2007-
k: Number of Documents to return.
2008-
**kwargs: Kwargs to be passed to similarity search.
2009-
2010-
Should include `score_threshold`, an optional floating point value
2011-
between `0` to `1` to filter the resulting set of retrieved docs.
2012-
2013-
Returns:
2014-
List of tuples of `(doc, similarity_score)`
2015-
2016-
"""
2017-
return await self.asimilarity_search_with_score(query, k, **kwargs)
2018-
20191970
@classmethod
20201971
def _build_payloads(
20211972
cls,

libs/partners/qdrant/tests/integration_tests/async_api/test_similarity_search.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,3 +303,31 @@ async def test_qdrant_similarity_search_with_relevance_scores(
303303
assert all(
304304
(score <= 1 or np.isclose(score, 1)) and score >= 0 for _, score in output
305305
)
306+
307+
308+
async def test_qdrant_asimilarity_search_with_relevance_scores_normalization() -> None:
309+
"""Test that async similarity search relevance scores are normalized."""
310+
import math
311+
312+
texts = ["foo", "bar", "baz"]
313+
docsearch = Qdrant.from_texts(
314+
texts,
315+
ConsistentFakeEmbeddings(),
316+
location=":memory:",
317+
distance_func="Euclid",
318+
)
319+
320+
# 1. Get raw distance
321+
docs_and_distances = await docsearch.asimilarity_search_with_score("foo", k=3)
322+
323+
# 2. Get normalized relevance score
324+
docs_and_relevance = await docsearch.asimilarity_search_with_relevance_scores(
325+
"foo", k=3
326+
)
327+
328+
assert len(docs_and_distances) == len(docs_and_relevance)
329+
zipped = zip(docs_and_distances, docs_and_relevance, strict=False)
330+
for (doc_dist, dist), (doc_rel, rel) in zipped:
331+
assert doc_dist.page_content == doc_rel.page_content
332+
expected_rel = 1.0 - dist / math.sqrt(2)
333+
assert np.isclose(rel, expected_rel)

libs/partners/qdrant/tests/integration_tests/test_similarity_search.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,3 +281,29 @@ def test_qdrant_similarity_search_with_relevance_scores(
281281
assert all(
282282
(score <= 1 or np.isclose(score, 1)) and score >= 0 for _, score in output
283283
)
284+
285+
286+
def test_qdrant_similarity_search_with_relevance_scores_normalization() -> None:
287+
"""Test that similarity search with relevance scores is normalized."""
288+
import math
289+
290+
texts = ["foo", "bar", "baz"]
291+
docsearch = Qdrant.from_texts(
292+
texts,
293+
ConsistentFakeEmbeddings(),
294+
location=":memory:",
295+
distance_func="Euclid",
296+
)
297+
298+
# 1. Get raw distance
299+
docs_and_distances = docsearch.similarity_search_with_score("foo", k=3)
300+
301+
# 2. Get normalized relevance score
302+
docs_and_relevance = docsearch.similarity_search_with_relevance_scores("foo", k=3)
303+
304+
assert len(docs_and_distances) == len(docs_and_relevance)
305+
zipped = zip(docs_and_distances, docs_and_relevance, strict=False)
306+
for (doc_dist, dist), (doc_rel, rel) in zipped:
307+
assert doc_dist.page_content == doc_rel.page_content
308+
expected_rel = 1.0 - dist / math.sqrt(2)
309+
assert np.isclose(rel, expected_rel)

0 commit comments

Comments
 (0)