Skip to content

Commit 414d062

Browse files
chore: Making embedding optional in retrieve_online_documents (feast-dev#5104)
1 parent 135342b commit 414d062

File tree

8 files changed

+24
-22
lines changed

8 files changed

+24
-22
lines changed

sdk/python/feast/feature_store.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -1863,11 +1863,11 @@ def retrieve_online_documents(
18631863

18641864
def retrieve_online_documents_v2(
18651865
self,
1866-
query: Union[str, List[float]],
1867-
top_k: int,
18681866
features: List[str],
1869-
distance_metric: Optional[str] = "L2",
1867+
top_k: int,
1868+
query: Optional[List[float]] = None,
18701869
query_string: Optional[str] = None,
1870+
distance_metric: Optional[str] = "L2",
18711871
) -> OnlineResponse:
18721872
"""
18731873
Retrieves the top k closest document features. Note, embeddings are a subset of features.
@@ -1876,15 +1876,14 @@ def retrieve_online_documents_v2(
18761876
features: The list of features that should be retrieved from the online document store. These features can be
18771877
specified either as a list of string document feature references or as a feature service. String feature
18781878
references must have format "feature_view:feature", e.g, "document_fv:document_embeddings".
1879-
query: The query to retrieve the closest document features for.
1879+
query: The embeded query to retrieve the closest document features for (optional)
18801880
top_k: The number of closest document features to retrieve.
18811881
distance_metric: The distance metric to use for retrieval.
18821882
query_string: The query string to retrieve the closest document features using keyword search (bm25).
18831883
"""
1884-
if isinstance(query, str):
1885-
raise ValueError(
1886-
"Using embedding functionality is not supported for document retrieval. Please embed the query before calling retrieve_online_documents."
1887-
)
1884+
assert query is not None or query_string is not None, (
1885+
"Either query or query_string must be provided."
1886+
)
18881887

18891888
(
18901889
available_feature_views,
@@ -1988,7 +1987,7 @@ def _retrieve_from_online_store_v2(
19881987
provider: Provider,
19891988
table: FeatureView,
19901989
requested_features: List[str],
1991-
query: List[float],
1990+
query: Optional[List[float]],
19921991
top_k: int,
19931992
distance_metric: Optional[str],
19941993
query_string: Optional[str],

sdk/python/feast/infra/online_stores/milvus_online_store/milvus.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ def retrieve_online_documents_v2(
460460
config: RepoConfig,
461461
table: FeatureView,
462462
requested_features: List[str],
463-
embedding: List[float],
463+
embedding: Optional[List[float]],
464464
top_k: int,
465465
distance_metric: Optional[str] = None,
466466
query_string: Optional[str] = None,
@@ -471,6 +471,7 @@ def retrieve_online_documents_v2(
471471
Optional[Dict[str, ValueProto]],
472472
]
473473
]:
474+
assert embedding is not None, "Key Word Search not yet implemented for Milvus"
474475
entity_name_feast_primitive_type_map = {
475476
k.name: k.dtype for k in table.entity_columns
476477
}

sdk/python/feast/infra/online_stores/online_store.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ def retrieve_online_documents_v2(
436436
config: RepoConfig,
437437
table: FeatureView,
438438
requested_features: List[str],
439-
embedding: List[float],
439+
embedding: Optional[List[float]],
440440
top_k: int,
441441
distance_metric: Optional[str] = None,
442442
query_string: Optional[str] = None,
@@ -455,7 +455,7 @@ def retrieve_online_documents_v2(
455455
config: The config for the current feature store.
456456
table: The feature view whose feature values should be read.
457457
requested_features: The list of features whose embeddings should be used for retrieval.
458-
embedding: The embeddings to use for retrieval.
458+
embedding: The embeddings to use for retrieval (optional)
459459
top_k: The number of documents to retrieve.
460460
query_string: The query string to search for using keyword search (bm25) (optional)
461461
@@ -464,6 +464,9 @@ def retrieve_online_documents_v2(
464464
where the first item is the event timestamp for the row, and the second item is a dict of feature
465465
name to embeddings.
466466
"""
467+
assert embedding is not None or query_string is not None, (
468+
"Either embedding or query_string must be specified"
469+
)
467470
raise NotImplementedError(
468471
f"Online store {self.__class__.__name__} does not support online retrieval"
469472
)

sdk/python/feast/infra/online_stores/sqlite.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ def retrieve_online_documents_v2(
439439
config: RepoConfig,
440440
table: FeatureView,
441441
requested_features: List[str],
442-
query: List[float],
442+
query: Optional[List[float]],
443443
top_k: int,
444444
distance_metric: Optional[str] = None,
445445
query_string: Optional[str] = None,
@@ -456,7 +456,7 @@ def retrieve_online_documents_v2(
456456
config: Feast configuration object
457457
table: FeatureView object as the table to search
458458
requested_features: List of requested features to retrieve
459-
query: Query embedding to search for
459+
query: Query embedding to search for (optional)
460460
top_k: Number of items to return
461461
distance_metric: Distance metric to use (optional)
462462
query_string: The query string to search for using keyword search (bm25) (optional)

sdk/python/feast/infra/passthrough_provider.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ def retrieve_online_documents_v2(
318318
config: RepoConfig,
319319
table: FeatureView,
320320
requested_features: Optional[List[str]],
321-
query: List[float],
321+
query: Optional[List[float]],
322322
top_k: int,
323323
distance_metric: Optional[str] = None,
324324
query_string: Optional[str] = None,

sdk/python/feast/infra/provider.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ def retrieve_online_documents_v2(
456456
config: RepoConfig,
457457
table: FeatureView,
458458
requested_features: List[str],
459-
query: List[float],
459+
query: Optional[List[float]],
460460
top_k: int,
461461
distance_metric: Optional[str] = None,
462462
query_string: Optional[str] = None,
@@ -475,7 +475,7 @@ def retrieve_online_documents_v2(
475475
config: The config for the current feature store.
476476
table: The feature view whose embeddings should be searched.
477477
requested_features: the requested document feature names.
478-
query: The query embedding to search for.
478+
query: The query embedding to search for (optional).
479479
top_k: The number of documents to return.
480480
query_string: The query string to search for using keyword search (bm25) (optional)
481481

sdk/python/tests/foo_provider.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def retrieve_online_documents_v2(
169169
config: RepoConfig,
170170
table: FeatureView,
171171
requested_features: List[str],
172-
query: List[float],
172+
query: Optional[List[float]],
173173
top_k: int,
174174
distance_metric: Optional[str] = None,
175175
query_string: Optional[str] = None,

sdk/python/tests/unit/online_store/test_online_retrieval.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -979,14 +979,13 @@ def test_sqlite_get_online_documents_v2_search() -> None:
979979
)
980980

981981
# Test vector similarity search
982-
query_embedding = [float(x) for x in np.random.random(vector_length)]
982+
# query_embedding = [float(x) for x in np.random.random(vector_length)]
983983
result = store.retrieve_online_documents_v2(
984984
features=[
985985
"document_embeddings:Embeddings",
986986
"document_embeddings:content",
987987
"document_embeddings:title",
988988
],
989-
query=query_embedding,
990989
query_string="(content: 5) OR (title: 1) OR (title: 3)",
991990
top_k=3,
992991
).to_dict()
@@ -1279,12 +1278,12 @@ def test_milvus_native_from_feast_data() -> None:
12791278
search_res = client.search(
12801279
collection_name=COLLECTION_NAME,
12811280
data=[query_embedding],
1282-
limit=3, # Top 3 results
1281+
limit=5, # Top 3 results
12831282
output_fields=["item_id", "author_id", "sentence_chunks"],
12841283
)
12851284

12861285
# Validate the search results
1287-
assert len(search_res[0]) == 3
1286+
assert len(search_res[0]) == 5
12881287
print("Search Results:", search_res[0])
12891288

12901289
# Clean up the collection

0 commit comments

Comments
 (0)