-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSolrRetriever.py
More file actions
47 lines (40 loc) · 1.49 KB
/
SolrRetriever.py
File metadata and controls
47 lines (40 loc) · 1.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from typing import Any, List
from langchain.callbacks.manager import (
AsyncCallbackManagerForRetrieverRun,
CallbackManagerForRetrieverRun,
)
from langchain.schema import BaseRetriever, Document
from globals import VECTOR_FIELD_BODY
from neural.VectorEncoder import VectorEncoder
from Solr import Solr
class SolrRetriever(BaseRetriever):
fields: str = "bodyChunk, id"
rows: int = 10
vector_encoder: VectorEncoder
solr: Solr
def set_solr(self, solr):
self.solr = solr
def set_vector_encoder(self, vector_encoder):
self.vector_encoder = vector_encoder
def knn_query(self, query, vector_field, fields, rows):
vector = self.vector_encoder.encode(query)
documents = self.solr.knn_query(
vector=vector.tolist(),
vector_field=vector_field,
fl=fields,
start=0,
rows=rows,
)
return documents
def _get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun) -> List[Document]:
return []
async def _aget_relevant_documents(self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun,
**kwargs: Any, ) -> List[Document]:
documents = self.knn_query(query, VECTOR_FIELD_BODY, self.fields, self.rows)
return [
Document(
page_content=doc["bodyChunk"][0],
metadata={"id": doc["id"]},
)
for doc in documents
]