Skip to content

Commit 1c6fad6

Browse files
committed
Adjust search index
1 parent fefe6f4 commit 1c6fad6

File tree

5 files changed

+28
-16
lines changed

5 files changed

+28
-16
lines changed

python/ai_search.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77

88

99
def add(searchDocs: List[dict]):
10+
if len(searchDocs) == 0:
11+
print("No documents to add.")
12+
return
13+
1014
data = {"value": [searchDoc for searchDoc in searchDocs]}
1115

1216
res = requests.post(
@@ -27,8 +31,8 @@ def search(query: str):
2731
data = json.dumps(
2832
{
2933
"search": query,
30-
"select": "title, content, url, author, language, date, excerpt, baseUrl",
31-
"facets": ["date", "language", "author", "baseUrl"],
34+
"top": 10,
35+
"select": "title, content, url, author, language, date, baseUrl",
3236
"vectorQueries": [
3337
{
3438
"kind": "vector",

python/compose.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
services:
22
service:
3-
build: .
3+
build:
4+
context: .
5+
target: search-mcp-server
46
env_file:
57
- ../.env
68
ports:
79
- "8000:8000"
10+

python/index.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import config
77
from ai_search import add
8+
import base64
89

910

1011
def prepare(searchDoc: Union[SearchDoc, List[SearchDoc]]):
@@ -13,24 +14,29 @@ def prepare(searchDoc: Union[SearchDoc, List[SearchDoc]]):
1314
else:
1415
documents = searchDoc
1516

16-
embeddings_list = list(config.model.embed([doc.content for doc in documents]))
17+
embeddings_list = list(config.model.embed([f"{doc.title} {doc.excerpt} {doc.content}" for doc in documents]))
1718

18-
jsonDocs = [
19-
{
19+
jsonDocs = []
20+
21+
for idx, searchDoc in enumerate(documents):
22+
if not searchDoc.url:
23+
continue
24+
25+
jsonDocs.append(
26+
{
2027
"@search.action": "mergeOrUpload",
21-
"id": searchDoc.id,
28+
"id": base64.b64encode(searchDoc.url.encode()).decode(),
2229
"title": searchDoc.title,
2330
"excerpt": searchDoc.excerpt,
2431
"author": searchDoc.author,
2532
"language": searchDoc.language,
26-
"url": searchDoc.url or "",
33+
"url": searchDoc.url,
2734
"baseUrl": "/".join(searchDoc.url.split("/")[:3]) if searchDoc.url else "",
2835
"content": searchDoc.content,
29-
"vector": embeddings_list[documents.index(searchDoc)],
36+
"vector": embeddings_list[idx],
3037
"date": searchDoc.date or str(datetime.now().astimezone().isoformat()),
31-
}
32-
for searchDoc in documents
33-
]
38+
}
39+
)
3440

3541
return jsonDocs
3642

@@ -43,13 +49,15 @@ def prepare(searchDoc: Union[SearchDoc, List[SearchDoc]]):
4349
author="John Doe",
4450
language="en",
4551
content="Gday this is an important Australian message.",
52+
url="https://example.com/doc1",
4653
),
4754
SearchDoc(
4855
title="Example Document",
4956
excerpt="This is an example excerpt.",
5057
author="John Doe",
5158
language="de",
5259
content="Guten Tag das ist eine wichtige deutsche Nachricht.",
60+
url="https://example.com/doc2",
5361
),
5462
]
5563

python/model.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,9 @@
33
from typing import List, Union
44
from numpyencoder import NumpyEncoder
55
from pydantic import BaseModel
6-
from uuid import uuid4
7-
from pydantic import Field
86

97

108
class SearchDoc(BaseModel):
11-
id: str = Field(default_factory=lambda: str(uuid4()))
129
title: str = ""
1310
excerpt: str = ""
1411
author: str = ""

python/search.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@
44

55

66
if __name__ == "__main__":
7-
query = "space"
7+
query = "deutsch"
88
print(json.dumps(search(query), indent=2))

0 commit comments

Comments
 (0)