55
66import config
77from ai_search import add
8+ import base64
89
910
1011def prepare (searchDoc : Union [SearchDoc , List [SearchDoc ]]):
@@ -13,24 +14,29 @@ def prepare(searchDoc: Union[SearchDoc, List[SearchDoc]]):
1314 else :
1415 documents = searchDoc
1516
16- embeddings_list = list (config .model .embed ([doc .content for doc in documents ]))
17+ embeddings_list = list (config .model .embed ([f" { doc .title } { doc . excerpt } { doc . content } " for doc in documents ]))
1718
18- jsonDocs = [
19- {
19+ jsonDocs = []
20+
21+ for idx , searchDoc in enumerate (documents ):
22+ if not searchDoc .url :
23+ continue
24+
25+ jsonDocs .append (
26+ {
2027 "@search.action" : "mergeOrUpload" ,
21- "id" : searchDoc .id ,
28+ "id" : base64 . b64encode ( searchDoc .url . encode ()). decode () ,
2229 "title" : searchDoc .title ,
2330 "excerpt" : searchDoc .excerpt ,
2431 "author" : searchDoc .author ,
2532 "language" : searchDoc .language ,
26- "url" : searchDoc .url or "" ,
33+ "url" : searchDoc .url ,
2734 "baseUrl" : "/" .join (searchDoc .url .split ("/" )[:3 ]) if searchDoc .url else "" ,
2835 "content" : searchDoc .content ,
29- "vector" : embeddings_list [documents . index ( searchDoc ) ],
36+ "vector" : embeddings_list [idx ],
3037 "date" : searchDoc .date or str (datetime .now ().astimezone ().isoformat ()),
31- }
32- for searchDoc in documents
33- ]
38+ }
39+ )
3440
3541 return jsonDocs
3642
@@ -43,13 +49,15 @@ def prepare(searchDoc: Union[SearchDoc, List[SearchDoc]]):
4349 author = "John Doe" ,
4450 language = "en" ,
4551 content = "Gday this is an important Australian message." ,
52+ url = "https://example.com/doc1" ,
4653 ),
4754 SearchDoc (
4855 title = "Example Document" ,
4956 excerpt = "This is an example excerpt." ,
5057 author = "John Doe" ,
5158 language = "de" ,
5259 content = "Guten Tag das ist eine wichtige deutsche Nachricht." ,
60+ url = "https://example.com/doc2" ,
5361 ),
5462 ]
5563
0 commit comments