-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvector_search.py
56 lines (39 loc) · 1.57 KB
/
vector_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import pandas as pd
from openai import AzureOpenAI
import ast, json
import scipy.spatial
embedding_client = AzureOpenAI(azure_endpoint=<azure endpoint>",
api_key=<api-key>",
api_version="2024-02-01")
def generate_embedding(text):
emb = embedding_client.embeddings.create(model="text-embedding-3-small",input=text)
res = json.loads(emb.model_dump_json())
return res['data'][0]['embedding']
relatedness_fn = lambda x,y: 1 - scipy.spatial.distance.cosine(x,y)
df = pd.read_csv("vector.csv", index_col=False)
df['embeddings'] = df['embeddings'].apply(ast.literal_eval)
query = "what is attenndence leave policy? "
top = 2
input_embedding = generate_embedding(text=query)
strings_and_relatednesses = [
(row['text'],relatedness_fn(input_embedding,row['embeddings']))
for i, row in df.iterrows()
]
strings_and_relatednesses.sort(key=lambda x : x[1], reverse=True)
strings, relatednesses = zip(*strings_and_relatednesses)
content = ""
for text in strings[:top]:
content+=text + "\n"
system_prompt = """
context:
---
{content}
--- """.format(content=content)
user_message = query
message_text = [{"role":"system", "content": system_prompt},{"role":"user", "content": user_message}]
completion = embedding_client.chat.completions.create(
model="gpt-35-turbo-16k",
messages= message_text,
temperature=0
)
print(completion.choices[0].message.content)