-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlocations_assets_ingestion.py
39 lines (30 loc) · 1.1 KB
/
locations_assets_ingestion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
from es_client import create_es_client
from langchain_experimental.text_splitter import SemanticChunker
from langchain_chroma import Chroma
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')
# embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key, model="text-embedding-3-large")
# text_splitter = SemanticChunker(
# embeddings,
# breakpoint_threshold_type="gradient",
# )
response = create_es_client(HOST).search(index="assets", body={"query": {"match_all": {}}})
print(response)
# documents = []
# for hit in response['hits']['hits']:
# source = hit['_source']
# page_content = source.get('text', '')
# metadata = source.get('metadata', {})
# docs = text_splitter.create_documents([page_content])
# for doc in docs:
# doc.metadata = metadata
# documents.append(doc)
# vectorstore = Chroma.from_documents(
# documents,
# embeddings,
# persist_directory="./vectorstore",
# )