-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathload.py
More file actions
19 lines (17 loc) · 739 Bytes
/
load.py
File metadata and controls
19 lines (17 loc) · 739 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from dotenv import load_dotenv
load_dotenv()
if not (os.path.exists('chroma-collections.parquet') and os.path.exists('chroma-embeddings.parquet')):
loader = DirectoryLoader(os.environ['LOAD_DIR'])
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
vectordb = Chroma.from_documents(
documents=docs,
embedding=OpenAIEmbeddings(),
persist_directory='.')
vectordb.persist()