Skip to content

Commit 3690df0

Browse files
committed
[GROWENG-124][GROWENG-126] Adding yugabytedb as connector in
unstructured.io Summary: * Adding tests for yugabytedb connector * Added set-up for yugabytedb as a source and destination connector * Added expected results for yugabytedb as source, destination and stager
1 parent 4187a80 commit 3690df0

30 files changed

+9546
-0
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
services:
2+
yugabytedb:
3+
image: yugabytedb/yugabyte:latest
4+
restart: always
5+
container_name: yugabytedb_dest
6+
command: bin/yugabyted start --daemon=false --initial_scripts_dir=/home/yugabyte/init_scripts
7+
ports:
8+
- 5433:5433
9+
volumes:
10+
- ./yugabytedb-schema.sql:/home/yugabyte/init_scripts/init.sql
11+
healthcheck:
12+
test: ["CMD-SHELL", "bin/ysqlsh -h localhost -U yugabyte -d yugabyte -c 'SELECT 1'"]
13+
interval: 10s
14+
timeout: 60s
15+
retries: 10
16+
start_period: 20s
17+
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
-- Enable pgvector extension (YugabyteDB has native support)
2+
CREATE EXTENSION IF NOT EXISTS vector;
3+
4+
-- Create the elements table with vector support in yugabyte database
5+
CREATE TABLE elements (
6+
id UUID PRIMARY KEY,
7+
record_id VARCHAR,
8+
element_id VARCHAR,
9+
text TEXT,
10+
embeddings vector(384),
11+
type VARCHAR,
12+
system VARCHAR,
13+
layout_width DECIMAL,
14+
layout_height DECIMAL,
15+
points TEXT,
16+
url TEXT,
17+
version VARCHAR,
18+
date_created TIMESTAMPTZ,
19+
date_modified TIMESTAMPTZ,
20+
date_processed TIMESTAMPTZ,
21+
permissions_data TEXT,
22+
record_locator TEXT,
23+
category_depth INTEGER,
24+
parent_id VARCHAR,
25+
attached_filename VARCHAR,
26+
filetype VARCHAR,
27+
last_modified TIMESTAMPTZ,
28+
file_directory VARCHAR,
29+
filename VARCHAR,
30+
languages VARCHAR [],
31+
page_number VARCHAR,
32+
links TEXT,
33+
page_name VARCHAR,
34+
link_urls VARCHAR [],
35+
link_texts VARCHAR [],
36+
sent_from VARCHAR [],
37+
sent_to VARCHAR [],
38+
subject VARCHAR,
39+
section VARCHAR,
40+
header_footer_type VARCHAR,
41+
emphasized_text_contents VARCHAR [],
42+
emphasized_text_tags VARCHAR [],
43+
text_as_html TEXT,
44+
regex_metadata TEXT,
45+
detection_class_prob DECIMAL
46+
);
47+
48+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
services:
2+
yugabytedb:
3+
image: yugabytedb/yugabyte:latest
4+
restart: always
5+
container_name: yugabytedb_src
6+
command: bin/yugabyted start --daemon=false --initial_scripts_dir=/home/yugabyte/init_scripts
7+
ports:
8+
- 5433:5433
9+
volumes:
10+
- ./yugabytedb-schema.sql:/home/yugabyte/init_scripts/init.sql
11+
healthcheck:
12+
test: ["CMD-SHELL", "bin/ysqlsh -h localhost -U yugabyte -d yugabyte -c 'SELECT 1'"]
13+
interval: 10s
14+
timeout: 60s
15+
retries: 10
16+
start_period: 20s
17+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
-- Create the cars table in the yugabyte database
2+
CREATE TABLE cars (
3+
car_id SERIAL PRIMARY KEY,
4+
brand TEXT NOT NULL,
5+
price INTEGER NOT NULL
6+
);
7+
8+
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"directory_structure": [
3+
"cars-1-5fb93ce5.csv",
4+
"cars-10-5fb93ce5.csv",
5+
"cars-2-5fb93ce5.csv",
6+
"cars-3-5fb93ce5.csv",
7+
"cars-4-5fb93ce5.csv",
8+
"cars-5-5fb93ce5.csv",
9+
"cars-6-5fb93ce5.csv",
10+
"cars-7-5fb93ce5.csv",
11+
"cars-8-5fb93ce5.csv",
12+
"cars-9-5fb93ce5.csv"
13+
]
14+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
car_id,brand
2+
1,brand_0
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
car_id,brand
2+
10,brand_9
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
car_id,brand
2+
2,brand_1
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
car_id,brand
2+
3,brand_2
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
car_id,brand
2+
4,brand_3

0 commit comments

Comments
 (0)