Skip to content

Commit 2930189

Browse files
committed
docs: update docs for name and destination changes
1 parent d8bb4f3 commit 2930189

File tree

11 files changed

+281
-66
lines changed

11 files changed

+281
-66
lines changed

docs/vectorizer-quick-start.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ Now we can create and run a vectorizer. A vectorizer is a pgai concept, it proce
9090
SELECT ai.create_vectorizer(
9191
'blog'::regclass,
9292
loading => ai.loading_column('contents'),
93-
destination => 'blog_contents_embeddings',
94-
embedding => ai.embedding_ollama('nomic-embed-text', 768),
93+
destination => ai.destination('blog_contents_embeddings'),
94+
embedding => ai.embedding_ollama('nomic-embed-text', 768)
9595
);
9696
```
9797

docs/vectorizer/api-reference.md

+199-42
Large diffs are not rendered by default.

docs/vectorizer/overview.md

+55-4
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,9 @@ query like this:
117117
```sql
118118
SELECT ai.create_vectorizer(
119119
'blog'::regclass,
120+
name => 'blog_embeddings', -- Optional custom name for easier reference
120121
loading => ai.loading_column('contents'),
121-
destination => 'blog_contents_embeddings',
122+
destination => ai.destination_table('blog_contents_embeddings'),
122123
embedding => ai.embedding_ollama('nomic-embed-text', 768)
123124
);
124125
```
@@ -150,7 +151,7 @@ into each chunk:
150151
SELECT ai.create_vectorizer(
151152
'blog'::regclass,
152153
loading => ai.loading_column('contents'),
153-
destination => 'blog_contents_embeddings',
154+
destination => ai.destionation('blog_contents_embeddings'),
154155
embedding => ai.embedding_ollama('nomic-embed-text', 768),
155156
formatting => ai.formatting_python_template('$title: $chunk')
156157
);
@@ -284,7 +285,7 @@ accordingly:
284285
SELECT ai.create_vectorizer(
285286
'blog'::regclass,
286287
loading => ai.loading_column('contents'),
287-
destination => 'blog_contents_embeddings',
288+
destination => ai.destination('blog_contents_embeddings'),
288289
embedding => ai.embedding_ollama('nomic-embed-text', 768),
289290
formatting => ai.formatting_python_template('$title - by $author - $chunk')
290291
);
@@ -304,7 +305,7 @@ example uses a HNSW index:
304305
SELECT ai.create_vectorizer(
305306
'blog'::regclass,
306307
loading => ai.loading_column('contents'),
307-
destination => 'blog_contents_embeddings',
308+
destination => ai.destination('blog_contents_embeddings'),
308309
embedding => ai.embedding_ollama('nomic-embed-text', 768),
309310
formatting => ai.formatting_python_template('$title - by $author - $chunk'),
310311
indexing => ai.indexing_hnsw(min_rows => 100000, opclass => 'vector_l2_ops')
@@ -344,6 +345,56 @@ CREATE TABLE blog_contents_embeddings_store(
344345
);
345346
```
346347

348+
## Destination Options for Embeddings
349+
350+
Vectorizer supports two different ways to store your embeddings:
351+
352+
### 1. Table Destination (Default)
353+
354+
The default approach creates a separate table to store embeddings and a view that joins with the source table:
355+
356+
```sql
357+
SELECT ai.create_vectorizer(
358+
'blog'::regclass,
359+
name => 'blog_vectorizer', -- Optional custom name for easier reference
360+
loading => ai.loading_column('contents'),
361+
destination => ai.destination_table(
362+
target_schema => 'public',
363+
target_table => 'blog_embeddings_store',
364+
view_name => 'blog_embeddings'
365+
),
366+
embedding => ai.embedding_ollama('nomic-embed-text', 768)
367+
);
368+
```
369+
370+
**When to use table destination:**
371+
- When you need multiple embeddings per row (chunking)
372+
- For large text fields that need to be split
373+
- You are vectorizing documents (which typically require chunking)
374+
375+
### 2. Column Destination
376+
377+
For simpler cases, you can add an embedding column directly to the source table:
378+
379+
```sql
380+
SELECT ai.create_vectorizer(
381+
'product_descriptions'::regclass,
382+
name => 'product_descriptions_vectorizer',
383+
loading => ai.loading_column('description'),
384+
destination => ai.destination_column('description_embedding'),
385+
embedding => ai.embedding_openai('text-embedding-3-small', 768),
386+
chunking => ai.chunking_none() -- Required for column destination
387+
);
388+
```
389+
390+
**When to use column destination:**
391+
- When you need exactly one embedding per row
392+
- For shorter text that doesn't require chunking
393+
- When your application already takes care of the chunking before inserting into the database
394+
- When you want to avoid creating additional database objects
395+
396+
**Note:** Column destination requires chunking to be set to `ai.chunking_none()` since it can only store one embedding per row.
397+
347398
## Monitor a vectorizer
348399

349400
Since embeddings are created asynchronously, a delay may occur before they

docs/vectorizer/python-integration.md

+12-5
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,14 @@ Then you can create a vectorizer from python:
1313

1414
```python
1515
from pgai.vectorizer import CreateVectorizer
16-
from pgai.vectorizer.configuration import EmbeddingOpenaiConfig, ChunkingCharacterTextSplitterConfig, FormattingPythonTemplateConfig, LoadingColumnConfig
16+
from pgai.vectorizer.configuration import EmbeddingOpenaiConfig, ChunkingCharacterTextSplitterConfig, FormattingPythonTemplateConfig, LoadingColumnConfig, DestinationTableConfig
1717

1818
vectorizer_statement = CreateVectorizer(
1919
source="blog",
20-
target_table='blog_embeddings',
20+
name="blog_content_embedder", # Optional custom name for easier reference
21+
destination=DestinationTableConfig(
22+
destination='blog_embeddings'
23+
),
2124
loading=LoadingColumnConfig(column_name='content'),
2225
embedding=EmbeddingOpenaiConfig(
2326
model='text-embedding-3-small',
@@ -237,14 +240,18 @@ from pgai.vectorizer.configuration import (
237240
EmbeddingOpenaiConfig,
238241
ChunkingCharacterTextSplitterConfig,
239242
FormattingPythonTemplateConfig,
240-
LoadingColumnConfig
243+
LoadingColumnConfig,
244+
DestinationTableConfig
241245
)
242246

243247

244248
def upgrade() -> None:
245249
op.create_vectorizer(
246250
source="blog",
247-
target_table='blog_embeddings',
251+
name="blog_content_embedder", # Optional custom name for easier reference
252+
destination=DestinationTableConfig(
253+
destination='blog_embeddings'
254+
),
248255
loading=LoadingColumnConfig(column_name='content'),
249256
embedding=EmbeddingOpenaiConfig(
250257
model='text-embedding-3-small',
@@ -261,7 +268,7 @@ def upgrade() -> None:
261268

262269

263270
def downgrade() -> None:
264-
op.drop_vectorizer(target_table="blog_embeddings", drop_all=True)
271+
op.drop_vectorizer(name="blog_content_embedder", drop_all=True)
265272
```
266273

267274
The `create_vectorizer` operation supports all configuration options available in the [SQL API](/docs/vectorizer/api-reference.md).

docs/vectorizer/quick-start-openai.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ To create and run a vectorizer, then query the auto-generated embeddings created
9292
SELECT ai.create_vectorizer(
9393
'blog'::regclass,
9494
loading => ai.loading_column('contents'),
95-
destination => 'blog_contents_embeddings',
95+
destination => ai.destination_table('blog_contents_embeddings'),
9696
embedding => ai.embedding_openai('text-embedding-3-small', 768)
9797
);
9898
```

docs/vectorizer/quick-start-voyage.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ Now you can create and run a vectorizer. A vectorizer is a pgai concept, it proc
8888
SELECT ai.create_vectorizer(
8989
'blog'::regclass,
9090
loading => ai.loading_column('contents'),
91-
destination => 'blog_contents_embeddings',
91+
destination => ai.destination_table('blog_contents_embeddings'),
9292
embedding => ai.embedding_voyageai(
9393
'voyage-3-lite',
9494
512

docs/vectorizer/quick-start.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ Now we can create and run a vectorizer. A vectorizer is a pgai concept, it proce
9090
SELECT ai.create_vectorizer(
9191
'blog'::regclass,
9292
loading => ai.loading_column('contents'),
93-
destination => 'blog_contents_embeddings',
93+
destination => ai.destination_table('blog_contents_embeddings'),
9494
embedding => ai.embedding_ollama('nomic-embed-text', 768)
9595
);
9696
```

examples/embeddings_from_documents/documents/pgai.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ Please note that using Ollama requires a large (>4GB) download of the docker ima
120120
```sql
121121
SELECT ai.create_vectorizer(
122122
'wiki'::regclass,
123-
destination => 'wiki_embeddings',
123+
destination => ai.destionation('wiki_embeddings'),
124124
embedding => ai.embedding_ollama('all-minilm', 384),
125125
chunking => ai.chunking_recursive_character_text_splitter('text')
126126
);

examples/evaluations/litellm_vectorizer/README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ The evaluation generates diverse question types (short, long, direct, implied, a
5454

5555
SELECT ai.create_vectorizer(
5656
'paul_graham_essays'::regclass,
57-
destination => 'essays_cohere_embeddings',
57+
destination => ai.destination('essays_cohere_embeddings'),
5858
embedding => ai.embedding_litellm(
5959
'cohere/embed-english-v3.0',
6060
1024,
@@ -65,7 +65,7 @@ The evaluation generates diverse question types (short, long, direct, implied, a
6565

6666
SELECT ai.create_vectorizer(
6767
'paul_graham_essays'::regclass,
68-
destination => 'essays_mistral_embeddings',
68+
destination => ai.destination('essays_mistral_embeddings'),
6969
embedding => ai.embedding_litellm(
7070
'mistral/mistral-embed',
7171
1024,
@@ -76,7 +76,7 @@ The evaluation generates diverse question types (short, long, direct, implied, a
7676

7777
SELECT ai.create_vectorizer(
7878
'paul_graham_essays'::regclass,
79-
destination => 'essays_openai_small_embeddings',
79+
destination => ai.destination('essays_openai_small_embeddings'),
8080
embedding => ai.embedding_openai(
8181
'text-embedding-3-small',
8282
1024,

examples/evaluations/ollama_vectorizer/README.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ Dataset Setup:
6161
SELECT ai.create_vectorizer(
6262
'pg_essays'::regclass,
6363
loading => ai.loading_column('text'),
64-
destination => 'essays_nomic_embeddings',
64+
destination => ai.destionation('essays_nomic_embeddings'),
6565
embedding => ai.embedding_ollama('nomic-embed-text', 768),
6666
chunking => ai.chunking_recursive_character_text_splitter(512, 50)
6767
);
@@ -70,7 +70,7 @@ Dataset Setup:
7070
SELECT ai.create_vectorizer(
7171
'pg_essays'::regclass,
7272
loading => ai.loading_column('text'),
73-
destination => 'essays_openai_small_embeddings',
73+
destination => ai.destination('essays_openai_small_embeddings'),
7474
embedding => ai.embedding_openai('text-embedding-3-small', 768),
7575
chunking => ai.chunking_recursive_character_text_splitter(512, 50)
7676
);
@@ -79,7 +79,7 @@ Dataset Setup:
7979
SELECT ai.create_vectorizer(
8080
'pg_essays'::regclass,
8181
loading => ai.loading_column('text'),
82-
destination => 'essays_bge_large_embeddings',
82+
destination => ai.destination('essays_bge_large_embeddings'),
8383
embedding => ai.embedding_ollama('bge-large', 1024),
8484
chunking => ai.chunking_recursive_character_text_splitter(512, 50)
8585
);
@@ -88,7 +88,7 @@ Dataset Setup:
8888
SELECT ai.create_vectorizer(
8989
'pg_essays'::regclass,
9090
loading => ai.loading_column('text'),
91-
destination => 'essays_openai_large_embeddings',
91+
destination => ai.destination('essays_openai_large_embeddings'),
9292
embedding => ai.embedding_openai('text-embedding-3-large', 1536),
9393
chunking => ai.chunking_recursive_character_text_splitter(512, 50)
9494
);

examples/evaluations/voyage_vectorizer/README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ Dataset Setup:
6969
SELECT ai.create_vectorizer(
7070
'sec_filings'::regclass,
7171
loading => ai.loading_column('text'),
72-
destination => 'sec_filings_openai_embeddings',
72+
destination => ai.destination('sec_filings_openai_embeddings'),
7373
embedding => ai.embedding_openai('text-embedding-3-small', 768),
7474
chunking => ai.chunking_recursive_character_text_splitter(512, 50)
7575
);
@@ -78,7 +78,7 @@ Dataset Setup:
7878
SELECT ai.create_vectorizer(
7979
'sec_filings'::regclass,
8080
loading => ai.loading_column('text'),
81-
destination => 'sec_filings_voyage_embeddings',
81+
destination => ai.destination('sec_filings_voyage_embeddings'),
8282
embedding => ai.embedding_voyageai('voyage-finance-2', 1024),
8383
chunking => ai.chunking_recursive_character_text_splitter(512, 50)
8484
);

0 commit comments

Comments
 (0)