docs: update docs for name and destination changes

Askir · Askir · commit 29301899fb09 · 2025-04-23T16:49:33.000+02:00
diff --git a/docs/vectorizer-quick-start.md b/docs/vectorizer-quick-start.md
@@ -90,8 +90,8 @@ Now we can create and run a vectorizer. A vectorizer is a pgai concept, it proce
     SELECT ai.create_vectorizer(
          'blog'::regclass,
          loading => ai.loading_column('contents'),
-         destination => 'blog_contents_embeddings',
-         embedding => ai.embedding_ollama('nomic-embed-text', 768),
+         destination => ai.destination('blog_contents_embeddings'),
+         embedding => ai.embedding_ollama('nomic-embed-text', 768)
     );
     ```
 
diff --git a/docs/vectorizer/api-reference.md b/docs/vectorizer/api-reference.md
diff --git a/docs/vectorizer/overview.md b/docs/vectorizer/overview.md
@@ -117,8 +117,9 @@ query like this:
 ```sql
 SELECT ai.create_vectorizer( 
    'blog'::regclass,
+   name => 'blog_embeddings',  -- Optional custom name for easier reference
    loading => ai.loading_column('contents'),
-   destination => 'blog_contents_embeddings',
+   destination => ai.destination_table('blog_contents_embeddings'),
    embedding => ai.embedding_ollama('nomic-embed-text', 768)
 );
 ```
@@ -150,7 +151,7 @@ into each chunk:
 SELECT ai.create_vectorizer(   
     'blog'::regclass,
     loading => ai.loading_column('contents'),
-    destination => 'blog_contents_embeddings',
+    destination => ai.destionation('blog_contents_embeddings'),
     embedding => ai.embedding_ollama('nomic-embed-text', 768),
     formatting => ai.formatting_python_template('$title: $chunk')
 );
@@ -284,7 +285,7 @@ accordingly:
 SELECT ai.create_vectorizer(
     'blog'::regclass,
     loading => ai.loading_column('contents'),
-    destination => 'blog_contents_embeddings',
+    destination => ai.destination('blog_contents_embeddings'),
     embedding => ai.embedding_ollama('nomic-embed-text', 768),
     formatting => ai.formatting_python_template('$title - by $author - $chunk')
 );
@@ -304,7 +305,7 @@ example uses a HNSW index:
 SELECT ai.create_vectorizer(
     'blog'::regclass,
     loading => ai.loading_column('contents'),
-    destination => 'blog_contents_embeddings',
+    destination => ai.destination('blog_contents_embeddings'),
     embedding => ai.embedding_ollama('nomic-embed-text', 768),
     formatting => ai.formatting_python_template('$title - by $author - $chunk'),
     indexing => ai.indexing_hnsw(min_rows => 100000, opclass => 'vector_l2_ops')
@@ -344,6 +345,56 @@ CREATE TABLE blog_contents_embeddings_store(
 );
 ```
 
+## Destination Options for Embeddings
+
+Vectorizer supports two different ways to store your embeddings:
+
+### 1. Table Destination (Default)
+
+The default approach creates a separate table to store embeddings and a view that joins with the source table:
+
+```sql
+SELECT ai.create_vectorizer(
+    'blog'::regclass,
+    name => 'blog_vectorizer',  -- Optional custom name for easier reference
+    loading => ai.loading_column('contents'),
+    destination => ai.destination_table(
+        target_schema => 'public',
+        target_table => 'blog_embeddings_store',
+        view_name => 'blog_embeddings'
+    ),
+    embedding => ai.embedding_ollama('nomic-embed-text', 768)
+);
+```
+
+**When to use table destination:**
+- When you need multiple embeddings per row (chunking)
+- For large text fields that need to be split
+- You are vectorizing documents (which typically require chunking)
+
+### 2. Column Destination
+
+For simpler cases, you can add an embedding column directly to the source table:
+
+```sql
+SELECT ai.create_vectorizer(
+    'product_descriptions'::regclass,
+    name => 'product_descriptions_vectorizer',
+    loading => ai.loading_column('description'),
+    destination => ai.destination_column('description_embedding'),
+    embedding => ai.embedding_openai('text-embedding-3-small', 768),
+    chunking => ai.chunking_none()  -- Required for column destination
+);
+```
+
+**When to use column destination:**
+- When you need exactly one embedding per row
+- For shorter text that doesn't require chunking
+- When your application already takes care of the chunking before inserting into the database
+- When you want to avoid creating additional database objects
+
+**Note:** Column destination requires chunking to be set to `ai.chunking_none()` since it can only store one embedding per row.
+
 ## Monitor a vectorizer
 
 Since embeddings are created asynchronously, a delay may occur before they
diff --git a/docs/vectorizer/python-integration.md b/docs/vectorizer/python-integration.md
@@ -13,11 +13,14 @@ Then you can create a vectorizer from python:
 
 ```python
 from pgai.vectorizer import CreateVectorizer
-from pgai.vectorizer.configuration import EmbeddingOpenaiConfig, ChunkingCharacterTextSplitterConfig, FormattingPythonTemplateConfig, LoadingColumnConfig
+from pgai.vectorizer.configuration import EmbeddingOpenaiConfig, ChunkingCharacterTextSplitterConfig, FormattingPythonTemplateConfig, LoadingColumnConfig, DestinationTableConfig
 
 vectorizer_statement = CreateVectorizer(
     source="blog",
-    target_table='blog_embeddings',
+    name="blog_content_embedder",  # Optional custom name for easier reference
+    destination=DestinationTableConfig(
+        destination='blog_embeddings'
+    ),
     loading=LoadingColumnConfig(column_name='content'),
     embedding=EmbeddingOpenaiConfig(
         model='text-embedding-3-small',
@@ -237,14 +240,18 @@ from pgai.vectorizer.configuration import (
     EmbeddingOpenaiConfig,
     ChunkingCharacterTextSplitterConfig,
     FormattingPythonTemplateConfig,
-    LoadingColumnConfig
+    LoadingColumnConfig,
+    DestinationTableConfig
 )
 
 
 def upgrade() -> None:
     op.create_vectorizer(
         source="blog",
-        target_table='blog_embeddings',
+        name="blog_content_embedder",  # Optional custom name for easier reference
+        destination=DestinationTableConfig(
+            destination='blog_embeddings'
+        ),
         loading=LoadingColumnConfig(column_name='content'),
         embedding=EmbeddingOpenaiConfig(
             model='text-embedding-3-small',
@@ -261,7 +268,7 @@ def upgrade() -> None:
 
 
 def downgrade() -> None:
-    op.drop_vectorizer(target_table="blog_embeddings", drop_all=True)
+    op.drop_vectorizer(name="blog_content_embedder", drop_all=True)
 ```
 
 The `create_vectorizer` operation supports all configuration options available in the [SQL API](/docs/vectorizer/api-reference.md).
diff --git a/docs/vectorizer/quick-start-openai.md b/docs/vectorizer/quick-start-openai.md
@@ -92,7 +92,7 @@ To create and run a vectorizer, then query the auto-generated embeddings created
     SELECT ai.create_vectorizer(
        'blog'::regclass,
        loading => ai.loading_column('contents'),
-       destination => 'blog_contents_embeddings',
+       destination => ai.destination_table('blog_contents_embeddings'),
        embedding => ai.embedding_openai('text-embedding-3-small', 768)
     );
     ```
diff --git a/docs/vectorizer/quick-start-voyage.md b/docs/vectorizer/quick-start-voyage.md
@@ -88,7 +88,7 @@ Now you can create and run a vectorizer. A vectorizer is a pgai concept, it proc
     SELECT ai.create_vectorizer(
       'blog'::regclass,
       loading => ai.loading_column('contents'),
-      destination => 'blog_contents_embeddings',
+      destination => ai.destination_table('blog_contents_embeddings'),
       embedding => ai.embedding_voyageai(
         'voyage-3-lite',
         512
diff --git a/docs/vectorizer/quick-start.md b/docs/vectorizer/quick-start.md
@@ -90,7 +90,7 @@ Now we can create and run a vectorizer. A vectorizer is a pgai concept, it proce
     SELECT ai.create_vectorizer(
          'blog'::regclass,
          loading => ai.loading_column('contents'),
-         destination => 'blog_contents_embeddings',
+         destination => ai.destination_table('blog_contents_embeddings'),
          embedding => ai.embedding_ollama('nomic-embed-text', 768)
     );
     ```
diff --git a/examples/embeddings_from_documents/documents/pgai.md b/examples/embeddings_from_documents/documents/pgai.md
@@ -120,7 +120,7 @@ Please note that using Ollama requires a large (>4GB) download of the docker ima
     ```sql
     SELECT ai.create_vectorizer(
          'wiki'::regclass,
-         destination => 'wiki_embeddings',
+         destination => ai.destionation('wiki_embeddings'),
          embedding => ai.embedding_ollama('all-minilm', 384),
          chunking => ai.chunking_recursive_character_text_splitter('text')
     );
diff --git a/examples/evaluations/litellm_vectorizer/README.md b/examples/evaluations/litellm_vectorizer/README.md
@@ -54,7 +54,7 @@ The evaluation generates diverse question types (short, long, direct, implied, a
 
     SELECT ai.create_vectorizer(
         'paul_graham_essays'::regclass,
-        destination => 'essays_cohere_embeddings',
+        destination => ai.destination('essays_cohere_embeddings'),
         embedding => ai.embedding_litellm(
             'cohere/embed-english-v3.0',
             1024,
@@ -65,7 +65,7 @@ The evaluation generates diverse question types (short, long, direct, implied, a
 
     SELECT ai.create_vectorizer(
         'paul_graham_essays'::regclass,
-        destination => 'essays_mistral_embeddings',
+        destination => ai.destination('essays_mistral_embeddings'),
         embedding => ai.embedding_litellm(
             'mistral/mistral-embed',
             1024,
@@ -76,7 +76,7 @@ The evaluation generates diverse question types (short, long, direct, implied, a
 
     SELECT ai.create_vectorizer(
         'paul_graham_essays'::regclass,
-        destination => 'essays_openai_small_embeddings',
+        destination => ai.destination('essays_openai_small_embeddings'),
         embedding => ai.embedding_openai(
             'text-embedding-3-small', 
             1024, 
diff --git a/examples/evaluations/ollama_vectorizer/README.md b/examples/evaluations/ollama_vectorizer/README.md
@@ -61,7 +61,7 @@ Dataset Setup:
    SELECT ai.create_vectorizer(
       'pg_essays'::regclass,
       loading => ai.loading_column('text'),
-      destination => 'essays_nomic_embeddings',
+      destination => ai.destionation('essays_nomic_embeddings'),
       embedding => ai.embedding_ollama('nomic-embed-text', 768),
       chunking => ai.chunking_recursive_character_text_splitter(512, 50)
    );
@@ -70,7 +70,7 @@ Dataset Setup:
    SELECT ai.create_vectorizer(
       'pg_essays'::regclass,
       loading => ai.loading_column('text'),
-      destination => 'essays_openai_small_embeddings',
+      destination => ai.destination('essays_openai_small_embeddings'),
       embedding => ai.embedding_openai('text-embedding-3-small', 768),
       chunking => ai.chunking_recursive_character_text_splitter(512, 50)
    );
@@ -79,7 +79,7 @@ Dataset Setup:
    SELECT ai.create_vectorizer(
       'pg_essays'::regclass,
       loading => ai.loading_column('text'),
-      destination => 'essays_bge_large_embeddings',
+      destination => ai.destination('essays_bge_large_embeddings'),
       embedding => ai.embedding_ollama('bge-large', 1024),
       chunking => ai.chunking_recursive_character_text_splitter(512, 50)
    );
@@ -88,7 +88,7 @@ Dataset Setup:
    SELECT ai.create_vectorizer(
       'pg_essays'::regclass,
       loading => ai.loading_column('text'),
-      destination => 'essays_openai_large_embeddings', 
+      destination => ai.destination('essays_openai_large_embeddings'), 
       embedding => ai.embedding_openai('text-embedding-3-large', 1536),
       chunking => ai.chunking_recursive_character_text_splitter(512, 50)
    );
diff --git a/examples/evaluations/voyage_vectorizer/README.md b/examples/evaluations/voyage_vectorizer/README.md
@@ -69,7 +69,7 @@ Dataset Setup:
    SELECT ai.create_vectorizer(
        'sec_filings'::regclass,
        loading => ai.loading_column('text'),
-       destination => 'sec_filings_openai_embeddings',
+       destination => ai.destination('sec_filings_openai_embeddings'),
        embedding => ai.embedding_openai('text-embedding-3-small', 768),
        chunking => ai.chunking_recursive_character_text_splitter(512, 50)
    );
@@ -78,7 +78,7 @@ Dataset Setup:
    SELECT ai.create_vectorizer(
        'sec_filings'::regclass,
        loading => ai.loading_column('text'),
-       destination => 'sec_filings_voyage_embeddings',
+       destination => ai.destination('sec_filings_voyage_embeddings'),
        embedding => ai.embedding_voyageai('voyage-finance-2', 1024),
        chunking => ai.chunking_recursive_character_text_splitter(512, 50)
    );

Original file line number	Diff line number	Diff line change
`@@ -88,7 +88,7 @@ Now you can create and run a vectorizer. A vectorizer is a pgai concept, it proc`
`88`	`88`	`SELECT ai.create_vectorizer(`
`89`	`89`	`'blog'::regclass,`
`90`	`90`	`loading => ai.loading_column('contents'),`
`91`		`- destination => 'blog_contents_embeddings',`
	`91`	`+ destination => ai.destination_table('blog_contents_embeddings'),`
`92`	`92`	`embedding => ai.embedding_voyageai(`
`93`	`93`	`'voyage-3-lite',`
`94`	`94`	`512`