From fe2a29f1cad62ae25b337accbc7f588e68bc1a21 Mon Sep 17 00:00:00 2001
From: kay-kim <kay.kim@materialize.com>
Date: Mon, 12 May 2025 19:58:06 -0400
Subject: [PATCH 1/3] docs: create table update syntax (part 1)

---
 doc/user/assets/sass/_content.scss            |  11 +-
 doc/user/content/sql/create-table.md          | 204 +++++++++++++++---
 .../transform-data/patterns/partition-by.md   |   2 +-
 ...eate_table_options_source_populated_db.yml |  45 ++++
 .../create_table_options_user_populated.yml   |  42 ++++
 doc/user/layouts/partials/head.html           |  67 +++---
 doc/user/sql-grammar/sql-grammar.bnf          |   4 -
 7 files changed, 310 insertions(+), 65 deletions(-)
 create mode 100644 doc/user/data/syntax_options/create_table_options_source_populated_db.yml
 create mode 100644 doc/user/data/syntax_options/create_table_options_user_populated.yml

diff --git a/doc/user/assets/sass/_content.scss b/doc/user/assets/sass/_content.scss
index c7178c470a378..fbe6d28a7be78 100644
--- a/doc/user/assets/sass/_content.scss
+++ b/doc/user/assets/sass/_content.scss
@@ -749,9 +749,7 @@ p+p {
         padding: 0;
         border-bottom: 1px solid #9c86e0;
         display: flex;
-        overflow-x: scroll;
-
-        padding-bottom: var(--xx-small);
+        overflow-x: auto;
 
         @media(max-width: 850px) {}
 
@@ -760,13 +758,15 @@ p+p {
             margin: 0 rem(0.1);
             padding: 0;
             position: relative;
-            bottom: -1px;
+
             background: var(--gray-lightest);
+            border-radius: 8px 8px 0 0;
+
 
             a {
                 color: var(--body);
                 display: block;
-                padding: rem(0.8) rem(1.6);
+                padding: rem(0.8) rem(1.5);
                 font-size: rem(1.4);
                 text-decoration: none;
                 font-weight: 500;
@@ -787,7 +787,6 @@ p+p {
 
             &.active {
                 background: var(--bg);
-                border-radius: 2px 2px 0 0;
                 border: 1px solid #9c86e0;
                 border-bottom-color: var(--bg);
 
diff --git a/doc/user/content/sql/create-table.md b/doc/user/content/sql/create-table.md
index d8cb31ff536ed..9a22cdb576eb4 100644
--- a/doc/user/content/sql/create-table.md
+++ b/doc/user/content/sql/create-table.md
@@ -9,50 +9,193 @@ menu:
     parent: 'commands'
 ---
 
-`CREATE TABLE` defines a table that is persisted in durable storage and can be
-written to, updated and seamlessly joined with other tables, views or sources.
+`CREATE TABLE` defines a table that is persisted in durable storage. In
+Materialize, you can create:
 
-Tables in Materialize are similar to tables in standard relational databases:
-they consist of rows and columns where the columns are fixed when the table is
-created but rows can be added to at will via [`INSERT`](../insert) statements.
+- User-populated tables. User-populated tables can be written to (i.e.,
+  [`INSERT`]/[`UPDATE`]/[`DELETE`]) by the user.
 
-{{< warning >}}
-At the moment, tables have many [known limitations](#known-limitations). In most
-situations, you should use [sources](/sql/create-source) instead.
-{{< /warning >}}
+- [Source-populated](/concepts/sources/) tables. Source-populated tables cannot
+  be written to by the user; they are populated through data ingestion from a
+  source.
 
-[//]: # "TODO(morsapaes) Bring back When to use a table? once there's more
-clarity around best practices."
+Tables can be joined with other tables, materialized views, and views. Tables in
+Materialize are similar to tables in standard relational databases: they consist
+of rows and columns where the columns are fixed when the table is created.
 
 ## Syntax
 
-{{< diagram "create-table.svg" >}}
+{{< tabs >}}
 
-### `col_option`
+{{< tab "User-populated tables" >}}
 
-{{< diagram "col-option.svg" >}}
+To create a table that users can write to (i.e., perform
+[`INSERT`](/sql/insert/)/[`UPDATE`](/sql/update/)/[`DELETE`](/sql/delete/)
+operations):
 
-Field | Use
-------|-----
-**TEMP** / **TEMPORARY** | Mark the table as [temporary](#temporary-tables).
-_table&lowbar;name_ | A name for the table.
-_col&lowbar;name_ | The name of the column to be created in the table.
-_col&lowbar;type_ | The data type of the column indicated by _col&lowbar;name_.
-**NOT NULL** | Do not allow the column to contain _NULL_ values. Columns without this constraint can contain _NULL_ values.
-*default_expr* | A default value to use for the column in an [`INSERT`](/sql/insert) statement if an explicit value is not provided. If not specified, `NULL` is assumed.
+```mzsql
+CREATE [TEMP|TEMPORARY] TABLE <table_name> (
+  <column_name> <column_type> [NOT NULL][DEFAULT <default_expr>]
+  [, ...]
+)
+[WITH (
+  PARTITION BY (<column_name> [, ...]) |
+  RETAIN HISTORY [=] FOR <duration>
+)]
+;
+```
+
+{{% yaml-table data="syntax_options/create_table_options_user_populated" %}}
+
+{{</ tab >}}
+
+{{< tab "Source-populated tables (DB source)" >}}
+
+To create a table from a [source](/sql/create-source/), where the source maps to
+an external database system:
 
-### `with_options`
+{{< note >}}
 
-{{< diagram "with-options.svg" >}}
+Users cannot write to source-populated tables; i.e., users cannot perform
+[`INSERT`](/sql/insert/)/[`UPDATE`](/sql/update/)/[`DELETE`](/sql/delete/)
+operations on source-populated tables.
 
-| Field                                    | Value               | Description                                                                                                                                                       |
-|------------------------------------------|---------------------| ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| **PARTITION BY** _columns_               | `(ident [, ident]*)` | The key by which Materialize should internally partition this durable collection. See the [partitioning guide](/transform-data/patterns/partition-by/) for restrictions on valid values and other details.
-| **RETAIN HISTORY FOR** _retention_period_ | `interval`          | ***Private preview.** This option has known performance or stability issues and is under active development.* Duration for which Materialize retains historical data, which is useful to implement [durable subscriptions](/transform-data/patterns/durable-subscriptions/#history-retention-period). Accepts positive [interval](/sql/types/interval/) values (e.g. `'1hr'`). Default: `1s`.
+{{</ note >}}
 
+```mzsql
+CREATE TABLE <table_name> FROM SOURCE <source_name> (REFERENCE <ref_object>)
+[WITH (
+    TEXT COLUMNS (<fq_column_name> [, ...])
+  | EXCLUDE COLUMNS (<fq_column_name> [, ...])
+  | PARTITION BY (<column_name> [, ...])
+  [, ...]
+)]
+;
+```
+
+{{% yaml-table data="syntax_options/create_table_options_source_populated_db" %}}
+
+<a name="supported-db-source-types" ></a>
+
+{{< tabs >}}
+{{< tab "Supported MySQL types">}}
+
+Materialize natively supports the following MySQL types:
+
+<ul style="column-count: 3">
+<li><code>bigint</code></li>
+<li><code>binary</code></li>
+<li><code>bit</code></li>
+<li><code>blob</code></li>
+<li><code>boolean</code></li>
+<li><code>char</code></li>
+<li><code>date</code></li>
+<li><code>datetime</code></li>
+<li><code>decimal</code></li>
+<li><code>double</code></li>
+<li><code>float</code></li>
+<li><code>int</code></li>
+<li><code>json</code></li>
+<li><code>longblob</code></li>
+<li><code>longtext</code></li>
+<li><code>mediumblob</code></li>
+<li><code>mediumint</code></li>
+<li><code>mediumtext</code></li>
+<li><code>numeric</code></li>
+<li><code>real</code></li>
+<li><code>smallint</code></li>
+<li><code>text</code></li>
+<li><code>time</code></li>
+<li><code>timestamp</code></li>
+<li><code>tinyblob</code></li>
+<li><code>tinyint</code></li>
+<li><code>tinytext</code></li>
+<li><code>varbinary</code></li>
+<li><code>varchar</code></li>
+</ul>
+
+Replicating tables that contain **unsupported data types** is
+possible via the [`TEXT COLUMNS` option](#text-columns) for the
+following types:
+
+<ul style="column-count: 1">
+<li><code>enum</code></li>
+<li><code>year</code></li>
+</ul>
+
+The specified columns will be treated as `text`, and will thus not offer the
+expected MySQL type features. For any unsupported data types not listed above,
+use the [`EXCLUDE COLUMNS`](#exclude-columns) option.
+
+{{</ tab >}}
+
+{{< tab "Supported PostgreSQL types">}}
+Materialize natively supports the following PostgreSQL types (including the
+array type for each of the types):
+
+<ul style="column-count: 3">
+<li><code>bool</code></li>
+<li><code>bpchar</code></li>
+<li><code>bytea</code></li>
+<li><code>char</code></li>
+<li><code>date</code></li>
+<li><code>daterange</code></li>
+<li><code>float4</code></li>
+<li><code>float8</code></li>
+<li><code>int2</code></li>
+<li><code>int2vector</code></li>
+<li><code>int4</code></li>
+<li><code>int4range</code></li>
+<li><code>int8</code></li>
+<li><code>int8range</code></li>
+<li><code>interval</code></li>
+<li><code>json</code></li>
+<li><code>jsonb</code></li>
+<li><code>numeric</code></li>
+<li><code>numrange</code></li>
+<li><code>oid</code></li>
+<li><code>text</code></li>
+<li><code>time</code></li>
+<li><code>timestamp</code></li>
+<li><code>timestamptz</code></li>
+<li><code>tsrange</code></li>
+<li><code>tstzrange</code></li>
+<li><code>uuid</code></li>
+<li><code>varchar</code></li>
+</ul>
+
+Replicating tables that contain **unsupported data types** is possible via the
+[`TEXT COLUMNS` option](#text-columns). When decoded as `text`, the specified
+columns will not have the expected PostgreSQL type features. For example:
+
+* [`enum`]: When decoded as `text`, the resulting `text` values will
+  not observe the implicit ordering of the original PostgreSQL `enum`; instead,
+  Materialize will sort the values as `text`.
+
+* [`money`]: When decoded as `text`, the resulting `text` value
+  cannot be cast back to `numeric` since PostgreSQL adds typical currency
+  formatting to the output.
+
+[`enum`]: https://www.postgresql.org/docs/current/datatype-enum.html
+[`money`]: https://www.postgresql.org/docs/current/datatype-money.html
+
+{{</ tab >}}
+{{</ tabs >}}
+
+See also [Materialize SQL data types](/sql/types/).
+
+{{</ tab >}}
+
+
+{{</ tabs >}}
 
 ## Details
 
+### Table names and column names
+
+Names for tables and column(s) must follow the [naming
+guidelines](/sql/identifiers/#naming-restrictions).
+
 ### Known limitations
 
 Tables do not currently support:
@@ -109,4 +252,9 @@ The privileges required to execute this statement are:
 ## Related pages
 
 - [`INSERT`](../insert)
+- [`CREATE SOURCE`](/sql/create-source/)
 - [`DROP TABLE`](../drop-table)
+
+[`INSERT`]: /sql/insert/
+[`UPDATE`]: /sql/update/
+[`DELETE`]: /sql/delete/
diff --git a/doc/user/content/transform-data/patterns/partition-by.md b/doc/user/content/transform-data/patterns/partition-by.md
index ddca7aa212e64..db5ff9569836c 100644
--- a/doc/user/content/transform-data/patterns/partition-by.md
+++ b/doc/user/content/transform-data/patterns/partition-by.md
@@ -26,7 +26,7 @@ If you want to return results in a specific order, use an `ORDER BY` clause on y
 
 ## Syntax
 
-The option `PARTITION BY <column list>` declares that a [materialized view](/sql/create-materialized-view/#with_options) or [table](/sql/create-table/#with_options) should be partitioned by the listed columns.
+The option `PARTITION BY <column list>` declares that a [materialized view](/sql/create-materialized-view/#with_options) or [table](/sql/create-table/#partition-by) should be partitioned by the listed columns.
 For example, a table that stores an append-only collection of events may want to partition the data by time:
 
 ```mzsql
diff --git a/doc/user/data/syntax_options/create_table_options_source_populated_db.yml b/doc/user/data/syntax_options/create_table_options_source_populated_db.yml
new file mode 100644
index 0000000000000..ea907574d8aea
--- /dev/null
+++ b/doc/user/data/syntax_options/create_table_options_source_populated_db.yml
@@ -0,0 +1,45 @@
+columns:
+  - column: "Parameter"
+  - column: "Description"
+rows:
+  - "Parameter": "`<table_name>`"
+    "Description": |
+
+      The name of the table to create. Names for tables must follow the [naming
+      guidelines](/sql/identifiers/#naming-restrictions).
+
+  - "Parameter": "`<source_name>`"
+    "Description": |
+
+      The name of the [source](/sql/create-source/) associated with the
+      reference object from which to create the table.
+
+  - "Parameter": "**(REFERENCE <ref_object>)**"
+    "Description": |
+
+      The name of the reference object from which to create the table. Reference
+      objects are the names of the tables in the upstream database. You can
+      create multiple tables from the same reference object.
+
+      To find the reference objects available in your
+      [source](/sql/create-source/), you can use the following query,
+      substituting your source name for `<source_name>`:
+
+      <br>
+
+      ```mzsql
+      SELECT refs.*
+      FROM mz_internal.mz_source_references refs, mz_sources s
+      WHERE s.name = '<source_name>' -- substitute with your source name
+      AND refs.source_id = s.id;
+      ```
+
+  - "Parameter": "**WITH (<with_option>[,...])**"
+    "Description": |
+      The following `<with_option>`s are supported:
+
+      | Option | Description |
+      |--------|-------------|
+      | <a name="text-columns"></a>`TEXT COLUMNS (<fq_column_name> [, ...])` | *Optional.* If specified, decode data as `text` for the listed column(s), such as for unsupported data types. Use fully qualified column names. See also [supported types](#supported-db-source-types). |
+      | <a name="exclude-columns"></a>`EXCLUDE COLUMNS (<fq_column_name> [, ...])` | *Optional.* If specified, exclude the listed column(s) from the table. Use fully qualified column names. |
+      | <a name="partition-by"></a>`PARTITION BY (<column> [, ...])` | *Optional.* The key by which Materialize should internally partition the table. See the [partitioning guide](/transform-data/patterns/partition-by/) for restrictions on valid values and other details. |
diff --git a/doc/user/data/syntax_options/create_table_options_user_populated.yml b/doc/user/data/syntax_options/create_table_options_user_populated.yml
new file mode 100644
index 0000000000000..08dd7dbef4b93
--- /dev/null
+++ b/doc/user/data/syntax_options/create_table_options_user_populated.yml
@@ -0,0 +1,42 @@
+columns:
+  - column: "Parameter"
+  - column: "Description"
+rows:
+  - "Parameter": "**TEMP** / **TEMPORARY**"
+    "Description": |
+      *Optional.* If specified, mark the table as [temporary](#temporary-tables). Temporary
+      tables are automatically dropped at the end of the SQL session and are not
+      visible to other connections. See [temporary tables](#temporary-tables)
+      for more details.
+  - "Parameter": "`<table_name>`"
+    "Description": |
+
+      The name of the table to create. Names for tables must follow the [naming
+      guidelines](/sql/identifiers/#naming-restrictions).
+
+  - "Parameter": "`<column_name>`"
+    "Description": |
+
+      The name of a column to be created in the new table. Names for columns
+      must follow the [naming guidelines](/sql/identifiers/#naming-restrictions).
+
+  - "Parameter": "`<column_type>`"
+    "Description": |
+
+      The type of the column. For supported types, see [SQL data types](/sql/types/).
+
+  - "Parameter": "**NOT NULL**"
+    "Description": |
+      *Optional.* If specified, disallow  _NULL_ values for the column. Columns without this constraint can contain _NULL_ values.
+  - "Parameter": "**DEFAULT <default_expr>**"
+    "Description": |
+      *Optional.* If specified, use the `<default_expr>` as the default value for the column. If not specified, `NULL` is used as the default value.
+  - "Parameter": "**WITH (<with_option>[,...])**"
+    "Description": |
+
+      The following `<with_option>`s are supported:
+
+      | Option | Description |
+      |--------|-------------|
+      | `PARTITION BY (<column> [, ...])` | <a name="partition-by"></a> *Optional.* The key by which Materialize should internally partition the table. See the [partitioning guide](/transform-data/patterns/partition-by/) for restrictions on valid values and other details. |
+      | `RETAIN HISTORY <duration>` | *Optional.* ***Private preview.** This option has known performance or stability issues and is under active development.* <br>If specified, Materialize retains historical data for the specified duration, which is useful to implement [durable subscriptions](/transform-data/patterns/durable-subscriptions/#history-retention-period).<br>Accepts positive [interval](/sql/types/interval/) values (e.g., `'1hr'`).|
diff --git a/doc/user/layouts/partials/head.html b/doc/user/layouts/partials/head.html
index 1ee1d80d621f0..c733814934e8b 100644
--- a/doc/user/layouts/partials/head.html
+++ b/doc/user/layouts/partials/head.html
@@ -115,35 +115,50 @@
 
 {{/* Tabs */}}
 <script>
-  $(document).ready(function () {
-    // make nav-tab lists from tab-panes
-    $(".tab-content").each(function (idx, tab) {
-      $(tab)
-        .find(".tab-pane")
-        .each(function (item) {
-          var navTabs = $(this).closest(".code-tabs").find(".nav-tabs"),
-            title = $(this).attr("title"),
-            id = title
-              .toLowerCase()
-              .replace(/ /g, "-")
-              .replace(/[^\w-]+/g, "");
-          navTabs.append(`<li><a href="#${id}-t${idx}">${title}</a></li>`);
-        });
-    });
+document.addEventListener("DOMContentLoaded", function () {
+  document.querySelectorAll(".code-tabs").forEach(function (tabGroup, groupIndex) {
+    const navTabs = tabGroup.querySelector(".nav-tabs");
+    const tabContent = tabGroup.querySelector(".tab-content");
+    const tabPanes = Array.from(tabContent.children).filter(child =>
+      child.classList.contains("tab-pane")
+    );
+
+    // Create tab headers from panes
+    tabPanes.forEach(function (pane, tabIndex) {
+      const title = pane.getAttribute("title") || `Tab ${tabIndex + 1}`;
+      const id = `tab-${groupIndex}-${tabIndex}`;
+      pane.setAttribute("id", id);
 
-    // handle click events
-    $(".nav-tabs a").click(function (e) {
-      var tab = $(this).parent(),
-        tabIndex = tab.index(),
-        tabPanel = $(this).closest(".code-tabs"),
-        tabPane = tabPanel.find(".tab-pane").eq(tabIndex);
-      tabPanel.find(".active").removeClass("active");
-      tab.addClass("active");
-      tabPane.addClass("active");
+      const tabItem = document.createElement("li");
+      const link = document.createElement("a");
+      link.setAttribute("href", `#${id}`);
+      link.textContent = title;
+      tabItem.appendChild(link);
+      navTabs.appendChild(tabItem);
     });
 
-    // activate first tab
-    $(".nav-tabs li:first-child a").click();
+    // Handle tab click events
+    navTabs.querySelectorAll("a").forEach(function (link, index) {
+      link.addEventListener("click", function (e) {
+        e.preventDefault();
+
+        // Deactivate all tabs and panes
+        navTabs.querySelectorAll("li").forEach(li => li.classList.remove("active"));
+        tabPanes.forEach(pane => pane.classList.remove("active"));
 
+        // Activate clicked tab and corresponding pane
+        link.parentElement.classList.add("active");
+        const targetPane = tabContent.querySelector(link.getAttribute("href"));
+        if (targetPane) {
+          targetPane.classList.add("active");
+        }
+      });
+    });
+
+    // Activate the first tab by default
+    const firstLink = navTabs.querySelector("a");
+    if (firstLink) firstLink.click();
   });
+});
+
 </script>
diff --git a/doc/user/sql-grammar/sql-grammar.bnf b/doc/user/sql-grammar/sql-grammar.bnf
index 3463f74bb99ed..fcddd9fa077b0 100644
--- a/doc/user/sql-grammar/sql-grammar.bnf
+++ b/doc/user/sql-grammar/sql-grammar.bnf
@@ -256,10 +256,6 @@ create_view ::=
   'CREATE' ('TEMP' | 'TEMPORARY')? 'VIEW' view_name ( '(' col_ident ( ',' col_ident )* ')' )? 'AS' select_stmt |
   'CREATE' ('TEMP' | 'TEMPORARY')? 'VIEW' 'IF NOT EXISTS' view_name ( '(' col_ident ( ',' col_ident )* ')' )? 'AS' select_stmt |
   'CREATE' 'OR REPLACE' 'VIEW' view_name ( '(' col_ident ( ',' col_ident )* ')' )? 'AS' select_stmt
-create_table ::=
-  'CREATE' ('TEMP' | 'TEMPORARY')? 'TABLE' table_name
-  '(' ((col_name col_type col_option*) (',' col_name col_type col_option*)*)? ')'
-  with_options?
 deallocate ::=
   'DEALLOCATE' ('PREPARE')?  (name | 'ALL')?
 declare ::=

From d26c6e73b301731a1bd76cc297db4b730d9c7afc Mon Sep 17 00:00:00 2001
From: kay-kim <kay.kim@materialize.com>
Date: Mon, 12 May 2025 22:52:31 -0400
Subject: [PATCH 2/3] docs: create table update syntax (part 2)

---
 doc/user/config.toml                          |  5 ++
 doc/user/content/sql/create-table.md          | 48 +++++++++-
 ...e_table_options_source_populated_kafka.yml | 87 +++++++++++++++++++
 .../kafka-format-envelope-compat-table.md     |  8 ++
 4 files changed, 147 insertions(+), 1 deletion(-)
 create mode 100644 doc/user/data/syntax_options/create_table_options_source_populated_kafka.yml
 create mode 100644 doc/user/shared-content/kafka-format-envelope-compat-table.md

diff --git a/doc/user/config.toml b/doc/user/config.toml
index f5c7cea7a288a..09b045ad0935f 100644
--- a/doc/user/config.toml
+++ b/doc/user/config.toml
@@ -198,6 +198,11 @@ weight = 30
 # allow <a name="link-target">, the old syntax no longer works
 unsafe = true
 
+[markup]
+  [markup.highlight]
+    noClasses = false
+    style = "monokai"
+
 [[deployment.targets]]
 name = "production"
 url = "s3://materialize-website?region=us-east-1"
diff --git a/doc/user/content/sql/create-table.md b/doc/user/content/sql/create-table.md
index 9a22cdb576eb4..dbdc119756f39 100644
--- a/doc/user/content/sql/create-table.md
+++ b/doc/user/content/sql/create-table.md
@@ -1,6 +1,6 @@
 ---
 title: "CREATE TABLE"
-description: "`CREATE TABLE` creates a table that is persisted in durable storage."
+description: "Reference page for `CREATE TABLE`. `CREATE TABLE` creates a table that is persisted in durable storage."
 pagerank: 40
 menu:
   # This should also have a "non-content entry" under Reference, which is
@@ -186,6 +186,52 @@ See also [Materialize SQL data types](/sql/types/).
 
 {{</ tab >}}
 
+{{< tab "Source-populated tables (Kafka/Redpanda source)" >}}
+
+To create a table from a source, where the source maps to an external
+Kafka/Redpanda system:
+
+{{< note >}}
+
+Users cannot write to source-populated tables; i.e., users cannot perform
+[`INSERT`](/sql/insert/)/[`UPDATE`](/sql/update/)/[`DELETE`](/sql/delete/)
+operations on source-populated tables.
+
+{{</  note >}}
+
+```mzsql
+CREATE TABLE <table_name> FROM SOURCE <source_name> [(REFERENCE <ref_object>)]
+[FORMAT <format> | KEY FORMAT <format> VALUE FORMAT <format>]
+   -- <format> can be:
+   -- AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION <conn_name>
+   --     [KEY STRATEGY
+   --       INLINE <schema> | ID <schema_registry_id> | LATEST ]
+   --     [VALUE STRATEGY
+   --       INLINE <schema> | ID <schema_registry_id> | LATEST ]
+  -- | PROTOBUF USING CONFLUENT SCHEMA REGISTRY CONNECTION <conn_name>
+  -- | PROTOBUF MESSAGE <msg_name> USING SCHEMA <encoded_schema>
+  -- | CSV WITH HEADER ( <col_name>[, ...]) [DELIMITED BY <char>]
+  -- | CSV WITH <num> COLUMNS DELIMITED BY <char>
+  -- | JSON | TEXT | BYTES
+]
+[INCLUDE
+    KEY [AS <name>] | PARTITION [AS <name>] | OFFSET [AS <name>]
+  | TIMESTAMP [AS <name>] | HEADERS [AS <name>] | HEADER <key_name> AS <name> [BYTES]
+  [, ...]
+]
+[ENVELOPE
+    NONE  --  Default.  Uses the append-only envelope.
+  | DEBEZIUM
+  | UPSERT [(VALUE DECODING ERRORS = INLINE [AS name])]
+]
+;
+```
+
+{{% yaml-table data="syntax_options/create_table_options_source_populated_kafka"
+%}}
+
+
+{{</ tab >}}
 
 {{</ tabs >}}
 
diff --git a/doc/user/data/syntax_options/create_table_options_source_populated_kafka.yml b/doc/user/data/syntax_options/create_table_options_source_populated_kafka.yml
new file mode 100644
index 0000000000000..ef49b823f0cc3
--- /dev/null
+++ b/doc/user/data/syntax_options/create_table_options_source_populated_kafka.yml
@@ -0,0 +1,87 @@
+columns:
+  - column: "Parameter"
+  - column: "Description"
+rows:
+  - "Parameter": "`<table_name>`"
+    "Description": |
+
+      The name of the table to create. Names for tables must follow the [naming
+      guidelines](/sql/identifiers/#naming-restrictions).
+
+  - "Parameter": "`<source_name>`"
+    "Description": |
+
+      The name of the [source](/sql/create-source/kafka/) created for the Kafka topic.
+
+  - "Parameter": "**(REFERENCE <ref_object>)**"
+    "Description": |
+
+      *Optional.* If specified, the topic (which should match the topic
+      specified in the source) from which to create the table. You can create
+      multiple tables from the same reference object.
+
+      To find the reference objects available in your
+      [source](/sql/create-source/), you can use the following query,
+      substituting your source name for `<source_name>`:
+
+      <br>
+
+      ```mzsql
+      SELECT refs.*
+      FROM mz_internal.mz_source_references refs, mz_sources s
+      WHERE s.name = '<source_name>' -- substitute with your source name
+      AND refs.source_id = s.id;
+      ```
+
+  - "Parameter": |
+      **FORMAT \<format\> |
+      KEY FORMAT \<format\> VALUE FORMAT \<format\>**
+    "Description": |
+
+      *Optional.* If specified, use the specified format to decode the data. The following `<format>`s are supported:
+
+      | Format | Description |
+      |--------|-------------|
+      | `AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION <csr_connection> [KEY STRATEGY <strategy> VALUE STRATEGY <strategy>]` | Decode the data as Avro, specifying the [Confluent Schema Registry connection](/sql/create-connection/#confluent-schema-registry) to use. You can also specify the `KEY STRATEGY` and `VALUE STRATEGY` to use: <table> <thead> <tr> <th>Strategy</th> <th>Description</th> </tr> </thead> <tbody> <tr> <td><code>LATEST</code></td> <td>(Default) Use the latest writer schema from the schema registry as the reader schema.</td> </tr> <tr> <td><code>ID</code></td> <td>Use a specific schema from the registry.</td> </tr> <tr> <td><code>INLINE</code></td> <td>Use the inline schema.</td> </tr> </tbody> </table>|
+      | `PROTOBUF USING CONFLUENT SCHEMA REGISTRY CONNECTION <csr_connection>` | Decode the data as Protocol Buffers, specifying the [Confluent Schema Registry connection](/sql/create-connection/#confluent-schema-registry) to use. |
+      | `PROTOBUF MESSAGE <msg_name> USING SCHEMA <encoded_schema>` | Decode the data as Protocol Buffers, specifying the `<msg_name>` and the inline `<encoded_schema>` descriptor to use. |
+      | `JSON` | Decode the data as JSON. |
+      | `TEXT` | Decode the data as TEXT. |
+      | `BYTES` | Decode the data as BYTES. |
+      | `CSV WITH HEADER ( <col_name>[, ...]) [DELIMITED BY <char>]` | Parse the data as CSV with a header row. Materialize uses this header to infer both the number of columns and their names. The header is **not** ingested as data. The optional `DELIMITED BY <char>` clause specifies the delimiter character. <br><br>The data is decoded as [`text`](/sql/types/text). You can convert the data to other to other types using explicit [casts](/sql/functions/cast/) when creating views.|
+      | `CSV WITH <num> COLUMNS DELIMITED BY <char>` | Parse the data as CSV with a specified number of columns and a specified delimiter. The columns are named `column1`, `column2`...`columnN`. <br><br> The data is decoded as [`text`](/sql/types/text). You can convert the data to other to other types using explicit [casts](/sql/functions/cast/) when creating views.|
+
+      {{< include-md file="shared-content/kafka-format-envelope-compat-table.md"
+      >}}
+
+      For more information, see [Creating a source](/sql/create-source/kafka/#creating-a-source).
+
+  - "Parameter": |
+      **INCLUDE \<include_option\>**
+    "Description": |
+
+      *Optional.* If specified, include the additional information as column(s) in the table. The following `<include_option>`s are supported:
+
+      | Option | Description |
+      |--------|-------------|
+      | **KEY [AS \<name\>]** | Include a column containing the Kafka message key. If the key is encoded using a format that includes schemas the column will take its name from the schema. For unnamed formats (e.g. `TEXT`), the column will be named `key`. The column can be renamed with the optional **AS** *name* statement.
+      | **PARTITION [AS \<name\>]** | Include a `partition` column containing the Kafka message partition. The column can be renamed with the optional **AS** *name* clause.
+      | **OFFSET [AS \<name\>]** | Include an `offset` column containing the Kafka message offset. The column can be renamed with the optional **AS** *name* clause.
+      | **TIMESTAMP [AS \<name\>]** | Include a `timestamp` column containing the Kafka message timestamp. The column can be renamed with the optional **AS** *name* clause. <br><br>Note that the timestamp of a Kafka message depends on how the topic and its producers are configured. See the [Confluent documentation](https://docs.confluent.io/3.0.0/streams/concepts.html?#time) for details.
+      | **HEADERS [AS \<name\>]** | Include a `headers` column containing the Kafka message headers as a list of records of type `(key text, value bytea)`. The column can be renamed with the optional **AS** *name* clause.
+      | **HEADER \<key\> AS \<name\> [**BYTES**]** | Include a *name* column containing the Kafka message header *key* parsed as a UTF-8 string. To expose the header value as `bytea`, use the `BYTES` option.
+
+  - "Parameter": |
+      **ENVELOPE \<envelope\>**
+    "Description": |
+
+      *Optional.* If specified, use the specified envelope. The following `<envelope>`s are supported:
+
+      | Envelope | Description |
+      |----------|-------------|
+      | **ENVELOPE NONE** | *Default*. Use an append-only envelope. This means that records will only be appended and cannot be updated or deleted.
+      | **ENVELOPE DEBEZIUM** | Use the Debezium envelope, which uses a diff  envelope to handle CRUD operations. This envelope can lead to **high memory utilization** in the cluster maintaining the source. Materialize can automatically offload processing to disk as needed. See [spilling to disk](/sql/create-source/kafka/#spilling-to-disk) for details. For more information, see [Using Debezium](/sql/create-source/kafka/#using-debezium).
+      | **ENVELOPE UPSERT** [**(VALUE DECODING ERRORS = INLINE)**] | Use the upsert envelope, which uses message keys to handle CRUD operations. To handle value decoding errors, use the `(VALUE DECODING ERRORS = INLINE)` option. For more information, see [Handling upserts](/sql/create-source/kafka/#handling-upserts) and [Value decoding errors](/sql/create-source/kafka/#value-decoding-errors).
+
+      {{< include-md file="shared-content/kafka-format-envelope-compat-table.md" >}}
+
diff --git a/doc/user/shared-content/kafka-format-envelope-compat-table.md b/doc/user/shared-content/kafka-format-envelope-compat-table.md
new file mode 100644
index 0000000000000..0fa76a1794344
--- /dev/null
+++ b/doc/user/shared-content/kafka-format-envelope-compat-table.md
@@ -0,0 +1,8 @@
+The following table specifies the format and envelope compatibility:
+
+| Format | Append-only envelope | Upsert envelope | Debezium envelope |
+|--------|:--------------------:|:---------------:|:-----------------:|
+| Avro              | ✓         | ✓               | ✓                 |
+| Protobuf          | ✓         | ✓
+| JSON/Text/Bytes   | ✓         | ✓
+| CSV               | ✓         |                 |

From d7e3def2b2a1bc0357cd247adc43089e7f3d8bc3 Mon Sep 17 00:00:00 2001
From: kay-kim <kay.kim@materialize.com>
Date: Wed, 14 May 2025 17:41:18 -0400
Subject: [PATCH 3/3] fix extra whitespace

---
 .../create_table_options_source_populated_kafka.yml              | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/user/data/syntax_options/create_table_options_source_populated_kafka.yml b/doc/user/data/syntax_options/create_table_options_source_populated_kafka.yml
index ef49b823f0cc3..779a6037ea8c6 100644
--- a/doc/user/data/syntax_options/create_table_options_source_populated_kafka.yml
+++ b/doc/user/data/syntax_options/create_table_options_source_populated_kafka.yml
@@ -84,4 +84,3 @@ rows:
       | **ENVELOPE UPSERT** [**(VALUE DECODING ERRORS = INLINE)**] | Use the upsert envelope, which uses message keys to handle CRUD operations. To handle value decoding errors, use the `(VALUE DECODING ERRORS = INLINE)` option. For more information, see [Handling upserts](/sql/create-source/kafka/#handling-upserts) and [Value decoding errors](/sql/create-source/kafka/#value-decoding-errors).
 
       {{< include-md file="shared-content/kafka-format-envelope-compat-table.md" >}}
-