Skip to content

Commit 7e70c30

Browse files
docs: document table branches
1 parent 1afe3ba commit 7e70c30

3 files changed

Lines changed: 104 additions & 0 deletions

File tree

docs/docs.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
"tables/schema",
7676
"tables/update",
7777
"tables/versioning",
78+
"tables/branches",
7879
"tables/consistency"
7980
]
8081
},

docs/snippets/tables.mdx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ export const PyAlterVectorColumn = "vector_dim = 768 # Your embedding dimension
2828

2929
export const PyBatchDataInsertion = "import pyarrow as pa\n\ndef make_batches():\n for i in range(5): # Create 5 batches\n yield pa.RecordBatch.from_arrays(\n [\n pa.array([[3.1, 4.1], [5.9, 26.5]], pa.list_(pa.float32(), 2)),\n pa.array([f\"item{i * 2 + 1}\", f\"item{i * 2 + 2}\"]),\n pa.array([float((i * 2 + 1) * 10), float((i * 2 + 2) * 10)]),\n ],\n [\"vector\", \"item\", \"price\"],\n )\n\nschema = pa.schema(\n [\n pa.field(\"vector\", pa.list_(pa.float32(), 2)),\n pa.field(\"item\", pa.utf8()),\n pa.field(\"price\", pa.float32()),\n ]\n)\n# Create table with batches\ntable_name = \"batch_ingestion_example\"\ntable = db.create_table(table_name, make_batches(), schema=schema, mode=\"overwrite\")\n";
3030

31+
export const PyBranches = "# Fork an isolated, writable branch from main.\n# The returned handle is scoped to the branch; writes on it do not\n# affect main.\nbranch = table.branches.create(\"exp\")\nbranch.add([{\"vector\": [10.0, 11.0], \"item\": \"baz\", \"price\": 30.0}])\nprint(branch.count_rows()) # 3 rows on the branch\nprint(table.count_rows()) # main is still untouched\n\n# List all branches on the table.\nprint(table.branches.list())\n\n# Reopen the branch later by name.\ncheckedOut = table.branches.checkout(\"exp\")\n\n# Or open a branch directly from the database connection.\nbranch_handle = db.open_table(\"quotes_versioning_example\", branch=\"exp\")\n\n# Delete a branch when you're done with it.\ntable.branches.delete(\"exp\")\n";
32+
3133
export const PyConsistencyCheckoutLatest = "uri = str(tmp_db.uri)\nwriter_db = lancedb.connect(uri)\nreader_db = lancedb.connect(uri)\nwriter_table = writer_db.create_table(\n \"consistency_checkout_latest_table\", [{\"id\": 1}], mode=\"overwrite\"\n)\nreader_table = reader_db.open_table(\"consistency_checkout_latest_table\")\n\nwriter_table.add([{\"id\": 2}])\nrows_before_refresh = reader_table.count_rows()\nprint(f\"Rows before checkout_latest: {rows_before_refresh}\")\n\nreader_table.checkout_latest()\nrows_after_refresh = reader_table.count_rows()\nprint(f\"Rows after checkout_latest: {rows_after_refresh}\")\n";
3234

3335
export const PyConsistencyEventual = "from datetime import timedelta\n\nuri = str(tmp_db.uri)\nwriter_db = lancedb.connect(uri)\nreader_db = lancedb.connect(uri, read_consistency_interval=timedelta(seconds=3600))\nwriter_table = writer_db.create_table(\n \"consistency_eventual_table\", [{\"id\": 1}], mode=\"overwrite\"\n)\nreader_table = reader_db.open_table(\"consistency_eventual_table\")\nwriter_table.add([{\"id\": 2}])\nrows_after_write = reader_table.count_rows()\nprint(f\"Rows visible before eventual refresh interval: {rows_after_write}\")\n";
@@ -140,6 +142,8 @@ export const TsAlterColumnsWithExpression = "// For custom transforms, create a
140142

141143
export const TsAlterVectorColumn = "const oldDim = 384;\nconst newDim = 1024;\nconst vectorSchema = new arrow.Schema([\n new arrow.Field(\"id\", new arrow.Int64()),\n new arrow.Field(\n \"embedding\",\n new arrow.FixedSizeList(\n oldDim,\n new arrow.Field(\"item\", new arrow.Float16(), true),\n ),\n true,\n ),\n]);\nconst vectorData = lancedb.makeArrowTable(\n [{ id: 1, embedding: Array.from({ length: oldDim }, () => Math.random()) }],\n { schema: vectorSchema },\n);\nconst vectorTable = await db.createTable(\"vector_alter_example\", vectorData, {\n mode: \"overwrite\",\n});\n\n// Changing FixedSizeList dimensions (384 -> 1024) is not supported via alterColumns.\n// Use addColumns + dropColumns + alterColumns(rename) to replace the column.\nawait vectorTable.addColumns([\n {\n name: \"embedding_v2\",\n valueSql: `arrow_cast(NULL, 'FixedSizeList(${newDim}, Float16)')`,\n },\n]);\nawait vectorTable.dropColumns([\"embedding\"]);\nawait vectorTable.alterColumns([{ path: \"embedding_v2\", rename: \"embedding\" }]);\n";
142144

145+
export const TsBranches = "const branches = await table.branches();\n\n// Fork an isolated, writable branch from main.\n// The returned handle is scoped to the branch; writes on it do not\n// affect main.\nconst branch = await branches.create(\"exp\");\nawait branch.add([{ id: 2, author: \"Morty\", quote: \"Aww geez, Rick!\" }]);\n\n// List all branches on the table.\nconsole.log(await branches.list());\n\n// Reopen the branch later by name.\nconst checkedOut = await branches.checkout(\"exp\");\n\n// Or open a branch directly from the database connection.\nconst branchHandle = await db.openTable(\n \"quotes_versioning_example\",\n undefined,\n { branch: \"exp\" },\n);\n\n// Delete a branch when you're done with it.\nawait branches.delete(\"exp\");\n";
146+
143147
export const TsConsistencyCheckoutLatest = "const checkoutWriterDb = await lancedb.connect(databaseDir);\nconst checkoutReaderDb = await lancedb.connect(databaseDir);\nconst checkoutWriterTable = await checkoutWriterDb.createTable(\n \"consistency_checkout_latest_table\",\n [{ id: 1 }],\n { mode: \"overwrite\" },\n);\nconst checkoutReaderTable = await checkoutReaderDb.openTable(\n \"consistency_checkout_latest_table\",\n);\nawait checkoutWriterTable.add([{ id: 2 }]);\nconst rowsBeforeRefresh = await checkoutReaderTable.countRows();\nconsole.log(`Rows before checkoutLatest: ${rowsBeforeRefresh}`);\nawait checkoutReaderTable.checkoutLatest();\nconst rowsAfterRefresh = await checkoutReaderTable.countRows();\nconsole.log(`Rows after checkoutLatest: ${rowsAfterRefresh}`);\n";
144148

145149
export const TsConsistencyEventual = "const eventualWriterDb = await lancedb.connect(databaseDir);\nconst eventualReaderDb = await lancedb.connect(databaseDir, {\n readConsistencyInterval: 3600,\n});\nconst eventualWriterTable = await eventualWriterDb.createTable(\n \"consistency_eventual_table\",\n [{ id: 1 }],\n { mode: \"overwrite\" },\n);\nconst eventualReaderTable = await eventualReaderDb.openTable(\n \"consistency_eventual_table\",\n);\nawait eventualWriterTable.add([{ id: 2 }]);\nconst eventualRowsAfterWrite = await eventualReaderTable.countRows();\nconsole.log(\n `Rows visible before eventual refresh interval: ${eventualRowsAfterWrite}`,\n);\n";
@@ -234,6 +238,8 @@ export const RsAlterColumnsWithExpression = "// For custom transforms, create a
234238

235239
export const RsAlterVectorColumn = "let old_dim = 384;\nlet new_dim = 1024;\nlet vector_schema = Arc::new(Schema::new(vec![\n Field::new(\"id\", DataType::Int64, false),\n Field::new(\n \"embedding\",\n DataType::FixedSizeList(\n Arc::new(Field::new(\"item\", DataType::Float32, true)),\n old_dim,\n ),\n true,\n ),\n]));\nlet vector_batch = RecordBatch::try_new(\n vector_schema.clone(),\n vec![\n Arc::new(Int64Array::from(vec![1])),\n Arc::new(\n FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(\n vec![Some(vec![Some(0.1_f32); old_dim as usize])],\n old_dim,\n ),\n ),\n ],\n)\n.unwrap();\nlet vector_reader: Box<dyn RecordBatchReader + Send> =\n Box::new(RecordBatchIterator::new(vec![Ok(vector_batch)].into_iter(), vector_schema.clone()));\nlet vector_table = db\n .create_table(\"vector_alter_example\", vector_reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n\n// Changing FixedSizeList dimensions (384 -> 1024) is not supported via alter_columns.\n// Use add_columns + drop_columns + alter_columns(rename) to replace the column.\nvector_table\n .add_columns(\n NewColumnTransform::SqlExpressions(vec![(\n \"embedding_v2\".to_string(),\n format!(\"arrow_cast(NULL, 'FixedSizeList({}, Float32)')\", new_dim),\n )]),\n None,\n )\n .await\n .unwrap();\nvector_table.drop_columns(&[\"embedding\"]).await.unwrap();\nvector_table\n .alter_columns(&[ColumnAlteration::new(\"embedding_v2\".to_string())\n .rename(\"embedding\".to_string())])\n .await\n .unwrap();\n";
236240

241+
export const RsBranches = "use lance::dataset::refs::Ref;\n\n// Fork an isolated, writable branch from main's latest version.\n// The returned handle is scoped to the branch; writes on it do not\n// affect main.\nlet branch = table\n .create_branch(\"exp\", Ref::Version(None, None))\n .await\n .unwrap();\nbranch.add(make_quotes_reader(vec![(4, \"Morty\", \"Aww geez, Rick!\")]))\n .execute()\n .await\n .unwrap();\n\n// List all branches on the table.\nlet branches = table.list_branches().await.unwrap();\nprintln!(\"Branches: {:?}\", branches);\n\n// Reopen the branch later by name, or open it directly via the builder.\nlet _checked_out = table.checkout_branch(\"exp\").await.unwrap();\nlet _opened = db\n .open_table(\"quotes_versioning_example\")\n .branch(\"exp\")\n .execute()\n .await\n .unwrap();\n\n// Delete a branch when you're done with it.\ntable.delete_branch(\"exp\").await.unwrap();\n";
242+
237243
export const RsConsistencyCheckoutLatest = "let checkout_writer_db = connect(&db_uri).execute().await.unwrap();\nlet checkout_reader_db = connect(&db_uri).execute().await.unwrap();\nlet checkout_writer_table = checkout_writer_db\n .create_table(\n \"consistency_checkout_latest_table\",\n make_users_reader(vec![1], vec![\"Alice\"], None),\n )\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\nlet checkout_reader_table = checkout_reader_db\n .open_table(\"consistency_checkout_latest_table\")\n .execute()\n .await\n .unwrap();\ncheckout_writer_table\n .add(make_users_reader(vec![2], vec![\"Bob\"], None))\n .execute()\n .await\n .unwrap();\nlet rows_before_refresh = checkout_reader_table.count_rows(None).await.unwrap();\nprintln!(\"Rows before checkout_latest: {}\", rows_before_refresh);\ncheckout_reader_table.checkout_latest().await.unwrap();\nlet rows_after_refresh = checkout_reader_table.count_rows(None).await.unwrap();\nprintln!(\"Rows after checkout_latest: {}\", rows_after_refresh);\n";
238244

239245
export const RsConsistencyEventual = "let eventual_writer_db = connect(&db_uri).execute().await.unwrap();\nlet eventual_reader_db = connect(&db_uri)\n .read_consistency_interval(StdDuration::from_secs(3600))\n .execute()\n .await\n .unwrap();\nlet eventual_writer_table = eventual_writer_db\n .create_table(\n \"consistency_eventual_table\",\n make_users_reader(vec![1], vec![\"Alice\"], None),\n )\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\nlet eventual_reader_table = eventual_reader_db\n .open_table(\"consistency_eventual_table\")\n .execute()\n .await\n .unwrap();\neventual_writer_table\n .add(make_users_reader(vec![2], vec![\"Bob\"], None))\n .execute()\n .await\n .unwrap();\nlet eventual_rows_after_write = eventual_reader_table.count_rows(None).await.unwrap();\nprintln!(\n \"Rows visible before eventual refresh interval: {}\",\n eventual_rows_after_write\n);\n";

docs/tables/branches.mdx

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
---
2+
title: "Table branches"
3+
sidebarTitle: "Branches"
4+
description: "Fork isolated, writable copies of a LanceDB table to test changes without touching main."
5+
icon: "code-branch"
6+
keywords: ["branches", "branch", "versioning", "fork", "isolation"]
7+
---
8+
import {
9+
PyBranches as Branches,
10+
TsBranches as TsBranches,
11+
RsBranches as RsBranches,
12+
} from '/snippets/tables.mdx';
13+
14+
Branches are isolated, writable lines of history forked from another branch
15+
(or a specific version). Writes on a branch do not affect `main`, and you can
16+
list, check out, or delete branches at any time. Branches are similar in spirit
17+
to git branches but operate at the table level.
18+
19+
Use branches when you want to:
20+
21+
- Try a destructive operation (re-embed, schema change, bulk update) without
22+
risking the production table.
23+
- Stage data for evaluation, then promote or discard it.
24+
- Run experiments in parallel against the same base data.
25+
26+
<Note>
27+
Branches are supported on local and embedded LanceDB tables. Remote tables
28+
served by LanceDB Enterprise do not yet support branches.
29+
</Note>
30+
31+
## Create, write, and check out a branch
32+
33+
Forking a branch returns a new table handle scoped to that branch. All reads
34+
and writes through the handle stay on the branch — `main` is untouched until
35+
you explicitly promote the data yourself (for example, by copying rows back).
36+
37+
`branches.create(name)` forks from `main`'s latest version by default. To fork
38+
from a different source, pass `from_ref` (a branch name) and/or `from_version`
39+
(a specific version on that ref).
40+
41+
<CodeGroup>
42+
<CodeBlock filename="Python" language="Python" icon="python">
43+
{Branches}
44+
</CodeBlock>
45+
46+
<CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
47+
{TsBranches}
48+
</CodeBlock>
49+
50+
<CodeBlock filename="Rust" language="Rust" icon="rust">
51+
{RsBranches}
52+
</CodeBlock>
53+
</CodeGroup>
54+
55+
`branches.list()` returns a map of branch name to metadata, including the
56+
parent branch (`None`/`null` for branches forked from `main`).
57+
58+
## Open a branch directly
59+
60+
If you already know the branch you want, you can skip the explicit checkout
61+
and pass the branch name to `open_table` / `openTable`. The returned table
62+
handle behaves the same as one returned by `branches.checkout()`.
63+
64+
<CodeGroup>
65+
```python Python icon="python"
66+
# Reads and writes operate in the branch's context
67+
branch = db.open_table("items", branch="exp")
68+
```
69+
70+
```typescript TypeScript icon="square-js"
71+
// Reads and writes operate in the branch's context
72+
const branch = await db.openTable("items", undefined, { branch: "exp" });
73+
```
74+
75+
```rust Rust icon="rust"
76+
// Reads and writes operate in the branch's context
77+
let branch = db
78+
.open_table("items")
79+
.branch("exp")
80+
.execute()
81+
.await?;
82+
```
83+
</CodeGroup>
84+
85+
## Branches versus tags
86+
87+
Branches and [tags](/tables/versioning#tag-based-versioning) both attach
88+
human-readable names to a table's history, but they serve different goals:
89+
90+
| | Branches | Tags |
91+
|---|---|---|
92+
| Writable | Yes — each branch has its own history | No — a label on an immutable version |
93+
| Affects `main` on write | No | N/A (tags can't be written to) |
94+
| Typical use | Experiments, staging, isolation | Pinning a known-good version (`"prod"`, `"baseline"`) |
95+
96+
Reach for a branch when you need to write data; reach for a tag when you just
97+
need a stable label on an existing version.

0 commit comments

Comments
 (0)