Skip to content

Commit 1ea0a97

Browse files
authored
Update table operations docs with identified gaps (#226)
1 parent 5dd9ef0 commit 1ea0a97

7 files changed

Lines changed: 350 additions & 2 deletions

File tree

docs/snippets/tables.mdx

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ export const PyCreateEmptyTable = "import pyarrow as pa\n\nschema = pa.schema(\n
3838

3939
export const PyCreateEmptyTablePydantic = "from lancedb.pydantic import LanceModel, Vector\n\nclass Item(LanceModel):\n vector: Vector(2)\n item: str\n price: float\n\ndb = tmp_db\ntbl = db.create_table(\n \"test_empty_table_new\", schema=Item.to_arrow_schema(), mode=\"overwrite\"\n)\n";
4040

41+
export const PyCreateTableConflictHandling = "# Idempotent open: reuse the existing table if it exists.\n# The provided data is ignored; the schema is validated against the\n# existing table and a mismatch raises an error.\ntbl = db.create_table(\"conflict_table\", data, exist_ok=True)\n\n# Overwrite: drop the existing table and create a new one with the\n# provided data. This permanently discards the old table's data.\ntbl = db.create_table(\"conflict_table\", data, mode=\"overwrite\")\n";
42+
4143
export const PyCreateTableCustomSchema = "import pyarrow as pa\n\ncustom_schema = pa.schema(\n [\n pa.field(\"vector\", pa.list_(pa.float32(), 4)),\n pa.field(\"lat\", pa.float32()),\n pa.field(\"long\", pa.float32()),\n ]\n)\n\ndata = [\n {\"vector\": [1.1, 1.2, 1.3, 1.4], \"lat\": 45.5, \"long\": -122.7},\n {\"vector\": [0.2, 1.8, 0.4, 3.6], \"lat\": 40.1, \"long\": -74.1},\n]\ndb = tmp_db\ntbl = db.create_table(\n \"my_table_custom_schema\", data, schema=custom_schema, mode=\"overwrite\"\n)\n";
4244

4345
export const PyCreateTableFromArrow = "import numpy as np\nimport pyarrow as pa\n\ndim = 16\ntotal = 2\nschema = pa.schema(\n [pa.field(\"vector\", pa.list_(pa.float16(), dim)), pa.field(\"text\", pa.string())]\n)\ndata = pa.Table.from_arrays(\n [\n pa.array(\n [np.random.randn(dim).astype(np.float16) for _ in range(total)],\n pa.list_(pa.float16(), dim),\n ),\n pa.array([\"foo\", \"bar\"]),\n ],\n [\"vector\", \"text\"],\n)\ndb = tmp_db\ntbl = db.create_table(\"f16_tbl\", data, schema=schema, mode=\"overwrite\")\n";
@@ -116,6 +118,8 @@ export const PyVersioningListAllVersions = "versions = table.list_versions()\nfo
116118

117119
export const PyVersioningRollback = "table.restore(version_after_mod)\nversions = table.list_versions()\nversion_count_after_rollback = len(versions)\nprint(f\"Total number of versions after rollback: {version_count_after_rollback}\")\n";
118120

121+
export const PyVersioningTags = "# Create a tag pointing at a specific version\ntable.tags.create(\"baseline\", 1)\ntable.tags.create(\"with-edits\", table.version)\n\n# List all tags on this table\nprint(table.tags.list())\n\n# Look up the version a tag points at\nprint(table.tags.get_version(\"baseline\"))\n\n# Move an existing tag to a different version\ntable.tags.update(\"baseline\", 2)\n\n# Check out a version by tag name\ntable.checkout(\"baseline\")\nprint(table.version)\n\n# Delete a tag (does not delete the underlying version)\ntable.tags.delete(\"with-edits\")\n\n# Return to the latest version\ntable.checkout_latest()\n";
122+
119123
export const PyVersioningUpdateData = "table.update(where=\"author='Richard'\", values={\"author\": \"Richard Daniel Sanchez\"})\nrows_after_update = table.count_rows(\"author = 'Richard Daniel Sanchez'\")\nprint(f\"Rows updated to Richard Daniel Sanchez: {rows_after_update}\")\n";
120124

121125
export const TsAddColumnsCalculated = "// Add a discounted price column (10% discount)\nawait schemaAddTable.addColumns([\n {\n name: \"discounted_price\",\n valueSql: \"cast((price * 0.9) as float)\",\n },\n]);\n";
@@ -144,6 +148,8 @@ export const TsConsistencyStrong = "const strongWriterDb = await lancedb.connect
144148

145149
export const TsCreateEmptyTable = "const emptySchema = new arrow.Schema([\n new arrow.Field(\n \"vector\",\n new arrow.FixedSizeList(\n 2,\n new arrow.Field(\"item\", new arrow.Float32(), true),\n ),\n ),\n new arrow.Field(\"item\", new arrow.Utf8()),\n new arrow.Field(\"price\", new arrow.Float32()),\n]);\nconst emptyTable = await db.createEmptyTable(\n \"test_empty_table\",\n emptySchema,\n {\n mode: \"overwrite\",\n },\n);\n";
146150

151+
export const TsCreateTableConflictHandling = "// Idempotent open: reuse the existing table if it exists.\n// The provided data is ignored; the schema is validated against the\n// existing table and a mismatch raises an error.\nlet conflictTable = await db.createTable(\"conflict_table\", data, {\n existOk: true,\n});\n\n// Overwrite: drop the existing table and create a new one with the\n// provided data. This permanently discards the old table's data.\nconflictTable = await db.createTable(\"conflict_table\", data, {\n mode: \"overwrite\",\n});\n";
152+
147153
export const TsCreateTableCustomSchema = "const customSchema = new arrow.Schema([\n new arrow.Field(\n \"vector\",\n new arrow.FixedSizeList(\n 4,\n new arrow.Field(\"item\", new arrow.Float32(), true),\n ),\n ),\n new arrow.Field(\"lat\", new arrow.Float32()),\n new arrow.Field(\"long\", new arrow.Float32()),\n]);\n\nconst customSchemaData = lancedb.makeArrowTable(\n [\n { vector: [1.1, 1.2, 1.3, 1.4], lat: 45.5, long: -122.7 },\n { vector: [0.2, 1.8, 0.4, 3.6], lat: 40.1, long: -74.1 },\n ],\n { schema: customSchema },\n);\nconst customSchemaTable = await db.createTable(\n \"my_table_custom_schema\",\n customSchemaData,\n { mode: \"overwrite\" },\n);\n";
148154

149155
export const TsCreateTableFromArrow = "const arrowSchema = new arrow.Schema([\n new arrow.Field(\n \"vector\",\n new arrow.FixedSizeList(\n 16,\n new arrow.Field(\"item\", new arrow.Float32(), true),\n ),\n ),\n new arrow.Field(\"text\", new arrow.Utf8()),\n]);\nconst arrowData = lancedb.makeArrowTable(\n [\n { vector: Array(16).fill(0.1), text: \"foo\" },\n { vector: Array(16).fill(0.2), text: \"bar\" },\n ],\n { schema: arrowSchema },\n);\nconst arrowTable = await db.createTable(\"f32_tbl\", arrowData, {\n mode: \"overwrite\",\n});\n";
@@ -206,6 +212,8 @@ export const TsVersioningListAllVersions = "const allVersions = await table.list
206212

207213
export const TsVersioningRollback = "await table.checkout(versionAfterMod);\nawait table.restore();\nconst versionsAfterRollback = await table.listVersions();\nconst versionCountAfterRollback = versionsAfterRollback.length;\nconsole.log(\n `Total number of versions after rollback: ${versionCountAfterRollback}`,\n);\n";
208214

215+
export const TsVersioningTags = "const tags = await tagsTable.tags();\n\n// Create a tag pointing at a specific version\nawait tags.create(\"baseline\", 1);\nawait tags.create(\"with-edits\", await tagsTable.version());\n\n// List all tags on this table\nconsole.log(await tags.list());\n\n// Look up the version a tag points at\nconsole.log(await tags.getVersion(\"baseline\"));\n\n// Move an existing tag to a different version\nawait tags.update(\"baseline\", 2);\n\n// Check out a version by tag name\nawait tagsTable.checkout(\"baseline\");\nconsole.log(await tagsTable.version());\n\n// Delete a tag (does not delete the underlying version)\nawait tags.delete(\"with-edits\");\n\n// Return to the latest version\nawait tagsTable.checkoutLatest();\n";
216+
209217
export const TsVersioningUpdateData = "await table.update({\n where: \"author = 'Richard'\",\n values: { author: \"Richard Daniel Sanchez\" },\n});\nconst rowsAfterUpdate = await table.countRows(\n \"author = 'Richard Daniel Sanchez'\",\n);\nconsole.log(`Rows updated to Richard Daniel Sanchez: ${rowsAfterUpdate}`);\n";
210218

211219
export const RsAddColumnsCalculated = "// Add a discounted price column (10% discount)\nschema_add_table\n .add_columns(\n NewColumnTransform::SqlExpressions(vec![(\n \"discounted_price\".to_string(),\n \"cast((price * 0.9) as float)\".to_string(),\n )]),\n None,\n )\n .await\n .unwrap();\n";
@@ -234,6 +242,8 @@ export const RsConsistencyStrong = "let strong_writer_db = connect(&db_uri).exec
234242

235243
export const RsCreateEmptyTable = "let empty_schema = Arc::new(Schema::new(vec![\n Field::new(\n \"vector\",\n DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 2),\n false,\n ),\n Field::new(\"item\", DataType::Utf8, false),\n Field::new(\"price\", DataType::Float32, false),\n]));\nlet empty_table = db\n .create_empty_table(\"test_empty_table\", empty_schema)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n";
236244

245+
export const RsCreateTableConflictHandling = "// Idempotent open: reuse the existing table if it exists.\n// The provided data is ignored; the schema is validated against the\n// existing table and a mismatch raises an error.\nlet _conflict_table = db\n .create_table(\"conflict_table\", exist_ok_reader)\n .mode(CreateTableMode::exist_ok(|req| req))\n .execute()\n .await\n .unwrap();\n\n// Overwrite: drop the existing table and create a new one with the\n// provided data. This permanently discards the old table's data.\nlet conflict_table = db\n .create_table(\"conflict_table\", overwrite_reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n";
246+
237247
export const RsCreateTableCustomSchema = "let custom_schema = Arc::new(Schema::new(vec![\n Field::new(\n \"vector\",\n DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 4),\n false,\n ),\n Field::new(\"lat\", DataType::Float32, false),\n Field::new(\"long\", DataType::Float32, false),\n]));\n\nlet custom_batch = RecordBatch::try_new(\n custom_schema.clone(),\n vec![\n Arc::new(\n FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(\n vec![\n Some(vec![Some(1.1), Some(1.2), Some(1.3), Some(1.4)]),\n Some(vec![Some(0.2), Some(1.8), Some(0.4), Some(3.6)]),\n ],\n 4,\n ),\n ),\n Arc::new(Float32Array::from(vec![45.5, 40.1])),\n Arc::new(Float32Array::from(vec![-122.7, -74.1])),\n ],\n)\n.unwrap();\nlet custom_reader =\n RecordBatchIterator::new(vec![Ok(custom_batch)].into_iter(), custom_schema.clone());\nlet custom_table = db\n .create_table(\"my_table_custom_schema\", custom_reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n";
238248

239249
export const RsCreateTableFromArrow = "let arrow_schema = Arc::new(Schema::new(vec![\n Field::new(\n \"vector\",\n DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 16),\n false,\n ),\n Field::new(\"text\", DataType::Utf8, false),\n]));\n\nlet arrow_batch = RecordBatch::try_new(\n arrow_schema.clone(),\n vec![\n Arc::new(\n FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(\n vec![Some(vec![Some(0.1); 16]), Some(vec![Some(0.2); 16])],\n 16,\n ),\n ),\n Arc::new(StringArray::from(vec![\"foo\", \"bar\"])),\n ],\n)\n.unwrap();\nlet arrow_reader =\n RecordBatchIterator::new(vec![Ok(arrow_batch)].into_iter(), arrow_schema.clone());\nlet arrow_table = db\n .create_table(\"arrow_table_example\", arrow_reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n";
@@ -300,5 +310,7 @@ export const RsVersioningMakeQuotesReader = "fn make_quotes_reader(rows: Vec<(i6
300310

301311
export const RsVersioningRollback = "table.checkout(version_after_mod).await.unwrap();\ntable.restore().await.unwrap();\nlet versions_after_rollback = table.list_versions().await.unwrap();\nlet version_count_after_rollback = versions_after_rollback.len();\nprintln!(\n \"Total number of versions after rollback: {}\",\n version_count_after_rollback\n);\n";
302312

313+
export const RsVersioningTags = "let mut tags = tags_table.tags().await.unwrap();\n\n// Create a tag pointing at a specific version\ntags.create(\"baseline\", 1).await.unwrap();\nlet current_version = tags_table.version().await.unwrap();\ntags.create(\"with-edits\", current_version).await.unwrap();\n\n// List all tags on this table\nlet all_tags = tags.list().await.unwrap();\nprintln!(\"Tags: {:?}\", all_tags);\n\n// Look up the version a tag points at\nlet baseline_version = tags.get_version(\"baseline\").await.unwrap();\nprintln!(\"baseline -> v{}\", baseline_version);\n\n// Move an existing tag to a different version\ntags.update(\"baseline\", 2).await.unwrap();\n\n// Check out a version by tag name (separate method in Rust)\ntags_table.checkout_tag(\"baseline\").await.unwrap();\nprintln!(\"Current version: {}\", tags_table.version().await.unwrap());\n\n// Delete a tag (does not delete the underlying version)\ntags.delete(\"with-edits\").await.unwrap();\n\n// Return to the latest version\ntags_table.checkout_latest().await.unwrap();\n";
314+
303315
export const RsVersioningUpdateData = "table\n .update()\n .only_if(\"author = 'Richard'\")\n .column(\"author\", \"'Richard Daniel Sanchez'\")\n .execute()\n .await\n .unwrap();\nlet rows_after_update = table\n .count_rows(Some(\"author = 'Richard Daniel Sanchez'\".to_string()))\n .await\n .unwrap();\nprintln!(\n \"Rows updated to Richard Daniel Sanchez: {}\",\n rows_after_update\n);\n";
304316

docs/tables/create.mdx

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ import {
99
PyCreateTableFromDicts as CreateTableFromDicts,
1010
TsCreateTableFromDicts as TsCreateTableFromDicts,
1111
RsCreateTableFromDicts as RsCreateTableFromDicts,
12+
PyCreateTableConflictHandling as CreateTableConflictHandling,
13+
TsCreateTableConflictHandling as TsCreateTableConflictHandling,
14+
RsCreateTableConflictHandling as RsCreateTableConflictHandling,
1215
PyCreateTableFromPandas as CreateTableFromPandas,
1316
PyCreateTableCustomSchema as CreateTableCustomSchema,
1417
TsCreateTableCustomSchema as TsCreateTableCustomSchema,
@@ -82,6 +85,39 @@ support lists/arrays of dictionaries, while the Rust SDK supports lists of struc
8285
</CodeBlock>
8386
</CodeGroup>
8487

88+
### Handle existing tables
89+
90+
By default, `create_table` raises an error if a table with the same name already exists.
91+
You can change this behavior with two parameters that resolve the conflict in different ways:
92+
93+
- **Idempotent open**: return the existing table without modifying it. Use when your
94+
code may run more than once (notebooks, retries, init scripts) and you want to reuse
95+
the table on subsequent runs. The provided data is ignored, but the schema is
96+
validated against the existing table and a mismatch raises an error.
97+
- **Overwrite**: drop the existing table and create a new one with the provided data.
98+
Use this for test fixtures or when you intentionally want to replace prior contents.
99+
This permanently discards the old table's data.
100+
101+
<CodeGroup>
102+
<CodeBlock filename="Python" language="Python" icon="python">
103+
{CreateTableConflictHandling}
104+
</CodeBlock>
105+
106+
<CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
107+
{TsCreateTableConflictHandling}
108+
</CodeBlock>
109+
110+
<CodeBlock filename="Rust" language="Rust" icon="rust">
111+
{RsCreateTableConflictHandling}
112+
</CodeBlock>
113+
</CodeGroup>
114+
115+
<Note>
116+
`exist_ok` / `existOk` does not append the provided data to an existing table. Use
117+
[`table.add()`](/tables/update) for that. If you need to ensure a table exists *and*
118+
contains specific rows, prefer the [empty-table-then-add pattern](#create-empty-table).
119+
</Note>
120+
85121
### From a custom schema
86122

87123
You can define a custom Arrow schema for the table. This is useful when you want to have more control over the column types and metadata.

docs/tables/versioning.mdx

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ import {
3232
PyVersioningDeleteData as VersioningDeleteData,
3333
TsVersioningDeleteData as TsVersioningDeleteData,
3434
RsVersioningDeleteData as RsVersioningDeleteData,
35+
PyVersioningTags as VersioningTags,
36+
TsVersioningTags as TsVersioningTags,
37+
RsVersioningTags as RsVersioningTags,
3538
RsVersioningMakeQuotesReader as RsVersioningMakeQuotesReader,
3639
} from '/snippets/tables.mdx';
3740

@@ -184,6 +187,37 @@ Now let's restore a captured version snapshot:
184187
</CodeBlock>
185188
</CodeGroup>
186189

190+
## Tag-Based Versioning
191+
192+
Numeric table versions like `v3` or `v17` are precise but hard to remember. Tags
193+
let you attach human-readable labels (e.g., `"prod"`, `"baseline"`,
194+
`"q3-evaluation"`) to specific versions and check those out by name. They are
195+
conceptually similar to git tags, and unlike numeric versions, **tagged versions
196+
are preserved when old versions are pruned** (see the cleanup note at the bottom
197+
of this page).
198+
199+
The tags API supports the standard CRUD operations — create, list, update, delete —
200+
plus checking out by tag name.
201+
202+
<CodeGroup>
203+
<CodeBlock filename="Python" language="Python" icon="python">
204+
{VersioningTags}
205+
</CodeBlock>
206+
207+
<CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
208+
{TsVersioningTags}
209+
</CodeBlock>
210+
211+
<CodeBlock filename="Rust" language="Rust" icon="rust">
212+
{RsVersioningTags}
213+
</CodeBlock>
214+
</CodeGroup>
215+
216+
<Note>
217+
Deleting a tag only removes the label, not the version it points to. After
218+
deletion, the underlying table version becomes eligible for cleanup again.
219+
</Note>
220+
187221
## Delete Data From the Table
188222

189223
Let's demonstrate how deletions also create new versions:
@@ -242,4 +276,8 @@ Read-only and checkout operations shown here (`list_versions`/`listVersions`, `v
242276
System operations like `optimize()`, index updates, and table compaction also increment table version numbers.
243277
In LanceDB OSS and Enterprise, `optimize()` can prune older versions based on its retention setting (`cleanup_older_than`, 7 days by default),
244278
which is when old-version files are removed and disk space is reclaimed.
279+
280+
**Tagged versions are exempt from cleanup.** A version with a tag pointing at it is
281+
retained regardless of age, and its files are not removed by `optimize()`. To make
282+
a tagged version eligible for pruning, [delete the tag](#tag-based-versioning) first.
245283
</Note>

0 commit comments

Comments
 (0)