ziyanTOP
diff --git a/‎.github/workflows/publish_snapshot-jdk17.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/publish_snapshot-jdk17.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/stale-pr.yml‎
Lines changed: 52 additions & 0 deletions b/‎.github/workflows/stale-pr.yml‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎.github/workflows/utitcase-rust-native.yml‎
Lines changed: 7 additions & 3 deletions b/‎.github/workflows/utitcase-rust-native.yml‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎.github/workflows/utitcase-spark-4.x.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/utitcase-spark-4.x.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/content/append-table/blob.md‎
Lines changed: 76 additions & 14 deletions b/‎docs/content/append-table/blob.md‎
Lines changed: 76 additions & 14 deletions
diff --git a/‎docs/content/append-table/global-index.md‎
Lines changed: 5 additions & 1 deletion b/‎docs/content/append-table/global-index.md‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎docs/content/concepts/system-tables.md‎
Lines changed: 5 additions & 5 deletions b/‎docs/content/concepts/system-tables.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎docs/content/learn-paimon/scenario-guide.md‎
Lines changed: 3 additions & 1 deletion b/‎docs/content/learn-paimon/scenario-guide.md‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/content/primary-key-table/pk-clustering-override.md‎
Lines changed: 18 additions & 1 deletion b/‎docs/content/primary-key-table/pk-clustering-override.md‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎docs/content/project/download.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/content/project/download.md‎
Lines changed: 2 additions & 0 deletions
@@ -63,8 +63,8 @@ jobs:
           echo "<password>$ASF_PASSWORD</password>" >> $tmp_settings
           echo "</server></servers></settings>" >> $tmp_settings
         
-          mvn --settings $tmp_settings -ntp clean install -Dgpg.skip -Drat.skip -DskipTests -Papache-release,spark4,flink1 -pl org.apache.paimon:paimon-spark-4.0_2.13 -am
+          mvn --settings $tmp_settings -ntp clean install -Dgpg.skip -Drat.skip -DskipTests -Papache-release,spark4,flink1 -pl org.apache.paimon:paimon-spark-4.0_2.13,org.apache.paimon:paimon-spark-4.1_2.13 -am
           # skip deploy paimon-spark-common_2.13 since they are already deployed in publish-snapshot.yml
-          mvn --settings $tmp_settings -ntp clean deploy -Dgpg.skip -Drat.skip -DskipTests -Papache-release,spark4,flink1 -pl org.apache.paimon:paimon-spark4-common_2.13,org.apache.paimon:paimon-spark-ut_2.13,org.apache.paimon:paimon-spark-4.0_2.13
+          mvn --settings $tmp_settings -ntp clean deploy -Dgpg.skip -Drat.skip -DskipTests -Papache-release,spark4,flink1 -pl org.apache.paimon:paimon-spark4-common_2.13,org.apache.paimon:paimon-spark-ut_2.13,org.apache.paimon:paimon-spark-4.0_2.13,org.apache.paimon:paimon-spark-4.1_2.13
 
           rm $tmp_settings
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Posts a single reminder comment on pull requests that have seen no
+# activity for 90 days. No auto-close; a maintainer decides whether to
+# close, ping again, or leave the PR open. Issues are not in scope.
+#
+# See dev@paimon.apache.org "Stale PR cleanup for Paimon" thread.
+
+name: Stale PR reminder
+
+on:
+  schedule:
+    - cron: '0 0 * * *'
+  workflow_dispatch:
+
+permissions:
+  pull-requests: write
+
+jobs:
+  stale-pr:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/stale@v9
+        with:
+          # PRs: nudge once at 90 days of inactivity, never auto-close.
+          days-before-pr-stale: 90
+          days-before-pr-close: -1
+          stale-pr-label: stale
+          stale-pr-message: >
+            This pull request has had no activity for 90 days. If you'd
+            like to keep it open, please push a new commit or leave a
+            comment. Thanks for the contribution.
+          remove-stale-when-updated: true
+
+          # Issues are not in scope for this workflow.
+          days-before-issue-stale: -1
+          days-before-issue-close: -1
+
+          operations-per-run: 100
@@ -51,11 +51,13 @@ jobs:
           distribution: 'temurin'
 
       - name: Install Rust toolchain
-        uses: dtolnay/rust-toolchain@stable
+        run: |
+          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal
+          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
 
       - name: Clone and build Vortex native library
         run: |
-          git clone --depth 1 https://github.com/spiraldb/vortex.git ${RUNNER_TEMP}/vortex
+          git clone --depth 1  -b 0.69.0 https://github.com/spiraldb/vortex.git ${RUNNER_TEMP}/vortex
           cd ${RUNNER_TEMP}/vortex
           cargo build --package vortex-jni --release
 
@@ -87,7 +89,9 @@ jobs:
           distribution: 'temurin'
 
       - name: Install Rust toolchain
-        uses: dtolnay/rust-toolchain@stable
+        run: |
+          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal
+          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
 
       - name: Build Tantivy native library
         run: |
 
@@ -61,7 +61,7 @@ jobs:
           jvm_timezone=$(random_timezone)
           echo "JVM timezone is set to $jvm_timezone"
           test_modules=""
-          for suffix in ut 4.0; do
+          for suffix in ut 4.0 4.1; do
           test_modules+="org.apache.paimon:paimon-spark-${suffix}_2.13,"
           done
           test_modules="${test_modules%,}"
 
@@ -71,7 +71,7 @@ For details about the blob file format structure, see [File Format - BLOB]({{< r
 
 ## Storage Modes
 
-Paimon supports three storage modes for BLOB fields:
+Paimon supports four storage modes for BLOB fields:
 
 1. **Default blob storage**
    Blob bytes are written to Paimon-managed `.blob` files under the table path.
@@ -82,7 +82,10 @@ Paimon supports three storage modes for BLOB fields:
 3. **External-storage descriptor mode**
    Fields configured in `blob-external-storage-field` are a subset of `blob-descriptor-field`. At write time, Paimon writes the raw blob data to the configured `blob-external-storage-path` and stores only serialized `BlobDescriptor` bytes inline in data files.
 
-This allows one table to mix raw-data BLOB fields, descriptor-only BLOB fields, and descriptor-based BLOB fields backed by external storage.
+4. **Blob view storage**
+   Fields configured in `blob-view-field` store serialized `BlobViewStruct` bytes inline in data files. The struct points to a BLOB value in an upstream table by table identifier, BLOB field, and row id. The actual blob bytes are resolved from the upstream table at read time.
+
+This allows one table to mix raw-data BLOB fields, descriptor-only BLOB fields, descriptor-based BLOB fields backed by external storage, and view fields that reference upstream BLOB values.
 
 ## Table Options
 
@@ -123,6 +126,17 @@ This allows one table to mix raw-data BLOB fields, descriptor-only BLOB fields,
         some BLOB fields in <code>.blob</code> files and some as descriptor references.
       </td>
     </tr>
+    <tr>
+      <td><h5>blob-view-field</h5></td>
+      <td>No</td>
+      <td style="word-wrap: break-word;">(none)</td>
+      <td>String</td>
+      <td>
+        Comma-separated BLOB field names stored as serialized <code>BlobViewStruct</code> bytes inline in normal data files.
+        The field values reference BLOB values in upstream tables and are resolved at read time.
+        This option must be a subset of <code>blob-field</code> and must not overlap with <code>blob-descriptor-field</code>.
+      </td>
+    </tr>
     <tr>
       <td><h5>blob-external-storage-field</h5></td>
       <td>No</td>
@@ -279,30 +293,75 @@ ALTER TABLE blob_table SET ('blob-as-descriptor' = 'false');
 SELECT image FROM blob_table;
 ```
 
-### External-Storage Descriptor Fields
+### Blob View
+
+Blob view is useful when a downstream table should reference BLOB values already stored in an upstream table, without copying the bytes or creating new `.blob` files. A blob view field stores only a small `BlobViewStruct` inline. When the field is read, Paimon resolves the referenced BLOB from the upstream table.
+
+Blob view requires:
 
-If you want Paimon to accept raw BLOB input, write the data to an external location, and store only descriptor bytes inline, configure the target field(s) like this:
+- the upstream table to have row tracking enabled, so each row has a stable `_ROW_ID`
+- the downstream field to be listed in both `blob-field` and `blob-view-field`
+- writes to provide a serialized `BlobViewStruct`; in Flink SQL, use the built-in `sys.blob_view` function
+
+The Flink SQL function signature is:
 
 ```sql
-'blob-descriptor-field' = 'image',
-'blob-external-storage-field' = 'image',
-'blob-external-storage-path' = 's3://my-bucket/paimon-external-blobs/'
+sys.blob_view(table_name, field_name, row_id)
 ```
 
-For these configured fields:
+Arguments:
+
+- `table_name`: the upstream table name. It must be fully qualified as `database.table` or `catalog.database.table`. Unqualified table names are rejected.
+- `field_name`: the upstream BLOB field name.
+- `row_id`: the `_ROW_ID` value from the upstream row-tracking table.
+
+The following example writes a downstream table whose `image_ref` field views the `image` field in `image_table`:
+
+```sql
+CREATE TABLE image_table (
+    id INT,
+    name STRING,
+    image BYTES
+) WITH (
+    'row-tracking.enabled' = 'true',
+    'data-evolution.enabled' = 'true',
+    'blob-field' = 'image'
+);
+
+CREATE TABLE image_view_table (
+    id INT,
+    label STRING,
+    image_ref BYTES
+) WITH (
+    'row-tracking.enabled' = 'true',
+    'data-evolution.enabled' = 'true',
+    'blob-field' = 'image_ref',
+    'blob-view-field' = 'image_ref'
+);
+
+INSERT INTO image_view_table
+SELECT
+    id,
+    name AS label,
+    sys.blob_view('default.image_table', 'image', _ROW_ID)
+FROM `image_table$row_tracking`;
+```
 
-- Paimon writes the raw blob data to `blob-external-storage-path`
-- Paimon stores serialized `BlobDescriptor` bytes inline in normal data files
-- the field remains descriptor-based when reading and updating
-- orphan file cleanup is not applied to the external storage path
+If the current Paimon catalog name is included in the table name, the function also accepts `catalog.database.table`:
+
+```sql
+SELECT sys.blob_view('my_catalog.default.image_table', 'image', _ROW_ID)
+FROM `image_table$row_tracking`;
+```
+
+Reads from `image_view_table.image_ref` return the referenced BLOB bytes in the same way as normal blob fields. The referenced upstream table and row must remain available for the view to be resolved.
 
 ### MERGE INTO Support
 
 For Data Evolution writes in Flink and Spark:
 
 - raw-data BLOB columns are still rejected in partial-column `MERGE INTO` updates
 - descriptor-based BLOB columns are allowed
-- fields configured in `blob-external-storage-field` are also allowed because they are descriptor-based fields
 
 ## Java API Usage
 
@@ -661,6 +720,7 @@ For these configured fields:
 3. **No Statistics**: Statistics collection is not supported for blob columns.
 4. **Required Options**: `row-tracking.enabled` and `data-evolution.enabled` must be set to `true`.
 5. **External Storage Cleanup**: Files written through `blob-external-storage-path` are outside Paimon's orphan file cleanup scope.
+6. **Blob View Dependency**: Blob view fields depend on the referenced upstream table and row. If the upstream data is removed or no longer readable, the view cannot be resolved.
 
 ## Best Practices
 
@@ -674,4 +734,6 @@ For these configured fields:
 
 5. **Manage External Storage Lifecycle Separately**: Files written to `blob-external-storage-path` are not cleaned up by Paimon, so retention and deletion should be managed externally.
 
-6. **Use Partitioning**: Partition your blob tables by date or other dimensions to improve query performance and data management.
+6. **Use Blob View to Avoid Copying BLOB Data**: Configure `blob-view-field` when a downstream table only needs to reference BLOB values from an upstream table.
+
+7. **Use Partitioning**: Partition your blob tables by date or other dimensions to improve query performance and data management.
@@ -41,6 +41,8 @@ Global indexes work on top of Data Evolution tables. To use global indexes, your
 - `'row-tracking.enabled' = 'true'`
 - `'data-evolution.enabled' = 'true'`
 
+> Global index queries may not be exact when the index only covers part of the table data. If a query predicate matches the index, Paimon returns only the results from the indexed portion. Matching records in data that has not been indexed yet will not be returned.
+
 ## Prerequisites
 
 Create a table with the required properties:
@@ -95,11 +97,13 @@ Generation) applications.
 CALL sys.create_global_index(
     table => 'db.my_table',
     index_column => 'embedding',
-    index_type => 'lumina-vector-ann',
+    index_type => 'lumina',
     options => 'lumina.index.dimension=128'
 );
 ```
 
+The legacy index type `lumina-vector-ann` is still accepted for existing tables and SQL compatibility.
+
 **Vector Search**
 
 {{< tabs "vector-search" >}}
 
@@ -432,11 +432,11 @@ You can query the partition files of the table.
 SELECT * FROM my_table$partitions;
 
 /*
-+-----------+--------------+-------------------+------------+---------------------+---------------------+------------+------------+---------+
-| partition | record_count | file_size_in_bytes| file_count | last_update_time    | created_at          | created_by | updated_by | options |
-+-----------+--------------+-------------------+------------+---------------------+---------------------+------------+------------+---------+
-| {1}       |            1 |               645 |          1 | 2024-06-24 10:25:57 | 2024-06-24 10:20:00 | admin      | test_user  | {}      |
-+-----------+--------------+-------------------+------------+---------------------+---------------------+------------+------------+---------+
++-----------+--------------+-------------------+------------+---------------------+---------------------+------------+------------+---------+---------------+-------+
+| partition | record_count | file_size_in_bytes| file_count | last_update_time    | created_at          | created_by | updated_by | options | total_buckets | done  |
++-----------+--------------+-------------------+------------+---------------------+---------------------+------------+------------+---------+---------------+-------+
+| {1}       |            1 |               645 |          1 | 2024-06-24 10:25:57 | 2024-06-24 10:20:00 | admin      | test_user  | {}      |             1 | false |
++-----------+--------------+-------------------+------------+---------------------+---------------------+------------+------------+---------+---------------+-------+
 */
 ```
 
 
@@ -451,14 +451,16 @@ Schema schema = Schema.newBuilder()
 CALL sys.create_global_index(
     table => 'db.doc_embeddings',
     index_column => 'embedding',
-    index_type => 'lumina-vector-ann',
+    index_type => 'lumina',
     options => 'lumina.index.dimension=768'
 );
 
 -- Search for top-5 nearest neighbors
 SELECT * FROM vector_search('doc_embeddings', 'embedding', array(0.1f, 0.2f, ...), 5);
 ```
 
+The legacy index type `lumina-vector-ann` is still accepted for existing tables and SQL compatibility.
+
 **Why:** The [Global Index]({{< ref "append-table/global-index" >}}) with DiskANN provides high-performance ANN search.
 Vector data is stored in dedicated `.vector.lance` files optimized for dense vectors, while scalar columns stay in
 Parquet. You can also build a **BTree Index** on scalar columns for efficient filtering:
 
@@ -50,6 +50,23 @@ CREATE TABLE my_table (
 );
 ```
 
+For `first-row` merge engine, deletion vectors are already built-in, so you don't need to enable them explicitly:
+
+```sql
+CREATE TABLE my_table (
+    id BIGINT,
+    dt STRING,
+    city STRING,
+    amount DOUBLE,
+    PRIMARY KEY (id) NOT ENFORCED
+) WITH (
+    'pk-clustering-override' = 'true',
+    'clustering.columns' = 'city',
+    'merge-engine' = 'first-row',
+    'bucket' = '4'
+);
+```
+
 After this, data files within each bucket will be physically sorted by `city` instead of `id`. Queries like
 `SELECT * FROM my_table WHERE city = 'Beijing'` can skip irrelevant data files by checking their min/max statistics
 on the clustering column.
@@ -60,7 +77,7 @@ on the clustering column.
 |--------|-------------|
 | `pk-clustering-override` | `true` |
 | `clustering.columns` | Must be set (one or more non-primary-key columns) |
-| `deletion-vectors.enabled` | Must be `true` |
+| `deletion-vectors.enabled` | Must be `true` (not required for `first-row` merge engine) |
 | `merge-engine` | `deduplicate` (default) or `first-row` only |
 
 ## When to Use
 
@@ -41,6 +41,7 @@ This documentation is a guide for downloading Paimon Jars.
 | Flink 1.17       | [paimon-flink-1.17-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-flink-1.17/{{< version >}}/)                                 |
 | Flink 1.16       | [paimon-flink-1.16-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-flink-1.16/{{< version >}}/)                                 |
 | Flink Action     | [paimon-flink-action-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-flink-action/{{< version >}}/)                             |
+| Spark 4.1        | [paimon-spark-4.1_2.13-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-spark-4.1_2.13/{{< version >}}/)                         |
 | Spark 4.0        | [paimon-spark-4.0_2.13-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-spark-4.0_2.13/{{< version >}}/)                         |
 | Spark 3.5        | [paimon-spark-3.5_2.12-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-spark-3.5_2.12/{{< version >}}/)                         |
 | Spark 3.4        | [paimon-spark-3.4_2.12-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-spark-3.4_2.12/{{< version >}}/)                         |
@@ -68,6 +69,7 @@ This documentation is a guide for downloading Paimon Jars.
 | Flink 1.17       | [paimon-flink-1.17-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-flink-1.17/{{< version >}}/paimon-flink-1.17-{{< version >}}.jar)                                                 |
 | Flink 1.16       | [paimon-flink-1.16-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-flink-1.16/{{< version >}}/paimon-flink-1.16-{{< version >}}.jar)                                                 |
 | Flink Action     | [paimon-flink-action-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-flink-action/{{< version >}}/paimon-flink-action-{{< version >}}.jar)                                           |
+| Spark 4.1        | [paimon-spark-4.1_2.13-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-spark-4.1_2.13/{{< version >}}/paimon-spark-4.1_2.13-{{< version >}}.jar)                                     |
 | Spark 4.0        | [paimon-spark-4.0_2.13-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-spark-4.0_2.13/{{< version >}}/paimon-spark-4.0_2.13-{{< version >}}.jar)                                     |
 | Spark 3.5        | [paimon-spark-3.5_2.12-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-spark-3.5_2.12/{{< version >}}/paimon-spark-3.5_2.12-{{< version >}}.jar)                                     |
 | Spark 3.4        | [paimon-spark-3.4_2.12-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-spark-3.4_2.12/{{< version >}}/paimon-spark-3.4_2.12-{{< version >}}.jar)                                     |