rerun-io
diff --git a/‎crates/store/re_chunk_store/src/dataframe.rs
Lines changed: 6 additions & 6 deletions b/‎crates/store/re_chunk_store/src/dataframe.rs
Lines changed: 6 additions & 6 deletions
diff --git a/‎crates/store/re_dataframe/src/engine.rs
Lines changed: 3 additions & 3 deletions b/‎crates/store/re_dataframe/src/engine.rs
Lines changed: 3 additions & 3 deletions
diff --git a/‎crates/store/re_dataframe/src/query.rs
Lines changed: 3 additions & 3 deletions b/‎crates/store/re_dataframe/src/query.rs
Lines changed: 3 additions & 3 deletions
diff --git a/‎crates/store/re_sorbet/src/chunk_batch.rs
Lines changed: 7 additions & 2 deletions b/‎crates/store/re_sorbet/src/chunk_batch.rs
Lines changed: 7 additions & 2 deletions
diff --git a/‎crates/store/re_sorbet/src/chunk_columns.rs
Lines changed: 160 additions & 0 deletions b/‎crates/store/re_sorbet/src/chunk_columns.rs
Lines changed: 160 additions & 0 deletions
diff --git a/‎crates/store/re_sorbet/src/chunk_schema.rs
Lines changed: 23 additions & 30 deletions b/‎crates/store/re_sorbet/src/chunk_schema.rs
Lines changed: 23 additions & 30 deletions
diff --git a/‎crates/store/re_sorbet/src/column_descriptor.rs
Lines changed: 13 additions & 6 deletions b/‎crates/store/re_sorbet/src/column_descriptor.rs
Lines changed: 13 additions & 6 deletions
@@ -14,8 +14,8 @@ use itertools::Itertools as _;
 use re_chunk::{LatestAtQuery, RangeQuery, TimelineName};
 use re_log_types::{EntityPath, ResolvedTimeRange, TimeInt, Timeline};
 use re_sorbet::{
-    ColumnDescriptor, ColumnSelector, ComponentColumnDescriptor, ComponentColumnSelector,
-    IndexColumnDescriptor, SorbetColumnDescriptors, TimeColumnSelector,
+    ChunkColumnDescriptors, ColumnDescriptor, ColumnSelector, ComponentColumnDescriptor,
+    ComponentColumnSelector, IndexColumnDescriptor, TimeColumnSelector,
 };
 use re_types_core::{ComponentDescriptor, ComponentName};
 use tap::Tap as _;
@@ -306,7 +306,7 @@ impl ChunkStore {
     /// The order of the columns is guaranteed to be in a specific order:
     /// * first, the time columns in lexical order (`frame_nr`, `log_time`, ...);
     /// * second, the component columns in lexical order (`Color`, `Radius, ...`).
-    pub fn schema(&self) -> SorbetColumnDescriptors {
+    pub fn schema(&self) -> ChunkColumnDescriptors {
         re_tracing::profile_function!();
 
         let indices = self
@@ -360,8 +360,8 @@ impl ChunkStore {
             .collect_vec()
             .tap_mut(|components| components.sort());
 
-        SorbetColumnDescriptors {
-            row_id: Some(self.row_id_descriptor()),
+        ChunkColumnDescriptors {
+            row_id: self.row_id_descriptor(),
             indices,
             components,
         }
@@ -483,7 +483,7 @@ impl ChunkStore {
     /// The order of the columns is guaranteed to be in a specific order:
     /// * first, the time columns in lexical order (`frame_nr`, `log_time`, ...);
     /// * second, the component columns in lexical order (`Color`, `Radius, ...`).
-    pub fn schema_for_query(&self, query: &QueryExpression) -> SorbetColumnDescriptors {
+    pub fn schema_for_query(&self, query: &QueryExpression) -> ChunkColumnDescriptors {
         re_tracing::profile_function!();
 
         let QueryExpression {
 
@@ -4,7 +4,7 @@ use re_chunk::EntityPath;
 use re_chunk_store::{ChunkStore, ChunkStoreConfig, ChunkStoreHandle, QueryExpression};
 use re_log_types::{EntityPathFilter, StoreId};
 use re_query::{QueryCache, QueryCacheHandle, StorageEngine, StorageEngineLike};
-use re_sorbet::SorbetColumnDescriptors;
+use re_sorbet::ChunkColumnDescriptors;
 
 use crate::QueryHandle;
 
@@ -69,7 +69,7 @@ impl<E: StorageEngineLike + Clone> QueryEngine<E> {
     /// * first, the time columns in lexical order (`frame_nr`, `log_time`, ...);
     /// * second, the component columns in lexical order (`Color`, `Radius, ...`).
     #[inline]
-    pub fn schema(&self) -> SorbetColumnDescriptors {
+    pub fn schema(&self) -> ChunkColumnDescriptors {
         self.engine.with(|store, _cache| store.schema())
     }
 
@@ -79,7 +79,7 @@ impl<E: StorageEngineLike + Clone> QueryEngine<E> {
     /// * first, the time columns in lexical order (`frame_nr`, `log_time`, ...);
     /// * second, the component columns in lexical order (`Color`, `Radius, ...`).
     #[inline]
-    pub fn schema_for_query(&self, query: &QueryExpression) -> SorbetColumnDescriptors {
+    pub fn schema_for_query(&self, query: &QueryExpression) -> ChunkColumnDescriptors {
         self.engine
             .with(|store, _cache| store.schema_for_query(query))
     }
 
@@ -32,7 +32,7 @@ use re_chunk_store::{
 use re_log_types::ResolvedTimeRange;
 use re_query::{QueryCache, StorageEngineLike};
 use re_sorbet::{
-    ColumnSelector, ComponentColumnSelector, RowIdColumnDescriptor, SorbetColumnDescriptors,
+    ChunkColumnDescriptors, ColumnSelector, ComponentColumnSelector, RowIdColumnDescriptor,
     TimeColumnSelector,
 };
 use re_types_core::{archetypes, arrow_helpers::as_array_ref, ComponentDescriptor, Loggable as _};
@@ -79,7 +79,7 @@ struct QueryHandleState {
     /// Describes the columns that make up this view.
     ///
     /// See [`QueryExpression::view_contents`].
-    view_contents: SorbetColumnDescriptors,
+    view_contents: ChunkColumnDescriptors,
 
     /// Describes the columns specifically selected to be returned from this view.
     ///
@@ -683,7 +683,7 @@ impl<E: StorageEngineLike> QueryHandle<E> {
     ///
     /// See [`QueryExpression::view_contents`].
     #[inline]
-    pub fn view_contents(&self) -> &SorbetColumnDescriptors {
+    pub fn view_contents(&self) -> &ChunkColumnDescriptors {
         &self.init().view_contents
     }
 
 
@@ -138,12 +138,17 @@ impl From<&ChunkBatch> for ArrowRecordBatch {
 impl TryFrom<&ArrowRecordBatch> for ChunkBatch {
     type Error = SorbetError;
 
-    /// Will automatically wrap data columns in `ListArrays` if they are not already.
+    /// Will perform some transformations:
+    /// * Will automatically wrap data columns in `ListArrays` if they are not already
+    /// * Will reorder columns so that Row ID comes before timelines, which come before data
+    /// * Will migrate legacy data to more modern form
     fn try_from(batch: &ArrowRecordBatch) -> Result<Self, Self::Error> {
         re_tracing::profile_function!();
 
+        let batch = crate::migration::reorder_columns(batch);
+
         Self::try_from(SorbetBatch::try_from_record_batch(
-            batch,
+            &batch,
             crate::BatchType::Chunk,
         )?)
     }
 
@@ -0,0 +1,160 @@
+use arrow::datatypes::Fields as ArrowFields;
+
+use re_log_types::EntityPath;
+
+use crate::{
+    ColumnDescriptor, ComponentColumnDescriptor, IndexColumnDescriptor, RowIdColumnDescriptor,
+    SorbetColumnDescriptors, SorbetError,
+};
+
+/// Requires a specific ordering of the columns.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ChunkColumnDescriptors {
+    /// The primary row id column.
+    pub row_id: RowIdColumnDescriptor,
+
+    /// Index columns (timelines).
+    pub indices: Vec<IndexColumnDescriptor>,
+
+    /// The actual component data
+    pub components: Vec<ComponentColumnDescriptor>,
+}
+
+impl ChunkColumnDescriptors {
+    /// Debug-only sanity check.
+    #[inline]
+    #[track_caller]
+    pub fn sanity_check(&self) {
+        for component in &self.components {
+            component.sanity_check();
+        }
+    }
+
+    /// Returns all indices and then all components;
+    /// skipping the `row_id` column.
+    ///
+    /// See also [`Self::get_index_or_component`].
+    // TODO(#9922): stop ignoring row_id
+    pub fn indices_and_components(&self) -> Vec<ColumnDescriptor> {
+        itertools::chain!(
+            self.indices.iter().cloned().map(ColumnDescriptor::Time),
+            self.components
+                .iter()
+                .cloned()
+                .map(ColumnDescriptor::Component),
+        )
+        .collect()
+    }
+
+    /// Index the index- and component columns, ignoring the `row_id` column completely.
+    ///
+    /// That is, `get_index_or_component(0)` will return the first index column (if any; otherwise
+    /// the first component column).
+    ///
+    /// See also [`Self::indices_and_components`].
+    // TODO(#9922): stop ignoring row_id
+    pub fn get_index_or_component(&self, index_ignoring_row_id: usize) -> Option<ColumnDescriptor> {
+        if index_ignoring_row_id < self.indices.len() {
+            Some(ColumnDescriptor::Time(
+                self.indices[index_ignoring_row_id].clone(),
+            ))
+        } else {
+            self.components
+                .get(index_ignoring_row_id - self.indices.len())
+                .cloned()
+                .map(ColumnDescriptor::Component)
+        }
+    }
+
+    /// Keep only the component columns that satisfy the given predicate.
+    #[must_use]
+    #[inline]
+    pub fn filter_components(mut self, keep: impl Fn(&ComponentColumnDescriptor) -> bool) -> Self {
+        self.components.retain(keep);
+        self
+    }
+}
+
+impl ChunkColumnDescriptors {
+    pub fn try_from_arrow_fields(
+        chunk_entity_path: Option<&EntityPath>,
+        fields: &ArrowFields,
+    ) -> Result<Self, SorbetError> {
+        Self::try_from(SorbetColumnDescriptors::try_from_arrow_fields(
+            chunk_entity_path,
+            fields,
+        )?)
+    }
+}
+
+impl TryFrom<SorbetColumnDescriptors> for ChunkColumnDescriptors {
+    type Error = SorbetError;
+
+    fn try_from(columns: SorbetColumnDescriptors) -> Result<Self, Self::Error> {
+        let SorbetColumnDescriptors { columns } = columns;
+
+        let mut row_ids = Vec::new();
+        let mut indices = Vec::new();
+        let mut components = Vec::new();
+
+        for column in &columns {
+            match column.clone() {
+                ColumnDescriptor::RowId(descr) => {
+                    if indices.is_empty() && components.is_empty() {
+                        row_ids.push(descr);
+                    } else {
+                        let err = format!(
+                            "RowId column must be the first column; but the columns were: {columns:?}"
+                        );
+                        return Err(SorbetError::InvalidColumnOrder(err));
+                    }
+                }
+
+                ColumnDescriptor::Time(descr) => {
+                    if components.is_empty() {
+                        indices.push(descr);
+                    } else {
+                        return Err(SorbetError::InvalidColumnOrder(
+                            "Index columns must come before any data columns".to_owned(),
+                        ));
+                    }
+                }
+
+                ColumnDescriptor::Component(descr) => {
+                    components.push(descr);
+                }
+            }
+        }
+
+        if row_ids.len() > 1 {
+            return Err(SorbetError::MultipleRowIdColumns(row_ids.len()));
+        }
+
+        let row_id = row_ids.pop().ok_or(SorbetError::MissingRowIdColumn)?;
+
+        Ok(Self {
+            row_id,
+            indices,
+            components,
+        })
+    }
+}
+
+impl From<ChunkColumnDescriptors> for SorbetColumnDescriptors {
+    fn from(columns: ChunkColumnDescriptors) -> Self {
+        let ChunkColumnDescriptors {
+            row_id,
+            indices,
+            components,
+        } = columns;
+
+        let columns = itertools::chain!(
+            std::iter::once(ColumnDescriptor::RowId(row_id.clone())),
+            indices.iter().cloned().map(ColumnDescriptor::Time),
+            components.iter().cloned().map(ColumnDescriptor::Component),
+        )
+        .collect();
+
+        Self { columns }
+    }
+}
@@ -6,7 +6,8 @@ use re_log_types::EntityPath;
 use re_types_core::ChunkId;
 
 use crate::{
-    ArrowBatchMetadata, ComponentColumnDescriptor, IndexColumnDescriptor, RowIdColumnDescriptor,
+    chunk_columns::ChunkColumnDescriptors, ArrowBatchMetadata, ColumnDescriptor,
+    ComponentColumnDescriptor, IndexColumnDescriptor, RowIdColumnDescriptor,
     SorbetColumnDescriptors, SorbetError, SorbetSchema,
 };
 
@@ -19,8 +20,8 @@ pub struct ChunkSchema {
     sorbet: SorbetSchema,
 
     // Some things here are also in [`SorbetSchema]`, but are duplicated
-    // here because they are non-optional:
-    row_id: RowIdColumnDescriptor,
+    // here because they have additional constraints (e.g. ordering, non-optional):
+    chunk_columns: ChunkColumnDescriptors,
     chunk_id: ChunkId,
     entity_path: EntityPath,
 }
@@ -53,15 +54,22 @@ impl ChunkSchema {
         Self {
             sorbet: SorbetSchema {
                 columns: SorbetColumnDescriptors {
-                    row_id: Some(row_id.clone()),
-                    indices,
-                    components,
+                    columns: itertools::chain!(
+                        std::iter::once(ColumnDescriptor::RowId(row_id.clone())),
+                        indices.iter().cloned().map(ColumnDescriptor::Time),
+                        components.iter().cloned().map(ColumnDescriptor::Component),
+                    )
+                    .collect(),
                 },
                 chunk_id: Some(chunk_id),
                 entity_path: Some(entity_path.clone()),
                 heap_size_bytes: None,
             },
-            row_id,
+            chunk_columns: ChunkColumnDescriptors {
+                row_id,
+                indices,
+                components,
+            },
             chunk_id,
             entity_path,
         }
@@ -103,17 +111,7 @@ impl ChunkSchema {
 
     #[inline]
     pub fn row_id_column(&self) -> &RowIdColumnDescriptor {
-        &self.row_id
-    }
-
-    #[inline]
-    pub fn index_columns(&self) -> &[IndexColumnDescriptor] {
-        &self.sorbet.columns.indices
-    }
-
-    #[inline]
-    pub fn component_columns(&self) -> &[ComponentColumnDescriptor] {
-        &self.sorbet.columns.components
+        &self.chunk_columns.row_id
     }
 
     pub fn arrow_batch_metadata(&self) -> ArrowBatchMetadata {
@@ -148,20 +146,15 @@ impl TryFrom<SorbetSchema> for ChunkSchema {
 
     fn try_from(sorbet_schema: SorbetSchema) -> Result<Self, Self::Error> {
         Ok(Self {
-            row_id: sorbet_schema
-                .columns
-                .row_id
-                .clone()
-                .ok_or_else(|| SorbetError::custom("Missing row_id column"))?,
-            chunk_id: sorbet_schema
-                .chunk_id
-                .ok_or_else(|| SorbetError::custom("Missing chunk_id"))?,
+            sorbet: sorbet_schema.clone(),
+
+            chunk_columns: ChunkColumnDescriptors::try_from(sorbet_schema.columns.clone())?,
+
+            chunk_id: sorbet_schema.chunk_id.ok_or(SorbetError::MissingChunkId)?,
+
             entity_path: sorbet_schema
                 .entity_path
-                .clone()
-                .ok_or_else(|| SorbetError::custom("Missing entity_path"))?,
-
-            sorbet: sorbet_schema,
+                .ok_or(SorbetError::MissingEntityPath)?,
         })
     }
 }
@@ -27,16 +27,23 @@ pub enum ColumnError {
     UnsupportedTimeType(#[from] crate::UnsupportedTimeType),
 }
 
-// Describes any kind of column.
-//
-// See:
-// * [`IndexColumnDescriptor`]
-// * [`ComponentColumnDescriptor`]
-//TODO(#9034): This should support RowId as well, but this has ramifications on the dataframe API.
+/// Describes any kind of column.
+///
+/// See:
+/// * [`RowIdColumnDescriptor`]
+/// * [`IndexColumnDescriptor`]
+/// * [`ComponentColumnDescriptor`]
 #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub enum ColumnDescriptor {
+    /// The primary row id column.
+    ///
+    /// There should usually only be one of these.
     RowId(RowIdColumnDescriptor),
+
+    /// Index columns (timelines).
     Time(IndexColumnDescriptor),
+
+    /// The actual component data
     Component(ComponentColumnDescriptor),
 }