TheRakeshPurohit
diff --git a/‎crates/cache/src/lib.rs‎
Lines changed: 30 additions & 0 deletions b/‎crates/cache/src/lib.rs‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎crates/runtime/src/datafusion/query.rs‎
Lines changed: 40 additions & 2 deletions b/‎crates/runtime/src/datafusion/query.rs‎
Lines changed: 40 additions & 2 deletions
diff --git a/‎crates/runtime/src/datafusion/query/builder.rs‎
Lines changed: 19 additions & 0 deletions b/‎crates/runtime/src/datafusion/query/builder.rs‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎crates/runtime/src/datafusion/query/cache.rs‎
Lines changed: 66 additions & 31 deletions b/‎crates/runtime/src/datafusion/query/cache.rs‎
Lines changed: 66 additions & 31 deletions
@@ -59,6 +59,31 @@ pub use utils::filter_transient_error_responses;
 pub use utils::get_logical_plan_input_tables;
 pub use utils::to_cached_record_batch_stream;
 
+/// Stable [`datafusion::logical_expr::UserDefinedLogicalNodeCore::name`] values for
+/// every Spice logical-plan extension node that performs (or dispatches) a write,
+/// a schema mutation, or any other side-effect that must not be reachable via a
+/// read-only SQL path and must not be served from or populated into the SQL
+/// results cache.
+///
+/// Keep this list in sync with:
+/// - `datafusion_ddl::DdlExtensionNode` → `"DdlExtension"`
+/// - `datafusion_dml::DmlExtensionNode` → `"DmlExtension"`
+/// - `cayenne::ddl::logical_nodes::CayenneMergeNode` → `"CayenneMerge"`
+/// - `runtime::datafusion::cayenne_ddl::logical_nodes::DistributedCayenne{Insert,Update,Delete}Node`
+///   → `"CayenneInsert"` / `"CayenneUpdate"` / `"CayenneDelete"` (they reuse the
+///   non-distributed names by design)
+/// - `runtime::datafusion::cayenne_ddl::logical_nodes::DistributedCayenneMergeNode`
+///   → `"DistributedCayenneMerge"`
+pub const WRITE_CAPABLE_EXTENSION_NAMES: &[&str] = &[
+    "DdlExtension",
+    "DmlExtension",
+    "CayenneInsert",
+    "CayenneUpdate",
+    "CayenneDelete",
+    "CayenneMerge",
+    "DistributedCayenneMerge",
+];
+
 use crate::result::embeddings::CachedEmbeddingResult;
 
 #[derive(Debug, Snafu)]
@@ -551,6 +576,11 @@ impl QueryResultsCacheProvider {
                 | LogicalPlan::Dml(..)
                 | LogicalPlan::Copy { .. }
                 | LogicalPlan::Statement(..) => return false,
+                LogicalPlan::Extension(ext)
+                    if WRITE_CAPABLE_EXTENSION_NAMES.contains(&ext.node.name()) =>
+                {
+                    return false;
+                }
                 _ => {}
             }
 
 
@@ -81,7 +81,9 @@ use super::{
 
 use super::managed_runtime;
 use crate::datafusion::{
-    DataFusion, query::cache::RequestCacheManager, sql_validator::validate_sql_query_operations,
+    DataFusion,
+    query::cache::RequestCacheManager,
+    sql_validator::{validate_sql_query_operations, validate_sql_query_read_only},
 };
 use managed_runtime::ManagedRuntimeError;
 use opentelemetry::KeyValue;
@@ -190,6 +192,11 @@ pub struct Query {
     df: Arc<crate::datafusion::DataFusion>,
     sql: QueryMethod,
     tracker: Option<QueryTracker>,
+    /// When true, the validator additionally rejects DDL, DML, COPY, or any
+    /// `LogicalPlan::Statement` node (including PREPARE/EXECUTE/DEALLOCATE),
+    /// regardless of per-catalog writability. Set via [`QueryBuilder::read_only`];
+    /// used by `/v1/tools/sql` and `/v1/nsql` to contain LLM-generated SQL.
+    read_only: bool,
 }
 
 macro_rules! handle_error {
@@ -310,14 +317,17 @@ impl Query {
                 sql, parameters, ..
             } => {
                 // Use the existing get_plan_or_cached which handles all cache control,
-                // stale-while-revalidate, and query tracking
+                // stale-while-revalidate, and query tracking. `read_only` is
+                // threaded through so cached results cannot bypass
+                // `validate_sql_query_read_only` below.
                 match Query::get_plan_or_cached(
                     &self.df,
                     &session,
                     Arc::clone(&request_context),
                     sql,
                     parameters.clone(),
                     tracker,
+                    self.read_only,
                 )
                 .await?
                 {
@@ -389,6 +399,12 @@ impl Query {
             let e = find_datafusion_root(e);
             return Err(Error::UnableToExecuteQuery { source: e });
         }
+        if self.read_only
+            && let Err(e) = validate_sql_query_read_only(&plan)
+        {
+            let e = find_datafusion_root(e);
+            return Err(Error::UnableToExecuteQuery { source: e });
+        }
 
         // Get the schema from the logical plan
         let schema = Arc::new(plan.schema().as_arrow().clone());
@@ -589,6 +605,7 @@ impl Query {
                             sql,
                             parameters.clone(),
                             tracker,
+                            ctx.read_only,
                         )
                         .await?
                         {
@@ -619,6 +636,19 @@ impl Query {
                     )
                 }
 
+                if ctx.read_only
+                    && let Err(e) = validate_sql_query_read_only(&plan)
+                {
+                    let e = find_datafusion_root(e);
+                    handle_error!(
+                        tracker,
+                        &request_context,
+                        ErrorCode::QueryPlanningError,
+                        e,
+                        UnableToExecuteQuery
+                    )
+                }
+
                 // Proactively invalidate cached query state for tables affected by
                 // DML mutations (INSERT, DELETE, UPDATE).
                 // - results cache must be cleared so repeated SQL does not replay
@@ -878,6 +908,7 @@ impl Query {
             df: Arc::clone(df),
             sql: QueryMethod::Plan(Box::new(plan.clone())),
             tracker: None,
+            read_only: false,
         }
     }
 
@@ -930,6 +961,13 @@ impl Query {
             self.handle_schema_error(&request_context, &e);
             return Err(e);
         }
+        if self.read_only
+            && let Err(e) = validate_sql_query_read_only(&plan)
+        {
+            let e = find_datafusion_root(e);
+            self.handle_schema_error(&request_context, &e);
+            return Err(e);
+        }
         let dataset_schema = plan.schema().as_arrow().clone();
         let parameter_schema = parameter_schema_for_plan(&plan)?;
 
 
@@ -31,6 +31,7 @@ pub struct QueryBuilder<'a> {
     parameters: Option<ParamValues>,
     table_allowlist: Option<ResolvedTableAwareAllowlist>,
     query_id: Uuid,
+    read_only: bool,
 }
 
 impl<'a> QueryBuilder<'a> {
@@ -41,6 +42,7 @@ impl<'a> QueryBuilder<'a> {
             parameters: None,
             query_id: Uuid::new_v4(),
             table_allowlist: None,
+            read_only: false,
         }
     }
 
@@ -62,6 +64,22 @@ impl<'a> QueryBuilder<'a> {
         self
     }
 
+    /// Enforce read-only SQL execution.
+    ///
+    /// When enabled, the planned query is additionally checked with
+    /// [`crate::datafusion::sql_validator::validate_sql_query_read_only`] and rejected if it
+    /// contains any DDL, DML, COPY, or `LogicalPlan::Statement` node (including
+    /// `PREPARE`/`EXECUTE`/`DEALLOCATE`) — regardless of whether the target
+    /// catalogs/datasets are individually marked writable.
+    ///
+    /// Used by surfaces that execute SQL on behalf of an LLM or unauthenticated caller
+    /// (the built-in `sql` tool, `/v1/nsql`).
+    #[must_use]
+    pub fn read_only(mut self, read_only: bool) -> Self {
+        self.read_only = read_only;
+        self
+    }
+
     #[must_use]
     pub fn build(self) -> Query {
         let sql: Arc<str> = self.sql.into();
@@ -91,6 +109,7 @@ impl<'a> QueryBuilder<'a> {
                 table_allowlist: self.table_allowlist,
             },
             tracker,
+            read_only: self.read_only,
         }
     }
 }
@@ -96,13 +96,27 @@ enum CacheResult {
 
 impl Query {
     /// Returns a `LogicalPlan` if the result is not cached and needs to be executed, otherwise returns a cached `QueryResult`.
+    ///
+    /// When `read_only` is true, both the SQL-keyed and plan-keyed results-cache
+    /// lookups are skipped, and the returned [`RequestCacheManager`] is forced to
+    /// [`CacheStatus::CacheDisabled`]. This is required because the read-only
+    /// contract (enforced by [`super::validate_sql_query_read_only`]) runs on the
+    /// [`LogicalPlan`] *after* `get_plan_or_cached` returns — a cache hit would
+    /// otherwise short-circuit validation and let write-capable plans served from
+    /// a prior cache-store bypass the read-only guarantee on `/v1/tools/sql` and
+    /// `/v1/nsql`. The existing cache-eligibility check
+    /// ([`cache::QueryResultsCacheProvider::cache_is_enabled_for_plan`]) only
+    /// filters the classic DDL/DML/Copy/Statement plan variants and does not
+    /// cover Spice's write-capable [`LogicalPlan::Extension`] nodes (e.g.
+    /// `DmlExtension`, `DistributedCayenneInsert`).
     pub(super) async fn get_plan_or_cached(
         df: &Arc<DataFusion>,
         session: &SessionState,
         request_context: Arc<RequestContext>,
         sql: &str,
         parameters: Option<ParamValues>,
         tracker: Option<QueryTracker>,
+        read_only: bool,
     ) -> super::Result<PlanOrCached> {
         let cache_control = request_context.cache_control();
         let sql_cache_key = CacheKey::Query(sql, parameters.as_ref());
@@ -117,25 +131,32 @@ impl Query {
             _ => sql_cache_key,
         };
 
-        // Try to get cached results from SQL or client key
+        // Try to get cached results from SQL or client key. When `read_only` is
+        // true, skip the cache lookup entirely so read-only validation always
+        // gets a chance to run on the freshly-planned query.
         let CacheResponse {
             tracker,
             raw_key: sql_or_client_raw_key,
             ..
-        } = match Self::try_get_cached_result(
-            df,
-            &request_context,
-            tracker,
-            &sql_or_user_cache_key,
-            sql,
-        )
-        .await?
-        {
-            CacheResponse {
-                result: CacheResult::Hit(result),
-                ..
-            } => return Ok(PlanOrCached::Cached(result)),
-            response => response,
+        } = if read_only {
+            CacheResponse::from(CacheResult::MissOrSkipped, CacheStatus::CacheDisabled)
+                .with_query_tracker(tracker)
+        } else {
+            match Self::try_get_cached_result(
+                df,
+                &request_context,
+                tracker,
+                &sql_or_user_cache_key,
+                sql,
+            )
+            .await?
+            {
+                CacheResponse {
+                    result: CacheResult::Hit(result),
+                    ..
+                } => return Ok(PlanOrCached::Cached(result)),
+                response => response,
+            }
         };
 
         let sql_raw_cache_key = sql_cache_key.as_raw_key(Self::plan_hasher(df));
@@ -154,26 +175,32 @@ impl Query {
             }
         };
 
-        // Try to get cached results from plan
+        // Try to get cached results from plan (skipped for read-only, same
+        // reasoning as the SQL-keyed lookup above).
         let CacheResponse {
             mut tracker,
             raw_key: plan_raw_cache_key,
             status,
             ..
-        } = match Self::try_get_cached_result(
-            df,
-            &request_context,
-            tracker,
-            &CacheKey::LogicalPlan(&plan),
-            sql,
-        )
-        .await?
-        {
-            CacheResponse {
-                result: CacheResult::Hit(result),
-                ..
-            } => return Ok(PlanOrCached::Cached(result)),
-            response => response,
+        } = if read_only {
+            CacheResponse::from(CacheResult::MissOrSkipped, CacheStatus::CacheDisabled)
+                .with_query_tracker(tracker)
+        } else {
+            match Self::try_get_cached_result(
+                df,
+                &request_context,
+                tracker,
+                &CacheKey::LogicalPlan(&plan),
+                sql,
+            )
+            .await?
+            {
+                CacheResponse {
+                    result: CacheResult::Hit(result),
+                    ..
+                } => return Ok(PlanOrCached::Cached(result)),
+                response => response,
+            }
         };
 
         let request_raw_cache_key = match request_context.cache_control() {
@@ -185,7 +212,15 @@ impl Query {
         }
         .unwrap_or(sql_raw_cache_key);
 
-        let cache_status = Self::should_cache_results(df, &plan, status);
+        // Read-only requests must also not populate the results cache — the
+        // plan has not yet been validated at this point, and we don't want a
+        // writable surface's cached output to leak back through a read-only
+        // caller on a later identical query.
+        let cache_status = if read_only {
+            CacheStatus::CacheDisabled
+        } else {
+            Self::should_cache_results(df, &plan, status)
+        };
         tracker = tracker.map(|t| t.results_cache_hit(false));
 
         Ok(PlanOrCached::Plan(