facebookexperimental · chucklever · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026
diff --git a/src/bin/index.rs b/src/bin/index.rs
@@ -1081,21 +1081,15 @@ async fn main() -> Result<()> {
             );
 
             if total_new_emails > 0 {
-                match db_manager.check_optimization_health().await {
-                    Ok((needs_optimization, message)) => {
-                        if needs_optimization {
-                            println!("\n{}", message);
-                            match db_manager.optimize_database().await {
-                                Ok(_) => println!("Database optimization completed successfully"),
-                                Err(e) => error!("Failed to optimize database: {}", e),
-                            }
-                        } else {
-                            println!("\n{}", message);
-                        }
-                    }
-                    Err(e) => {
-                        error!("Failed to check database health: {}", e);
-                    }
+                // Compact only lore tables.  The full optimize_database()
+                // method processes every table in the database including
+                // code-index tables and content shards that a lore run
+                // never touches.  On memory-constrained systems the
+                // combined working set triggers the OOM killer.
+                println!("\nCompacting lore tables...");
+                match db_manager.compact_lore_tables().await {
+                    Ok(_) => println!("Lore table compaction completed successfully"),
+                    Err(e) => error!("Failed to compact lore tables: {}", e),
                 }
 
                 // Create FTS indices on first run; merge new rows on
@@ -1255,21 +1249,10 @@ async fn main() -> Result<()> {
             }
 
             if total_new_emails > 0 {
-                match db_manager.check_optimization_health().await {
-                    Ok((needs_optimization, message)) => {
-                        if needs_optimization {
-                            println!("\n{}", message);
-                            match db_manager.optimize_database().await {
-                                Ok(_) => println!("Database optimization completed successfully"),
-                                Err(e) => error!("Failed to optimize database: {}", e),
-                            }
-                        } else {
-                            println!("\n{}", message);
-                        }
-                    }
-                    Err(e) => {
-                        error!("Failed to check database health: {}", e);
-                    }
+                println!("\nCompacting lore tables...");
+                match db_manager.compact_lore_tables().await {
+                    Ok(_) => println!("Lore table compaction completed successfully"),
+                    Err(e) => error!("Failed to compact lore tables: {}", e),
                 }
 
                 println!("\nUpdating FTS indices for lore table...");

diff --git a/src/bin/semcode-lsp.rs b/src/bin/semcode-lsp.rs
@@ -343,9 +343,7 @@ impl LanguageServer for SemcodeLspBackend {
             .map(|f| &f.uri);
         #[allow(deprecated)]
         let workspace_uri = workspace_uri.or(params.root_uri.as_ref());
-        let _ = self
-            .ensure_database_connection(workspace_uri)
-            .await;
+        let _ = self.ensure_database_connection(workspace_uri).await;
 
         Ok(InitializeResult {
             server_info: Some(ServerInfo {

diff --git a/src/database/connection.rs b/src/database/connection.rs
@@ -504,6 +504,10 @@ impl DatabaseManager {
         self.schema_manager.compact_and_cleanup().await
     }
 
+    pub async fn compact_lore_tables(&self) -> Result<()> {
+        self.schema_manager.compact_lore_tables().await
+    }
+
     /// Drop and recreate all tables for maximum space savings
     pub async fn drop_and_recreate_tables(&self) -> Result<()> {
         self.schema_manager.drop_and_recreate_tables().await

diff --git a/src/database/schema.rs b/src/database/schema.rs
@@ -990,6 +990,34 @@ impl SchemaManager {
     /// even before this call.  Running optimize merges those rows
     /// into the inverted index structure, eliminating the scan cost.
     pub async fn optimize_lore_fts_indices(&self) -> Result<()> {
+        // Guard against running on a table with a large _indices/
+        // backlog.  lance/index/append.rs opens every delta fragment
+        // for a column before merging any, so peak memory scales
+        // linearly with the number of fragments per column.  On
+        // memory-constrained systems a backlog in the thousands
+        // drives semcode-index into swap and gets it OOM-killed.
+        // Query correctness is preserved regardless: unindexed rows
+        // still fall back to a brute-force scan.
+        const MAX_LORE_INDEX_FRAGMENTS: usize = 100;
+        let uri = self.connection.uri();
+        let indices_dir = std::path::Path::new(uri)
+            .join("lore.lance")
+            .join("_indices");
+        if let Ok(rd) = std::fs::read_dir(&indices_dir) {
+            let count = rd.count();
+            if count > MAX_LORE_INDEX_FRAGMENTS {
+                tracing::warn!(
+                    "Skipping lore FTS index optimization: \
+                     {} _indices/ fragments exceeds {} threshold. \
+                     Queries remain correct via brute-force fallback. \
+                     Rebuild the lore table on a larger host to recover.",
+                    count,
+                    MAX_LORE_INDEX_FRAGMENTS
+                );
+                return Ok(());
+            }
+        }
+
         let table = self.connection.open_table("lore").execute().await?;
         let start_time = std::time::Instant::now();
 
@@ -1189,6 +1217,43 @@ impl SchemaManager {
         Ok(())
     }
 
+    /// Compact only the tables modified by lore indexing.
+    ///
+    /// The full `compact_and_cleanup` method processes every table in
+    /// the database, including code-index tables and content shards
+    /// that a lore run never touches.  On memory-constrained systems
+    /// the combined working set of those compactions triggers the OOM
+    /// killer.  This method limits work to the two lore tables and
+    /// processes them sequentially to keep peak memory low.
+    pub async fn compact_lore_tables(&self) -> Result<()> {
+        tracing::info!("Running compaction for lore tables...");
+
+        let table_names = self.connection.table_names().execute().await?;
+        let lore_tables = ["lore", "lore_indexed_commits"];
+
+        for name in &lore_tables {
+            if !table_names.iter().any(|n| n == name) {
+                continue;
+            }
+            match Self::optimize_single_table(&self.connection, name).await {
+                Ok(OptimizeOutcome::Optimized) => {
+                    tracing::info!("Compacted table {}", name);
+                }
+                Ok(OptimizeOutcome::Skipped) => {
+                    tracing::info!("Skipped table {} (too small)", name);
+                }
+                Ok(OptimizeOutcome::PartialFailure) => {
+                    tracing::warn!("Partial failure compacting table {}", name);
+                }
+                Err(e) => {
+                    tracing::warn!("Failed to compact table {}: {}", name, e);
+                }
+            }
+        }
+
+        Ok(())
+    }
+
     /// Optimize a single table - runs compact, prune, and index operations
     ///
     /// Tables with fewer than 1000 rows are skipped since the overhead of
@@ -1197,6 +1262,24 @@ impl SchemaManager {
         connection: &Connection,
         table_name: &str,
     ) -> Result<OptimizeOutcome> {
+        // The lore table is indexed incrementally via
+        // ensure_lore_fts_indices() + optimize_lore_fts_indices().
+        // Running the generic optimize path here does no useful work
+        // that those helpers have not already done, and for large
+        // lore archives its Compact phase walks every delta index
+        // fragment under _indices/, holding per-fragment state until
+        // the operation completes.  On memory-constrained systems the
+        // resident set grows into swap and the OOM killer terminates
+        // semcode-index before compaction finishes, leaving fresh
+        // delta fragments behind each time.  Skip the table entirely.
+        if table_name == "lore" {
+            tracing::info!(
+                "Skipping generic optimization for lore table \
+                 (handled by optimize_lore_fts_indices)"
+            );
+            return Ok(OptimizeOutcome::Skipped);
+        }
+
         // Minimum row count for optimization to be worthwhile
         const MIN_ROWS_FOR_OPTIMIZATION: usize = 1000;