sort snapshots to the front in loadDoc (#414)

pvh · web-flow · commit d2690bf3b1dd · 2025-04-01T21:21:26.000+01:00
diff --git a/packages/automerge-repo/src/storage/StorageSubsystem.ts b/packages/automerge-repo/src/storage/StorageSubsystem.ts
@@ -6,7 +6,6 @@ import { type DocumentId } from "../types.js"
 import { StorageAdapterInterface } from "./StorageAdapterInterface.js"
 import { ChunkInfo, StorageKey, StorageId } from "./types.js"
 import { keyHash, headsHash } from "./keyHash.js"
-import { chunkTypeFromKey } from "./chunkTypeFromKey.js"
 import * as Uuid from "uuid"
 import { EventEmitter } from "eventemitter3"
 import { encodeHeads } from "../AutomergeUrl.js"
@@ -113,33 +112,63 @@ export class StorageSubsystem extends EventEmitter<StorageSubsystemEvents> {
   // AUTOMERGE DOCUMENT STORAGE
 
   /**
-   * Loads the Automerge document with the given ID from storage.
+   * Loads and combines document chunks from storage, with snapshots first.
    */
-  async loadDoc<T>(documentId: DocumentId): Promise<A.Doc<T> | null> {
-    // Load all the chunks for this document
-    const chunks = await this.#storageAdapter.loadRange([documentId])
-    const binaries = []
+  async loadDocData(documentId: DocumentId): Promise<Uint8Array | null> {
+    // Load snapshots first
+    const snapshotChunks = await this.#storageAdapter.loadRange([
+      documentId,
+      "snapshot",
+    ])
+    const incrementalChunks = await this.#storageAdapter.loadRange([
+      documentId,
+      "incremental",
+    ])
+
+    const binaries: Uint8Array[] = []
     const chunkInfos: ChunkInfo[] = []
 
-    for (const chunk of chunks) {
-      // chunks might have been deleted in the interim
+    // Process snapshots first
+    for (const chunk of snapshotChunks) {
       if (chunk.data === undefined) continue
+      chunkInfos.push({
+        key: chunk.key,
+        type: "snapshot",
+        size: chunk.data.length,
+      })
+      binaries.push(chunk.data)
+    }
 
-      const chunkType = chunkTypeFromKey(chunk.key)
-      if (chunkType == null) continue
-
+    // Then process incrementals
+    for (const chunk of incrementalChunks) {
+      if (chunk.data === undefined) continue
       chunkInfos.push({
         key: chunk.key,
-        type: chunkType,
+        type: "incremental",
         size: chunk.data.length,
       })
       binaries.push(chunk.data)
     }
+
+    // Store chunk infos for future reference
     this.#chunkInfos.set(documentId, chunkInfos)
 
+    // If no chunks were found, return null
+    if (binaries.length === 0) {
+      return null
+    }
+
     // Merge the chunks into a single binary
-    const binary = mergeArrays(binaries)
-    if (binary.length === 0) return null
+    return mergeArrays(binaries)
+  }
+
+  /**
+   * Loads the Automerge document with the given ID from storage.
+   */
+  async loadDoc<T>(documentId: DocumentId): Promise<A.Doc<T> | null> {
+    // Load and combine chunks
+    const binary = await this.loadDocData(documentId)
+    if (!binary) return null
 
     // Load into an Automerge document
     const start = performance.now()
@@ -169,6 +198,7 @@ export class StorageSubsystem extends EventEmitter<StorageSubsystemEvents> {
     if (!this.#shouldSave(documentId, doc)) return
 
     const sourceChunks = this.#chunkInfos.get(documentId) ?? []
+
     if (this.#shouldCompact(sourceChunks)) {
       await this.#saveTotal(documentId, doc, sourceChunks)
     } else {
diff --git a/packages/automerge-repo/test/Repo.test.ts b/packages/automerge-repo/test/Repo.test.ts
@@ -33,6 +33,7 @@ import {
 import { getRandomItem } from "./helpers/getRandomItem.js"
 import { TestDoc } from "./types.js"
 import { StorageId, StorageKey } from "../src/storage/types.js"
+import { chunkTypeFromKey } from "../src/storage/chunkTypeFromKey.js"
 
 describe("Repo", () => {
   describe("constructor", () => {
diff --git a/packages/automerge-repo/test/StorageSubsystem.test.ts b/packages/automerge-repo/test/StorageSubsystem.test.ts
@@ -4,13 +4,15 @@ import assert from "assert"
 import fs from "fs"
 import os from "os"
 import path from "path"
-import { describe, it } from "vitest"
+import { describe, it, expect } from "vitest"
 import { generateAutomergeUrl, parseAutomergeUrl } from "../src/AutomergeUrl.js"
 import { PeerId, cbor } from "../src/index.js"
 import { StorageSubsystem } from "../src/storage/StorageSubsystem.js"
 import { StorageId } from "../src/storage/types.js"
 import { DummyStorageAdapter } from "../src/helpers/DummyStorageAdapter.js"
 import * as Uuid from "uuid"
+import { chunkTypeFromKey } from "../src/storage/chunkTypeFromKey.js"
+import { DocumentId } from "../src/types.js"
 
 const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "automerge-repo-tests"))
 
@@ -243,6 +245,83 @@ describe("StorageSubsystem", () => {
           assert.strictEqual(id1, id2)
         })
       })
+
+      describe("loadDoc", () => {
+        it("maintains correct document state when loading chunks in order", async () => {
+          const storageAdapter = new DummyStorageAdapter()
+          const storage = new StorageSubsystem(storageAdapter)
+
+          // Create a document with multiple changes
+          const doc = A.init<{ foo: string }>()
+          const doc1 = A.change(doc, d => {
+            d.foo = "first"
+          })
+          const doc2 = A.change(doc1, d => {
+            d.foo = "second"
+          })
+          const doc3 = A.change(doc2, d => {
+            d.foo = "third"
+          })
+
+          // Save the document with multiple changes
+          const documentId = "test-doc" as DocumentId
+          await storage.saveDoc(documentId, doc3)
+
+          // Load the document
+          const loadedDoc = await storage.loadDoc<{ foo: string }>(documentId)
+
+          // Verify the document state is correct
+          expect(loadedDoc?.foo).toBe("third")
+        })
+
+        it("combines chunks with snapshot first", async () => {
+          const storageAdapter = new DummyStorageAdapter()
+          const storage = new StorageSubsystem(storageAdapter)
+
+          // Create a document with multiple changes
+          const doc = A.init<{ foo: string }>()
+          const doc1 = A.change(doc, d => {
+            d.foo = "first"
+          })
+          const doc2 = A.change(doc1, d => {
+            d.foo = Array(10000)
+              .fill(0)
+              .map(() =>
+                String.fromCharCode(Math.floor(Math.random() * 26) + 97)
+              )
+              .join("")
+          })
+
+          // Save the document with multiple changes
+          const documentId = "test-doc" as DocumentId
+          await storage.saveDoc(documentId, doc2)
+
+          const doc3 = A.change(doc2, d => {
+            d.foo = "third"
+          })
+          await storage.saveDoc(documentId, doc3)
+
+          // Load the document
+          const loadedDoc = await storage.loadDoc<{ foo: string }>(documentId)
+
+          // Verify the document state is correct
+          expect(loadedDoc?.foo).toBe(doc3.foo)
+
+          // Get the raw binary data from storage
+          const binary = await storage.loadDocData(documentId)
+          expect(binary).not.toBeNull()
+          if (!binary) return
+
+          // Verify the binary starts with the Automerge magic value
+          expect(binary[0]).toBe(0x85)
+          expect(binary[1]).toBe(0x6f)
+          expect(binary[2]).toBe(0x4a)
+          expect(binary[3]).toBe(0x83)
+
+          // Verify the chunk type is CHUNK_TYPE_DOCUMENT (0x00)
+          expect(binary[8]).toBe(0x00)
+        })
+      })
     })
   }
 })