powersync-ja
diff --git a/‎modules/module-postgres/src/replication/SnapshotQuery.ts
Lines changed: 73 additions & 2 deletions b/‎modules/module-postgres/src/replication/SnapshotQuery.ts
Lines changed: 73 additions & 2 deletions
diff --git a/‎modules/module-postgres/src/replication/WalStream.ts
Lines changed: 129 additions & 37 deletions b/‎modules/module-postgres/src/replication/WalStream.ts
Lines changed: 129 additions & 37 deletions
@@ -1,14 +1,27 @@
 import { ColumnDescriptor, SourceTable } from '@powersync/service-core';
-import { PgChunk, PgConnection, PgTypeOid, StatementParam } from '@powersync/service-jpgwire';
+import { PgChunk, PgConnection, PgType, PgTypeOid, StatementParam } from '@powersync/service-jpgwire';
 import { escapeIdentifier } from '../utils/pgwire_utils.js';
-import { logger } from '@powersync/lib-services-framework';
 import { SqliteValue } from '@powersync/service-sync-rules';
 
 export interface SnapshotQuery {
   initialize(): Promise<void>;
   nextChunk(): AsyncIterableIterator<PgChunk>;
 }
 
+export type PrimaryKeyValue = Record<string, SqliteValue>;
+
+export interface MissingRow {
+  table: SourceTable;
+  key: PrimaryKeyValue;
+}
+
+/**
+ * Snapshot query using a plain SELECT * FROM table; chunked using
+ * DELCLARE CURSOR / FETCH.
+ *
+ * This supports all tables, but does not efficiently resume the snapshot
+ * if the process is restarted.
+ */
 export class SimpleSnapshotQuery {
   public constructor(
     private readonly connection: PgConnection,
@@ -25,6 +38,16 @@ export class SimpleSnapshotQuery {
   }
 }
 
+/**
+ * Performs a table snapshot query, chunking by ranges of primary key data.
+ *
+ * This may miss some rows if they are modified during the snapshot query.
+ * In that case, logical replication will pick up those rows afterwards,
+ * possibly resulting in an IdSnapshotQuery.
+ *
+ * Currently, this only supports a table with a single primary key column,
+ * of a select few types.
+ */
 export class ChunkedSnapshotQuery {
   /**
    * Primary key types that we support for chunked snapshots.
@@ -104,3 +127,51 @@ export class ChunkedSnapshotQuery {
     }
   }
 }
+
+/**
+ * This performs a snapshot query using a list of primary keys.
+ */
+export class IdSnapshotQuery {
+  private didChunk = false;
+
+  static supports(table: SourceTable) {
+    // We have the same requirements as ChunkedSnapshotQuery.
+    // This is typically only used as a fallback when ChunkedSnapshotQuery
+    // skipped some rows.
+    return ChunkedSnapshotQuery.supports(table);
+  }
+
+  public constructor(
+    private readonly connection: PgConnection,
+    private readonly table: SourceTable,
+    private readonly keys: PrimaryKeyValue[]
+  ) {}
+
+  public async initialize(): Promise<void> {
+    // No-op
+  }
+
+  public async *nextChunk(): AsyncIterableIterator<PgChunk> {
+    // Only produce one chunk
+    if (this.didChunk) {
+      return;
+    }
+    this.didChunk = true;
+
+    const keyDefinition = this.table.replicaIdColumns[0];
+    const ids = this.keys.map((record) => record[keyDefinition.name]);
+    const type = PgType.getArrayType(keyDefinition.typeId!);
+    if (type == null) {
+      throw new Error(`Cannot determine primary key array type for ${JSON.stringify(keyDefinition)}`);
+    }
+    yield* this.connection.stream({
+      statement: `SELECT * FROM ${this.table.escapedIdentifier} WHERE ${escapeIdentifier(keyDefinition.name)} = ANY($1)`,
+      params: [
+        {
+          type: type,
+          value: ids
+        }
+      ]
+    });
+  }
+}
@@ -1,12 +1,26 @@
 import { container, errors, logger } from '@powersync/lib-services-framework';
-import { getUuidReplicaIdentityBson, Metrics, SourceEntityDescriptor, storage } from '@powersync/service-core';
+import {
+  BucketStorageBatch,
+  getUuidReplicaIdentityBson,
+  Metrics,
+  SaveUpdate,
+  SourceEntityDescriptor,
+  storage
+} from '@powersync/service-core';
 import * as pgwire from '@powersync/service-jpgwire';
 import { DatabaseInputRow, SqliteRow, SqlSyncRules, TablePattern, toSyncRulesRow } from '@powersync/service-sync-rules';
 import * as pg_utils from '../utils/pgwire_utils.js';
 import { PgManager } from './PgManager.js';
 import { getPgOutputRelation, getRelId } from './PgRelation.js';
 import { checkSourceConfiguration, getReplicationIdentityColumns } from './replication-utils.js';
-import { ChunkedSnapshotQuery, SimpleSnapshotQuery, SnapshotQuery } from './SnapshotQuery.js';
+import {
+  ChunkedSnapshotQuery,
+  IdSnapshotQuery,
+  MissingRow,
+  PrimaryKeyValue,
+  SimpleSnapshotQuery,
+  SnapshotQuery
+} from './SnapshotQuery.js';
 
 export const ZERO_LSN = '00000000/00000000';
 export const PUBLICATION_NAME = 'powersync';
@@ -359,19 +373,8 @@ WHERE  oid = $1::regclass`,
               logger.info(`${this.slot_name} Skipping ${table.qualifiedName} - snapshot already done`);
               continue;
             }
-            let tableLsnNotBefore: string;
-            await db.query('BEGIN');
-            try {
-              await this.snapshotTable(batch, db, table);
-
-              const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
-              tableLsnNotBefore = rs.rows[0][0];
-            } finally {
-              // Read-only transaction, commit does not actually do anything.
-              await db.query('COMMIT');
-            }
 
-            await batch.markSnapshotDone([table], tableLsnNotBefore);
+            await this.snapshotTableInTx(batch, db, table);
             await touch();
           }
         }
@@ -391,7 +394,38 @@ WHERE  oid = $1::regclass`,
     }
   }
 
-  private async snapshotTable(batch: storage.BucketStorageBatch, db: pgwire.PgConnection, table: storage.SourceTable) {
+  private async snapshotTableInTx(
+    batch: storage.BucketStorageBatch,
+    db: pgwire.PgConnection,
+    table: storage.SourceTable,
+    limited?: PrimaryKeyValue[]
+  ): Promise<storage.SourceTable> {
+    await db.query('BEGIN');
+    try {
+      let tableLsnNotBefore: string;
+      await this.snapshotTable(batch, db, table, limited);
+
+      // Get the current LSN.
+      // The data will only be consistent once incremental replication
+      // has passed that point.
+      // We have to get this LSN _after_ we have started the snapshot query.
+      const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
+      tableLsnNotBefore = rs.rows[0][0];
+      await db.query('COMMIT');
+      const [resultTable] = await batch.markSnapshotDone([table], tableLsnNotBefore);
+      return resultTable;
+    } catch (e) {
+      await db.query('ROLLBACK');
+      throw e;
+    }
+  }
+
+  private async snapshotTable(
+    batch: storage.BucketStorageBatch,
+    db: pgwire.PgConnection,
+    table: storage.SourceTable,
+    limited?: PrimaryKeyValue[]
+  ) {
     logger.info(`${this.slot_name} Replicating ${table.qualifiedName}`);
     const estimatedCount = await this.estimatedCount(db, table);
     let at = 0;
@@ -401,13 +435,16 @@ WHERE  oid = $1::regclass`,
     // We do streaming on two levels:
     // 1. Coarse level: DELCARE CURSOR, FETCH 10000 at a time.
     // 2. Fine level: Stream chunks from each fetch call.
-    if (ChunkedSnapshotQuery.supports(table)) {
+    if (limited) {
+      q = new IdSnapshotQuery(db, table, limited);
+    } else if (ChunkedSnapshotQuery.supports(table)) {
       // Single primary key - we can use the primary key for chunking
       const orderByKey = table.replicaIdColumns[0];
       logger.info(`Chunking ${table.qualifiedName} by ${orderByKey.name}`);
-      q = new ChunkedSnapshotQuery(db, table, 10_000);
+      q = new ChunkedSnapshotQuery(db, table, 1000);
     } else {
       // Fallback case - query the entire table
+      logger.info(`Snapshot ${table.qualifiedName} without chunking`);
       q = new SimpleSnapshotQuery(db, table, 10_000);
     }
     await q.initialize();
@@ -501,37 +538,52 @@ WHERE  oid = $1::regclass`,
       // Truncate this table, in case a previous snapshot was interrupted.
       await batch.truncate([result.table]);
 
-      let lsn: string = ZERO_LSN;
       // Start the snapshot inside a transaction.
       // We use a dedicated connection for this.
       const db = await this.connections.snapshotConnection();
       try {
-        await db.query('BEGIN');
-        try {
-          await this.snapshotTable(batch, db, result.table);
-
-          // Get the current LSN.
-          // The data will only be consistent once incremental replication
-          // has passed that point.
-          // We have to get this LSN _after_ we have started the snapshot query.
-          const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
-          lsn = rs.rows[0][0];
-
-          await db.query('COMMIT');
-        } catch (e) {
-          await db.query('ROLLBACK');
-          throw e;
-        }
+        const table = await this.snapshotTableInTx(batch, db, result.table);
+        return table;
       } finally {
         await db.end();
       }
-      const [table] = await batch.markSnapshotDone([result.table], lsn);
-      return table;
     }
 
     return result.table;
   }
 
+  /**
+   * Process rows that have missing TOAST values.
+   *
+   * This can happen during edge cases in the chunked intial snapshot process.
+   *
+   * We handle this similar to an inline table snapshot, but limited to the specific
+   * set of rows.
+   */
+  private async resnapshot(batch: BucketStorageBatch, rows: MissingRow[]) {
+    const byTable = new Map<string | number, MissingRow[]>();
+    for (let row of rows) {
+      if (!byTable.has(row.table.objectId)) {
+        byTable.set(row.table.objectId, []);
+      }
+      byTable.get(row.table.objectId)!.push(row);
+    }
+    const db = await this.connections.snapshotConnection();
+    try {
+      for (let rows of byTable.values()) {
+        const table = rows[0].table;
+        await this.snapshotTableInTx(
+          batch,
+          db,
+          table,
+          rows.map((r) => r.key)
+        );
+      }
+    } finally {
+      await db.end();
+    }
+  }
+
   private getTable(relationId: number): storage.SourceTable {
     const table = this.relation_cache.get(relationId);
     if (table == null) {
@@ -640,8 +692,38 @@ WHERE  oid = $1::regclass`,
     // Auto-activate as soon as initial replication is done
     await this.storage.autoActivate();
 
+    let resnapshot: { table: storage.SourceTable; key: PrimaryKeyValue }[] = [];
+
+    const markRecordUnavailable = (record: SaveUpdate) => {
+      if (!IdSnapshotQuery.supports(record.sourceTable)) {
+        // If it's not supported, it's also safe to ignore
+        return;
+      }
+      let key: PrimaryKeyValue = {};
+      for (let column of record.sourceTable.replicaIdColumns) {
+        const name = column.name;
+        const value = record.after[name];
+        if (value == null) {
+          // We don't expect this to actually happen.
+          // The key should always be present in the "after" record.
+          return;
+        }
+        key[name] = value;
+      }
+      resnapshot.push({
+        table: record.sourceTable,
+        key: key
+      });
+    };
+
     await this.storage.startBatch(
-      { zeroLSN: ZERO_LSN, defaultSchema: POSTGRES_DEFAULT_SCHEMA, storeCurrentData: true, skipExistingRows: false },
+      {
+        zeroLSN: ZERO_LSN,
+        defaultSchema: POSTGRES_DEFAULT_SCHEMA,
+        storeCurrentData: true,
+        skipExistingRows: false,
+        markRecordUnavailable
+      },
       async (batch) => {
         // Replication never starts in the middle of a transaction
         let inTx = false;
@@ -665,6 +747,16 @@ WHERE  oid = $1::regclass`,
             } else if (msg.tag == 'commit') {
               Metrics.getInstance().transactions_replicated_total.add(1);
               inTx = false;
+              // flush() must be before the resnapshot check - that is
+              // typically what reports the resnapshot records.
+              await batch.flush();
+              // This _must_ be checked after the flush(), and before
+              // commit() or ack(). We never persist the resnapshot list,
+              // so we have to process it before marking our progress.
+              if (resnapshot.length > 0) {
+                await this.resnapshot(batch, resnapshot);
+                resnapshot = [];
+              }
               await batch.commit(msg.lsn!);
               await this.ack(msg.lsn!, replicationStream);
             } else {