Skip to content

Commit 894cdf0

Browse files
authored
fix: replace OFFSET with cursor-based pagination on idlink_va (#10)
* fix: eplace OFFSET with cursor-based pagination on idlink_va * tests
1 parent f122299 commit 894cdf0

2 files changed

Lines changed: 51 additions & 15 deletions

File tree

src/lib.mjs

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -106,15 +106,15 @@ export const fetchValidActionIds = async (
106106
log = console.log,
107107
) => {
108108
const validActions = new Set();
109-
let actionOffset = 0;
109+
let lastIdlinkVa = 0;
110+
let totalFetched = 0;
110111

111112
log(" - fetching valid action IDs...");
112113

113114
while (true) {
114115
const batch = await sourceDB
115116
.selectFrom("matomo_log_link_visit_action")
116-
.select(["idaction_name", "idaction_url"])
117-
.distinct()
117+
.select(["idlink_va", "idaction_name", "idaction_url"])
118118
.where("idsite", "=", idsite)
119119
.where("server_time", ">=", new Date("2025-01-01"))
120120
.where(({ eb }) =>
@@ -123,20 +123,23 @@ export const fetchValidActionIds = async (
123123
eb("idaction_url", ">", lastActionId),
124124
]),
125125
)
126+
.where("idlink_va", ">", lastIdlinkVa)
127+
.orderBy("idlink_va")
126128
.limit(ACTION_BATCH)
127-
.offset(actionOffset)
128129
.execute();
129130

130131
if (batch.length === 0) break;
131132

133+
lastIdlinkVa = batch[batch.length - 1].idlink_va;
134+
totalFetched += batch.length;
135+
132136
for (const r of batch) {
133137
if (r.idaction_name !== null && r.idaction_name > lastActionId)
134138
validActions.add(r.idaction_name);
135139
if (r.idaction_url !== null && r.idaction_url > lastActionId)
136140
validActions.add(r.idaction_url);
137141
}
138-
actionOffset += ACTION_BATCH;
139-
log(` - fetched ${actionOffset} rows, ${validActions.size} unique actions`);
142+
log(` - fetched ${totalFetched} rows, ${validActions.size} unique actions`);
140143
}
141144

142145
return validActions;
@@ -288,7 +291,12 @@ export const importSite = async ({
288291

289292
// Copy actions
290293
log(`copy matomo_log_action from ${lastActionId}...`);
291-
const validActions = await fetchValidActionIds(sourceDB, idsite, lastActionId, log);
294+
const validActions = await fetchValidActionIds(
295+
sourceDB,
296+
idsite,
297+
lastActionId,
298+
log,
299+
);
292300
await copyActions({
293301
sourceDB,
294302
targetDB,

src/lib.test.mjs

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -254,10 +254,11 @@ describe("fetchValidActionIds", () => {
254254
]);
255255
assert.strictEqual(sourceDB._calls.select.length, 1);
256256
assert.deepStrictEqual(sourceDB._calls.select[0], [
257+
"idlink_va",
257258
"idaction_name",
258259
"idaction_url",
259260
]);
260-
assert.deepStrictEqual(sourceDB._calls.distinct, [true]);
261+
assert.deepStrictEqual(sourceDB._calls.distinct, []); // no distinct
261262
});
262263

263264
it("should apply idsite and date filters", async () => {
@@ -271,21 +272,48 @@ describe("fetchValidActionIds", () => {
271272
assert.strictEqual(sourceDB._calls.where[1][1], ">=");
272273
});
273274

274-
it("should use pagination with limit and offset", async () => {
275+
it("should use cursor-based pagination on idlink_va", async () => {
275276
const sourceDB = createMockDB({ selectResults: [[]] });
276277
await fetchValidActionIds(sourceDB, 1, 0, () => {});
277278

278279
assert.deepStrictEqual(sourceDB._calls.limit, [1000]);
279-
assert.deepStrictEqual(sourceDB._calls.offset, [0]);
280+
assert.deepStrictEqual(sourceDB._calls.offset, []); // no offset
281+
assert.deepStrictEqual(sourceDB._calls.orderBy, ["idlink_va"]);
282+
// cursor where: idlink_va > 0
283+
const cursorWhere = sourceDB._calls.where.find(
284+
(w) => Array.isArray(w) && w[0] === "idlink_va",
285+
);
286+
assert.deepStrictEqual(cursorWhere, ["idlink_va", ">", 0]);
287+
});
288+
289+
it("should advance cursor using last idlink_va of each batch", async () => {
290+
const sourceDB = createMockDB({
291+
selectResults: [
292+
[
293+
{ idlink_va: 10, idaction_name: 1, idaction_url: 2 },
294+
{ idlink_va: 20, idaction_name: 3, idaction_url: 4 },
295+
],
296+
[], // End pagination
297+
],
298+
});
299+
300+
await fetchValidActionIds(sourceDB, 1, 0, () => {});
301+
302+
// Second call should use cursor idlink_va > 20
303+
const cursorWheres = sourceDB._calls.where.filter(
304+
(w) => Array.isArray(w) && w[0] === "idlink_va",
305+
);
306+
assert.deepStrictEqual(cursorWheres[0], ["idlink_va", ">", 0]);
307+
assert.deepStrictEqual(cursorWheres[1], ["idlink_va", ">", 20]);
280308
});
281309

282310
it("should collect unique action IDs from multiple batches", async () => {
283311
const sourceDB = createMockDB({
284312
selectResults: [
285313
[
286-
{ idaction_name: 1, idaction_url: 2 },
287-
{ idaction_name: 1, idaction_url: 3 },
288-
{ idaction_name: null, idaction_url: 4 },
314+
{ idlink_va: 1, idaction_name: 1, idaction_url: 2 },
315+
{ idlink_va: 2, idaction_name: 1, idaction_url: 3 },
316+
{ idlink_va: 3, idaction_name: null, idaction_url: 4 },
289317
],
290318
[], // End pagination
291319
],
@@ -304,8 +332,8 @@ describe("fetchValidActionIds", () => {
304332
const sourceDB = createMockDB({
305333
selectResults: [
306334
[
307-
{ idaction_name: 5, idaction_url: 10 },
308-
{ idaction_name: 3, idaction_url: 15 },
335+
{ idlink_va: 1, idaction_name: 5, idaction_url: 10 },
336+
{ idlink_va: 2, idaction_name: 3, idaction_url: 15 },
309337
],
310338
[],
311339
],

0 commit comments

Comments
 (0)