Skip to content

Commit bcc3fa2

Browse files
authored
feat: allow scan limit override (#866)
## Problem Find Single Row action does a full table scan which get very expensive for high volume pipes and large tiles. ## Solution For some high volume cases, there is no need to scan the entire table but just the most recent ones. This PR allows the overriding of the scanned limit by modifying the step config. ## Changes - Adding `stepConfig` as an optional param to `getTableRows` function to allow limit the number rows scanned. - Adding `order` as a param to `getTableRows` which determines whether to traverse the createdAt index by ascending or descending order. - Prevent clearing the entire stepConfig on test step, we should only be deleting the `templateConfig` key in the step config
1 parent 3901904 commit bcc3fa2

File tree

8 files changed

+132
-14
lines changed

8 files changed

+132
-14
lines changed

package-lock.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/backend/src/apps/tiles/__tests__/actions/find-single-row.itest.ts

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import {
1010
generateMockTableRowData,
1111
} from '@/graphql/__tests__/mutations/tiles/table.mock'
1212
import { createTableRow, TableRowFilter } from '@/models/dynamodb/table-row'
13+
import * as tableFunctions from '@/models/dynamodb/table-row/functions'
1314
import TableColumnMetadata from '@/models/table-column-metadata'
1415
import TableMetadata from '@/models/table-metadata'
1516
import User from '@/models/user'
@@ -18,6 +19,24 @@ import Context from '@/types/express/context'
1819
import tiles from '../..'
1920
import findSingleRowAction from '../../actions/find-single-row'
2021

22+
const mocks = vi.hoisted(() => ({
23+
stepQueryResult: vi.fn().mockResolvedValue({
24+
config: {
25+
adminOverride: {
26+
tileScanLimit: 100,
27+
},
28+
},
29+
}),
30+
}))
31+
32+
vi.mock('@/models/step', () => ({
33+
default: {
34+
query: vi.fn().mockReturnThis(),
35+
findById: vi.fn().mockReturnThis(),
36+
throwIfNotFound: mocks.stepQueryResult,
37+
},
38+
}))
39+
2140
describe('tiles create row action', () => {
2241
let context: Context
2342
let dummyTable: TableMetadata
@@ -107,4 +126,37 @@ describe('tiles create row action', () => {
107126
.where({ table_id: $.step.parameters.tableId })
108127
await expect(findSingleRowAction.run($)).rejects.toThrow(StepError)
109128
})
129+
130+
it('should call getTableRows with scan limit if exists in step config', async () => {
131+
const getTableRowsSpy = vi
132+
.spyOn(tableFunctions, 'getTableRows')
133+
.mockResolvedValueOnce({
134+
rows: [],
135+
stringifiedCursor: undefined,
136+
})
137+
await findSingleRowAction.run($)
138+
expect(getTableRowsSpy).toHaveBeenCalledWith({
139+
tableId: $.step.parameters.tableId,
140+
filters: $.step.parameters.filters,
141+
scanLimit: 100,
142+
order: 'asc',
143+
})
144+
})
145+
146+
it('should call getTableRows with scan limit and order', async () => {
147+
const getTableRowsSpy = vi
148+
.spyOn(tableFunctions, 'getTableRows')
149+
.mockResolvedValueOnce({
150+
rows: [],
151+
stringifiedCursor: undefined,
152+
})
153+
$.step.parameters.returnLastRow = true
154+
await findSingleRowAction.run($)
155+
expect(getTableRowsSpy).toHaveBeenCalledWith({
156+
tableId: $.step.parameters.tableId,
157+
filters: $.step.parameters.filters,
158+
scanLimit: 100,
159+
order: 'desc',
160+
})
161+
})
110162
})

packages/backend/src/apps/tiles/actions/find-single-row/index.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import {
88
TableRowFilter,
99
TableRowFilterOperator,
1010
} from '@/models/dynamodb/table-row'
11+
import Step from '@/models/step'
1112
import TableCollaborator from '@/models/table-collaborators'
1213
import TableColumnMetadata from '@/models/table-column-metadata'
1314

@@ -149,6 +150,7 @@ const action: IRawAction = {
149150
returnLastRow: boolean | undefined
150151
}
151152

153+
const step = await Step.query().findById($.step.id).throwIfNotFound()
152154
/**
153155
* Check for columns first, there will not be any columns if the tile has been deleted.
154156
*/
@@ -175,13 +177,16 @@ const action: IRawAction = {
175177
$.app.name,
176178
)
177179
}
180+
// Retrieve the manual scan limit override, converting it to a number.
181+
// If the conversion results in NaN, we set scanLimit to undefined.
182+
const scanLimitRaw = +step.config?.adminOverride?.tileScanLimit
183+
const scanLimit = isNaN(scanLimitRaw) ? undefined : scanLimitRaw
178184

179-
/**
180-
* When columnIds are not provided, we only return rowId
181-
*/
182185
const { rows } = await getTableRows({
183186
tableId,
184187
filters,
188+
order: returnLastRow ? 'desc' : 'asc',
189+
scanLimit,
185190
})
186191

187192
if (!rows || !rows.length) {
@@ -192,9 +197,7 @@ const action: IRawAction = {
192197
})
193198
return
194199
}
195-
const rowIdToUse = returnLastRow
196-
? rows[rows.length - 1].rowId
197-
: rows[0].rowId
200+
const rowIdToUse = rows[0].rowId
198201

199202
/**
200203
* We use raw row data instead of mapped column names as we want them to

packages/backend/src/graphql/mutations/execute-step.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { raw } from 'objection'
2+
13
import testStep from '@/services/test-step'
24

35
import type { MutationResolvers } from '../__generated__/types.generated'
@@ -29,7 +31,8 @@ const executeStep: MutationResolvers['executeStep'] = async (
2931
if (!executionStep.isFailed) {
3032
await stepToTest.$query().patch({
3133
status: 'completed',
32-
config: {}, // clear template step config when step is tested successfully
34+
// clear templateConfig in config when step is tested successfully
35+
config: raw(`config - 'templateConfig'`),
3336
})
3437
}
3538

packages/backend/src/models/dynamodb/__tests__/table-row/functions.itest.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,49 @@ describe('dynamodb table row functions', () => {
9090
expect(Object.keys(rows[0])).not.toContain('randomcol')
9191
})
9292

93+
describe('get table rows with scan limit', () => {
94+
const TEN_THOUSAND = 10000
95+
// i tried using beforeAll hook to cut down the setup time, but couldnt get it to work for this nested describe
96+
beforeEach(async () => {
97+
const dataArray = []
98+
for (let i = 0; i < TEN_THOUSAND; i++) {
99+
dataArray.push(
100+
generateMockTableRowData({ columnIds: dummyColumnIds }),
101+
)
102+
}
103+
await createTableRows({ tableId: dummyTable.id, dataArray })
104+
})
105+
it(
106+
'should be able to paginate and get a large number of rows',
107+
async () => {
108+
const { rows } = await getTableRows({
109+
tableId: dummyTable.id,
110+
columnIds: dummyColumnIds,
111+
})
112+
expect(rows).toHaveLength(TEN_THOUSAND)
113+
},
114+
{
115+
timeout: 20000,
116+
},
117+
)
118+
119+
it(
120+
'should be able to paginate and get a large number of rows with a scan limit',
121+
async () => {
122+
const SCAN_LIMIT = 5789
123+
const { rows } = await getTableRows({
124+
tableId: dummyTable.id,
125+
columnIds: dummyColumnIds,
126+
scanLimit: SCAN_LIMIT,
127+
})
128+
expect(rows).toHaveLength(SCAN_LIMIT)
129+
},
130+
{
131+
timeout: 20000,
132+
},
133+
)
134+
})
135+
93136
it('should get relevant rows based on a single filter', async () => {
94137
const dataArray = []
95138
for (let i = 0; i < 1000; i++) {

packages/backend/src/models/dynamodb/table-row/functions.ts

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -293,20 +293,31 @@ export const getTableRows = async ({
293293
tableId,
294294
columnIds,
295295
filters,
296+
order = 'asc',
296297
stringifiedCursor,
298+
scanLimit,
297299
}: {
298300
tableId: string
299301
columnIds?: string[]
300302
filters?: TableRowFilter[]
303+
order?: 'asc' | 'desc'
301304
/**
302305
* if stringifiedCursor is 'start', we will fetch the first page of results
303306
* if undefined, we will auto-paginate
304307
*/
305308
stringifiedCursor?: string | 'start'
309+
/**
310+
* Optional limit on the total number of rows scanned.
311+
*/
312+
scanLimit?: number
306313
}): Promise<{
307314
rows: TableRowOutput[]
308315
stringifiedCursor?: string
309316
}> => {
317+
if (stringifiedCursor && scanLimit) {
318+
throw new Error('stringifiedCursor and scanLimit cannot both be provided')
319+
}
320+
310321
try {
311322
// need to use ProjectionExpression to select nested attributes
312323
const { ProjectionExpression, ExpressionAttributeNames } =
@@ -316,6 +327,7 @@ export const getTableRows = async ({
316327
indexUsed: 'byCreatedAt',
317328
})
318329
const tableRows = []
330+
let remainingScanLimit = scanLimit ?? Infinity
319331
let cursor: any =
320332
stringifiedCursor && stringifiedCursor !== 'start'
321333
? JSON.parse(stringifiedCursor)
@@ -326,34 +338,39 @@ export const getTableRows = async ({
326338
addFiltersToQuery(query, filters)
327339
}
328340
const response = await query.go({
329-
order: 'asc',
341+
order,
330342
pages: 'all', // this is ignored, we need to paginate manually
331343
cursor,
332344
params: {
333345
ProjectionExpression,
334346
ExpressionAttributeNames,
347+
Limit: remainingScanLimit,
335348
},
336349
// use data:'raw' to bypass electrodb formatting, since we're using ProjectionExpression to select nested attributes
337350
// ref: https://electrodb.dev/en/queries/get/#execution-options
338351
data: 'raw',
339352
pager: 'raw',
340353
ignoreOwnership: true,
341354
})
355+
342356
// need to explicitly cast to DynamoDB's raw output because of the 'raw' option
343357
const data = response.data as unknown as QueryCommandOutput & {
344358
Items: TableRowOutput[]
345359
}
346360
tableRows.push(...data.Items)
361+
remainingScanLimit -= data.ScannedCount
347362
cursor = data.LastEvaluatedKey
348363
// loop only if cursor is
349-
} while (cursor && !stringifiedCursor)
364+
} while (cursor && !stringifiedCursor && remainingScanLimit > 0)
350365

351366
return {
352367
rows: tableRows.map((row) => ({
353368
...row,
354369
data: row.data || {}, // data can be undefined if values are empty
355370
})),
356-
stringifiedCursor: cursor ? JSON.stringify(cursor) : undefined,
371+
stringifiedCursor:
372+
cursor && stringifiedCursor ? JSON.stringify(cursor) : undefined,
373+
// if no cursor was passed in, we should not return
357374
}
358375
} catch (e: unknown) {
359376
handleDynamoDBError(e)

packages/backend/src/models/step.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
import type { IJSONObject, IStep } from '@plumber/types'
1+
import type { IJSONObject, IStep, IStepConfig } from '@plumber/types'
22

33
import { type StaticHookArguments, ValidationError } from 'objection'
44
import { URL } from 'url'
55

66
import apps from '@/apps'
77
import appConfig from '@/config/app'
8-
import type { StepConfig } from '@/graphql/__generated__/types.generated'
98

109
import Base from './base'
1110
import Connection from './connection'
@@ -29,7 +28,7 @@ class Step extends Base {
2928
connection?: Connection
3029
flow: Flow
3130
executionSteps: ExecutionStep[]
32-
config: StepConfig
31+
config: IStepConfig
3332

3433
static tableName = 'steps'
3534

packages/types/index.d.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ export interface IExecution {
135135

136136
export interface IStepConfig {
137137
templateConfig?: IStepTemplateConfig
138+
adminOverride?: IJSONObject
138139
}
139140

140141
export interface IStepTemplateConfig {

0 commit comments

Comments
 (0)