Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/rude-steaks-jam.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@transcend-io/cli': patch
---

Improve request export and manual enrichment performance by switching request identifier fetching to cursor pagination and batched lookups.
19 changes: 13 additions & 6 deletions packages/cli/src/lib/graphql/fetchAllRequestIdentifierMetadata.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export interface RequestIdentifierMetadata {
isVerifiedAtLeastOnce: boolean;
}

const PAGE_SIZE = 50;
const PAGE_SIZE = 2000;

/**
* Fetch all request identifier metadata for a particular request
Expand Down Expand Up @@ -41,29 +41,36 @@ export async function fetchAllRequestIdentifierMetadata(
): Promise<RequestIdentifierMetadata[]> {
const resolvedRequestIds = requestIds ?? (requestId ? [requestId] : undefined);
const requestIdentifiers: RequestIdentifierMetadata[] = [];
let offset = 0;
let cursor: string | undefined;

// Paginate
let shouldContinue = false;
do {
const {
requestIdentifiers: { nodes },
requestIdentifiers: { nodes, pageInfo },
} = await makeGraphQLRequest<{
/** Request Identifiers */
requestIdentifiers: {
/** List */
nodes: RequestIdentifierMetadata[];
/** Pagination info */
pageInfo: {
/** Cursor for the last item */
endCursor: string | null;
/** Whether more pages exist */
hasNextPage: boolean;
};
};
}>(client, REQUEST_IDENTIFIERS, {
first: PAGE_SIZE,
offset,
after: cursor,
requestIds: resolvedRequestIds,
updatedAtBefore: updatedAtBefore ? updatedAtBefore.toISOString() : undefined,
updatedAtAfter: updatedAtAfter ? updatedAtAfter.toISOString() : undefined,
});
requestIdentifiers.push(...nodes);
offset += PAGE_SIZE;
shouldContinue = nodes.length === PAGE_SIZE;
cursor = pageInfo.endCursor ?? undefined;
shouldContinue = pageInfo.hasNextPage;
} while (shouldContinue);

return requestIdentifiers;
Expand Down
111 changes: 102 additions & 9 deletions packages/cli/src/lib/graphql/fetchAllRequestIdentifiers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,29 @@ const RequestIdentifier = t.type({
/** Type override */
export type RequestIdentifier = t.TypeOf<typeof RequestIdentifier>;

const PAGE_SIZE = 50;
const PAGE_SIZE = 100;

const PageInfo = t.type({
endCursor: t.union([t.string, t.null]),
hasNextPage: t.boolean,
});

export const RequestIdentifiersResponse = t.type({
identifiers: t.array(RequestIdentifier),
pageInfo: PageInfo,
});

const BatchRequestIdentifier = t.type({
id: t.string,
name: t.string,
value: t.string,
type: valuesOf(IdentifierType),
requestId: t.string,
});

const BatchRequestIdentifiersResponse = t.type({
identifiers: t.array(BatchRequestIdentifier),
pageInfo: PageInfo,
});

/**
Expand Down Expand Up @@ -82,24 +101,31 @@ export async function fetchAllRequestIdentifiers(
},
): Promise<RequestIdentifier[]> {
const requestIdentifiers: RequestIdentifier[] = [];
let offset = 0;
let shouldContinue = false;
let endCursor: string | undefined;
let shouldContinue = true;

if (!skipSombraCheck) {
await validateSombraVersion(client);
}

do {
while (shouldContinue) {
let response: unknown;
try {
response = await sombra!
.post<{
/** Decrypted identifiers */
identifiers: RequestIdentifier[];
/** Pagination info */
pageInfo: {
/** Cursor for the last item */
endCursor: string | null;
/** Whether more pages exist */
hasNextPage: boolean;
};
}>('v1/request-identifiers', {
json: {
first: PAGE_SIZE,
offset,
after: endCursor ?? undefined,
requestId,
},
})
Expand All @@ -110,13 +136,80 @@ export async function fetchAllRequestIdentifiers(
);
}

const { identifiers: nodes } = decodeCodec(RequestIdentifiersResponse, response);
const { identifiers: nodes, pageInfo } = decodeCodec(RequestIdentifiersResponse, response);

requestIdentifiers.push(...nodes);

offset += PAGE_SIZE;
shouldContinue = nodes.length === PAGE_SIZE;
} while (shouldContinue);
endCursor = pageInfo.endCursor ?? undefined;
shouldContinue = pageInfo.hasNextPage;
}

return requestIdentifiers;
}

/**
* Fetch request identifiers for multiple requests in a single paginated call.
* Returns a Map keyed by requestId so callers can look up identifiers per request.
*
* @param sombra - Sombra client
* @param options - Options
* @returns Map of requestId to its identifiers
*/
export async function fetchRequestIdentifiersBatch(
sombra: Got,
{
requestIds,
}: {
/** IDs of requests to fetch identifiers for */
requestIds: string[];
},
): Promise<Map<string, RequestIdentifier[]>> {
const result = new Map<string, RequestIdentifier[]>();

if (requestIds.length === 0) {
return result;
}

// Ensure every requested ID has an entry even if Sombra returns nothing for it
for (const id of requestIds) {
result.set(id, []);
}

let cursor: string | undefined;
let shouldContinue = true;

while (shouldContinue) {
let response: unknown;
try {
response = await sombra
.post('v1/request-identifiers', {
json: {
first: PAGE_SIZE,
after: cursor ?? undefined,
requestIds,
},
})
.json();
} catch (err) {
throw new Error(
`Failed to fetch request identifiers: ${err?.response?.body || err?.message}`,
);
}

const { identifiers: nodes, pageInfo } = decodeCodec(BatchRequestIdentifiersResponse, response);

for (const { requestId, ...identifier } of nodes) {
const list = result.get(requestId);
if (list) {
list.push(identifier);
} else {
result.set(requestId, [identifier]);
}
}

cursor = pageInfo.endCursor ?? undefined;
shouldContinue = pageInfo.hasNextPage;
}

return result;
}
15 changes: 8 additions & 7 deletions packages/cli/src/lib/graphql/gqls/RequestIdentifier.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,16 @@ export const REMOVE_REQUEST_IDENTIFIERS = gql`
export const REQUEST_IDENTIFIERS = gql`
query TranscendCliRequestIdentifiers(
$first: Int!
$offset: Int!
$after: String
$requestIds: [ID!]
$updatedAtBefore: Date
$updatedAtAfter: Date
) {
requestIdentifiers(
input: {
requestIds: $requestIds
updatedAtBefore: $updatedAtBefore
updatedAtAfter: $updatedAtAfter
}
input: { requestIds: $requestIds }
filterBy: { updatedAtBefore: $updatedAtBefore, updatedAtAfter: $updatedAtAfter }
first: $first
offset: $offset
after: $after
useMaster: false
orderBy: [{ field: createdAt, direction: ASC }, { field: name, direction: ASC }]
) {
Expand All @@ -35,6 +32,10 @@ export const REQUEST_IDENTIFIERS = gql`
isVerifiedAtLeastOnce
}
totalCount
pageInfo {
endCursor
hasNextPage
}
}
}
`;
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import {
buildTranscendGraphQLClient,
createSombraGotInstance,
fetchAllRequestEnrichers,
fetchAllRequestIdentifiers,
fetchRequestIdentifiersBatch,
fetchAllRequests,
validateSombraVersion,
} from '../graphql/index.js';
Expand Down Expand Up @@ -70,39 +70,35 @@ export async function pullManualEnrichmentIdentifiersToCsv({

await validateSombraVersion(client);

// Requests to save
const savedRequests: PrivacyRequestWithIdentifiers[] = [];

// Filter down requests to what is needed
await map(
// Fetch enrichers for all requests in parallel
const requestsWithEnrichers = await map(
allRequests,
async (request) => {
// Fetch enrichers
const requestEnrichers = await fetchAllRequestEnrichers(client, {
async (request) => ({
request,
requestEnrichers: await fetchAllRequestEnrichers(client, {
requestId: request.id,
});
}),
}),
{ concurrency },
);

// Filter to requests that have manual enrichment
const manualEnrichmentRequests = requestsWithEnrichers.filter(
({ requestEnrichers }) =>
requestEnrichers.filter(({ status }) => status === 'ACTION_REQUIRED').length > 0,
);

// Check if manual enrichment exists for that request
const hasManualEnrichment = requestEnrichers.filter(
({ status }) => status === 'ACTION_REQUIRED',
);
// Batch-fetch identifiers for all qualifying requests at once
const identifiersByRequest = await fetchRequestIdentifiersBatch(sombra, {
requestIds: manualEnrichmentRequests.map(({ request }) => request.id),
});

// Save request to queue
if (hasManualEnrichment) {
const requestIdentifiers = await fetchAllRequestIdentifiers(client, sombra, {
requestId: request.id,
skipSombraCheck: true,
});
savedRequests.push({
...request,
requestIdentifiers,
requestEnrichers,
});
}
},
{
concurrency,
},
const savedRequests: PrivacyRequestWithIdentifiers[] = manualEnrichmentRequests.map(
({ request, requestEnrichers }) => ({
...request,
requestIdentifiers: identifiersByRequest.get(request.id) ?? [],
requestEnrichers,
}),
);

const data = savedRequests.map(
Expand Down
13 changes: 7 additions & 6 deletions packages/cli/src/lib/requests/bulkRestartRequests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ import { DEFAULT_TRANSCEND_API } from '../../constants.js';
import { logger } from '../../logger.js';
import { map } from '../bluebird.js';
import {
RequestIdentifier,
buildTranscendGraphQLClient,
createSombraGotInstance,
fetchAllRequestIdentifiers,
fetchRequestIdentifiersBatch,
fetchAllRequests,
validateSombraVersion,
} from '../graphql/index.js';
Expand Down Expand Up @@ -156,8 +157,12 @@ export async function bulkRestartRequests({
}
}

let identifiersByRequest: Map<string, RequestIdentifier[]> | undefined;
if (copyIdentifiers) {
await validateSombraVersion(client);
identifiersByRequest = await fetchRequestIdentifiersBatch(sombra, {
requestIds: requests.map((r) => r.id),
});
}

// Map over the requests
Expand All @@ -167,12 +172,8 @@ export async function bulkRestartRequests({
requests,
async (request, ind) => {
try {
// Pull the request identifiers
const requestIdentifiers = copyIdentifiers
? await fetchAllRequestIdentifiers(client, sombra, {
requestId: request.id,
skipSombraCheck: true,
})
? (identifiersByRequest!.get(request.id) ?? [])
: [];

// Make the GraphQL request to restart the request
Expand Down
1 change: 1 addition & 0 deletions packages/cli/src/lib/requests/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ export * from './pullPrivacyRequests.js';
export * from './streamPrivacyRequestsToCsv.js';
export * from './skipRequestDataSilos.js';
export * from './removeUnverifiedRequestIdentifiers.js';
export * from './splitDateRange.js';
Loading
Loading