diff --git a/DESIGN.md b/DESIGN.md index 583c908a..46d7107c 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -21,9 +21,19 @@ The Stack is updated to be more familiar to modern JavaScript developers, includ * GraphQL SDL * graphql-codegen to generate TypeScript interfaces -## Catalog +## Catalog Server -The catalog is a standalone backend service with a GraphQL API that is accessed by the site's frontend server. +The catalog server is a standalone backend service with a GraphQL API that is accessed by the site's frontend server. + +## Access Control + +The server is intended to eventually have a public-facing GraphQL service. It also has admin functionality such as HTTP endpoints for updating the packages in the catalog that need to be reachable by scheduling services (such as [Google Cloud Scheduler](https://cloud.google.com/scheduler) or [Google Cloud Tasks](https://cloud.google.com/tasks)). These admin endpoints are attractive for denial-of-service attacks because they initiate expensive bulk I/O and database operations. + +The admin endpoints must have restricted access control and not be public. For now, the easiest way to do this without a third service (a separate admin service) is to restrict access to the whole catalog server service which also implements the admin endpoints. Later, the catalog server can be refactored to contain only a public API and the admin endpoints moved to a separate service. + +Access control to the catalog service can be implemented in Google Cloud Run by setting up a service account to make requests and limiting the service to non-public visibility, [as documented here](https://cloud.google.com/run/docs/authenticating/service-to-service). + +As the article says, this should be environment agnostic, so it works outside of Google Cloud - though each environment will have it's own way of setting the access control of the catalog service. ### Custom Elements Manifest diff --git a/packages/catalog-server/src/lib/catalog.ts b/packages/catalog-server/src/lib/catalog.ts index 28099ab4..f5817ad5 100644 --- a/packages/catalog-server/src/lib/catalog.ts +++ b/packages/catalog-server/src/lib/catalog.ts @@ -42,6 +42,11 @@ const toTemporalInstant = (date: Date) => { */ const defaultPackageRefreshInterval = Temporal.Duration.from({minutes: 5}); +/** + * The default amount of time between automated bulk updates of packages. + */ +const defaultPackageUpdateInterval = Temporal.Duration.from({hours: 6}); + export interface CatalogInit { repository: Repository; files: PackageFiles; @@ -77,7 +82,7 @@ export class Catalog { packageVersion?: PackageVersion; problems?: ValidationProblem[]; }> { - console.log('Catalog.importPackage'); + console.log('Catalog.importPackage', packageName); const currentPackageInfo = await this.#repository.getPackageInfo( packageName @@ -347,4 +352,17 @@ export class Catalog { // to the repository return this.#repository.queryElements({query, limit}); } + + async getPackagesToUpdate(notUpdatedSince?: Temporal.Instant) { + if (notUpdatedSince === undefined) { + const now = Temporal.Now.instant(); + notUpdatedSince = now.subtract(defaultPackageUpdateInterval); + } + + const packages = await this.#repository.getPackagesToUpdate( + notUpdatedSince, + 100 + ); + return packages; + } } diff --git a/packages/catalog-server/src/lib/firestore/firestore-repository.ts b/packages/catalog-server/src/lib/firestore/firestore-repository.ts index e36ad628..72744980 100644 --- a/packages/catalog-server/src/lib/firestore/firestore-repository.ts +++ b/packages/catalog-server/src/lib/firestore/firestore-repository.ts @@ -15,6 +15,7 @@ import { CollectionReference, CollectionGroup, UpdateData, + Timestamp, } from '@google-cloud/firestore'; import {Firestore} from '@google-cloud/firestore'; import firebase from 'firebase-admin'; @@ -55,6 +56,7 @@ import { } from './package-version-converter.js'; import {customElementConverter} from './custom-element-converter.js'; import {validationProblemConverter} from './validation-problem-converter.js'; +import type {Temporal} from '@js-temporal/polyfill'; const projectId = process.env['GCP_PROJECT_ID'] || 'wc-catalog'; firebase.initializeApp({projectId}); @@ -577,13 +579,37 @@ export class FirestoreRepository implements Repository { return result; } - getPackageRef(packageName: string) { + async getPackagesToUpdate( + notUpdatedSince: Temporal.Instant, + limit = 100 + ): Promise> { + const date = new Date(notUpdatedSince.epochMilliseconds); + const notUpdatedSinceTimestamp = Timestamp.fromDate(date); + + // Only query 'READY', 'ERROR', and 'NOT_FOUND' packages. + // INITIALIZING and UPDATING packages are being updated, possibly by the + // batch update task calling this method. + // ERROR and NOT_FOUND are "recoverable" errors, so we should try to import + // them again. + const result = await this.getPackageCollectionRef() + .where('status', 'in', ['READY', 'ERROR', 'NOT_FOUND']) + .where('lastUpdate', '<', notUpdatedSinceTimestamp) + .limit(limit) + .get(); + const packages = result.docs.map((d) => d.data()); + return packages; + } + + getPackageCollectionRef() { return db .collection('packages' + (this.namespace ? `-${this.namespace}` : '')) - .doc(packageNameToId(packageName)) .withConverter(packageInfoConverter); } + getPackageRef(packageName: string) { + return this.getPackageCollectionRef().doc(packageNameToId(packageName)); + } + getPackageVersionCollectionRef(packageName: string) { return this.getPackageRef(packageName) .collection('versions') diff --git a/packages/catalog-server/src/lib/repository.ts b/packages/catalog-server/src/lib/repository.ts index 97f469b4..d4f90f85 100644 --- a/packages/catalog-server/src/lib/repository.ts +++ b/packages/catalog-server/src/lib/repository.ts @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +import type {Temporal} from '@js-temporal/polyfill'; import type { CustomElement, PackageInfo, @@ -150,4 +151,12 @@ export interface Repository { packageName: string, version: string ): Promise; + + /** + * Returns packages that have not been updated since the date given. + */ + getPackagesToUpdate( + notUpdatedSince: Temporal.Instant, + limit: number + ): Promise>; } diff --git a/packages/catalog-server/src/lib/server/routes/bootstrap-packages.ts b/packages/catalog-server/src/lib/server/routes/bootstrap-packages.ts index cc8779b0..68d6bb6a 100644 --- a/packages/catalog-server/src/lib/server/routes/bootstrap-packages.ts +++ b/packages/catalog-server/src/lib/server/routes/bootstrap-packages.ts @@ -22,6 +22,10 @@ export const makeBootstrapPackagesRoute = const bootstrapListFile = await readFile(bootstrapListFilePath, 'utf-8'); const bootstrapList = JSON.parse(bootstrapListFile); const packageNames = bootstrapList['packages'] as Array; + + // TODO (justinfagnani): rather than import the packages directly, add them + // to the DB in a non-imported state, then kick off the standard update + // workflow, which will import them all. const results = await Promise.all( packageNames.map( async ( diff --git a/packages/catalog-server/src/lib/server/routes/update-packages.ts b/packages/catalog-server/src/lib/server/routes/update-packages.ts new file mode 100644 index 00000000..6487680c --- /dev/null +++ b/packages/catalog-server/src/lib/server/routes/update-packages.ts @@ -0,0 +1,95 @@ +import {Temporal} from '@js-temporal/polyfill'; +import {PackageInfo} from '@webcomponents/catalog-api/lib/schema.js'; +import type Koa from 'koa'; +import type {Catalog} from '../../catalog.js'; + +// Google Cloud Run default request timeout is 5 minutes, so to do longer +// imports we need to configure the timeout. +const maxImportDuration = Temporal.Duration.from({minutes: 5}); + +export const makeUpdatePackagesRoute = + (catalog: Catalog) => async (context: Koa.Context) => { + // TODO (justinfagnani): DO_NOT_LAUNCH check that the request is from one + // of our service accounts + + const startInstant = Temporal.Now.instant(); + // If the `force` query parameter is present we force updating of all + // packages by setting the `notUpdatedSince` parameter to `startInstant` so + // that we get all packages last updated before now. We calculate the + // `notUpdatedSince` time once before updates so that we don't retrieve + // packages that we update in this operation. + // `force`` is useful for development and testing as we may be trying to + // update packages that were just imported. + // TODO (justinfagnani): check a DEV mode also so this isn't available + // in production? + const force = 'force' in context.query; + const notUpdatedSince = force ? startInstant : undefined; + + // If `force` is true, override the default packageUpdateInterval + // TODO: how do we make an actually 0 duration? + const packageUpdateInterval = force + ? Temporal.Duration.from({microseconds: 1}) + : undefined; + + console.log('Starting package update at', startInstant, `force: ${force}`); + + let packagesToUpdate!: Array; + let packagesUpdated = 0; + let iteration = 0; + + // Loop through batches of packages to update. + // We batch here so that we can pause and check that we're still within the + // maxImportDuration, and use small enough batches so that we can ensure at + // least one batch in that time. + do { + // getPackagesToUpdate() queries the first N (default 100) packages that + // have not been updated since the update interval (default 6 hours). + // When a package is imported it's lastUpdate date will be updated and the + // next call to getPackagesToUpdate() will return the next 100 packages. + // This way we don't need a DB cursor to make progress through the + // package list. + packagesToUpdate = await catalog.getPackagesToUpdate(notUpdatedSince); + + if (packagesToUpdate.length === 0) { + // No more packages to update + if (iteration === 0) { + console.log('No packages to update'); + } + break; + } + + await Promise.allSettled( + packagesToUpdate.map(async (pkg) => { + try { + return await catalog.importPackage(pkg.name, packageUpdateInterval); + } catch (e) { + console.error(e); + throw e; + } + }) + ); + packagesUpdated += packagesToUpdate.length; + + const now = Temporal.Now.instant(); + const timeSinceStart = now.since(startInstant); + // If the time since the update started is not less than that max import + // duration, stop. + // TODO (justinfagnani): we need a way to test this + if (Temporal.Duration.compare(timeSinceStart, maxImportDuration) !== -1) { + break; + } + } while (true); + console.log(`Updated ${packagesUpdated} packages`); + + if (packagesToUpdate.length > 0) { + // TODO (justinfagnani): kick off new update request + console.log(`Not all packages were updated (${packagesToUpdate.length})`); + } + + context.status = 200; + context.type = 'html'; + context.body = ` +

Update Results

+

Updated ${packagesUpdated} package

+ `; + }; diff --git a/packages/catalog-server/src/lib/server/server.ts b/packages/catalog-server/src/lib/server/server.ts index 1b0fc92d..36400f99 100644 --- a/packages/catalog-server/src/lib/server/server.ts +++ b/packages/catalog-server/src/lib/server/server.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2021 Google LLC + * Copyright 2022 Google LLC * SPDX-License-Identifier: BSD-3-Clause */ @@ -17,6 +17,7 @@ import {NpmAndUnpkgFiles} from '@webcomponents/custom-elements-manifest-tools/li import {makeGraphQLRoute} from './routes/graphql.js'; import {makeBootstrapPackagesRoute} from './routes/bootstrap-packages.js'; +import {makeUpdatePackagesRoute} from './routes/update-packages.js'; export const makeServer = async () => { const files = new NpmAndUnpkgFiles(); @@ -32,6 +33,8 @@ export const makeServer = async () => { router.get('/bootstrap-packages', makeBootstrapPackagesRoute(catalog)); + router.get('/update-packages', makeUpdatePackagesRoute(catalog)); + router.get('/', async (ctx) => { ctx.status = 200; ctx.type = 'html';