11import type { Dictionary } from '@crawlee/types' ;
2+ import type { StandardSchemaV1 } from '@standard-schema/spec' ;
23
34import type { CrawlingContext , LoadedRequest , RestrictedCrawlingContext } from './crawlers/crawler_commons.js' ;
4- import { MissingRouteError } from './errors.js' ;
5+ import { MissingRouteError , RequestValidationError } from './errors.js' ;
56import type { Request } from './request.js' ;
67import type { Awaitable } from './typedefs.js' ;
78
89const defaultRoute = Symbol ( 'default-route' ) ;
910
11+ /**
12+ * A map of request labels to the shape of `request.userData` expected for that label. Pass it as the
13+ * `Routes` type argument of {@apilink Router} (or a `createXRouter` factory) to get per-label typing of
14+ * `request.userData` and autocomplete/validation of labels in {@apilink Router.addHandler}.
15+ *
16+ * ```ts
17+ * interface MyRoutes {
18+ * PRODUCT: { sku: string; price: number };
19+ * CATEGORY: { categoryId: string };
20+ * }
21+ * ```
22+ */
23+ export type RouteMap = Record < string , Dictionary > ;
24+
25+ /**
26+ * A map of request labels to a [Standard Schema](https://standardschema.dev) (Zod, Valibot, ArkType, …)
27+ * validating that label's `request.userData`. Pass it to {@apilink Router.create} or a `createXRouter`
28+ * factory to derive the per-label `request.userData` types *and* validate them at runtime before the
29+ * matching handler runs.
30+ */
31+ export type RouteSchemas = Record < string , StandardSchemaV1 > ;
32+
33+ /**
34+ * Derives a {@apilink RouteMap} (label → `userData` type) from a {@apilink RouteSchemas} map by inferring
35+ * each schema's output type. Outputs that are not object-shaped fall back to a plain {@apilink Dictionary}.
36+ */
37+ export type RoutesFromSchemas < Schemas extends RouteSchemas > = {
38+ [ Label in keyof Schemas ] : StandardSchemaV1 . InferOutput < Schemas [ Label ] > extends Dictionary
39+ ? StandardSchemaV1 . InferOutput < Schemas [ Label ] >
40+ : Dictionary ;
41+ } ;
42+
43+ /**
44+ * The crawling context received by a route handler, with `request.userData` narrowed to `UserData`.
45+ */
46+ export type RouterHandlerContext < Context , UserData extends Dictionary > = Omit < Context , 'request' > & {
47+ request : LoadedRequest < Request < UserData > > ;
48+ } ;
49+
50+ /**
51+ * The set of labels accepted by {@apilink Router.addHandler}. When the router declares a concrete
52+ * {@apilink RouteMap} (e.g. `{ PRODUCT: ...; CATEGORY: ... }`), only those labels (plus symbols) are
53+ * allowed — unknown labels become a compile-time error. When the map is left open (the default
54+ * `Record<string, ...>`), any string or symbol label is accepted, preserving the original behaviour.
55+ */
56+ export type RouterLabel < Routes extends Record < keyof Routes , Dictionary > > = string extends keyof Routes
57+ ? string | symbol
58+ : ( keyof Routes & string ) | symbol ;
59+
1060export interface RouterHandler <
1161 Context extends Omit < RestrictedCrawlingContext , 'enqueueLinks' > = CrawlingContext ,
12- > extends Router < Context > {
62+ Routes extends Record < keyof Routes , Dictionary > = Record < string , GetUserDataFromRequest < Context [ 'request' ] > > ,
63+ > extends Router < Context , Routes > {
1364 ( ctx : Context ) : Awaitable < void > ;
1465}
1566
1667export type GetUserDataFromRequest < T > = T extends Request < infer Y > ? Y : never ;
1768
18- export type RouterRoutes < Context , UserData extends Dictionary > = {
19- [ label in string | symbol ] : ( ctx : Omit < Context , 'request' > & { request : Request < UserData > } ) => Awaitable < void > ;
69+ export type RouterRoutes < Context , Routes extends Record < keyof Routes , Dictionary > > = {
70+ [ Label in keyof Routes ] : ( ctx : Omit < Context , 'request' > & { request : Request < Routes [ Label ] > } ) => Awaitable < void > ;
2071} ;
2172
2273/**
@@ -83,9 +134,57 @@ export type RouterRoutes<Context, UserData extends Dictionary> = {
83134 * ctx.log.info('...');
84135 * });
85136 * ```
137+ *
138+ * ## Typed labels
139+ *
140+ * To get `request.userData` typed per label, declare a {@apilink RouteMap} and pass it as the second
141+ * type argument. The label passed to {@apilink Router.addHandler} then drives the type of
142+ * `request.userData`, and unknown labels are rejected at compile time:
143+ *
144+ * ```ts
145+ * import { createCheerioRouter, CheerioCrawlingContext } from 'crawlee';
146+ *
147+ * interface Routes {
148+ * PRODUCT: { sku: string; price: number };
149+ * CATEGORY: { categoryId: string };
150+ * }
151+ *
152+ * const router = createCheerioRouter<CheerioCrawlingContext, Routes>();
153+ *
154+ * router.addHandler('PRODUCT', async ({ request }) => {
155+ * request.userData.sku; // string
156+ * request.userData.price; // number
157+ * });
158+ *
159+ * router.addHandler('TYPO', async () => {}); // compile error: not a known label
160+ * ```
161+ *
162+ * ## Schema-validated labels
163+ *
164+ * Passing a [Standard Schema](https://standardschema.dev) per label both infers the `request.userData`
165+ * types *and* validates them at runtime before the handler runs (replacing `request.userData` with the
166+ * parsed value). A failing request throws a {@apilink RequestValidationError}.
167+ *
168+ * ```ts
169+ * import { z } from 'zod';
170+ * import { createCheerioRouter } from 'crawlee';
171+ *
172+ * const router = createCheerioRouter({
173+ * PRODUCT: z.object({ sku: z.string(), price: z.number() }),
174+ * CATEGORY: z.object({ categoryId: z.string() }),
175+ * });
176+ *
177+ * router.addHandler('PRODUCT', async ({ request }) => {
178+ * request.userData.price; // number, inferred from the schema and validated at runtime
179+ * });
180+ * ```
86181 */
87- export class Router < Context extends Omit < RestrictedCrawlingContext , 'enqueueLinks' > > {
182+ export class Router <
183+ Context extends Omit < RestrictedCrawlingContext , 'enqueueLinks' > ,
184+ Routes extends Record < keyof Routes , Dictionary > = Record < string , GetUserDataFromRequest < Context [ 'request' ] > > ,
185+ > {
88186 private readonly routes : Map < string | symbol , ( ctx : any ) => Awaitable < void > > = new Map ( ) ;
187+ private readonly schemas : Map < string | symbol , StandardSchemaV1 > = new Map ( ) ;
89188 private readonly middlewares : ( ( ctx : Context ) => Awaitable < void > ) [ ] = [ ] ;
90189
91190 /**
@@ -95,26 +194,52 @@ export class Router<Context extends Omit<RestrictedCrawlingContext, 'enqueueLink
95194 protected constructor ( ) { }
96195
97196 /**
98- * Registers new route handler for given label.
197+ * Registers new route handler for given label. When the router declares a {@apilink RouteMap}, the
198+ * `label` is restricted to the declared labels and `request.userData` is typed accordingly.
199+ */
200+ addHandler < Label extends keyof Routes & string > (
201+ label : Label ,
202+ handler : ( ctx : RouterHandlerContext < Context , Routes [ Label ] > ) => Awaitable < void > ,
203+ ) : void ;
204+
205+ /**
206+ * Registers new route handler for given label, with an explicit `request.userData` type. Use this
207+ * overload to type a handler whose label is not part of the router's {@apilink RouteMap}.
99208 */
100209 addHandler < UserData extends Dictionary = GetUserDataFromRequest < Context [ 'request' ] > > (
101- label : string | symbol ,
102- handler : ( ctx : Omit < Context , 'request' > & { request : LoadedRequest < Request < UserData > > } ) => Awaitable < void > ,
103- ) {
210+ label : RouterLabel < Routes > ,
211+ handler : ( ctx : RouterHandlerContext < Context , UserData > ) => Awaitable < void > ,
212+ ) : void ;
213+
214+ addHandler ( label : string | symbol , handler : ( ctx : any ) => Awaitable < void > ) : void {
104215 this . validate ( label ) ;
105216 this . routes . set ( label , handler ) ;
106217 }
107218
108219 /**
109- * Registers default route handler.
220+ * Registers default route handler. By default `request.userData` is typed as the union of all
221+ * `userData` shapes declared in the router's {@apilink RouteMap}.
110222 */
111- addDefaultHandler < UserData extends Dictionary = GetUserDataFromRequest < Context [ 'request' ] > > (
112- handler : ( ctx : Omit < Context , 'request' > & { request : LoadedRequest < Request < UserData > > } ) => Awaitable < void > ,
223+ addDefaultHandler < UserData extends Dictionary = Routes [ keyof Routes ] > (
224+ handler : ( ctx : RouterHandlerContext < Context , UserData > ) => Awaitable < void > ,
113225 ) {
114226 this . validate ( defaultRoute ) ;
115227 this . routes . set ( defaultRoute , handler ) ;
116228 }
117229
230+ /**
231+ * Registers {@apilink RouteSchemas|Standard Schema} validators for the given labels. Before a matching
232+ * route handler runs, `request.userData` is validated against the label's schema and replaced with the
233+ * parsed value; a failing request throws a {@apilink RequestValidationError}.
234+ */
235+ addSchemas ( schemas : Partial < Record < keyof Routes & string , StandardSchemaV1 > > ) {
236+ for ( const [ label , schema ] of Object . entries ( schemas ) ) {
237+ if ( schema ) {
238+ this . schemas . set ( label , schema as StandardSchemaV1 ) ;
239+ }
240+ }
241+ }
242+
118243 /**
119244 * Registers a middleware that will be fired before the matching route handler.
120245 * Multiple middlewares can be registered, they will be fired in the same order.
@@ -142,6 +267,27 @@ export class Router<Context extends Omit<RestrictedCrawlingContext, 'enqueueLink
142267 ) ;
143268 }
144269
270+ /**
271+ * Validates `request.userData` against the schema registered for its label (if any), replacing it with
272+ * the parsed value. Throws a {@apilink RequestValidationError} when validation fails.
273+ */
274+ private async validateRequest ( context : Context ) {
275+ const { label } = context . request ;
276+ const schema = label != null ? this . schemas . get ( label ) : undefined ;
277+
278+ if ( ! schema ) {
279+ return ;
280+ }
281+
282+ const result = await schema [ '~standard' ] . validate ( context . request . userData ) ;
283+
284+ if ( result . issues ) {
285+ throw new RequestValidationError ( label ! , result . issues ) ;
286+ }
287+
288+ context . request . userData = result . value as Dictionary ;
289+ }
290+
145291 /**
146292 * Throws when the label already exists in our registry.
147293 */
@@ -174,27 +320,55 @@ export class Router<Context extends Omit<RestrictedCrawlingContext, 'enqueueLink
174320 * });
175321 * await crawler.run();
176322 * ```
323+ *
324+ * Passing a {@apilink RouteSchemas|Standard Schema} per label instead of handlers infers the
325+ * `request.userData` types and validates them at runtime:
326+ *
327+ * ```ts
328+ * import { z } from 'zod';
329+ *
330+ * const router = Router.create({
331+ * PRODUCT: z.object({ sku: z.string() }),
332+ * });
333+ * ```
177334 */
178335 static create <
179336 Context extends Omit < RestrictedCrawlingContext , 'enqueueLinks' > = CrawlingContext ,
180337 UserData extends Dictionary = GetUserDataFromRequest < Context [ 'request' ] > ,
181- > ( routes ?: RouterRoutes < Context , UserData > ) : RouterHandler < Context > {
182- const router = new Router < Context > ( ) ;
338+ Routes extends Record < keyof Routes , Dictionary > = Record < string , UserData > ,
339+ > ( routes ?: RouterRoutes < Context , Routes > ) : RouterHandler < Context , Routes > ;
340+
341+ static create <
342+ Context extends Omit < RestrictedCrawlingContext , 'enqueueLinks' > = CrawlingContext ,
343+ const Schemas extends RouteSchemas = RouteSchemas ,
344+ > ( schemas : Schemas ) : RouterHandler < Context , RoutesFromSchemas < Schemas > > ;
345+
346+ static create < Context extends Omit < RestrictedCrawlingContext , 'enqueueLinks' > = CrawlingContext > (
347+ routesOrSchemas ?: Record < string , ( ( ctx : any ) => Awaitable < void > ) | StandardSchemaV1 > ,
348+ ) : RouterHandler < Context , any > {
349+ const router = new Router < Context , any > ( ) ;
183350 const obj = Object . create ( Function . prototype ) ;
184351
185352 obj . addHandler = router . addHandler . bind ( router ) ;
186353 obj . addDefaultHandler = router . addDefaultHandler . bind ( router ) ;
354+ obj . addSchemas = router . addSchemas . bind ( router ) ;
187355 obj . getHandler = router . getHandler . bind ( router ) ;
188356 obj . use = router . use . bind ( router ) ;
189357
190- for ( const [ label , handler ] of Object . entries ( routes ?? { } ) ) {
191- router . addHandler ( label , handler ) ;
358+ for ( const [ label , value ] of Object . entries ( routesOrSchemas ?? { } ) ) {
359+ if ( typeof value === 'function' ) {
360+ router . addHandler ( label as keyof Context & string , value as ( ctx : any ) => Awaitable < void > ) ;
361+ } else {
362+ router . schemas . set ( label , value ) ;
363+ }
192364 }
193365
194366 const func = async function ( context : Context ) {
195367 const { url, loadedUrl, label } = context . request ;
196368 context . log . debug ( 'Page opened.' , { label, url : loadedUrl ?? url } ) ;
197369
370+ await router . validateRequest ( context ) ;
371+
198372 for ( const middleware of router . middlewares ) {
199373 await middleware ( context ) ;
200374 }
@@ -204,6 +378,6 @@ export class Router<Context extends Omit<RestrictedCrawlingContext, 'enqueueLink
204378
205379 Object . setPrototypeOf ( func , obj ) ;
206380
207- return func as unknown as RouterHandler < Context > ;
381+ return func as unknown as RouterHandler < Context , any > ;
208382 }
209383}
0 commit comments