-
-
Notifications
You must be signed in to change notification settings - Fork 9.5k
fix(scan): isolate detect() exceptions, clarify timeout errors, fix resolve error count #599
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
ac3190c
0a67e21
c9978e9
35ded27
ff09198
37b6359
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| // HTTP transport helpers shared across providers. | ||
| // Files prefixed with _ are never loaded as providers by scan.mjs. | ||
|
|
||
| const DEFAULT_TIMEOUT_MS = 10_000; | ||
| const DEFAULT_USER_AGENT = 'Mozilla/5.0 (compatible; career-ops/1.3)'; | ||
|
|
||
| async function fetchWithTimeout(url, { timeoutMs = DEFAULT_TIMEOUT_MS, headers = {}, method = 'GET', body = null } = {}) { | ||
| const controller = new AbortController(); | ||
| const timer = setTimeout(() => controller.abort(), timeoutMs); | ||
| try { | ||
| const res = await fetch(url, { | ||
| method, | ||
| headers: { 'user-agent': DEFAULT_USER_AGENT, ...headers }, | ||
| body, | ||
| signal: controller.signal, | ||
| }); | ||
| if (!res.ok) { | ||
| const responseText = await res.text().catch(() => ''); | ||
| const snippet = responseText.replace(/\s+/g, ' ').trim().slice(0, 300); | ||
| const err = new Error(snippet ? `HTTP ${res.status}: ${snippet}` : `HTTP ${res.status}`); | ||
| err.status = res.status; | ||
| err.body = responseText; | ||
| throw err; | ||
| } | ||
| return res; | ||
| } catch (e) { | ||
| if (e.name === 'AbortError') throw new Error(`Request timed out after ${timeoutMs}ms`); | ||
| throw e; | ||
| } finally { | ||
| clearTimeout(timer); | ||
| } | ||
| } | ||
|
|
||
| export async function fetchJson(url, opts = {}) { | ||
| const res = await fetchWithTimeout(url, opts); | ||
| return await res.json(); | ||
| } | ||
|
|
||
| export async function fetchText(url, opts = {}) { | ||
| const res = await fetchWithTimeout(url, opts); | ||
| return await res.text(); | ||
| } | ||
|
|
||
| export function makeHttpCtx() { | ||
| return { | ||
| transport: 'http', | ||
| fetchJson, | ||
| fetchText, | ||
| }; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| // Type catalog for the provider plugin contract. | ||
| // | ||
| // This file is documentation-only — pure JSDoc @typedef annotations. The | ||
| // project is plain ESM JavaScript with no build step; provider authors can | ||
| // reference these types via `/** @typedef {import('./_types.js').Provider} Provider */` | ||
| // at the top of a `// @ts-check`-enabled file to get IDE hints. The runtime | ||
| // contract is enforced by scan.mjs (id presence, fetch is a function, fetch | ||
| // returns an array), not by these annotations. | ||
| // | ||
| // Files prefixed with _ are never loaded as providers by scan.mjs. | ||
|
|
||
| /** | ||
| * Normalized job posting — the unit of currency throughout the scanner. | ||
| * | ||
| * @typedef {object} Job | ||
| * @property {string} title Required, non-empty after trim. | ||
| * @property {string} url Required, absolute URL — used as the dedup key. | ||
| * @property {string} company May be empty when the source can't expose it | ||
| * at the list-page level; populated downstream. | ||
| * @property {string} location May be empty. | ||
| */ | ||
|
|
||
| /** | ||
| * A single `tracked_companies` entry from `portals.yml`. | ||
| * | ||
| * Provider-specific fields are opaque to scan.mjs and validated by the | ||
| * provider itself. Examples in current providers: `api`, `careers_url`. | ||
| * Providers read these directly off the entry object — no schema enforcement | ||
| * at the framework level. | ||
| * | ||
| * @typedef {object} PortalEntry | ||
| * @property {string} name User-facing label; appears in logs and placeholders. | ||
| * @property {boolean} [enabled] Default: true. | ||
| * @property {string} [careers_url] Public listing URL; consumed by detect(). | ||
| * @property {string} [provider] Explicit provider id — bypasses detect(). | ||
| * @property {('http')} [transport] Default: 'http'. Reserved for future transports. | ||
| */ | ||
|
|
||
| /** | ||
| * Returned by `detect()` when a provider claims an entry. `url` is | ||
| * informational (used in logs); routing only checks for a non-null return. | ||
| * | ||
| * @typedef {object} DetectHit | ||
| * @property {string} url | ||
| */ | ||
|
|
||
| /** | ||
| * Options forwarded to the underlying `fetch` call. | ||
| * | ||
| * @typedef {object} FetchOptions | ||
| * @property {number} [timeoutMs] | ||
| * @property {Object<string,string>} [headers] | ||
| * @property {string} [method] | ||
| * @property {(string|null)} [body] | ||
| */ | ||
|
|
||
| /** | ||
| * What scan.mjs hands to provider.fetch(). For Phase A only `transport: 'http'` | ||
| * is implemented; the shape reserves room for future transports without | ||
| * breaking the contract. | ||
| * | ||
| * @typedef {object} Context | ||
| * @property {('http')} transport | ||
| * @property {(url: string, opts?: FetchOptions) => Promise<string>} fetchText | ||
| * @property {(url: string, opts?: FetchOptions) => Promise<unknown>} fetchJson | ||
| */ | ||
|
|
||
| /** | ||
| * The provider contract — the default export of every providers/*.mjs file | ||
| * (excluding _-prefixed shared helpers). | ||
| * | ||
| * @typedef {object} Provider | ||
| * @property {string} id Unique across all loaded providers. | ||
| * @property {((entry: PortalEntry) => (DetectHit | null))} [detect] Optional auto-detection. | ||
| * @property {(entry: PortalEntry, ctx: Context) => Promise<Job[]>} fetch Required. | ||
| */ | ||
|
|
||
| export {}; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| // @ts-check | ||
| /** @typedef {import('./_types.js').Provider} Provider */ | ||
|
|
||
| // Ashby provider — hits the public posting-api endpoint. | ||
| // Auto-detects from careers_url pattern `https://jobs.ashbyhq.com/<slug>`. | ||
|
|
||
| function resolveApiUrl(entry) { | ||
| const url = entry.careers_url || ''; | ||
| const match = url.match(/jobs\.ashbyhq\.com\/([^/?#]+)/); | ||
| if (!match) return null; | ||
| return `https://api.ashbyhq.com/posting-api/job-board/${match[1]}?includeCompensation=true`; | ||
| } | ||
|
|
||
| /** @type {Provider} */ | ||
| export default { | ||
| id: 'ashby', | ||
|
|
||
| detect(entry) { | ||
| const apiUrl = resolveApiUrl(entry); | ||
| return apiUrl ? { url: apiUrl } : null; | ||
| }, | ||
|
|
||
| async fetch(entry, ctx) { | ||
| const apiUrl = resolveApiUrl(entry); | ||
| if (!apiUrl) throw new Error(`ashby: cannot derive API URL for ${entry.name}`); | ||
| const json = await ctx.fetchJson(apiUrl); | ||
| const jobs = Array.isArray(json?.jobs) ? json.jobs : []; | ||
| return jobs.map(j => ({ | ||
| title: j.title || '', | ||
| url: j.jobUrl || '', | ||
| company: entry.name, | ||
| location: j.location || '', | ||
| })); | ||
| }, | ||
| }; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| // @ts-check | ||
| /** @typedef {import('./_types.js').Provider} Provider */ | ||
|
|
||
| // Greenhouse provider — hits the public boards-api JSON endpoint. | ||
| // Handles both explicit `api:` URLs and auto-detection from `careers_url`. | ||
|
|
||
| function resolveApiUrl(entry) { | ||
| if (entry.api && entry.api.includes('greenhouse')) return entry.api; | ||
| const url = entry.careers_url || ''; | ||
| const match = url.match(/job-boards(?:\.eu)?\.greenhouse\.io\/([^/?#]+)/); | ||
| if (match) return `https://boards-api.greenhouse.io/v1/boards/${match[1]}/jobs`; | ||
| return null; | ||
|
Comment on lines
+7
to
+12
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tighten Greenhouse URL validation before fetching.
Suggested hardening function resolveApiUrl(entry) {
- if (entry.api && entry.api.includes('greenhouse')) return entry.api;
- const url = entry.careers_url || '';
- const match = url.match(/job-boards(?:\.eu)?\.greenhouse\.io\/([^/?#]+)/);
- if (match) return `https://boards-api.greenhouse.io/v1/boards/${match[1]}/jobs`;
+ const parse = (value) => {
+ try {
+ return new URL(value);
+ } catch {
+ return null;
+ }
+ };
+
+ const explicit = entry.api ? parse(entry.api) : null;
+ if (explicit?.protocol === 'https:' && explicit.hostname === 'boards-api.greenhouse.io') {
+ return explicit.toString();
+ }
+
+ const careers = entry.careers_url ? parse(entry.careers_url) : null;
+ if (!careers || careers.protocol !== 'https:') return null;
+ if (!['job-boards.greenhouse.io', 'job-boards.eu.greenhouse.io', 'boards.greenhouse.io'].includes(careers.hostname)) {
+ return null;
+ }
+
+ const [slug] = careers.pathname.split('/').filter(Boolean);
+ return slug ? `https://boards-api.greenhouse.io/v1/boards/${slug}/jobs` : null;
- return null;
}As per coding guidelines 🤖 Prompt for AI Agents |
||
| } | ||
|
|
||
| /** @type {Provider} */ | ||
| export default { | ||
| id: 'greenhouse', | ||
|
|
||
| detect(entry) { | ||
| const apiUrl = resolveApiUrl(entry); | ||
| return apiUrl ? { url: apiUrl } : null; | ||
| }, | ||
|
|
||
| async fetch(entry, ctx) { | ||
| const apiUrl = resolveApiUrl(entry); | ||
| if (!apiUrl) throw new Error(`greenhouse: cannot derive API URL for ${entry.name}`); | ||
| const json = await ctx.fetchJson(apiUrl); | ||
| const jobs = Array.isArray(json?.jobs) ? json.jobs : []; | ||
| return jobs.map(j => ({ | ||
| title: j.title || '', | ||
| url: j.absolute_url || '', | ||
| company: entry.name, | ||
| location: j.location?.name || '', | ||
| })); | ||
| }, | ||
| }; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| // @ts-check | ||
| /** @typedef {import('./_types.js').Provider} Provider */ | ||
|
|
||
| // Lever provider — hits the public postings endpoint. | ||
| // Auto-detects from careers_url pattern `https://jobs.lever.co/<slug>`. | ||
|
|
||
| function resolveApiUrl(entry) { | ||
| const url = entry.careers_url || ''; | ||
| const match = url.match(/jobs\.lever\.co\/([^/?#]+)/); | ||
| if (!match) return null; | ||
| return `https://api.lever.co/v0/postings/${match[1]}`; | ||
| } | ||
|
|
||
| /** @type {Provider} */ | ||
| export default { | ||
| id: 'lever', | ||
|
|
||
| detect(entry) { | ||
| const apiUrl = resolveApiUrl(entry); | ||
| return apiUrl ? { url: apiUrl } : null; | ||
| }, | ||
|
|
||
| async fetch(entry, ctx) { | ||
| const apiUrl = resolveApiUrl(entry); | ||
| if (!apiUrl) throw new Error(`lever: cannot derive API URL for ${entry.name}`); | ||
| const json = await ctx.fetchJson(apiUrl); | ||
| if (!Array.isArray(json)) return []; | ||
| return json.map(j => ({ | ||
| title: j.text || '', | ||
| url: j.hostedUrl || '', | ||
| company: entry.name, | ||
| location: j.categories?.location || '', | ||
| })); | ||
| }, | ||
| }; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧹 Nitpick | 🔵 Trivial | ⚡ Quick win
🧩 Analysis chain
🏁 Script executed:
Repository: santifer/career-ops
Length of output: 15091
🏁 Script executed:
Repository: santifer/career-ops
Length of output: 452
🏁 Script executed:
Repository: santifer/career-ops
Length of output: 10379
🏁 Script executed:
Repository: santifer/career-ops
Length of output: 78
🏁 Script executed:
Repository: santifer/career-ops
Length of output: 835
Document
apionPortalEntry.The JSDoc prose mentions
apias a current provider field example, andproviders/greenhouse.mjsreadsentry.api, but the typedef omits it. Adding@property {string} [api]would complete the shared contract and avoid TypeScript diagnostics for providers using this field.🤖 Prompt for AI Agents