Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions providers/_http.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// HTTP transport helpers shared across providers.
// Files prefixed with _ are never loaded as providers by scan.mjs.

const DEFAULT_TIMEOUT_MS = 10_000;
const DEFAULT_USER_AGENT = 'Mozilla/5.0 (compatible; career-ops/1.3)';

async function fetchWithTimeout(url, { timeoutMs = DEFAULT_TIMEOUT_MS, headers = {}, method = 'GET', body = null } = {}) {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
try {
const res = await fetch(url, {
method,
headers: { 'user-agent': DEFAULT_USER_AGENT, ...headers },
body,
signal: controller.signal,
});
if (!res.ok) {
const responseText = await res.text().catch(() => '');
const snippet = responseText.replace(/\s+/g, ' ').trim().slice(0, 300);
const err = new Error(snippet ? `HTTP ${res.status}: ${snippet}` : `HTTP ${res.status}`);
err.status = res.status;
err.body = responseText;
throw err;
}
return res;
} catch (e) {
if (e.name === 'AbortError') throw new Error(`Request timed out after ${timeoutMs}ms`);
throw e;
} finally {
clearTimeout(timer);
}
}

export async function fetchJson(url, opts = {}) {
const res = await fetchWithTimeout(url, opts);
return await res.json();
}

export async function fetchText(url, opts = {}) {
const res = await fetchWithTimeout(url, opts);
return await res.text();
}

export function makeHttpCtx() {
return {
transport: 'http',
fetchJson,
fetchText,
};
}
78 changes: 78 additions & 0 deletions providers/_types.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Type catalog for the provider plugin contract.
//
// This file is documentation-only — pure JSDoc @typedef annotations. The
// project is plain ESM JavaScript with no build step; provider authors can
// reference these types via `/** @typedef {import('./_types.js').Provider} Provider */`
// at the top of a `// @ts-check`-enabled file to get IDE hints. The runtime
// contract is enforced by scan.mjs (id presence, fetch is a function, fetch
// returns an array), not by these annotations.
//
// Files prefixed with _ are never loaded as providers by scan.mjs.

/**
* Normalized job posting — the unit of currency throughout the scanner.
*
* @typedef {object} Job
* @property {string} title Required, non-empty after trim.
* @property {string} url Required, absolute URL — used as the dedup key.
* @property {string} company May be empty when the source can't expose it
* at the list-page level; populated downstream.
* @property {string} location May be empty.
*/

/**
* A single `tracked_companies` entry from `portals.yml`.
*
* Provider-specific fields are opaque to scan.mjs and validated by the
* provider itself. Examples in current providers: `api`, `careers_url`.
* Providers read these directly off the entry object — no schema enforcement
* at the framework level.
*
* @typedef {object} PortalEntry
* @property {string} name User-facing label; appears in logs and placeholders.
* @property {boolean} [enabled] Default: true.
* @property {string} [careers_url] Public listing URL; consumed by detect().
* @property {string} [provider] Explicit provider id — bypasses detect().
* @property {('http')} [transport] Default: 'http'. Reserved for future transports.
*/
Comment on lines +23 to +37
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial | ⚡ Quick win

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify that PortalEntry omits fields used by current providers/config.
rg -n -C2 '@typedef \{object\} PortalEntry|@property .*careers_url|@property .*provider|@property .*transport' providers/_types.js
rg -n -C2 '\bentry\.api\b|^\s+api:' providers/greenhouse.mjs templates/portals.example.yml

Repository: santifer/career-ops

Length of output: 15091


🏁 Script executed:

#!/bin/bash
# Check if other providers access entry.api besides greenhouse
rg -l 'entry\.api' providers/
# List all provider files
fd -e mjs providers/ | head -20

Repository: santifer/career-ops

Length of output: 452


🏁 Script executed:

#!/bin/bash
# Search for other entry.* accesses in providers to find undocumented fields
rg 'entry\.[a-zA-Z_]+' providers/ --only-matching -h | sort | uniq -c | sort -rn

Repository: santifer/career-ops

Length of output: 10379


🏁 Script executed:

#!/bin/bash
# Search for entry.* patterns in provider files more carefully
rg 'entry\.' providers/*.mjs -o | grep -o 'entry\.[a-zA-Z_]*' | sort | uniq -c | sort -rn

Repository: santifer/career-ops

Length of output: 78


🏁 Script executed:

#!/bin/bash
# Find all entry.FIELDNAME accesses in providers
rg -n 'entry\.[a-zA-Z_]+' providers/*.mjs

Repository: santifer/career-ops

Length of output: 835


Document api on PortalEntry.

The JSDoc prose mentions api as a current provider field example, and providers/greenhouse.mjs reads entry.api, but the typedef omits it. Adding @property {string} [api] would complete the shared contract and avoid TypeScript diagnostics for providers using this field.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@providers/_types.js` around lines 23 - 37, Add the missing JSDoc property for
the optional api field on the PortalEntry typedef: declare `@property` {string}
[api] with a brief description so providers that read entry.api (e.g.,
providers/greenhouse.mjs) and TypeScript consumers won't error; update the
existing PortalEntry block (the typedef named PortalEntry) to include this
property alongside name, enabled, careers_url, provider, and transport.


/**
* Returned by `detect()` when a provider claims an entry. `url` is
* informational (used in logs); routing only checks for a non-null return.
*
* @typedef {object} DetectHit
* @property {string} url
*/

/**
* Options forwarded to the underlying `fetch` call.
*
* @typedef {object} FetchOptions
* @property {number} [timeoutMs]
* @property {Object<string,string>} [headers]
* @property {string} [method]
* @property {(string|null)} [body]
*/

/**
* What scan.mjs hands to provider.fetch(). For Phase A only `transport: 'http'`
* is implemented; the shape reserves room for future transports without
* breaking the contract.
*
* @typedef {object} Context
* @property {('http')} transport
* @property {(url: string, opts?: FetchOptions) => Promise<string>} fetchText
* @property {(url: string, opts?: FetchOptions) => Promise<unknown>} fetchJson
*/

/**
* The provider contract — the default export of every providers/*.mjs file
* (excluding _-prefixed shared helpers).
*
* @typedef {object} Provider
* @property {string} id Unique across all loaded providers.
* @property {((entry: PortalEntry) => (DetectHit | null))} [detect] Optional auto-detection.
* @property {(entry: PortalEntry, ctx: Context) => Promise<Job[]>} fetch Required.
*/

export {};
35 changes: 35 additions & 0 deletions providers/ashby.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// @ts-check
/** @typedef {import('./_types.js').Provider} Provider */

// Ashby provider — hits the public posting-api endpoint.
// Auto-detects from careers_url pattern `https://jobs.ashbyhq.com/<slug>`.

function resolveApiUrl(entry) {
const url = entry.careers_url || '';
const match = url.match(/jobs\.ashbyhq\.com\/([^/?#]+)/);
if (!match) return null;
return `https://api.ashbyhq.com/posting-api/job-board/${match[1]}?includeCompensation=true`;
}

/** @type {Provider} */
export default {
id: 'ashby',

detect(entry) {
const apiUrl = resolveApiUrl(entry);
return apiUrl ? { url: apiUrl } : null;
},

async fetch(entry, ctx) {
const apiUrl = resolveApiUrl(entry);
if (!apiUrl) throw new Error(`ashby: cannot derive API URL for ${entry.name}`);
const json = await ctx.fetchJson(apiUrl);
const jobs = Array.isArray(json?.jobs) ? json.jobs : [];
return jobs.map(j => ({
title: j.title || '',
url: j.jobUrl || '',
company: entry.name,
location: j.location || '',
}));
},
};
36 changes: 36 additions & 0 deletions providers/greenhouse.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// @ts-check
/** @typedef {import('./_types.js').Provider} Provider */

// Greenhouse provider — hits the public boards-api JSON endpoint.
// Handles both explicit `api:` URLs and auto-detection from `careers_url`.

function resolveApiUrl(entry) {
if (entry.api && entry.api.includes('greenhouse')) return entry.api;
const url = entry.careers_url || '';
const match = url.match(/job-boards(?:\.eu)?\.greenhouse\.io\/([^/?#]+)/);
if (match) return `https://boards-api.greenhouse.io/v1/boards/${match[1]}/jobs`;
return null;
Comment on lines +7 to +12
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Tighten Greenhouse URL validation before fetching.

entry.api.includes('greenhouse') will accept arbitrary hosts such as https://greenhouse.attacker.tld/..., and the fallback regex still parses the raw string instead of the URL host. That makes host confusion possible and also misses valid boards.greenhouse.io boards. Parse with new URL(), require an allowlisted Greenhouse hostname, and derive the slug from the pathname.

Suggested hardening
 function resolveApiUrl(entry) {
-  if (entry.api && entry.api.includes('greenhouse')) return entry.api;
-  const url = entry.careers_url || '';
-  const match = url.match(/job-boards(?:\.eu)?\.greenhouse\.io\/([^/?#]+)/);
-  if (match) return `https://boards-api.greenhouse.io/v1/boards/${match[1]}/jobs`;
+  const parse = (value) => {
+    try {
+      return new URL(value);
+    } catch {
+      return null;
+    }
+  };
+
+  const explicit = entry.api ? parse(entry.api) : null;
+  if (explicit?.protocol === 'https:' && explicit.hostname === 'boards-api.greenhouse.io') {
+    return explicit.toString();
+  }
+
+  const careers = entry.careers_url ? parse(entry.careers_url) : null;
+  if (!careers || careers.protocol !== 'https:') return null;
+  if (!['job-boards.greenhouse.io', 'job-boards.eu.greenhouse.io', 'boards.greenhouse.io'].includes(careers.hostname)) {
+    return null;
+  }
+
+  const [slug] = careers.pathname.split('/').filter(Boolean);
+  return slug ? `https://boards-api.greenhouse.io/v1/boards/${slug}/jobs` : null;
-  return null;
 }

As per coding guidelines **/*.mjs: Check for command injection, path traversal, and SSRF. Ensure scripts handle missing data/ directories gracefully.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@providers/greenhouse.mjs` around lines 7 - 12, The resolveApiUrl function
currently trusts entry.api.includes('greenhouse') and raw regex on careers_url
which allows host confusion and SSRF; update resolveApiUrl to parse entry.api
and entry.careers_url with new URL(...) (guarding against missing/invalid
values), require the hostname to be one of an allowlist (e.g.
"boards.greenhouse.io", "greenhouse.io", "boards-api.greenhouse.io", and the
regional "job-boards.greenhouse.io" variants you support), and extract the board
slug from the pathname (not via regex on the whole string); if the URL is not
valid or not an allowlisted Greenhouse host return null. Ensure you reference
and change the resolveApiUrl function to stop using entry.api.includes and the
raw regex fallback.

}

/** @type {Provider} */
export default {
id: 'greenhouse',

detect(entry) {
const apiUrl = resolveApiUrl(entry);
return apiUrl ? { url: apiUrl } : null;
},

async fetch(entry, ctx) {
const apiUrl = resolveApiUrl(entry);
if (!apiUrl) throw new Error(`greenhouse: cannot derive API URL for ${entry.name}`);
const json = await ctx.fetchJson(apiUrl);
const jobs = Array.isArray(json?.jobs) ? json.jobs : [];
return jobs.map(j => ({
title: j.title || '',
url: j.absolute_url || '',
company: entry.name,
location: j.location?.name || '',
}));
},
};
35 changes: 35 additions & 0 deletions providers/lever.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// @ts-check
/** @typedef {import('./_types.js').Provider} Provider */

// Lever provider — hits the public postings endpoint.
// Auto-detects from careers_url pattern `https://jobs.lever.co/<slug>`.

function resolveApiUrl(entry) {
const url = entry.careers_url || '';
const match = url.match(/jobs\.lever\.co\/([^/?#]+)/);
if (!match) return null;
return `https://api.lever.co/v0/postings/${match[1]}`;
}

/** @type {Provider} */
export default {
id: 'lever',

detect(entry) {
const apiUrl = resolveApiUrl(entry);
return apiUrl ? { url: apiUrl } : null;
},

async fetch(entry, ctx) {
const apiUrl = resolveApiUrl(entry);
if (!apiUrl) throw new Error(`lever: cannot derive API URL for ${entry.name}`);
const json = await ctx.fetchJson(apiUrl);
if (!Array.isArray(json)) return [];
return json.map(j => ({
title: j.text || '',
url: j.hostedUrl || '',
company: entry.name,
location: j.categories?.location || '',
}));
},
};
Loading