Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions providers/recruitee.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// @ts-check
/** @typedef {import('./_types.js').Provider} Provider */

// Recruitee provider — hits the public per-tenant offers API.
// Auto-detects from careers_url pattern `https://<slug>.recruitee.com`.
// Per-tenant subdomains are the variable part — SSRF defence uses a
// regex match on `<safe-slug>.recruitee.com` rather than a static
// allowlist.

const RECRUITEE_HOST_RE = /^[a-z0-9][a-z0-9-]*\.recruitee\.com$/;

function assertRecruiteeUrl(url) {
let parsed;
try {
parsed = new URL(url);
} catch {
throw new Error(`recruitee: invalid URL: ${url}`);
}
if (parsed.protocol !== 'https:') throw new Error(`recruitee: URL must use HTTPS: ${url}`);
if (!RECRUITEE_HOST_RE.test(parsed.hostname)) {
throw new Error(`recruitee: untrusted hostname "${parsed.hostname}" — must match <slug>.recruitee.com`);
}
return url;
}

function resolveApiUrl(entry) {
const url = entry.careers_url || '';
const match = url.match(/([a-z0-9][a-z0-9-]*)\.recruitee\.com/);
if (!match) return null;
return `https://${match[1]}.recruitee.com/api/offers/`;
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
}

/** @type {Provider} */
export default {
id: 'recruitee',

detect(entry) {
const apiUrl = resolveApiUrl(entry);
return apiUrl ? { url: apiUrl } : null;
},

async fetch(entry, ctx) {
const apiUrl = resolveApiUrl(entry);
if (!apiUrl) throw new Error(`recruitee: cannot derive API URL for ${entry.name}`);
assertRecruiteeUrl(apiUrl);
const json = await ctx.fetchJson(apiUrl, { redirect: 'error' });
return parseRecruiteeResponse(json, entry.name);
},
};

/**
* Parse a Recruitee /api/offers/ response. Exported for unit tests.
*
* Recruitee returns:
* { offers: [{ title, careers_url?, url?, city?, country?, remote?, location? }] }
*
* - url: prefer `careers_url`, fall back to `url`, empty string otherwise.
* - location: prefer the explicit `location` field; else assemble from
* city/country, appending "Remote" when `remote` is true.
*
* @param {any} json
* @param {string} companyName
* @returns {Array<{title: string, url: string, company: string, location: string}>}
*/
export function parseRecruiteeResponse(json, companyName) {
const offers = json?.offers;
if (!Array.isArray(offers)) return [];
return offers.map(j => {
const city = j.city || '';
const country = j.country || '';
const remote = j.remote ? 'Remote' : '';
const location = j.location || [city, country, remote].filter(Boolean).join(', ');
return {
title: j.title || '',
url: j.careers_url || j.url || '',
location,
company: companyName,
Comment thread
coderabbitai[bot] marked this conversation as resolved.
};
});
}
81 changes: 81 additions & 0 deletions providers/smartrecruiters.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// @ts-check
/** @typedef {import('./_types.js').Provider} Provider */

// SmartRecruiters provider — hits the public postings API.
// Auto-detects from careers_url pattern
// `https://(careers|jobs).smartrecruiters.com/<slug>`. A tracked_companies
// entry can also set `provider: smartrecruiters` explicitly to bypass
// detection (useful when the public careers URL is a branded custom domain).

const ALLOWED_SMARTRECRUITERS_HOSTS = new Set(['api.smartrecruiters.com']);

function assertSmartRecruitersUrl(url) {
let parsed;
try {
parsed = new URL(url);
} catch {
throw new Error(`smartrecruiters: invalid URL: ${url}`);
}
if (parsed.protocol !== 'https:') throw new Error(`smartrecruiters: URL must use HTTPS: ${url}`);
if (!ALLOWED_SMARTRECRUITERS_HOSTS.has(parsed.hostname)) {
throw new Error(`smartrecruiters: untrusted hostname "${parsed.hostname}" — must be one of: ${[...ALLOWED_SMARTRECRUITERS_HOSTS].join(', ')}`);
}
return url;
}

function resolveApiUrl(entry) {
const url = entry.careers_url || '';
const match = url.match(/(?:careers|jobs)\.smartrecruiters\.com\/([^/?#]+)/);
if (!match) return null;
return `https://api.smartrecruiters.com/v1/companies/${match[1]}/postings?limit=100&offset=0&status=PUBLIC`;
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
}

/** @type {Provider} */
export default {
id: 'smartrecruiters',

detect(entry) {
const apiUrl = resolveApiUrl(entry);
return apiUrl ? { url: apiUrl } : null;
},

async fetch(entry, ctx) {
const apiUrl = resolveApiUrl(entry);
if (!apiUrl) throw new Error(`smartrecruiters: cannot derive API URL for ${entry.name}`);
assertSmartRecruitersUrl(apiUrl);
const json = await ctx.fetchJson(apiUrl, { redirect: 'error' });
return parseSmartRecruitersResponse(json, entry.name);
},
};

/**
* Parse a SmartRecruiters /postings response. Exported for unit tests.
*
* SmartRecruiters returns:
* { content: [{ id, name, ref, location: { fullLocation?, city?, region?, country?, remote? } }] }
*
* - location: prefer `fullLocation`; else assemble from city/region/country
* parts (skipping empties); append "Remote" when `location.remote` is true.
* - url: `j.ref` is an `api.smartrecruiters.com/v1/companies/<slug>/postings/<id>`
* URL — rewrite to the public `jobs.smartrecruiters.com/<slug>/postings/<id>`.
* If `ref` is missing, synthesise a URL from the company slug + posting id.
*
* @param {any} json
* @param {string} companyName
* @returns {Array<{title: string, url: string, company: string, location: string}>}
*/
export function parseSmartRecruitersResponse(json, companyName) {
const items = json?.content;
if (!Array.isArray(items)) return [];
return items.map(j => {
const loc = j.location || {};
const fullLocation = loc.fullLocation || [loc.city, loc.region, loc.country].filter(Boolean).join(', ');
const remote = loc.remote ? 'Remote' : '';
const location = [fullLocation, remote].filter(Boolean).join(', ');
const slugified = (j.name || '').toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '');
const url = j.ref
? j.ref.replace('api.smartrecruiters.com/v1/companies/', 'jobs.smartrecruiters.com/')
: `https://jobs.smartrecruiters.com/${(companyName || '').toLowerCase()}/${j.id}-${slugified}`;
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
return { title: j.name || '', url, location, company: companyName };
});
}
81 changes: 81 additions & 0 deletions providers/workable.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// @ts-check
/** @typedef {import('./_types.js').Provider} Provider */

// Workable provider — hits the public markdown feed at /<slug>/jobs.md.
// Workable's documented JSON API requires an auth token; the markdown feed
// is the only no-auth public surface. Auto-detects from careers_url pattern
// `https://apply.workable.com/<slug>`. A tracked_companies entry can also
// set `provider: workable` explicitly to bypass detection.

const ALLOWED_WORKABLE_HOSTS = new Set(['apply.workable.com']);

function assertWorkableUrl(url) {
let parsed;
try {
parsed = new URL(url);
} catch {
throw new Error(`workable: invalid URL: ${url}`);
}
if (parsed.protocol !== 'https:') throw new Error(`workable: URL must use HTTPS: ${url}`);
if (!ALLOWED_WORKABLE_HOSTS.has(parsed.hostname)) {
throw new Error(`workable: untrusted hostname "${parsed.hostname}" — must be one of: ${[...ALLOWED_WORKABLE_HOSTS].join(', ')}`);
}
return url;
}

function resolveFeedUrl(entry) {
const url = entry.careers_url || '';
const match = url.match(/apply\.workable\.com\/([^/?#]+)/);
if (!match) return null;
return `https://apply.workable.com/${match[1]}/jobs.md`;
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
}

/** @type {Provider} */
export default {
id: 'workable',

detect(entry) {
const feedUrl = resolveFeedUrl(entry);
return feedUrl ? { url: feedUrl } : null;
},

async fetch(entry, ctx) {
const feedUrl = resolveFeedUrl(entry);
if (!feedUrl) throw new Error(`workable: cannot derive feed URL for ${entry.name}`);
assertWorkableUrl(feedUrl);
// redirect:'error' prevents SSRF via server-side redirects; combined with
// assertWorkableUrl above it guarantees the final hostname stays in the allowlist.
const text = await ctx.fetchText(feedUrl, { redirect: 'error' });
return parseWorkableMarkdown(text, entry.name);
},
};

/**
* Parse Workable's public markdown feed. Exported as a named export for unit
* tests. The feed exposes a table:
* | Title | Department | Location | Type | Salary | Posted | Details |
* where `Details` holds a markdown link
* [View](https://apply.workable.com/<slug>/jobs/view/<id>.md)
*
* @param {string} text — markdown body
* @param {string} companyName — value to write into job.company
* @returns {Array<{title: string, url: string, company: string, location: string}>}
*/
export function parseWorkableMarkdown(text, companyName) {
if (typeof text !== 'string') return [];
const jobs = [];
for (const line of text.split('\n')) {
if (!line.startsWith('|') || !line.includes('[View]')) continue;
const cols = line.split('|').map(c => c.trim());
// Cols: ['', title, dept, location, type, salary, posted, '[View](url.md)', '']
if (cols.length < 8) continue;
const title = cols[1];
if (!title || title === 'Title') continue;
const location = cols[3] || '';
const urlMatch = cols[7].match(/\(([^)]+)\)/);
let url = urlMatch ? urlMatch[1] : '';
if (url.endsWith('.md')) url = url.slice(0, -3);
jobs.push({ title, url, location, company: companyName });
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
return jobs;
}
15 changes: 15 additions & 0 deletions templates/portals.example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,21 @@ search_queries:
# provider's `id`.
# transport: http — reserved for future transports. Defaults to http.

# ── Provider auto-detection ───────────────────────────────────────
# scan.mjs auto-loads everything in providers/*.mjs and tries each
# provider's detect() in order. URL patterns recognized:
#
# greenhouse job-boards(.eu)?.greenhouse.io/<slug> (or api: field)
# ashby jobs.ashbyhq.com/<slug>
# lever jobs.lever.co/<slug>
# workable apply.workable.com/<slug>
# smartrecruiters (careers|jobs).smartrecruiters.com/<slug>
# recruitee <slug>.recruitee.com
#
# When the public careers URL is a branded custom domain (e.g.
# careers.adyen.com), set `provider: smartrecruiters` explicitly to
# bypass detect(). The `provider:` field wins over auto-detection.

tracked_companies:

# -- AI Labs & LLM providers --
Expand Down
Loading