-
-
Notifications
You must be signed in to change notification settings - Fork 9.5k
feat(scan): add Workable, SmartRecruiters, Recruitee ATS parsers #653
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
jrojomartinez
wants to merge
7
commits into
santifer:main
Choose a base branch
from
jrojomartinez:feat/ats-parsers-workable-smartrecruiters-recruitee
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+776
−0
Open
Changes from 4 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
8b96b1f
feat(providers): add Workable provider
jrojomartinez 6448abd
feat(providers): add SmartRecruiters provider
jrojomartinez 148551c
feat(providers): add Recruitee provider
jrojomartinez a67e794
docs(portals): document Workable, SmartRecruiters, Recruitee URL patt…
jrojomartinez fcab2cc
fix(providers): defensive input normalization + edge cases
jrojomartinez 09b6f2b
fix(providers): strict URL parsing + ref validation (review)
jrojomartinez 434375b
fix(providers): validate parsed URLs + paginate SmartRecruiters (review)
jrojomartinez File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| // @ts-check | ||
| /** @typedef {import('./_types.js').Provider} Provider */ | ||
|
|
||
| // Recruitee provider — hits the public per-tenant offers API. | ||
| // Auto-detects from careers_url pattern `https://<slug>.recruitee.com`. | ||
| // Per-tenant subdomains are the variable part — SSRF defence uses a | ||
| // regex match on `<safe-slug>.recruitee.com` rather than a static | ||
| // allowlist. | ||
|
|
||
| const RECRUITEE_HOST_RE = /^[a-z0-9][a-z0-9-]*\.recruitee\.com$/; | ||
|
|
||
| function assertRecruiteeUrl(url) { | ||
| let parsed; | ||
| try { | ||
| parsed = new URL(url); | ||
| } catch { | ||
| throw new Error(`recruitee: invalid URL: ${url}`); | ||
| } | ||
| if (parsed.protocol !== 'https:') throw new Error(`recruitee: URL must use HTTPS: ${url}`); | ||
| if (!RECRUITEE_HOST_RE.test(parsed.hostname)) { | ||
| throw new Error(`recruitee: untrusted hostname "${parsed.hostname}" — must match <slug>.recruitee.com`); | ||
| } | ||
| return url; | ||
| } | ||
|
|
||
| function resolveApiUrl(entry) { | ||
| const url = entry.careers_url || ''; | ||
| const match = url.match(/([a-z0-9][a-z0-9-]*)\.recruitee\.com/); | ||
| if (!match) return null; | ||
| return `https://${match[1]}.recruitee.com/api/offers/`; | ||
| } | ||
|
|
||
| /** @type {Provider} */ | ||
| export default { | ||
| id: 'recruitee', | ||
|
|
||
| detect(entry) { | ||
| const apiUrl = resolveApiUrl(entry); | ||
| return apiUrl ? { url: apiUrl } : null; | ||
| }, | ||
|
|
||
| async fetch(entry, ctx) { | ||
| const apiUrl = resolveApiUrl(entry); | ||
| if (!apiUrl) throw new Error(`recruitee: cannot derive API URL for ${entry.name}`); | ||
| assertRecruiteeUrl(apiUrl); | ||
| const json = await ctx.fetchJson(apiUrl, { redirect: 'error' }); | ||
| return parseRecruiteeResponse(json, entry.name); | ||
| }, | ||
| }; | ||
|
|
||
| /** | ||
| * Parse a Recruitee /api/offers/ response. Exported for unit tests. | ||
| * | ||
| * Recruitee returns: | ||
| * { offers: [{ title, careers_url?, url?, city?, country?, remote?, location? }] } | ||
| * | ||
| * - url: prefer `careers_url`, fall back to `url`, empty string otherwise. | ||
| * - location: prefer the explicit `location` field; else assemble from | ||
| * city/country, appending "Remote" when `remote` is true. | ||
| * | ||
| * @param {any} json | ||
| * @param {string} companyName | ||
| * @returns {Array<{title: string, url: string, company: string, location: string}>} | ||
| */ | ||
| export function parseRecruiteeResponse(json, companyName) { | ||
| const offers = json?.offers; | ||
| if (!Array.isArray(offers)) return []; | ||
| return offers.map(j => { | ||
| const city = j.city || ''; | ||
| const country = j.country || ''; | ||
| const remote = j.remote ? 'Remote' : ''; | ||
| const location = j.location || [city, country, remote].filter(Boolean).join(', '); | ||
| return { | ||
| title: j.title || '', | ||
| url: j.careers_url || j.url || '', | ||
| location, | ||
| company: companyName, | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| }; | ||
| }); | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| // @ts-check | ||
| /** @typedef {import('./_types.js').Provider} Provider */ | ||
|
|
||
| // SmartRecruiters provider — hits the public postings API. | ||
| // Auto-detects from careers_url pattern | ||
| // `https://(careers|jobs).smartrecruiters.com/<slug>`. A tracked_companies | ||
| // entry can also set `provider: smartrecruiters` explicitly to bypass | ||
| // detection (useful when the public careers URL is a branded custom domain). | ||
|
|
||
| const ALLOWED_SMARTRECRUITERS_HOSTS = new Set(['api.smartrecruiters.com']); | ||
|
|
||
| function assertSmartRecruitersUrl(url) { | ||
| let parsed; | ||
| try { | ||
| parsed = new URL(url); | ||
| } catch { | ||
| throw new Error(`smartrecruiters: invalid URL: ${url}`); | ||
| } | ||
| if (parsed.protocol !== 'https:') throw new Error(`smartrecruiters: URL must use HTTPS: ${url}`); | ||
| if (!ALLOWED_SMARTRECRUITERS_HOSTS.has(parsed.hostname)) { | ||
| throw new Error(`smartrecruiters: untrusted hostname "${parsed.hostname}" — must be one of: ${[...ALLOWED_SMARTRECRUITERS_HOSTS].join(', ')}`); | ||
| } | ||
| return url; | ||
| } | ||
|
|
||
| function resolveApiUrl(entry) { | ||
| const url = entry.careers_url || ''; | ||
| const match = url.match(/(?:careers|jobs)\.smartrecruiters\.com\/([^/?#]+)/); | ||
| if (!match) return null; | ||
| return `https://api.smartrecruiters.com/v1/companies/${match[1]}/postings?limit=100&offset=0&status=PUBLIC`; | ||
|
coderabbitai[bot] marked this conversation as resolved.
Outdated
|
||
| } | ||
|
|
||
| /** @type {Provider} */ | ||
| export default { | ||
| id: 'smartrecruiters', | ||
|
|
||
| detect(entry) { | ||
| const apiUrl = resolveApiUrl(entry); | ||
| return apiUrl ? { url: apiUrl } : null; | ||
| }, | ||
|
|
||
| async fetch(entry, ctx) { | ||
| const apiUrl = resolveApiUrl(entry); | ||
| if (!apiUrl) throw new Error(`smartrecruiters: cannot derive API URL for ${entry.name}`); | ||
| assertSmartRecruitersUrl(apiUrl); | ||
| const json = await ctx.fetchJson(apiUrl, { redirect: 'error' }); | ||
| return parseSmartRecruitersResponse(json, entry.name); | ||
| }, | ||
| }; | ||
|
|
||
| /** | ||
| * Parse a SmartRecruiters /postings response. Exported for unit tests. | ||
| * | ||
| * SmartRecruiters returns: | ||
| * { content: [{ id, name, ref, location: { fullLocation?, city?, region?, country?, remote? } }] } | ||
| * | ||
| * - location: prefer `fullLocation`; else assemble from city/region/country | ||
| * parts (skipping empties); append "Remote" when `location.remote` is true. | ||
| * - url: `j.ref` is an `api.smartrecruiters.com/v1/companies/<slug>/postings/<id>` | ||
| * URL — rewrite to the public `jobs.smartrecruiters.com/<slug>/postings/<id>`. | ||
| * If `ref` is missing, synthesise a URL from the company slug + posting id. | ||
| * | ||
| * @param {any} json | ||
| * @param {string} companyName | ||
| * @returns {Array<{title: string, url: string, company: string, location: string}>} | ||
| */ | ||
| export function parseSmartRecruitersResponse(json, companyName) { | ||
| const items = json?.content; | ||
| if (!Array.isArray(items)) return []; | ||
| return items.map(j => { | ||
| const loc = j.location || {}; | ||
| const fullLocation = loc.fullLocation || [loc.city, loc.region, loc.country].filter(Boolean).join(', '); | ||
| const remote = loc.remote ? 'Remote' : ''; | ||
| const location = [fullLocation, remote].filter(Boolean).join(', '); | ||
| const slugified = (j.name || '').toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, ''); | ||
| const url = j.ref | ||
| ? j.ref.replace('api.smartrecruiters.com/v1/companies/', 'jobs.smartrecruiters.com/') | ||
| : `https://jobs.smartrecruiters.com/${(companyName || '').toLowerCase()}/${j.id}-${slugified}`; | ||
|
coderabbitai[bot] marked this conversation as resolved.
Outdated
|
||
| return { title: j.name || '', url, location, company: companyName }; | ||
| }); | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| // @ts-check | ||
| /** @typedef {import('./_types.js').Provider} Provider */ | ||
|
|
||
| // Workable provider — hits the public markdown feed at /<slug>/jobs.md. | ||
| // Workable's documented JSON API requires an auth token; the markdown feed | ||
| // is the only no-auth public surface. Auto-detects from careers_url pattern | ||
| // `https://apply.workable.com/<slug>`. A tracked_companies entry can also | ||
| // set `provider: workable` explicitly to bypass detection. | ||
|
|
||
| const ALLOWED_WORKABLE_HOSTS = new Set(['apply.workable.com']); | ||
|
|
||
| function assertWorkableUrl(url) { | ||
| let parsed; | ||
| try { | ||
| parsed = new URL(url); | ||
| } catch { | ||
| throw new Error(`workable: invalid URL: ${url}`); | ||
| } | ||
| if (parsed.protocol !== 'https:') throw new Error(`workable: URL must use HTTPS: ${url}`); | ||
| if (!ALLOWED_WORKABLE_HOSTS.has(parsed.hostname)) { | ||
| throw new Error(`workable: untrusted hostname "${parsed.hostname}" — must be one of: ${[...ALLOWED_WORKABLE_HOSTS].join(', ')}`); | ||
| } | ||
| return url; | ||
| } | ||
|
|
||
| function resolveFeedUrl(entry) { | ||
| const url = entry.careers_url || ''; | ||
| const match = url.match(/apply\.workable\.com\/([^/?#]+)/); | ||
| if (!match) return null; | ||
| return `https://apply.workable.com/${match[1]}/jobs.md`; | ||
|
coderabbitai[bot] marked this conversation as resolved.
Outdated
|
||
| } | ||
|
|
||
| /** @type {Provider} */ | ||
| export default { | ||
| id: 'workable', | ||
|
|
||
| detect(entry) { | ||
| const feedUrl = resolveFeedUrl(entry); | ||
| return feedUrl ? { url: feedUrl } : null; | ||
| }, | ||
|
|
||
| async fetch(entry, ctx) { | ||
| const feedUrl = resolveFeedUrl(entry); | ||
| if (!feedUrl) throw new Error(`workable: cannot derive feed URL for ${entry.name}`); | ||
| assertWorkableUrl(feedUrl); | ||
| // redirect:'error' prevents SSRF via server-side redirects; combined with | ||
| // assertWorkableUrl above it guarantees the final hostname stays in the allowlist. | ||
| const text = await ctx.fetchText(feedUrl, { redirect: 'error' }); | ||
| return parseWorkableMarkdown(text, entry.name); | ||
| }, | ||
| }; | ||
|
|
||
| /** | ||
| * Parse Workable's public markdown feed. Exported as a named export for unit | ||
| * tests. The feed exposes a table: | ||
| * | Title | Department | Location | Type | Salary | Posted | Details | | ||
| * where `Details` holds a markdown link | ||
| * [View](https://apply.workable.com/<slug>/jobs/view/<id>.md) | ||
| * | ||
| * @param {string} text — markdown body | ||
| * @param {string} companyName — value to write into job.company | ||
| * @returns {Array<{title: string, url: string, company: string, location: string}>} | ||
| */ | ||
| export function parseWorkableMarkdown(text, companyName) { | ||
| if (typeof text !== 'string') return []; | ||
| const jobs = []; | ||
| for (const line of text.split('\n')) { | ||
| if (!line.startsWith('|') || !line.includes('[View]')) continue; | ||
| const cols = line.split('|').map(c => c.trim()); | ||
| // Cols: ['', title, dept, location, type, salary, posted, '[View](url.md)', ''] | ||
| if (cols.length < 8) continue; | ||
| const title = cols[1]; | ||
| if (!title || title === 'Title') continue; | ||
| const location = cols[3] || ''; | ||
| const urlMatch = cols[7].match(/\(([^)]+)\)/); | ||
| let url = urlMatch ? urlMatch[1] : ''; | ||
| if (url.endsWith('.md')) url = url.slice(0, -3); | ||
| jobs.push({ title, url, location, company: companyName }); | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| } | ||
| return jobs; | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.