Skip to content

Commit f88f2a2

Browse files
committed
feat(scan): add Workable, SmartRecruiters, Recruitee ATS parsers
The zero-token tier only detected Greenhouse, Ashby, and Lever. Many EU-native scaleups run on Workable, SmartRecruiters, or Recruitee — for those, detectApi returned null and the company was silently skipped. SmartRecruiters and Recruitee expose JSON APIs and reuse fetchJson. Workable's only no-auth public feed is a markdown document at apply.workable.com/<slug>/jobs.md (its JSON API needs a token, and the legacy unauthenticated endpoint 404s), so this adds a small fetchText helper and main() dispatches workable -> fetchText. Strictly additive — existing parsers untouched; a config with none of these providers sees no behaviour change. Refs #651
1 parent d692647 commit f88f2a2

2 files changed

Lines changed: 106 additions & 3 deletions

File tree

scan.mjs

Lines changed: 98 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,33 @@ function detectApi(company) {
6060
};
6161
}
6262

63+
// Workable (public markdown feed at /<slug>/jobs.md — no auth required)
64+
const workableMatch = url.match(/apply\.workable\.com\/([^/?#]+)/);
65+
if (workableMatch) {
66+
return {
67+
type: 'workable',
68+
url: `https://apply.workable.com/${workableMatch[1]}/jobs.md`,
69+
};
70+
}
71+
72+
// SmartRecruiters
73+
const smartRecruitersMatch = url.match(/(?:careers|jobs)\.smartrecruiters\.com\/([^/?#]+)/);
74+
if (smartRecruitersMatch) {
75+
return {
76+
type: 'smartrecruiters',
77+
url: `https://api.smartrecruiters.com/v1/companies/${smartRecruitersMatch[1]}/postings?limit=100&offset=0&status=PUBLIC`,
78+
};
79+
}
80+
81+
// Recruitee
82+
const recruiteeMatch = url.match(/([a-z0-9-]+)\.recruitee\.com/);
83+
if (recruiteeMatch) {
84+
return {
85+
type: 'recruitee',
86+
url: `https://${recruiteeMatch[1]}.recruitee.com/api/offers/`,
87+
};
88+
}
89+
6390
// Greenhouse EU boards
6491
const ghEuMatch = url.match(/job-boards(?:\.eu)?\.greenhouse\.io\/([^/?#]+)/);
6592
if (ghEuMatch && !company.api) {
@@ -104,7 +131,63 @@ function parseLever(json, companyName) {
104131
}));
105132
}
106133

107-
const PARSERS = { greenhouse: parseGreenhouse, ashby: parseAshby, lever: parseLever };
134+
function parseWorkable(text, companyName) {
135+
// Workable exposes a public markdown table at /<slug>/jobs.md (no auth needed).
136+
// Format: | Title | Department | Location | Type | Salary | Posted | Details |
137+
// where Details has a markdown link [View](https://apply.workable.com/<slug>/jobs/view/<id>.md)
138+
if (typeof text !== 'string') return [];
139+
const jobs = [];
140+
const lines = text.split('\n');
141+
for (const line of lines) {
142+
// Skip non-data lines: must be a table row with `[View](...)` link
143+
if (!line.startsWith('|') || !line.includes('[View]')) continue;
144+
const cols = line.split('|').map(c => c.trim());
145+
// Cols: [empty, title, dept, location, type, salary, posted, details, empty]
146+
if (cols.length < 8) continue;
147+
const title = cols[1];
148+
const location = cols[3];
149+
const urlMatch = cols[7].match(/\(([^)]+)\)/);
150+
let url = urlMatch ? urlMatch[1] : '';
151+
// Strip the .md suffix to get the human-readable URL
152+
if (url.endsWith('.md')) url = url.slice(0, -3);
153+
if (!title || title === 'Title') continue;
154+
jobs.push({ title, url, location, company: companyName });
155+
}
156+
return jobs;
157+
}
158+
159+
function parseSmartRecruiters(json, companyName) {
160+
return (json?.content || []).map(j => {
161+
const loc = j.location || {};
162+
const fullLocation = loc.fullLocation || [loc.city, loc.region, loc.country].filter(Boolean).join(', ');
163+
const remote = loc.remote ? 'Remote' : '';
164+
const location = [fullLocation, remote].filter(Boolean).join(', ');
165+
const slugified = (j.name || '').toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '');
166+
return {
167+
title: j.name || '',
168+
url: j.ref ? j.ref.replace('api.smartrecruiters.com/v1/companies/', 'jobs.smartrecruiters.com/') : `https://jobs.smartrecruiters.com/${companyName.toLowerCase()}/${j.id}-${slugified}`,
169+
location,
170+
company: companyName,
171+
};
172+
});
173+
}
174+
175+
function parseRecruitee(json, companyName) {
176+
return (json?.offers || []).map(j => {
177+
const city = j.city || '';
178+
const country = j.country || '';
179+
const remote = j.remote ? 'Remote' : '';
180+
const location = j.location || [city, country, remote].filter(Boolean).join(', ');
181+
return {
182+
title: j.title || '',
183+
url: j.careers_url || j.url || '',
184+
location,
185+
company: companyName,
186+
};
187+
});
188+
}
189+
190+
const PARSERS = { greenhouse: parseGreenhouse, ashby: parseAshby, lever: parseLever, workable: parseWorkable, smartrecruiters: parseSmartRecruiters, recruitee: parseRecruitee };
108191

109192
// ── Fetch with timeout ──────────────────────────────────────────────
110193

@@ -120,6 +203,18 @@ async function fetchJson(url) {
120203
}
121204
}
122205

206+
async function fetchText(url) {
207+
const controller = new AbortController();
208+
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
209+
try {
210+
const res = await fetch(url, { signal: controller.signal });
211+
if (!res.ok) throw new Error(`HTTP ${res.status}`);
212+
return await res.text();
213+
} finally {
214+
clearTimeout(timer);
215+
}
216+
}
217+
123218
// ── Title filter ────────────────────────────────────────────────────
124219

125220
function buildTitleFilter(titleFilter) {
@@ -319,8 +414,8 @@ async function main() {
319414
const tasks = targets.map(company => async () => {
320415
const { type, url } = company._api;
321416
try {
322-
const json = await fetchJson(url);
323-
const jobs = PARSERS[type](json, company.name);
417+
const data = type === 'workable' ? await fetchText(url) : await fetchJson(url);
418+
const jobs = PARSERS[type](data, company.name);
324419
totalFound += jobs.length;
325420

326421
for (const job of jobs) {

templates/portals.example.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,14 @@ search_queries:
315315
# Companies whose career pages are checked directly.
316316
# scan_method: playwright (default), websearch, greenhouse_api
317317
# For Greenhouse companies, add api: field for faster structured JSON access.
318+
#
319+
# The scan.mjs zero-token tier auto-detects the ATS from careers_url. Patterns:
320+
# Greenhouse job-boards(.eu).greenhouse.io/<slug> (or api: field)
321+
# Ashby jobs.ashbyhq.com/<slug>
322+
# Lever jobs.lever.co/<slug>
323+
# Workable apply.workable.com/<slug>
324+
# SmartRecruiters careers.smartrecruiters.com/<slug> or jobs.smartrecruiters.com/<slug>
325+
# Recruitee <slug>.recruitee.com
318326

319327
tracked_companies:
320328

0 commit comments

Comments
 (0)