Skip to content

Commit e2e04a3

Browse files
committed
feat(scan): add optional always_allow tier to location_filter
Adds an optional always_allow list checked BEFORE block. A location matching always_allow passes regardless of block. Fully backward compatible: a config without always_allow: behaves exactly as today. Motivation: the current filter checks block first and absolutely, so a multi-location posting like "Remote, Belgium or France" is dropped the moment "france" is in block — even though Belgium is an acceptable location in the same string. always_allow is the home-region escape hatch. Worked example with always_allow: ["belgium"], block: ["france"]: - "Remote, Belgium" pass (unchanged) - "Remote, Belgium or France" PASS (was REJECT) - "Remote, France" reject (unchanged) Also: - Adds `export` to buildLocationFilter + gates main() behind an import.meta.url check so the function is importable from tests without running scan.mjs as a script. - Adds test-all.mjs §11 covering the 6 boundary cases (home-region match, always_allow beats block, block still rejects when no always_allow hit, empty location, case-insensitivity, backward compatibility when always_allow is omitted). - templates/portals.example.yml documents the commented always_allow: example with an ordering note. Refs #650
1 parent 5d1f3a3 commit e2e04a3

3 files changed

Lines changed: 119 additions & 13 deletions

File tree

scan.mjs

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -117,21 +117,34 @@ function buildTitleFilter(titleFilter) {
117117

118118
// ── Location filter ─────────────────────────────────────────────────
119119
// Optional. If `location_filter` is absent from portals.yml, all locations pass.
120-
// Semantics:
120+
// Semantics (case-insensitive substring, in this order):
121121
// - Empty location string → pass (don't penalize missing data)
122-
// - `block` matches → reject (takes precedence over allow)
122+
// - `always_allow` matches → pass (takes precedence over `block` — lets a
123+
// multi-location string like "Remote, Belgium or France" through because
124+
// the home region is an option, even though "france" is blocked)
125+
// - `block` matches → reject
123126
// - `allow` empty → pass (already cleared block)
124127
// - `allow` non-empty → must match at least one keyword
125-
// All matches are case-insensitive substring.
126128

127-
function buildLocationFilter(locationFilter) {
129+
// Normalize a keyword list from portals.yml: tolerates a bare string
130+
// (wrapped to a 1-item array), null/undefined (→ []), and non-string
131+
// entries (filtered out). All survivors are lowercased.
132+
function normalizeKeywordList(value) {
133+
if (value == null) return [];
134+
const arr = Array.isArray(value) ? value : [value];
135+
return arr.filter(k => typeof k === 'string').map(k => k.toLowerCase());
136+
}
137+
138+
export function buildLocationFilter(locationFilter) {
128139
if (!locationFilter) return () => true;
129-
const allow = (locationFilter.allow || []).map(k => k.toLowerCase());
130-
const block = (locationFilter.block || []).map(k => k.toLowerCase());
140+
const alwaysAllow = normalizeKeywordList(locationFilter.always_allow);
141+
const allow = normalizeKeywordList(locationFilter.allow);
142+
const block = normalizeKeywordList(locationFilter.block);
131143

132144
return (location) => {
133145
if (!location) return true;
134146
const lower = location.toLowerCase();
147+
if (alwaysAllow.length > 0 && alwaysAllow.some(k => lower.includes(k))) return true;
135148
if (block.length > 0 && block.some(k => lower.includes(k))) return false;
136149
if (allow.length === 0) return true;
137150
return allow.some(k => lower.includes(k));
@@ -394,7 +407,11 @@ async function main() {
394407
console.log('→ Share results and get help: https://discord.gg/8pRpHETxa4');
395408
}
396409

397-
main().catch(err => {
398-
console.error('Fatal:', err.message);
399-
process.exit(1);
400-
});
410+
// Only run main() when invoked directly (`node scan.mjs`), not when imported by tests.
411+
// `|| ''` guards the case where Node is invoked without a script arg (e.g. `node -e`).
412+
if (import.meta.url === pathToFileURL(process.argv[1] || '').href) {
413+
main().catch(err => {
414+
console.error('Fatal:', err.message);
415+
process.exit(1);
416+
});
417+
}

templates/portals.example.yml

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,26 @@
2424
# Filter scanned jobs by location. Applied AFTER title filter, BEFORE dedup.
2525
# If this entire block is absent, all locations pass (current default behavior).
2626
#
27-
# Semantics:
27+
# Semantics (case-insensitive substring, in this order):
2828
# - Empty location string on a job → pass (don't penalize missing data)
29-
# - Any `block` keyword present → reject (takes precedence over allow)
29+
# - Any `always_allow` keyword present → pass (takes precedence over `block`)
30+
# - Any `block` keyword present → reject
3031
# - `allow` empty → pass (already cleared block)
3132
# - `allow` non-empty → must match at least one keyword
32-
# All matches are case-insensitive substring.
33+
#
34+
# `always_allow` is optional. It rescues multi-location postings that name your
35+
# home region: with always_allow ["Belgium"] and block ["France"], a job listed
36+
# "Remote, Belgium or France" passes (Belgium wins), while "Remote, France" is
37+
# still rejected. Omit always_allow entirely and the filter behaves as before.
3338
#
3439
# Example below targets US-based remote + a couple of US metros, blocking
3540
# common foreign hubs. Customize to your geography.
3641

3742
# location_filter:
43+
# # always_allow is checked BEFORE block — keep this list to your home region only
44+
# always_allow:
45+
# - "Belgium" # (replace with your home region)
46+
# - "Brussels"
3847
# allow:
3948
# - "Remote"
4049
# - "United States"

test-all.mjs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,86 @@ if (fileExists('VERSION')) {
314314
fail('VERSION file missing');
315315
}
316316

317+
// ── 11. LOCATION FILTER — always_allow tier ───────────────────────
318+
319+
console.log('\n11. Location filter — always_allow tier');
320+
321+
try {
322+
const { buildLocationFilter } = await import(pathToFileURL(join(ROOT, 'scan.mjs')).href);
323+
324+
const filter = buildLocationFilter({
325+
always_allow: ['belgium', 'brussels'],
326+
allow: ['europe', 'emea', 'remote'],
327+
block: ['france', 'germany', 'united states'],
328+
});
329+
330+
// Case 1: home-region passes regardless of other text
331+
if (filter('Brussels, Belgium') === true) pass('Brussels, Belgium passes (always_allow hit)');
332+
else fail('Brussels, Belgium should pass');
333+
334+
// Case 2: always_allow wins over block (THE motivating case for this tier)
335+
if (filter('Remote, Belgium or France') === true) pass('Remote, Belgium or France passes (always_allow beats block)');
336+
else fail('Remote, Belgium or France should pass — always_allow must win over block');
337+
338+
// Case 3: no always_allow hit, block still rejects
339+
if (filter('Paris, France') === false) pass('Paris, France is rejected (block still applies)');
340+
else fail('Paris, France should be rejected');
341+
342+
// Case 4: empty location → pass (existing semantics, unchanged)
343+
if (filter('') === true) pass('empty location passes (unchanged semantics)');
344+
else fail('empty location should pass');
345+
346+
// Case 5: case-insensitivity
347+
if (filter('BRUSSELS, BELGIUM') === true) pass('case-insensitive match works');
348+
else fail('case-insensitive match failed');
349+
350+
// Case 6: backward compatibility — no always_allow key behaves like stock allow/block
351+
const stockFilter = buildLocationFilter({
352+
allow: ['europe', 'remote'],
353+
block: ['france'],
354+
});
355+
if (stockFilter('Remote, Belgium or France') === false) pass('without always_allow, block still wins (backward compatible)');
356+
else fail('without always_allow, behaviour must match stock allow/block (block wins)');
357+
358+
// Case 7: null/missing locationFilter → pass-all filter (early-return path)
359+
const nullFilter = buildLocationFilter(null);
360+
if (nullFilter('Anywhere on Earth') === true && nullFilter('') === true) {
361+
pass('null locationFilter returns a pass-all filter (early-return path)');
362+
} else {
363+
fail('null locationFilter should return a pass-all filter');
364+
}
365+
366+
// Case 8: string-instead-of-array → wrapped to a 1-item list
367+
const stringFilter = buildLocationFilter({ always_allow: 'belgium', block: ['france'] });
368+
if (stringFilter('Remote, Belgium or France') === true) {
369+
pass('always_allow as a bare string is wrapped to a single-item list');
370+
} else {
371+
fail('always_allow as a bare string should still work');
372+
}
373+
374+
// Case 9: null/non-string items are filtered out (no crash, no false matches)
375+
const messyFilter = buildLocationFilter({
376+
always_allow: [null, 'belgium', 42, undefined],
377+
block: ['france', null, 7],
378+
});
379+
if (messyFilter('Brussels, Belgium') === true && messyFilter('Paris, France') === false) {
380+
pass('non-string entries (null, numbers, undefined) are filtered out without crashing');
381+
} else {
382+
fail('mixed-type keyword lists should not crash and should still match string entries');
383+
}
384+
385+
// Case 10: all-null/non-string list → empty after normalization (no false rejects)
386+
const allBadFilter = buildLocationFilter({ block: [null, 42, undefined], allow: ['remote'] });
387+
if (allBadFilter('Remote') === true) {
388+
pass('a block list with only non-string entries normalizes to [] (no false rejects)');
389+
} else {
390+
fail('non-string-only block list should not cause rejection');
391+
}
392+
393+
} catch (e) {
394+
fail(`always_allow tests crashed: ${e.message}`);
395+
}
396+
317397
// ── SUMMARY ─────────────────────────────────────────────────────
318398

319399
console.log('\n' + '='.repeat(50));

0 commit comments

Comments
 (0)