Skip to content

Commit 872766e

Browse files
authored
Merge pull request #165 from vincentmakes/claude/fix-robots-api-blocking-roXWK
Fix Googlebot robots.txt blocking of public read-only API endpoints
2 parents 0b3c60e + c33cd28 commit 872766e

6 files changed

Lines changed: 108 additions & 15 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ All notable changes to CV Manager will be documented in this file.
44

55
Format follows [Keep a Changelog](https://keepachangelog.com/), versioning follows [Semantic Versioning](https://semver.org/).
66

7+
## [1.49.5] - 2026-05-06
8+
9+
### Fixed
10+
- **Googlebot reported `Blocked by robots.txt` for public read-only API endpoints** (e.g. `/api/datasets/id/:id`, `/api/settings/language`). The public site hydrates client-side from `/api/*` JSON, so a JS-rendering crawler that can't fetch those endpoints sees the SSR shell only and skips re-render — degrading indexing. The public server's `robots.txt` previously had a blanket `Disallow: /api/`, which blocked all of it. The public server only ever exposes a curated set of GET-only, rate-limited, sensitive-field-filtered endpoints, so it's safe to expose those to crawlers. `robots.txt` now emits explicit `Allow:` rules for each public read-only API path (`/api/profile`, `/api/sections`, `/api/settings`, `/api/experiences`, `/api/certifications`, `/api/education`, `/api/skills`, `/api/projects`, `/api/timeline`, `/api/custom-sections`, `/api/layout-types`, `/api/social-platforms`, `/api/cv`, `/api/datasets/slug/`, `/api/datasets/id/`) before the trailing `Disallow: /api/`, so the longest-prefix-wins rule keeps anything not on the allow-list blocked by default. Both `robots.txt` handlers (the dual-server and PUBLIC_ONLY paths) now share a single `buildRobotsTxt(req)` helper in `src/server.js` so they can't drift, with regression tests covering both the indexable and `noindex` branches.
11+
712
## [1.49.4] - 2026-05-06
813

914
### Added

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "cv-manager",
3-
"version": "1.49.4",
3+
"version": "1.49.5",
44
"description": "Professional CV Management System",
55
"main": "src/server.js",
66
"scripts": {

src/server.js

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,42 @@ function buildCanonicalTag(req) {
307307
return ` <link rel="canonical" href="${escapeHtmlServer(url)}">`;
308308
}
309309

310+
// Public read-only API paths the public site fetches client-side. Listed here
311+
// (rather than just dropping `Disallow: /api/`) so that JS-rendering crawlers
312+
// like Googlebot can hydrate the page while any future `/api/*` route that
313+
// isn't on this list stays blocked by the trailing `Disallow: /api/` — most
314+
// specific match wins per Google's robots.txt rules. Keep in sync with the
315+
// `publicApp.get('/api/...')` routes below.
316+
const PUBLIC_API_ALLOW_PATHS = [
317+
'/api/profile',
318+
'/api/sections',
319+
'/api/settings',
320+
'/api/experiences',
321+
'/api/certifications',
322+
'/api/education',
323+
'/api/skills',
324+
'/api/projects',
325+
'/api/timeline',
326+
'/api/custom-sections',
327+
'/api/layout-types',
328+
'/api/social-platforms',
329+
'/api/cv',
330+
'/api/datasets/slug/',
331+
'/api/datasets/id/'
332+
];
333+
334+
function buildRobotsTxt(req) {
335+
const protocol = req.headers['x-forwarded-proto'] || req.protocol || 'https';
336+
const host = req.headers['x-forwarded-host'] || req.headers.host || 'localhost';
337+
const robotsMeta = db.prepare('SELECT value FROM settings WHERE key = ?').get('robotsMeta');
338+
const metaValue = robotsMeta?.value || 'index, follow';
339+
if (metaValue.includes('noindex')) {
340+
return `User-agent: *\nDisallow: /`;
341+
}
342+
const allows = PUBLIC_API_ALLOW_PATHS.map(p => `Allow: ${p}`).join('\n');
343+
return `User-agent: *\nAllow: /\n${allows}\nDisallow: /api/\nSitemap: ${protocol}://${host}/sitemap.xml`;
344+
}
345+
310346
// Pull the current live CV into the same shape as a saved-dataset blob so
311347
// the SSR helper has one input format to deal with.
312348
function gatherLiveCvData() {
@@ -1978,17 +2014,8 @@ if (PUBLIC_ONLY) {
19782014
});
19792015

19802016
publicApp.get('/robots.txt', (req, res) => {
1981-
const protocol = req.headers['x-forwarded-proto'] || req.protocol || 'https';
1982-
const host = req.headers['x-forwarded-host'] || req.headers.host || 'localhost';
1983-
const robotsMeta = db.prepare('SELECT value FROM settings WHERE key = ?').get('robotsMeta');
1984-
const metaValue = robotsMeta?.value || 'index, follow';
1985-
const isNoIndex = metaValue.includes('noindex');
19862017
res.setHeader('Content-Type', 'text/plain');
1987-
if (isNoIndex) {
1988-
res.send(`User-agent: *\nDisallow: /`);
1989-
} else {
1990-
res.send(`User-agent: *\nAllow: /\nSitemap: ${protocol}://${host}/sitemap.xml\nDisallow: /api/`);
1991-
}
2018+
res.send(buildRobotsTxt(req));
19922019
});
19932020

19942021
publicApp.use('/shared', express.static(path.join(__dirname, '../public/shared')));
@@ -4400,7 +4427,7 @@ if (PUBLIC_ONLY) {
44004427
next();
44014428
});
44024429
publicApp.get('/sitemap.xml', (req, res) => { const protocol = req.headers['x-forwarded-proto'] || req.protocol || 'https'; const host = req.headers['x-forwarded-host'] || req.headers.host || 'localhost'; res.setHeader('Content-Type', 'application/xml'); res.send(`<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"><url><loc>${protocol}://${host}/</loc><lastmod>${new Date().toISOString().split('T')[0]}</lastmod><changefreq>weekly</changefreq><priority>1.0</priority></url></urlset>`); });
4403-
publicApp.get('/robots.txt', (req, res) => { const protocol = req.headers['x-forwarded-proto'] || req.protocol || 'https'; const host = req.headers['x-forwarded-host'] || req.headers.host || 'localhost'; const robotsMeta = db.prepare('SELECT value FROM settings WHERE key = ?').get('robotsMeta'); const metaValue = robotsMeta?.value || 'index, follow'; const isNoIndex = metaValue.includes('noindex'); res.setHeader('Content-Type', 'text/plain'); if (isNoIndex) { res.send(`User-agent: *\nDisallow: /`); } else { res.send(`User-agent: *\nAllow: /\nSitemap: ${protocol}://${host}/sitemap.xml\nDisallow: /api/`); } });
4430+
publicApp.get('/robots.txt', (req, res) => { res.setHeader('Content-Type', 'text/plain'); res.send(buildRobotsTxt(req)); });
44044431
publicApp.use('/shared', express.static(path.join(__dirname, '../public/shared')));
44054432
// Favicon and icons (public uses icon-public.png with eye badge)
44064433
const publicIconPathB = path.join(__dirname, '../icon-public.png');

tests/backend.test.js

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2634,6 +2634,67 @@ describe('Backend API', () => {
26342634
});
26352635
});
26362636

2637+
describe('robots.txt API allow-list', () => {
2638+
it('does not block public read-only API paths from JS-rendering crawlers', async () => {
2639+
// Make sure the indexable branch is exercised.
2640+
await fetch(`${BASE_URL}/api/settings/robotsMeta`, {
2641+
method: 'PUT',
2642+
headers: { 'Content-Type': 'application/json' },
2643+
body: JSON.stringify({ value: 'index, follow' }),
2644+
});
2645+
2646+
const res = await fetch(`${PUBLIC_URL}/robots.txt`);
2647+
assert.strictEqual(res.status, 200);
2648+
const text = await res.text();
2649+
2650+
// Sanity: the rule the public site relies on for hydration must not
2651+
// be a bare blanket block. The Disallow may still appear as the
2652+
// catch-all fallback, but explicit Allow rules for the read-only
2653+
// endpoints must precede it (longer-prefix Allow wins for Google).
2654+
const requiredAllows = [
2655+
'/api/profile',
2656+
'/api/sections',
2657+
'/api/settings',
2658+
'/api/experiences',
2659+
'/api/certifications',
2660+
'/api/education',
2661+
'/api/skills',
2662+
'/api/projects',
2663+
'/api/timeline',
2664+
'/api/custom-sections',
2665+
'/api/cv',
2666+
'/api/datasets/slug/',
2667+
'/api/datasets/id/',
2668+
];
2669+
for (const path of requiredAllows) {
2670+
assert.ok(
2671+
text.includes(`Allow: ${path}`),
2672+
`robots.txt is missing Allow rule for ${path}; full body:\n${text}`,
2673+
);
2674+
}
2675+
});
2676+
2677+
it('still emits a single global Disallow when robotsMeta is noindex', async () => {
2678+
await fetch(`${BASE_URL}/api/settings/robotsMeta`, {
2679+
method: 'PUT',
2680+
headers: { 'Content-Type': 'application/json' },
2681+
body: JSON.stringify({ value: 'noindex, nofollow' }),
2682+
});
2683+
2684+
const res = await fetch(`${PUBLIC_URL}/robots.txt`);
2685+
assert.strictEqual(res.status, 200);
2686+
const text = await res.text();
2687+
assert.match(text, /^User-agent: \*\nDisallow: \/$/);
2688+
2689+
// Restore default so subsequent tests see the indexable branch.
2690+
await fetch(`${BASE_URL}/api/settings/robotsMeta`, {
2691+
method: 'PUT',
2692+
headers: { 'Content-Type': 'application/json' },
2693+
body: JSON.stringify({ value: 'index, follow' }),
2694+
});
2695+
});
2696+
});
2697+
26372698
describe('Canonical link injection', () => {
26382699
it('emits canonical from request host on public root', async () => {
26392700
// Node's fetch reserves the Host header, so simulate the deployed-host

version.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
{
2-
"version": "1.49.4",
2+
"version": "1.49.5",
33
"changelog": "https://github.com/vincentmakes/cv-manager/blob/main/CHANGELOG.md"
44
}

0 commit comments

Comments
 (0)