From 0b6f306f4d02eae56d668d11f9cf48e6c749f446 Mon Sep 17 00:00:00 2001 From: Joost de Valk Date: Fri, 22 May 2026 12:55:37 +0200 Subject: [PATCH] Stamp sitemap index entries with per-file lastmod The sitemap index gave every `` entry the same global `lastmod`, so crawlers could not tell which child sitemaps actually changed. Each index entry is now stamped with the newest `lastmod` of the URLs in the child sitemap it points to, falling back to the configured `lastmod`. Co-Authored-By: Claude Opus 4.7 (1M context) --- .changeset/sitemap-index-lastmod.md | 5 + .../integrations/sitemap/src/utils/lastmod.ts | 22 ++++ .../sitemap/src/write-sitemap-chunk.ts | 9 +- .../integrations/sitemap/src/write-sitemap.ts | 7 +- .../sitemap/test/index-lastmod.test.ts | 105 ++++++++++++++++++ 5 files changed, 145 insertions(+), 3 deletions(-) create mode 100644 .changeset/sitemap-index-lastmod.md create mode 100644 packages/integrations/sitemap/src/utils/lastmod.ts create mode 100644 packages/integrations/sitemap/test/index-lastmod.test.ts diff --git a/.changeset/sitemap-index-lastmod.md b/.changeset/sitemap-index-lastmod.md new file mode 100644 index 000000000000..3e75bc45a7b3 --- /dev/null +++ b/.changeset/sitemap-index-lastmod.md @@ -0,0 +1,5 @@ +--- +'@astrojs/sitemap': patch +--- + +Improves `` accuracy in the sitemap index. Each `` entry in `sitemap-index.xml` is now stamped with the most recent `lastmod` of the URLs in the child sitemap it points to, instead of repeating a single global date on every entry. When a child sitemap has no per-URL `lastmod`, the entry falls back to the `lastmod` option as before. This gives search engines a per-file freshness signal, so they can tell which child sitemaps actually changed without refetching all of them. diff --git a/packages/integrations/sitemap/src/utils/lastmod.ts b/packages/integrations/sitemap/src/utils/lastmod.ts new file mode 100644 index 000000000000..8fe6c0e8b806 --- /dev/null +++ b/packages/integrations/sitemap/src/utils/lastmod.ts @@ -0,0 +1,22 @@ +import type { SitemapItem } from '../index.js'; + +/** + * Returns the most recent `lastmod` among the given sitemap items as an + * ISO 8601 string, or `undefined` when none of them carry a valid `lastmod`. + * + * Used to stamp each `` entry in the sitemap index with the freshest + * date present in the child sitemap it points to, so search engines can tell + * which child sitemaps actually changed without refetching all of them. + */ +export function getLatestLastmod(items: SitemapItem[]): string | undefined { + let latest: number | undefined; + for (const item of items) { + if (!item.lastmod) continue; + const time = new Date(item.lastmod).getTime(); + if (Number.isNaN(time)) continue; + if (latest === undefined || time > latest) { + latest = time; + } + } + return latest === undefined ? undefined : new Date(latest).toISOString(); +} diff --git a/packages/integrations/sitemap/src/write-sitemap-chunk.ts b/packages/integrations/sitemap/src/write-sitemap-chunk.ts index 738e83c6949b..4baafdf379d0 100644 --- a/packages/integrations/sitemap/src/write-sitemap-chunk.ts +++ b/packages/integrations/sitemap/src/write-sitemap-chunk.ts @@ -7,6 +7,7 @@ import type { AstroConfig } from 'astro'; import { SitemapAndIndexStream, SitemapIndexStream, SitemapStream } from 'sitemap'; import replace from 'stream-replace-string'; import type { SitemapItem } from './index.js'; +import { getLatestLastmod } from './utils/lastmod.js'; type WriteSitemapChunkConfig = { filenameBase: string; @@ -91,11 +92,15 @@ export async function writeSitemapChunk( } const url = new URL(publicPath, sitemapHostname).toString(); + // Stamp this index entry with the freshest lastmod among the + // URLs that land in this file (items are written in order, + // `limit` per file), falling back to the global `lastmod`. + const fileLastmod = getLatestLastmod(items.slice(i * limit, (i + 1) * limit)) ?? lastmod; // Collect this sitemap URL for the index - sitemapUrls.push({ url, lastmod }); + sitemapUrls.push({ url, lastmod: fileLastmod }); - return [{ url, lastmod }, sitemapStream, stream]; + return [{ url, lastmod: fileLastmod }, sitemapStream, stream]; }, }); diff --git a/packages/integrations/sitemap/src/write-sitemap.ts b/packages/integrations/sitemap/src/write-sitemap.ts index 00ed48eac28f..cd190e913365 100644 --- a/packages/integrations/sitemap/src/write-sitemap.ts +++ b/packages/integrations/sitemap/src/write-sitemap.ts @@ -7,6 +7,7 @@ import type { AstroConfig } from 'astro'; import { SitemapAndIndexStream, SitemapIndexStream, SitemapStream } from 'sitemap'; import replace from 'stream-replace-string'; import type { SitemapItem } from './index.js'; +import { getLatestLastmod } from './utils/lastmod.js'; type WriteSitemapConfig = { filenameBase: string; @@ -82,7 +83,11 @@ export async function writeSitemap( } const url = new URL(publicPath, sitemapHostname).toString(); - return [{ url, lastmod }, sitemapStream, stream]; + // Stamp this index entry with the freshest lastmod among the URLs + // that land in this file (items are written in order, `limit` per + // file), falling back to the configured global `lastmod`. + const fileLastmod = getLatestLastmod(sourceData.slice(i * limit, (i + 1) * limit)) ?? lastmod; + return [{ url, lastmod: fileLastmod }, sitemapStream, stream]; }, }); diff --git a/packages/integrations/sitemap/test/index-lastmod.test.ts b/packages/integrations/sitemap/test/index-lastmod.test.ts new file mode 100644 index 000000000000..1490d5334e39 --- /dev/null +++ b/packages/integrations/sitemap/test/index-lastmod.test.ts @@ -0,0 +1,105 @@ +import assert from 'node:assert/strict'; +import { before, describe, it } from 'node:test'; +import { sitemap } from './fixtures/static/deps.mjs'; +import { type Fixture, loadFixture, readXML } from './test-utils.ts'; + +type IndexEntry = { loc: string; lastmod?: string }; + +async function readIndex(fixture: Fixture): Promise { + const data = await readXML(fixture.readFile('/sitemap-index.xml')); + return data.sitemapindex.sitemap.map((s: { loc: string[]; lastmod?: string[] }) => ({ + loc: s.loc[0], + lastmod: s.lastmod?.[0], + })); +} + +describe('Sitemap index lastmod', () => { + describe('Chunked sitemaps', () => { + let entries: IndexEntry[]; + + const BLOG_OLDER = '2024-02-01T00:00:00.000Z'; + const BLOG_NEWEST = '2024-09-15T00:00:00.000Z'; + const GLOSSARY_DATE = '2023-03-01T00:00:00.000Z'; + const FALLBACK = '2020-01-01T00:00:00.000Z'; + + before(async () => { + const fixture = await loadFixture({ + root: './fixtures/chunks/', + integrations: [ + sitemap({ + lastmod: new Date(FALLBACK), + chunks: { + blog: (item) => { + if (item.url.includes('blog')) { + // Different blog URLs get different dates; the + // index entry must surface the newest of them. + item.lastmod = item.url.includes('two') ? BLOG_NEWEST : BLOG_OLDER; + return item; + } + }, + glossary: (item) => { + if (item.url.includes('glossary')) { + item.lastmod = GLOSSARY_DATE; + return item; + } + }, + }, + }), + ], + }); + await fixture.build(); + entries = await readIndex(fixture); + }); + + const entryFor = (name: string) => entries.find((e) => e.loc.endsWith(name)); + + it('stamps each entry with the newest lastmod in its child sitemap', () => { + assert.equal(entryFor('sitemap-blog-0.xml')?.lastmod, BLOG_NEWEST); + assert.equal(entryFor('sitemap-glossary-0.xml')?.lastmod, GLOSSARY_DATE); + }); + + it('falls back to the configured lastmod when a child has no per-URL lastmod', () => { + assert.equal(entryFor('sitemap-pages-0.xml')?.lastmod, FALLBACK); + }); + }); + + describe('Non-chunked sitemaps split across multiple files', () => { + let fixture: Fixture; + let entries: IndexEntry[]; + + before(async () => { + fixture = await loadFixture({ + root: './fixtures/static/', + integrations: [ + sitemap({ + // One URL per file, so each index entry maps to exactly + // one child sitemap and the per-file slicing is exercised. + entryLimit: 1, + serialize(item) { + const day = (item.url.length % 27) + 1; + item.lastmod = new Date(Date.UTC(2024, 0, day)).toISOString(); + return item; + }, + }), + ], + }); + await fixture.build(); + entries = await readIndex(fixture); + }); + + it('gives each entry the lastmod of the child sitemap it points to', async () => { + assert.ok(entries.length > 1, 'expected the sitemap to span multiple files'); + for (const entry of entries) { + const childFile = `/${entry.loc.split('/').pop()}`; + const child = await readXML(fixture.readFile(childFile)); + const childDates = (child.urlset.url ?? []) + .map((u: { lastmod?: string[] }) => u.lastmod?.[0]) + .filter((d: string | undefined): d is string => Boolean(d)) + .map((d: string) => new Date(d).getTime()); + const expected = + childDates.length > 0 ? new Date(Math.max(...childDates)).toISOString() : undefined; + assert.equal(entry.lastmod, expected, `mismatch for ${entry.loc}`); + } + }); + }); +});