Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/sitemap-index-lastmod.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@astrojs/sitemap': patch
---

Improves `<lastmod>` accuracy in the sitemap index. Each `<sitemap>` entry in `sitemap-index.xml` is now stamped with the most recent `lastmod` of the URLs in the child sitemap it points to, instead of repeating a single global date on every entry. When a child sitemap has no per-URL `lastmod`, the entry falls back to the `lastmod` option as before. This gives search engines a per-file freshness signal, so they can tell which child sitemaps actually changed without refetching all of them.
22 changes: 22 additions & 0 deletions packages/integrations/sitemap/src/utils/lastmod.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import type { SitemapItem } from '../index.js';

/**
* Returns the most recent `lastmod` among the given sitemap items as an
* ISO 8601 string, or `undefined` when none of them carry a valid `lastmod`.
*
* Used to stamp each `<sitemap>` entry in the sitemap index with the freshest
* date present in the child sitemap it points to, so search engines can tell
* which child sitemaps actually changed without refetching all of them.
*/
export function getLatestLastmod(items: SitemapItem[]): string | undefined {
let latest: number | undefined;
for (const item of items) {
if (!item.lastmod) continue;
const time = new Date(item.lastmod).getTime();
if (Number.isNaN(time)) continue;
if (latest === undefined || time > latest) {
latest = time;
}
}
return latest === undefined ? undefined : new Date(latest).toISOString();
}
9 changes: 7 additions & 2 deletions packages/integrations/sitemap/src/write-sitemap-chunk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import type { AstroConfig } from 'astro';
import { SitemapAndIndexStream, SitemapIndexStream, SitemapStream } from 'sitemap';
import replace from 'stream-replace-string';
import type { SitemapItem } from './index.js';
import { getLatestLastmod } from './utils/lastmod.js';

type WriteSitemapChunkConfig = {
filenameBase: string;
Expand Down Expand Up @@ -91,11 +92,15 @@ export async function writeSitemapChunk(
}

const url = new URL(publicPath, sitemapHostname).toString();
// Stamp this index entry with the freshest lastmod among the
// URLs that land in this file (items are written in order,
// `limit` per file), falling back to the global `lastmod`.
const fileLastmod = getLatestLastmod(items.slice(i * limit, (i + 1) * limit)) ?? lastmod;

// Collect this sitemap URL for the index
sitemapUrls.push({ url, lastmod });
sitemapUrls.push({ url, lastmod: fileLastmod });

return [{ url, lastmod }, sitemapStream, stream];
return [{ url, lastmod: fileLastmod }, sitemapStream, stream];
},
});

Expand Down
7 changes: 6 additions & 1 deletion packages/integrations/sitemap/src/write-sitemap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import type { AstroConfig } from 'astro';
import { SitemapAndIndexStream, SitemapIndexStream, SitemapStream } from 'sitemap';
import replace from 'stream-replace-string';
import type { SitemapItem } from './index.js';
import { getLatestLastmod } from './utils/lastmod.js';

type WriteSitemapConfig = {
filenameBase: string;
Expand Down Expand Up @@ -82,7 +83,11 @@ export async function writeSitemap(
}

const url = new URL(publicPath, sitemapHostname).toString();
return [{ url, lastmod }, sitemapStream, stream];
// Stamp this index entry with the freshest lastmod among the URLs
// that land in this file (items are written in order, `limit` per
// file), falling back to the configured global `lastmod`.
const fileLastmod = getLatestLastmod(sourceData.slice(i * limit, (i + 1) * limit)) ?? lastmod;
return [{ url, lastmod: fileLastmod }, sitemapStream, stream];
},
});

Expand Down
105 changes: 105 additions & 0 deletions packages/integrations/sitemap/test/index-lastmod.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import assert from 'node:assert/strict';
import { before, describe, it } from 'node:test';
import { sitemap } from './fixtures/static/deps.mjs';
import { type Fixture, loadFixture, readXML } from './test-utils.ts';

type IndexEntry = { loc: string; lastmod?: string };

async function readIndex(fixture: Fixture): Promise<IndexEntry[]> {
const data = await readXML(fixture.readFile('/sitemap-index.xml'));
return data.sitemapindex.sitemap.map((s: { loc: string[]; lastmod?: string[] }) => ({
loc: s.loc[0],
lastmod: s.lastmod?.[0],
}));
}

describe('Sitemap index lastmod', () => {
describe('Chunked sitemaps', () => {
let entries: IndexEntry[];

const BLOG_OLDER = '2024-02-01T00:00:00.000Z';
const BLOG_NEWEST = '2024-09-15T00:00:00.000Z';
const GLOSSARY_DATE = '2023-03-01T00:00:00.000Z';
const FALLBACK = '2020-01-01T00:00:00.000Z';

before(async () => {
const fixture = await loadFixture({
root: './fixtures/chunks/',
integrations: [
sitemap({
lastmod: new Date(FALLBACK),
chunks: {
blog: (item) => {
if (item.url.includes('blog')) {
// Different blog URLs get different dates; the
// index entry must surface the newest of them.
item.lastmod = item.url.includes('two') ? BLOG_NEWEST : BLOG_OLDER;
return item;
}
},
glossary: (item) => {
if (item.url.includes('glossary')) {
item.lastmod = GLOSSARY_DATE;
return item;
}
},
},
}),
],
});
await fixture.build();
entries = await readIndex(fixture);
});

const entryFor = (name: string) => entries.find((e) => e.loc.endsWith(name));

it('stamps each entry with the newest lastmod in its child sitemap', () => {
assert.equal(entryFor('sitemap-blog-0.xml')?.lastmod, BLOG_NEWEST);
assert.equal(entryFor('sitemap-glossary-0.xml')?.lastmod, GLOSSARY_DATE);
});

it('falls back to the configured lastmod when a child has no per-URL lastmod', () => {
assert.equal(entryFor('sitemap-pages-0.xml')?.lastmod, FALLBACK);
});
});

describe('Non-chunked sitemaps split across multiple files', () => {
let fixture: Fixture;
let entries: IndexEntry[];

before(async () => {
fixture = await loadFixture({
root: './fixtures/static/',
integrations: [
sitemap({
// One URL per file, so each index entry maps to exactly
// one child sitemap and the per-file slicing is exercised.
entryLimit: 1,
serialize(item) {
const day = (item.url.length % 27) + 1;
item.lastmod = new Date(Date.UTC(2024, 0, day)).toISOString();
return item;
},
}),
],
});
await fixture.build();
entries = await readIndex(fixture);
});

it('gives each entry the lastmod of the child sitemap it points to', async () => {
assert.ok(entries.length > 1, 'expected the sitemap to span multiple files');
for (const entry of entries) {
const childFile = `/${entry.loc.split('/').pop()}`;
const child = await readXML(fixture.readFile(childFile));
const childDates = (child.urlset.url ?? [])
.map((u: { lastmod?: string[] }) => u.lastmod?.[0])
.filter((d: string | undefined): d is string => Boolean(d))
.map((d: string) => new Date(d).getTime());
const expected =
childDates.length > 0 ? new Date(Math.max(...childDates)).toISOString() : undefined;
assert.equal(entry.lastmod, expected, `mismatch for ${entry.loc}`);
}
});
});
});
Loading