Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/publish-production.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ jobs:
name: Build
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ENABLE_LAST_MOD_IN_SITEMAP: true
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not needed anymore, as it's fast

Copy link
Collaborator

@kodster28 kodster28 Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should wait to remove this until we've confirmed that across a couple builds. Making assumptions about what does / doesn't affect the build time is sorta how we got into the sitemap generation (and it's build time regression) situation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. I did benchmark a few scenarios locally though, and removing it here means it runs in this PR, which it wouldn't otherwise.

- uses: actions/upload-artifact@v4
continue-on-error: true
with:
Expand Down
90 changes: 77 additions & 13 deletions astro.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,25 +61,84 @@ const customCss = await autogenStyles();

const RUN_LINK_CHECK =
process.env.RUN_LINK_CHECK?.toLowerCase() === "true" || false;
const ENABLE_LAST_MOD_IN_SITEMAP =
process.env.ENABLE_LAST_MOD_IN_SITEMAP?.toLowerCase() === "true";

/**
* Get the last Git modification date for a file
* @param filePath - Absolute path to the file
* @returns ISO date string or null if not available
* Build a cache of all git last-modified dates in one batch
*/
function getGitLastModified(filePath: string): string | null {
function buildGitDateCache(): Map<string, string> | null {
try {
const result = execSync(`git log -1 --format=%cI -- "${filePath}"`, {
encoding: "utf-8",
}).trim();
return result || null;
} catch (_error) {
console.time("[sitemap] Building git date cache");

// Use git log with --name-only and --diff-filter to get all files with their last commit
// The format outputs the commit date followed by the list of files changed in that commit
// e.g.
// 2025-10-01T12:34:56-07:00
// src/content/docs/file1.mdx
// src/content/docs/file2.mdx
//
// 2025-09-25T09:15:30-07:00
// src/content/docs/file3.mdx

const result = execSync(
'git log --pretty=format:"%cI" --name-only --diff-filter=AMR src/content/docs',
{
encoding: "utf-8",
maxBuffer: 100 * 1024 * 1024,
},
);

const cache = new Map<string, string>();
const lines = result.split("\n");

let currentDate: string | null = null;
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) {
continue;
}
// Lines are either dates or file paths
// Date lines match ISO format
if (/^\d{4}-\d{2}-\d{2}T/.test(trimmed)) {
currentDate = trimmed;
} else if (currentDate) {
const filePath = `./${trimmed}`; // fileURLToPath includes leading ./, so we do the same here
if (!cache.has(filePath)) {
cache.set(filePath, currentDate); // e.g., "src/content/docs/file.mdx"
}
}
}

console.timeEnd("[sitemap] Building git date cache");
console.log(`[sitemap] Loaded git dates for ${cache.size} files`);
return cache;
} catch (error) {
console.warn("[sitemap] Failed to build git date cache:", error);
return null;
}
}

const gitDateCache = buildGitDateCache();

/**
* Get the last Git modification date for a file (from cache)
* @param filePath - Path to the file
* @returns ISO date string or null if not available
*/
function getGitLastModified(filePath: string): string | undefined {
if (!gitDateCache) {
console.warn("[sitemap] Git date cache is not initialized");
return undefined;
}

const result = gitDateCache.get(filePath);

if (!result) {
console.log(`[sitemap] Last modified not found in git for: "${filePath}"`);
}

return result ?? undefined;
}

/**
* Convert a sitemap URL to the corresponding source file path
* @param url - The full URL from the sitemap
Expand Down Expand Up @@ -116,10 +175,15 @@ function addLastModDate(item: SitemapItem) {
const gitDate = getGitLastModified(filePath);
if (gitDate) {
item.lastmod = gitDate;
} else {
console.warn(
`[sitemap] No git last mod date found for ${filePath} (${item.url}) - setting to now`,
);
item.lastmod = new Date().toISOString();
}
} else {
console.warn(
`[sitemap] Could not find last modified for ${item.url} - setting to now`,
`[sitemap] Could not find source file for ${item.url} - setting last modified to now`,
);
item.lastmod = new Date().toISOString();
}
Expand Down Expand Up @@ -261,7 +325,7 @@ export default defineConfig({
return true;
},
serialize(item) {
return ENABLE_LAST_MOD_IN_SITEMAP ? addLastModDate(item) : item;
return addLastModDate(item);
},
}),
react(),
Expand Down