Skip to content

Commit 4e36868

Browse files
wow
1 parent 3635251 commit 4e36868

File tree

7 files changed

+260
-21
lines changed

7 files changed

+260
-21
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ node_modules/
77
out/
88
.vercel/
99

10+
# Generated content
11+
app/papers/papers.generated.json
12+
1013
# Logs
1114
npm-debug.log*
1215
yarn-debug.log*

app/page.tsx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,9 +210,9 @@ export default function HomePage() {
210210
<Button variant="contained" href="#downloads">
211211
Downloads
212212
</Button>
213-
<Button variant="outlined" href="/papers/" target="_blank" rel="noreferrer">
214-
Papers folder
215-
</Button>
213+
<Button variant="outlined" href="/papers" target="_blank" rel="noreferrer">
214+
Papers
215+
</Button>
216216
<Button variant="outlined" href="/presentations/" target="_blank" rel="noreferrer">
217217
Presentations folder
218218
</Button>

app/papers/page.tsx

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
"use client";
2+
3+
import {
4+
AppBar,
5+
Box,
6+
Container,
7+
Divider,
8+
Link,
9+
Stack,
10+
Toolbar,
11+
Typography,
12+
} from "@mui/material";
13+
14+
import papersIndex from "./papers.generated.json";
15+
16+
type PaperItem = {
17+
fileName: string;
18+
href: string;
19+
title: string;
20+
authors: string | null;
21+
bytes: number | null;
22+
error?: string;
23+
};
24+
25+
type PapersIndex = {
26+
generatedAt: string;
27+
items: PaperItem[];
28+
};
29+
30+
const data = papersIndex as PapersIndex;
31+
32+
export default function PapersPage() {
33+
return (
34+
<Box>
35+
<AppBar position="sticky" color="default" elevation={0}>
36+
<Toolbar>
37+
<Container maxWidth="lg" sx={{ px: { xs: 0, sm: 2 } }}>
38+
<Stack direction="row" alignItems="baseline" justifyContent="space-between">
39+
<Typography variant="h5" component="h1">
40+
Papers
41+
</Typography>
42+
<Link href="/" underline="hover" color="inherit">
43+
Home
44+
</Link>
45+
</Stack>
46+
</Container>
47+
</Toolbar>
48+
</AppBar>
49+
50+
<Container maxWidth="lg" sx={{ py: { xs: 4, md: 6 } }}>
51+
<Typography color="text.secondary" sx={{ mb: 3 }}>
52+
{data.items.length} PDFs indexed (generated {data.generatedAt}).
53+
</Typography>
54+
55+
<Stack divider={<Divider flexItem />} spacing={2}>
56+
{data.items.map((paper) => (
57+
<Box key={paper.fileName} sx={{ py: 1 }}>
58+
<Typography variant="h6" sx={{ mb: 0.25 }}>
59+
<Link href={paper.href} target="_blank" rel="noreferrer" underline="hover">
60+
{paper.title}
61+
</Link>
62+
</Typography>
63+
{paper.authors ? (
64+
<Typography variant="body2" color="text.secondary">
65+
{paper.authors}
66+
</Typography>
67+
) : null}
68+
<Typography variant="caption" color="text.secondary">
69+
{paper.fileName}
70+
{paper.bytes ? ` · ${Math.round(paper.bytes / 1024).toLocaleString()} KB` : ""}
71+
{paper.error ? " · (metadata parse failed)" : ""}
72+
</Typography>
73+
</Box>
74+
))}
75+
</Stack>
76+
</Container>
77+
</Box>
78+
);
79+
}

package-lock.json

Lines changed: 26 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
"private": true,
44
"version": "0.1.0",
55
"scripts": {
6+
"predev": "node scripts/generate-papers-index.mjs",
67
"dev": "next dev",
8+
"prebuild": "node scripts/generate-papers-index.mjs",
79
"build": "next build",
810
"start": "npx serve@latest out",
911
"preview": "npx serve@latest out"
@@ -21,6 +23,7 @@
2123
"@types/node": "^22.0.0",
2224
"@types/react": "^19.0.0",
2325
"@types/react-dom": "^19.0.0",
26+
"pdf-parse": "^1.1.1",
2427
"typescript": "^5.7.0"
2528
},
2629
"engines": {

public/papers/index.html

Lines changed: 0 additions & 17 deletions
This file was deleted.

scripts/generate-papers-index.mjs

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
import fs from "node:fs/promises";
2+
import path from "node:path";
3+
import pdf from "pdf-parse";
4+
5+
const repoRoot = process.cwd();
6+
const papersDir = path.join(repoRoot, "public", "papers");
7+
const outDir = path.join(repoRoot, "app", "papers");
8+
const outFile = path.join(outDir, "papers.generated.json");
9+
10+
const isPdf = (fileName) => fileName.toLowerCase().endsWith(".pdf");
11+
12+
const normalizeString = (value) => {
13+
if (!value || typeof value !== "string") return null;
14+
const trimmed = value.replace(/\0/g, "").trim();
15+
if (!trimmed) return null;
16+
if (trimmed.toLowerCase() === "untitled") return null;
17+
return trimmed;
18+
};
19+
20+
const titleFromFilename = (fileName) => {
21+
const withoutExt = fileName.replace(/\.pdf$/i, "");
22+
return withoutExt.replace(/[_-]+/g, " ").replace(/\s+/g, " ").trim();
23+
};
24+
25+
const guessAuthorsFromText = (text) => {
26+
if (!text || typeof text !== "string") return null;
27+
28+
const lines = text
29+
.split(/\r?\n/)
30+
.map((l) => l.trim())
31+
.filter(Boolean)
32+
.slice(0, 30);
33+
34+
for (const line of lines) {
35+
const clean = line.replace(/\s+/g, " ").trim();
36+
if (clean.length < 6 || clean.length > 140) continue;
37+
38+
const looksLikeAuthors =
39+
/,/.test(clean) || /\band\b/i.test(clean) || /\bet\s+al\b/i.test(clean);
40+
41+
const hasLetters = /[A-Za-z]/.test(clean);
42+
const hasAtLeastTwoWords = clean.split(" ").length >= 2;
43+
44+
if (looksLikeAuthors && hasLetters && hasAtLeastTwoWords) {
45+
return clean;
46+
}
47+
}
48+
49+
return null;
50+
};
51+
52+
const tryParsePdf = async (filePath) => {
53+
const buffer = await fs.readFile(filePath);
54+
55+
// Keep it light: metadata + first page text only.
56+
const data = await pdf(buffer, { max: 1 });
57+
58+
const title = normalizeString(data?.info?.Title) ?? normalizeString(data?.metadata?.get?.("dc:title"));
59+
const author = normalizeString(data?.info?.Author) ?? normalizeString(data?.metadata?.get?.("dc:creator"));
60+
61+
const guessedAuthors = author ?? guessAuthorsFromText(data?.text);
62+
63+
return {
64+
title,
65+
authors: guessedAuthors,
66+
};
67+
};
68+
69+
const listPdfFiles = async () => {
70+
const entries = await fs.readdir(papersDir, { withFileTypes: true });
71+
return entries
72+
.filter((e) => e.isFile())
73+
.map((e) => e.name)
74+
.filter((name) => isPdf(name));
75+
};
76+
77+
const main = async () => {
78+
let pdfFiles;
79+
try {
80+
pdfFiles = await listPdfFiles();
81+
} catch (err) {
82+
console.error(`Failed to read papers directory: ${papersDir}`);
83+
console.error(err);
84+
process.exitCode = 1;
85+
return;
86+
}
87+
88+
// Fast path: if the generated index is newer than every PDF, skip re-parsing.
89+
try {
90+
const outStat = await fs.stat(outFile);
91+
let newestPdfMtimeMs = 0;
92+
for (const fileName of pdfFiles) {
93+
const stat = await fs.stat(path.join(papersDir, fileName));
94+
newestPdfMtimeMs = Math.max(newestPdfMtimeMs, stat.mtimeMs);
95+
}
96+
97+
if (outStat.mtimeMs >= newestPdfMtimeMs) {
98+
console.log(`Papers index already up to date -> ${path.relative(repoRoot, outFile)}`);
99+
return;
100+
}
101+
} catch {
102+
// Missing output file, or stat failed: proceed to generate.
103+
}
104+
105+
const items = [];
106+
for (const fileName of pdfFiles) {
107+
const filePath = path.join(papersDir, fileName);
108+
109+
try {
110+
const stat = await fs.stat(filePath);
111+
const meta = await tryParsePdf(filePath);
112+
113+
items.push({
114+
fileName,
115+
href: `/papers/${encodeURIComponent(fileName)}`,
116+
title: meta.title ?? titleFromFilename(fileName),
117+
authors: meta.authors,
118+
bytes: stat.size,
119+
});
120+
} catch (err) {
121+
items.push({
122+
fileName,
123+
href: `/papers/${encodeURIComponent(fileName)}`,
124+
title: titleFromFilename(fileName),
125+
authors: null,
126+
bytes: null,
127+
error: String(err?.message ?? err),
128+
});
129+
}
130+
}
131+
132+
items.sort((a, b) => {
133+
const at = (a.title ?? a.fileName).toLowerCase();
134+
const bt = (b.title ?? b.fileName).toLowerCase();
135+
if (at < bt) return -1;
136+
if (at > bt) return 1;
137+
return a.fileName.toLowerCase().localeCompare(b.fileName.toLowerCase());
138+
});
139+
140+
await fs.mkdir(outDir, { recursive: true });
141+
await fs.writeFile(outFile, JSON.stringify({ generatedAt: new Date().toISOString(), items }, null, 2) + "\n", "utf8");
142+
143+
console.log(`Generated ${items.length} paper entries -> ${path.relative(repoRoot, outFile)}`);
144+
};
145+
146+
await main();

0 commit comments

Comments
 (0)