Skip to content

Commit 62b916d

Browse files
authored
Merge pull request #23 from words/fix/add-missing-build-script
fix: add missing build script that was gitignored
2 parents 2026e60 + 4e10753 commit 62b916d

File tree

2 files changed

+161
-1
lines changed

2 files changed

+161
-1
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
node_modules
22
dist/
3-
scripts/build.js

scripts/build.js

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#!/usr/bin/env node
2+
3+
const fs = require('fs')
4+
const path = require('path')
5+
const http = require('http')
6+
const puppeteer = require('puppeteer')
7+
8+
const app = require('../server.js')
9+
10+
const BUILD_DIR = path.join(__dirname, '../dist')
11+
const PORT = process.env.BUILD_PORT || 3456
12+
13+
// Get all words from the thesaurus data
14+
function getAllWords () {
15+
const wordsFile = fs.readFileSync(path.join(__dirname, '../words.txt'), 'utf8')
16+
const words = new Set()
17+
18+
wordsFile.split('\n').forEach(line => {
19+
if (line.trim()) {
20+
const match = line.match(/^([\w-]+),/)
21+
if (match) {
22+
words.add(match[1])
23+
}
24+
}
25+
})
26+
27+
return Array.from(words)
28+
}
29+
30+
// Create output directory
31+
function setupBuildDir () {
32+
if (fs.existsSync(BUILD_DIR)) {
33+
fs.rmSync(BUILD_DIR, { recursive: true })
34+
}
35+
fs.mkdirSync(BUILD_DIR, { recursive: true })
36+
37+
// Copy static assets
38+
const publicDir = path.join(__dirname, '../public')
39+
const staticDir = path.join(BUILD_DIR)
40+
41+
function copyDir (src, dest) {
42+
if (!fs.existsSync(dest)) {
43+
fs.mkdirSync(dest, { recursive: true })
44+
}
45+
46+
const items = fs.readdirSync(src)
47+
items.forEach(item => {
48+
const srcPath = path.join(src, item)
49+
const destPath = path.join(dest, item)
50+
51+
if (fs.statSync(srcPath).isDirectory()) {
52+
copyDir(srcPath, destPath)
53+
} else {
54+
fs.copyFileSync(srcPath, destPath)
55+
}
56+
})
57+
}
58+
59+
copyDir(publicDir, staticDir)
60+
}
61+
62+
// Scrape a single page
63+
async function scrapePage (page, url, outputPath) {
64+
try {
65+
await page.goto(url, {
66+
waitUntil: 'domcontentloaded',
67+
timeout: 10000
68+
})
69+
const html = await page.content()
70+
71+
// Ensure directory exists
72+
const dir = path.dirname(outputPath)
73+
if (!fs.existsSync(dir)) {
74+
fs.mkdirSync(dir, { recursive: true })
75+
}
76+
77+
// Fix relative URLs to work as static files
78+
const depth = outputPath.split('/').length - BUILD_DIR.split('/').length - 1
79+
const prefix = depth > 0 ? '../'.repeat(depth) : './'
80+
81+
const fixedHtml = html
82+
.replace(/href="\//g, `href="${prefix}`)
83+
.replace(/src="\//g, `src="${prefix}`)
84+
.replace(/action="\/search"/g, `action="${prefix}search.html"`)
85+
.replace(/href="\/([^"]+)"/g, `href="${prefix}$1/index.html"`)
86+
87+
fs.writeFileSync(outputPath, fixedHtml)
88+
} catch (error) {
89+
console.error(`Error scraping ${url}:`, error.message)
90+
// Don't fail the entire build for one page
91+
}
92+
}
93+
94+
async function build () {
95+
console.log('Starting static site generation...')
96+
97+
// Setup build directory
98+
setupBuildDir()
99+
100+
// Start the server
101+
const server = http.createServer(app)
102+
server.listen(PORT)
103+
console.log(`Server running on port ${PORT}`)
104+
105+
try {
106+
// Launch puppeteer with optimizations for CI
107+
const browser = await puppeteer.launch({
108+
headless: 'new',
109+
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
110+
})
111+
const page = await browser.newPage()
112+
113+
// Optimize page settings for faster scraping
114+
await page.setRequestInterception(true)
115+
page.on('request', (req) => {
116+
// Block unnecessary resources to speed up page loads
117+
if (req.resourceType() === 'stylesheet' || req.resourceType() === 'image' || req.resourceType() === 'font') {
118+
req.abort()
119+
} else {
120+
req.continue()
121+
}
122+
})
123+
124+
const baseUrl = `http://localhost:${PORT}`
125+
126+
// Scrape homepage
127+
await scrapePage(page, baseUrl, path.join(BUILD_DIR, 'index.html'))
128+
129+
// Get all words and scrape their pages
130+
const words = getAllWords()
131+
console.log(`Found ${words.length} words to generate pages for`)
132+
133+
let processed = 0
134+
for (const word of words) {
135+
const url = `${baseUrl}/${encodeURIComponent(word)}`
136+
const outputPath = path.join(BUILD_DIR, word, 'index.html')
137+
138+
await scrapePage(page, url, outputPath)
139+
140+
processed++
141+
if (processed % 500 === 0) {
142+
console.log(`Progress: ${processed}/${words.length} (${Math.round(processed / words.length * 100)}%)`)
143+
}
144+
}
145+
146+
console.log('Static site generation complete!')
147+
148+
await browser.close()
149+
} catch (error) {
150+
console.error('Build failed:', error)
151+
process.exit(1)
152+
} finally {
153+
server.close()
154+
}
155+
}
156+
157+
if (require.main === module) {
158+
build()
159+
}
160+
161+
module.exports = { build }

0 commit comments

Comments
 (0)