Skip to content

Commit 6cbcd8a

Browse files
committed
feat: handle metas changing case
1 parent 928d39c commit 6cbcd8a

File tree

3 files changed

+122
-20
lines changed

3 files changed

+122
-20
lines changed

README.md

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,16 +104,31 @@ When a URL is submitted via the `/generate` endpoint:
104104
git clone <repository-url>
105105
cd prerender-url-shortener
106106
```
107-
2. **Create a `.env` file** in the project root with your configuration. See `.env.example` for a template (if one exists, otherwise define the following):
107+
2. **Create a `.env` file** in the project root with your configuration. See `.env.example` for a template (if one exists). Current configuration options:
108108

109109
```env
110+
# Required
110111
DATABASE_URL="postgres://user:password@host:port/dbname?sslmode=disable"
111-
SERVER_PORT=":8080" # Optional, defaults to :8080
112-
ALLOWED_DOMAINS="example.com,another.org" # Optional, comma-separated, empty means allow all
113-
ROD_BIN_PATH="" # Optional, path to Chrome/Chromium binary if not in system PATH or for specific version
114-
RENDER_WORKER_COUNT="3" # Optional, number of background rendering workers, defaults to 3
112+
113+
# Server
114+
SERVER_PORT=":8080" # Optional, default :8080
115+
ALLOWED_DOMAINS="example.com,another.org" # Optional, comma-separated; empty means allow all
116+
117+
# Renderer / Rod
118+
ROD_BIN_PATH="" # Optional, path to Chrome/Chromium binary
119+
RENDER_WORKER_COUNT="3" # Optional, default 3 workers
120+
RENDER_TIMEOUT_SECONDS="90" # Optional, default 90s overall per-page timeout
121+
122+
# Prerender stabilization
123+
META_WAIT_TIMEOUT_SECONDS="20" # Optional, default 20s max wait for meta stabilization
124+
META_STABLE_CONSECUTIVE_CHECKS="3" # Optional, default 3 equal reads for stability
125+
PRERENDER_READY_MARKER="data-prerender-ready=\"true\"" # Optional marker that, if present in HTML, ends waiting early
115126
```
116127

128+
Notes:
129+
- PRERENDER_READY_MARKER: leave it as default or set to an empty value to disable marker-based early exit. The renderer still returns once og:image stabilizes or after a brief loop if no og:image is present.
130+
- Without a ready marker: behavior is generic — waits for load + network idle; then only loops for og:image stabilization if an og:image exists.
131+
117132
3. **Install dependencies:**
118133
```bash
119134
go mod tidy
@@ -167,4 +182,4 @@ docker buildx build --platform linux/amd64,linux/arm64 -t <your_username>/preren
167182

168183
# For a local build (current platform only)
169184
docker build -t prerender-url-shortener .
170-
```
185+
```

internal/config/config.go

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,16 @@ import (
1111
// Config holds the application configuration.
1212
// We'll use struct tags for environment variable loading.
1313
type Config struct {
14-
ServerPort string `env:"SERVER_PORT,default=:8080"`
15-
DatabaseURL string `env:"DATABASE_URL,required"`
16-
RodBinPath string `env:"ROD_BIN_PATH"` // Optional, if not in default PATH
17-
AllowedDomains string `env:"ALLOWED_DOMAINS"` // Comma-separated list of allowed domains
18-
RenderWorkerCount int `env:"RENDER_WORKER_COUNT,default=3"` // Number of render workers
19-
RenderTimeoutSeconds int `env:"RENDER_TIMEOUT_SECONDS,default=90"` // Timeout for Rod rendering in seconds
14+
ServerPort string `env:"SERVER_PORT,default=:8080"`
15+
DatabaseURL string `env:"DATABASE_URL,required"`
16+
RodBinPath string `env:"ROD_BIN_PATH"` // Optional, if not in default PATH
17+
AllowedDomains string `env:"ALLOWED_DOMAINS"` // Comma-separated list of allowed domains
18+
RenderWorkerCount int `env:"RENDER_WORKER_COUNT,default=3"` // Number of render workers
19+
RenderTimeoutSeconds int `env:"RENDER_TIMEOUT_SECONDS,default=90"` // Timeout for Rod rendering in seconds
20+
// Prerender stabilization settings
21+
MetaWaitTimeoutSeconds int `env:"META_WAIT_TIMEOUT_SECONDS,default=20"` // Max time to wait for metas to stabilize
22+
MetaStableConsecutiveChecks int `env:"META_STABLE_CONSECUTIVE_CHECKS,default=3"` // Number of consecutive equal reads to consider stable
23+
PrerenderReadyMarker string `env:"PRERENDER_READY_MARKER,default=data-prerender-ready=\"true\""` // String to search in HTML as a ready signal
2024
}
2125

2226
var AppConfig *Config
@@ -35,7 +39,10 @@ func LoadConfig() error {
3539
AppConfig.RodBinPath = getEnv("ROD_BIN_PATH", "")
3640
AppConfig.AllowedDomains = getEnv("ALLOWED_DOMAINS", "") // Empty means allow all
3741
AppConfig.RenderWorkerCount = getEnvInt("RENDER_WORKER_COUNT", 3)
38-
AppConfig.RenderTimeoutSeconds = getEnvInt("RENDER_TIMEOUT_SECONDS", 90)
42+
AppConfig.RenderTimeoutSeconds = getEnvInt("RENDER_TIMEOUT_SECONDS", 90)
43+
AppConfig.MetaWaitTimeoutSeconds = getEnvInt("META_WAIT_TIMEOUT_SECONDS", 20)
44+
AppConfig.MetaStableConsecutiveChecks = getEnvInt("META_STABLE_CONSECUTIVE_CHECKS", 3)
45+
AppConfig.PrerenderReadyMarker = getEnv("PRERENDER_READY_MARKER", "data-prerender-ready=\"true\"")
3946

4047
if AppConfig.DatabaseURL == "" {
4148
log.Fatal("DATABASE_URL environment variable is required")

internal/renderer/renderer.go

Lines changed: 87 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import (
55
"fmt"
66
"log"
77
"prerender-url-shortener/internal/config"
8+
"regexp"
9+
"strings"
810
"time"
911

1012
"github.com/go-rod/rod"
@@ -102,7 +104,7 @@ func renderWithRod(url string) (string, error) {
102104
return "", fmt.Errorf("failed to create page for %s: %w", url, err)
103105
}
104106
log.Printf("Rod: Page created successfully for URL: %s", url)
105-
//nolint:errcheck
107+
106108
defer func() {
107109
log.Printf("Rod: Closing page for URL: %s", url)
108110
page.MustClose() // MustClose panics on error, no return to check.
@@ -121,7 +123,6 @@ func renderWithRod(url string) (string, error) {
121123
// Wait for network to be almost idle, this is a good indicator for SPAs
122124
// Using a timeout to prevent indefinite blocking
123125
log.Printf("Rod: Waiting for network to be almost idle for URL: %s (timeout: 30s)", url)
124-
//nolint:errcheck
125126
page.Timeout(30 * time.Second).WaitNavigation(proto.PageLifecycleEventNameNetworkAlmostIdle)()
126127
log.Printf("Rod: Network almost idle wait completed for URL: %s", url)
127128

@@ -130,12 +131,91 @@ func renderWithRod(url string) (string, error) {
130131
time.Sleep(2 * time.Second)
131132
log.Printf("Rod: Additional wait completed for URL: %s", url)
132133

133-
log.Printf("Rod: Extracting HTML content for URL: %s", url)
134-
html, err := page.HTML()
134+
// Wait for metas (e.g., og:image) to reach their final state or a ready marker
135+
log.Printf("Rod: Waiting for meta stabilization for URL: %s", url)
136+
finalHTML, err := waitForMetaFinalization(page)
135137
if err != nil {
136-
return "", fmt.Errorf("failed to get HTML content for %s: %w", url, err)
138+
log.Printf("Rod: Meta stabilization wait ended with error for URL: %s: %v. Returning current HTML.", url, err)
139+
// Best-effort fallback to current HTML
140+
html, hErr := page.HTML()
141+
if hErr != nil {
142+
return "", fmt.Errorf("failed to get HTML content after meta wait for %s: %w", url, hErr)
143+
}
144+
return html, nil
137145
}
138-
log.Printf("Rod: Successfully extracted HTML content for URL: %s (length: %d characters)", url, len(html))
146+
log.Printf("Rod: Meta stabilization complete for URL: %s (length: %d characters)", url, len(finalHTML))
147+
return finalHTML, nil
148+
}
149+
150+
// waitForMetaFinalization polls the page HTML until either:
151+
// - The configured ready marker is present in the HTML, or
152+
// - The og:image meta content is stable for N consecutive checks (only if an og:image exists),
153+
// or times out based on config.
154+
func waitForMetaFinalization(page *rod.Page) (string, error) {
155+
timeout := time.Duration(config.AppConfig.MetaWaitTimeoutSeconds) * time.Second
156+
stableTarget := config.AppConfig.MetaStableConsecutiveChecks
157+
if stableTarget < 1 {
158+
stableTarget = 1
159+
}
160+
161+
// Regex to extract meta content attributes
162+
// Matches: <meta property="og:image" ... content="...">
163+
// Note: We intentionally ignore twitter:image for stabilization. The renderer remains generic
164+
// and will only loop for stability when an og:image is present.
165+
ogRe := regexp.MustCompile(`(?i)<meta[^>]+property=["']og:image["'][^>]*content=["']([^"']+)["'][^>]*>`) //nolint:lll
166+
167+
deadline := time.Now().Add(timeout)
168+
ticker := time.NewTicker(500 * time.Millisecond)
169+
defer ticker.Stop()
170+
171+
var lastOG string
172+
stableCount := 0
173+
emptyCount := 0
174+
175+
readyMarker := strings.TrimSpace(config.AppConfig.PrerenderReadyMarker)
176+
177+
for {
178+
// Fetch current HTML
179+
html, err := page.HTML()
180+
if err != nil {
181+
return "", fmt.Errorf("failed to get HTML during meta wait: %w", err)
182+
}
183+
// html captured for checks below
184+
185+
// If a custom ready marker is present, we're done
186+
if readyMarker != "" && strings.Contains(html, readyMarker) {
187+
return html, nil
188+
}
139189

140-
return html, nil
190+
// Extract og:image
191+
og := ""
192+
if m := ogRe.FindStringSubmatch(html); len(m) > 1 {
193+
og = m[1]
194+
}
195+
196+
// Only loop for stability when og:image exists
197+
if og != "" {
198+
if og == lastOG {
199+
stableCount++
200+
} else {
201+
stableCount = 1
202+
}
203+
lastOG = og
204+
if stableCount >= stableTarget {
205+
return html, nil
206+
}
207+
} else {
208+
// If no og:image is present repeatedly, don't wait the full timeout.
209+
// This keeps the renderer generic while avoiding long waits for pages without OG metadata.
210+
emptyCount++
211+
if emptyCount >= 3 { // ~1.5s given 500ms tick
212+
return html, nil
213+
}
214+
}
215+
216+
if time.Now().After(deadline) {
217+
return html, fmt.Errorf("meta wait timeout after %v", timeout)
218+
}
219+
<-ticker.C
220+
}
141221
}

0 commit comments

Comments
 (0)