-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbrowser.js
More file actions
194 lines (179 loc) · 5.56 KB
/
browser.js
File metadata and controls
194 lines (179 loc) · 5.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
// Browser abstraction layer using browser-commander for all browser operations
// See: https://github.com/link-foundation/browser-commander
import { launchBrowser } from 'browser-commander';
import os from 'os';
import path from 'path';
/**
* Additional Chrome args needed for headless server environments
* These are appended to browser-commander's default CHROME_ARGS
*/
const SERVER_CHROME_ARGS = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
];
/**
* Unified browser interface that works with both Puppeteer and Playwright
* @typedef {Object} BrowserAdapter
* @property {Function} newPage - Create a new page
* @property {Function} close - Close the browser
* @property {string} type - Browser type ('puppeteer' or 'playwright')
*/
/**
* Unified page interface
* @typedef {Object} PageAdapter
* @property {Function} setExtraHTTPHeaders - Set HTTP headers
* @property {Function} setUserAgent - Set user agent
* @property {Function} setViewport - Set viewport size
* @property {Function} goto - Navigate to URL
* @property {Function} content - Get page HTML content
* @property {Function} screenshot - Take screenshot
* @property {Function} close - Close the page
* @property {Object} _page - Original page object
* @property {string} _type - Browser type
*/
/**
* Create a browser instance using the specified engine
* Uses browser-commander's launchBrowser for both Puppeteer and Playwright
* @param {string} engine - 'puppeteer' or 'playwright' (defaults to puppeteer)
* @param {Object} options - Browser launch options
* @returns {Promise<BrowserAdapter>}
*/
export async function createBrowser(engine = 'puppeteer', options = {}) {
const normalizedEngine = engine.toLowerCase();
const engineType =
normalizedEngine === 'playwright' || normalizedEngine === 'pw'
? 'playwright'
: 'puppeteer';
// Generate unique userDataDir for this session to avoid conflicts
const userDataDir = path.join(
os.tmpdir(),
`web-capture-${engineType}-${Date.now()}`
);
// Use browser-commander's launchBrowser with server-specific args
// Default to headless for server environments
const { browser, page } = await launchBrowser({
engine: engineType,
args: SERVER_CHROME_ARGS,
headless: true,
userDataDir,
slowMo: 0, // Disable slowMo for server operations
...options,
});
// Close the initial page since we'll create new ones via newPage()
await page.close();
const pageAdapter =
engineType === 'playwright'
? createPlaywrightPageAdapter
: createPuppeteerPageAdapter;
return {
async newPage() {
const newPage = await browser.newPage();
return pageAdapter(newPage);
},
async close() {
await browser.close();
},
type: engineType,
_browser: browser,
};
}
/**
* Create a page adapter for Puppeteer
* @param {Object} page - Puppeteer page object
* @returns {PageAdapter}
*/
function createPuppeteerPageAdapter(page) {
return {
async setExtraHTTPHeaders(headers) {
await page.setExtraHTTPHeaders(headers);
},
async setUserAgent(userAgent) {
await page.setUserAgent(userAgent);
},
async setViewport(viewport) {
await page.setViewport(viewport);
},
async goto(url, options = {}) {
await page.goto(url, options);
},
async content() {
return await page.content();
},
async screenshot(options = {}) {
return await page.screenshot(options);
},
async close() {
await page.close();
},
_page: page,
_type: 'puppeteer',
};
}
/**
* Create a page adapter for Playwright
* @param {Object} page - Playwright page object
* @returns {PageAdapter}
*/
function createPlaywrightPageAdapter(page) {
return {
async setExtraHTTPHeaders(headers) {
await page.setExtraHTTPHeaders(headers);
},
async setUserAgent(userAgent) {
// Playwright doesn't have page.setUserAgent, use extraHTTPHeaders instead
await page.setExtraHTTPHeaders({ 'User-Agent': userAgent });
},
async setViewport(viewport) {
// Playwright uses setViewportSize instead of setViewport
await page.setViewportSize(viewport);
},
async goto(url, options = {}) {
// Convert Puppeteer waitUntil options to Playwright equivalents
const playwrightOptions = { ...options };
if (playwrightOptions.waitUntil === 'networkidle0') {
playwrightOptions.waitUntil = 'networkidle';
}
await page.goto(url, playwrightOptions);
},
async content() {
return await page.content();
},
async screenshot(options = {}) {
return await page.screenshot(options);
},
async close() {
await page.close();
},
_page: page,
_type: 'playwright',
};
}
/**
* Get the browser engine from query parameters or environment variable
* @param {Object} req - Express request object
* @returns {string} - 'puppeteer' or 'playwright'
*/
export function getBrowserEngine(req) {
// Check query parameter first
const engineParam = req.query.engine || req.query.browser;
if (engineParam) {
const normalized = engineParam.toLowerCase();
if (normalized === 'playwright' || normalized === 'pw') {
return 'playwright';
}
if (normalized === 'puppeteer' || normalized === 'pptr') {
return 'puppeteer';
}
}
// Check environment variable
const envEngine = process.env.BROWSER_ENGINE;
if (envEngine) {
const normalized = envEngine.toLowerCase();
if (normalized === 'playwright') {
return 'playwright';
}
}
// Default to puppeteer for backward compatibility
return 'puppeteer';
}