-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrape.js
More file actions
98 lines (84 loc) · 3.05 KB
/
Copy pathscrape.js
File metadata and controls
98 lines (84 loc) · 3.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env node
const { chromium } = require('playwright-extra');
const stealth = require('puppeteer-extra-plugin-stealth')();
chromium.use(stealth);
const registernummer = process.argv[2] || 'D-21RP-R1O5O-37';
(async () => {
const browser = await chromium.launch({
headless: true,
args: ['--disable-blink-features=AutomationControlled', '--no-sandbox']
});
const context = await browser.newContext({
userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
viewport: { width: 1920, height: 1080 }
});
const page = await context.newPage();
const apiResponses = {};
page.on('response', async (response) => {
const url = response.url();
if (url.includes('/suche/') && !url.includes('ip-test') && !url.includes('puzzle') && response.status() === 200) {
try {
const ct = response.headers()['content-type'] || '';
if (ct.includes('json')) {
const data = await response.json();
const endpoint = url.split('/suche/')[1].split('?')[0];
apiResponses[endpoint] = data;
console.error(`Captured: ${endpoint}`);
}
} catch (e) {}
}
});
try {
await page.goto('https://www.vermittlerregister.info/recherche', {
waitUntil: 'networkidle', timeout: 30000
});
await page.evaluate(() => {
const k = document.getElementById('klaro'); if (k) k.remove();
const w = document.getElementById('frc-widget'); if (w) w.classList.remove('hidden');
});
await page.fill('#registernummer', registernummer);
await page.evaluate(() => {
const btn = document.querySelector('#frc-widget .frc-button');
if (btn) btn.click();
});
const start = Date.now();
try {
await page.waitForFunction(() => {
const el = document.querySelector('[name="frc-captcha-solution"]');
return el && el.value && el.value.length > 20 && !el.value.startsWith('.');
}, { timeout: 30000 });
console.error(`Captcha: ${((Date.now()-start)/1000).toFixed(1)}s`);
} catch (e) {
console.error('Captcha failed');
process.exit(1);
}
await page.click('button[type="submit"]', { force: true });
// Wait for results
try {
await page.waitForSelector('.result-single', { timeout: 15000 });
} catch (e) {
await page.waitForTimeout(5000);
}
// Also extract from DOM as backup
const domData = await page.evaluate(() => {
const result = {};
document.querySelectorAll('.result-single dl > div').forEach(div => {
const dt = div.querySelector('dt');
const dd = div.querySelector('dd');
if (dt && dd) result[dt.textContent.trim()] = dd.textContent.trim();
});
return result;
});
const output = {
registernummer,
api: apiResponses,
dom: Object.keys(domData).length > 0 ? domData : undefined
};
console.log(JSON.stringify(output, null, 2));
} catch (err) {
console.error('Error:', err.message);
process.exit(1);
} finally {
await browser.close();
}
})();