Skip to content

add solveRecaptchaV3 #50

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# CF Clearance Scraper

### 请我喝杯咖啡
ETH 0x72691a36ed1fac3b197fb42612dc15a8958bf9f2


This library was created for testing and training purposes to retrieve the page source of websites, create Cloudflare Turnstile tokens and create Cloudflare WAF sessions.

Cloudflare protection not only checks cookies in the request. It also checks variables in the header. For this reason, it is recommended to use it with the sample code in this readme file.
Expand Down
14 changes: 14 additions & 0 deletions install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
curl -L https://github.com/854771076/cf-clearance-scraper/archive/refs/heads/master.zip -o cf-clearance-scraper-main.zip
unzip cf-clearance-scraper-main.zip
cd cf-clearance-scraper-main

# 构建镜像
docker build -t captcha_cracker .

# 运行容器
docker run -d --restart unless-stopped -p 3000:3000 \
-e PORT=3000 \
-e browserLimit=20 \
-e timeOut=60000 \
-e authToken=authToken123456 \
captcha_cracker
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions src/data/axios.min.js

Large diffs are not rendered by default.

13 changes: 9 additions & 4 deletions src/data/fakePage.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title></title>
<input type="hidden" id="cf-response" name="cf-response">
<input type="hidden" id="cf-status" name="cf-status" value="0">
</head>

<body>
Expand All @@ -14,15 +16,18 @@
window.onloadTurnstileCallback = function () {
turnstile.render('.turnstile', {
sitekey: '<site-key>',

callback: function (token) {
var c = document.createElement('input');
c.type = 'hidden';
c.name = 'cf-response';
var c=document.querySelector('#cf-response');
var status=document.querySelector('#cf-status');
c.value = token;
document.body.appendChild(c);
status.value="1"


},
});
};


</script>
</body>
Expand Down
34 changes: 34 additions & 0 deletions src/data/reCAPTCHAV3.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<!DOCTYPE html>
<html lang="en">

<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title></title>
</head>

<body>
<script src="https://www.google.com/recaptcha/api.js?render=<reCAPTCHA_site_key>" defer></script>
<script>
window.onload= function () {
console.log('begin')
grecaptcha.ready(function () {
console.log('in')
grecaptcha.execute('<reCAPTCHA_site_key>', { action: 'submit' }).then(function (token) {
console.log("token:",token)
let input = document.createElement('input');
input.id = 'my_token';
input.name = 'my_token';
input.value = token; // Ensure `token` is defined earlier in your code

// Append the input to the body (or another container)
document.body.appendChild(input);

document.appendChild(input)
});
});
};
</script>
</body>

</html>
34 changes: 34 additions & 0 deletions src/data/reCAPTCHAV3_enterprise.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<!DOCTYPE html>
<html lang="en">

<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title></title>
</head>

<body>
<script src="https://www.google.com/recaptcha/enterprise.js?render=<reCAPTCHA_site_key>" defer></script>
<script>
window.onload= function () {
console.log('begin')
grecaptcha.enterprise.ready(function () {
console.log('in')
grecaptcha.enterprise.execute('<reCAPTCHA_site_key>', { action: 'submit' }).then(function (token) {
console.log("token:",token)
let input = document.createElement('input');
input.id = 'my_token';
input.name = 'my_token';
input.value = token; // Ensure `token` is defined earlier in your code

// Append the input to the body (or another container)
document.body.appendChild(input);

document.appendChild(input)
});
});
};
</script>
</body>

</html>
36 changes: 30 additions & 6 deletions src/endpoints/getSource.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function getSource({ url, proxy }) {
function getSource({ url, proxy,callback,axios }) {
return new Promise(async (resolve, reject) => {

if (!url) return reject('Missing url parameter')
Expand Down Expand Up @@ -34,13 +34,37 @@ function getSource({ url, proxy }) {
});
page.on('response', async (res) => {
try {
if ([200, 302].includes(res.status()) && [url, url + '/'].includes(res.url())) {
// if ([200, 302].includes(res.status()) && [url, url + '/'].includes(res.url())) {
if (true) {
await page.waitForNavigation({ waitUntil: 'load', timeout: 5000 }).catch(() => { });
if (axios){
//读取src\data\axios.min.js
const axios = await fs.readFileSync(path.join(__dirname, '../src/data/axios.min.js'), 'utf8')
//在页面中执行axios.min.js
await page.evaluate(axios)
}
const html = await page.content();
await context.close()
isResolved = true
clearInterval(cl)
resolve(html)
if(callback){
//在页面中执行callback()
await page.evaluate(eval(callback))
//等待id为status的元素加载
await page.waitForSelector('#status', { timeout: 60000 })
//获取id为status的元素的文本
const text = await page.$eval('#status', el => el.innerText)
//返回执行结果
await context.close()
isResolved = true
clearInterval(cl)
resolve(text)
}else{
await context.close()
isResolved = true
clearInterval(cl)
//控制台执行callback

resolve(html)
}

}
} catch (e) { }
})
Expand Down
81 changes: 81 additions & 0 deletions src/endpoints/getVercel.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
async function findAcceptLanguage(page) {
await page.setBypassCSP(true)
return await page.evaluate(async () => {
const result = await fetch('https://httpbin.org/get')
.then(res => res.json())
.then(res => (res.headers['Accept-Language'] || res.headers['accept-language']))
.catch(() => null)
return result
})
}

function getVercel({ url, proxy }) {
return new Promise(async (resolve, reject) => {

if (!url) return reject('Missing url parameter')
const context = await global.browser.createBrowserContext().catch(() => null);
if (!context) return reject('Failed to create browser context')

let isResolved = false

const { proxyRequest } = await import('puppeteer-proxy')

var cl = setTimeout(async () => {
if (!isResolved) {
await context.close()
reject("Timeout Error")
}
}, (global.timeOut || 60000))

try {
const page = await context.newPage();
await page.setRequestInterception(true);
page.on('request', async (request) => {
try {
if (proxy) {
await proxyRequest({
page,
proxyUrl: `http://${proxy.username ? `${proxy.username}:${proxy.password}@` : ""}${proxy.host}:${proxy.port}`,
request,
});
} else {
request.continue()
}
} catch (e) { }
});
page.on('response', async (res) => {
try {
await page.waitForNavigation({ waitUntil: 'load', timeout: 5000 }).catch(() => { });
await page.waitForFunction(() => {
return document.getElementById('footer-text')==null
})
const cookies = await page.cookies()
let headers = await res.request().headers()
delete headers['content-type']
delete headers['accept-encoding']
delete headers['accept']
delete headers['content-length']
headers["accept-language"] = await findAcceptLanguage(page)
await context.close()
isResolved = true
clearInterval(cl)
resolve({ cookies, headers })

} catch (e) { }
})


await page.goto(url, {
waitUntil: 'domcontentloaded'
})
} catch (e) {
if (!isResolved) {
await context.close()
clearInterval(cl)
reject(e.message)
}
}

})
}
module.exports = getVercel
98 changes: 98 additions & 0 deletions src/endpoints/solveRecaptcha.v2.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
function solveRecaptcha({ url, proxy, siteKey,action }) {
return new Promise(async (resolve, reject) => {

if (!url) return reject('Missing url parameter')
if (!siteKey) return reject('Missing siteKey parameter')

const context = await global.browser.createBrowserContext().catch(() => null);
if (!context) return reject('Failed to create browser context')

let isResolved = false

const { proxyRequest } = await import('puppeteer-proxy')
const { RequestInterceptionManager } = await import('puppeteer-intercept-and-modify-requests')


var cl = setTimeout(async () => {
if (!isResolved) {
await context.close()
reject("Timeout Error")
}
}, (global.timeOut || 60000))

try {
const page = await context.newPage();
const client = await page.target().createCDPSession()
const interceptManager = new RequestInterceptionManager(client)

await page.setRequestInterception(true);
page.on('request', async (request) => {
try {
if ([url, url + '/'].includes(request.url())) return request.abort()

if (proxy) {
await proxyRequest({
page,
proxyUrl: `http://${proxy.username ? `${proxy.username}:${proxy.password}@` : ""}${proxy.host}:${proxy.port}`,
request,
});
} else {
request.continue()
}
} catch (e) { }
});

await interceptManager.intercept(
{
urlPattern: url,
resourceType: 'Document',
modifyResponse({ body }) {
return {
body: String(require('fs').readFileSync('./src/data/reCAPTCHAV3.html')).replace(/<reCAPTCHA_site_key>/g, siteKey).replace(/<reCAPTCHA_site_key>/g, action),
status: 200
}
},
}
)

await page.goto(url, {
waitUntil: 'domcontentloaded'
})
await page.waitForSelector('.grecaptcha-badge', {
timeout: 60000
})

await page.waitForSelector('iframe', {
timeout: 60000
});
const iframes = await page.$$('iframe');
const iframe1 = await iframes[0].contentFrame();
await iframe1.waitForSelector('#recaptcha-token', {
timeout: 60000
});
await page.waitForSelector('#my_token', {
timeout: 60000
});
const token = await page.evaluate(() => {
try {
return document.querySelector('#my_token').value
} catch (e) {
return null
}
})
isResolved = true
clearInterval(cl)
await context.close()
if (!token || token.length < 10) return reject('Failed to get token')
return resolve({token:token})
} catch (e) {
if (!isResolved) {
await context.close()
clearInterval(cl)
reject(e.message)
}
}

})
}
module.exports = solveRecaptcha
Loading