Skip to content

Commit 942fab8

Browse files
committed
feat: Enhance browser setup and HTML retrieval with user agent support and cache bypass option
1 parent 2781202 commit 942fab8

2 files changed

Lines changed: 50 additions & 8 deletions

File tree

cf_bypasser/core/bypasser.py

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,18 +52,37 @@ def parse_proxy(self, proxy: str) -> Optional[Dict[str, str]]:
5252
self.log_message(f"Error parsing proxy {proxy}: {e}")
5353
return None
5454

55-
async def setup_browser(self, proxy: Optional[str] = None, lang: str = "en") -> tuple:
55+
async def setup_browser(self, proxy: Optional[str] = None, lang: str = "en", user_agent: Optional[str] = None) -> tuple:
5656
"""Setup Camoufox browser with random OS and configuration. Returns (browser, context, page)."""
5757
# Clear expired cache entries
5858
self.cookie_cache.clear_expired()
5959

60-
# Randomly choose an OS
61-
selected_os = random.choice(OPERATING_SYSTEMS)
60+
# Determine OS from user_agent if provided, otherwise random
61+
selected_os = None
62+
if user_agent:
63+
ua_lower = user_agent.lower()
64+
if "windows" in ua_lower:
65+
selected_os = "windows"
66+
elif "macintosh" in ua_lower or "mac os" in ua_lower:
67+
selected_os = "macos"
68+
elif "linux" in ua_lower or "x11" in ua_lower:
69+
selected_os = "linux"
70+
71+
if not selected_os:
72+
selected_os = random.choice(OPERATING_SYSTEMS)
73+
6274
self.log_message(f"Using OS: {selected_os}")
6375

6476
# Generate random config for the selected OS
6577
random_config = BrowserConfig.generate_random_config(selected_os, lang=lang)
66-
self.log_message(f"Generated config with UA: {random_config.get('navigator.userAgent', 'N/A')}")
78+
79+
# Override user agent if provided
80+
if user_agent:
81+
random_config['navigator.userAgent'] = user_agent
82+
self.log_message(f"Using provided User-Agent: {user_agent}")
83+
else:
84+
self.log_message(f"Generated config with UA: {random_config.get('navigator.userAgent', 'N/A')}")
85+
6786
self.log_message(f"Screen resolution: {random_config['window.outerWidth']}x{random_config['window.outerHeight']}")
6887

6988
# Setup proxy configuration if provided
@@ -261,7 +280,7 @@ async def get_or_generate_cookies(self, url: str, proxy: Optional[str] = None) -
261280
finally:
262281
await self.cleanup_browser(browser, context, page)
263282

264-
async def get_or_generate_html(self, url: str, proxy: Optional[str] = None) -> Optional[Dict[str, Any]]:
283+
async def get_or_generate_html(self, url: str, proxy: Optional[str] = None, bypass_cache: bool = False) -> Optional[Dict[str, Any]]:
265284
"""Get HTML content along with cookies (cached or fresh)."""
266285
hostname = urlparse(url).netloc
267286
cache_key = md5_hash(hostname + proxy if proxy else "")
@@ -270,14 +289,36 @@ async def get_or_generate_html(self, url: str, proxy: Optional[str] = None) -> O
270289
# even if we have cached cookies, as HTML content may change
271290
self.log_message(f"Getting HTML content for {url}...")
272291

292+
cached_cookies = None
293+
cached_ua = None
294+
295+
if not bypass_cache:
296+
cached = self.cookie_cache.get(cache_key)
297+
if cached:
298+
cached_cookies = cached.cookies
299+
cached_ua = cached.user_agent
300+
self.log_message(f"Found cached cookies for {url}")
301+
273302
# Create isolated browser instance
274303
browser = None
275304
context = None
276305
page = None
277306

278307
try:
279308
# Setup browser and solve challenge
280-
browser, context, page = await self.setup_browser(proxy)
309+
browser, context, page = await self.setup_browser(proxy, user_agent=cached_ua)
310+
311+
if cached_cookies:
312+
self.log_message("Restoring cached cookies...")
313+
# Convert dict to list of cookie objects
314+
cookie_list = []
315+
for name, value in cached_cookies.items():
316+
cookie_list.append({
317+
'name': name,
318+
'value': value,
319+
'url': url # Use the target URL for the cookie
320+
})
321+
await context.add_cookies(cookie_list)
281322

282323
if await self.solve_cloudflare_challenge(url, page):
283324
data = await self.get_html_content_and_cookies(context, page)

cf_bypasser/server/routes.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,8 @@ async def get_cookies(
134134
async def get_html(
135135
url: str = Query(..., description="Target URL to get HTML content for"),
136136
retries: int = Query(5, ge=1, le=10, description="Number of retry attempts"),
137-
proxy: Optional[str] = Query(None, description="Proxy URL (optional)")
137+
proxy: Optional[str] = Query(None, description="Proxy URL (optional)"),
138+
bypassCookieCache: bool = Query(False, description="Force fresh cookie generation")
138139
):
139140
"""
140141
Get HTML content from a URL after bypassing Cloudflare protection.
@@ -162,7 +163,7 @@ async def get_html(
162163
bypasser = global_bypasser or CamoufoxBypasser(max_retries=retries, log=True)
163164

164165
# Get HTML content using the new method
165-
data = await bypasser.get_or_generate_html(url, proxy)
166+
data = await bypasser.get_or_generate_html(url, proxy, bypass_cache=bypassCookieCache)
166167

167168
if not data:
168169
raise HTTPException(status_code=500, detail="Failed to bypass Cloudflare protection")

0 commit comments

Comments
 (0)