Skip to content

Enable custom startup logic for browser context and pages #1235

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ src
.env
/.idea/
/TikTok-Api.iml
venv/**
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,11 @@ python -m examples.trending_example
```

You can access the full data dictionary the object was created from with `.as_dict`. On a video this may look like
[this](https://gist.github.com/davidteather/7c30780bbc30772ba11ec9e0b909e99d). TikTok changes their structure from time to time so it's worth investigating the structure of the dictionary when you use this package.
[this](https://gist.github.com/davidteather/7c30780bbc30772ba11ec9e0b909e99d). TikTok changes their structure from time to time so it's worth investigating the structure of the dictionary when you use this package.

## Custom launchers
When you call `TikTokApi.create_sessions()`, the program creates a browser context and a page, visits TikTok and extracts the cookies from the page.
To implement custom functionality, such as login or captcha solving, when the session is being created, you may use the keyword arguments `browser_context_factory` and `page_factory`.
These arguments are callable functions that TikTok-Api will use to launch your browser and pages, and allow you to perform custom actions on the page before the session is created.
You can find examples [here](https://github.com/davidteather/TikTok-Api/blob/main/tests/test_custom_launchers.py).

65 changes: 42 additions & 23 deletions TikTokApi/tiktok.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import asyncio
import logging
import dataclasses
from typing import Any
from typing import Any, Awaitable, Callable
import random
import time
import json

from playwright.async_api import async_playwright, TimeoutError
from playwright.async_api import Browser, BrowserContext, Page, Playwright, ProxySettings, async_playwright, TimeoutError
from urllib.parse import urlencode, quote, urlparse
from .stealth import stealth_async
from .helpers import random_choice
Expand Down Expand Up @@ -81,6 +81,8 @@ def __init__(self, logging_level: int = logging.WARN, logger_name: str = None):
Search.parent = self
Playlist.parent = self

self.browser: Browser

def __create_logger(self, name: str, level: int = logging.DEBUG):
"""Create a logger for the class."""
self.logger: logging.Logger = logging.getLogger(name)
Expand Down Expand Up @@ -140,13 +142,15 @@ async def __set_session_params(self, session: TikTokPlaywrightSession):
async def __create_session(
self,
url: str = "https://www.tiktok.com",
ms_token: str = None,
proxy: str = None,
context_options: dict = {},
ms_token: str | None = None,
proxy: dict[str, Any] | ProxySettings | None = None,
context_options: dict[str, Any] = {},
sleep_after: int = 1,
cookies: dict = None,
cookies: dict[str, Any] | None = None,
suppress_resource_load_types: list[str] = None,
timeout: int = 30000,
page_factory: Callable[[BrowserContext], Awaitable[Page]] | None = None,
browser_context_factory: Callable[[Playwright], Awaitable[BrowserContext]] | None = None,
):
try:
"""Create a TikTokPlaywrightSession"""
Expand All @@ -155,16 +159,27 @@ async def __create_session(
cookies = {}
cookies["msToken"] = ms_token

context = await self.browser.new_context(proxy=proxy, **context_options)
if browser_context_factory is not None:
context = self.browser
else:
context = await self.browser.new_context(proxy=proxy, **context_options)
if cookies is not None:
formatted_cookies = [
{"name": k, "value": v, "domain": urlparse(url).netloc, "path": "/"}
for k, v in cookies.items()
if v is not None
]
await context.add_cookies(formatted_cookies)
page = await context.new_page()
await stealth_async(page)

if page_factory:
page = await page_factory(context)
else:
page = await context.new_page()
await stealth_async(page)
_ = await page.goto(url)

if "tiktok" not in page.url:
_ = await page.goto("https://www.tiktok.com")

# Get the request headers to the url
request_headers = None
Expand All @@ -185,9 +200,6 @@ def handle_request(request):

# Set the navigation timeout
page.set_default_navigation_timeout(timeout)

await page.goto(url)
await page.goto(url) # hack: tiktok blocks first request not sure why, likely bot detection

# by doing this, we are simulate scroll event using mouse to `avoid` bot detection
x, y = random.randint(0, 50), random.randint(0, 50)
Expand Down Expand Up @@ -229,18 +241,20 @@ def handle_request(request):

async def create_sessions(
self,
num_sessions=5,
headless=True,
ms_tokens: list[str] = None,
proxies: list = None,
sleep_after=1,
starting_url="https://www.tiktok.com",
context_options: dict = {},
override_browser_args: list[dict] = None,
cookies: list[dict] = None,
suppress_resource_load_types: list[str] = None,
num_sessions: int =5,
headless: bool =True,
ms_tokens: list[str] | None = None,
proxies: list[dict[str, Any] | ProxySettings] | None = None,
sleep_after: int = 1,
starting_url: str ="https://www.tiktok.com",
context_options: dict[str, Any] = {},
override_browser_args: list[str] | None = None,
cookies: list[dict[str, Any]] | None = None,
suppress_resource_load_types: list[str] | None = None,
browser: str = "chromium",
executable_path: str = None,
executable_path: str | None = None,
page_factory: Callable[[BrowserContext], Awaitable[Page]] | None = None,
browser_context_factory: Callable[[Playwright], Awaitable[BrowserContext]] | None = None,
timeout: int = 30000,
):
"""
Expand All @@ -262,6 +276,7 @@ async def create_sessions(
suppress_resource_load_types (list[str]): Types of resources to suppress playwright from loading, excluding more types will make playwright faster.. Types: document, stylesheet, image, media, font, script, textrack, xhr, fetch, eventsource, websocket, manifest, other.
browser (str): firefox, chromium, or webkit; default is chromium
executable_path (str): Path to the browser executable
page_factory (Callable[[], Awaitable[Page]]) | None: Optional async function for instantiating pages.
timeout (int): The timeout in milliseconds for page navigation

Example Usage:
Expand All @@ -272,6 +287,8 @@ async def create_sessions(
await api.create_sessions(num_sessions=5, ms_tokens=['msToken1', 'msToken2'])
"""
self.playwright = await async_playwright().start()
if browser_context_factory is not None:
self.browser = await browser_context_factory(self.playwright)
if browser == "chromium":
if headless and override_browser_args is None:
override_browser_args = ["--headless=new"]
Expand Down Expand Up @@ -301,6 +318,8 @@ async def create_sessions(
cookies=random_choice(cookies),
suppress_resource_load_types=suppress_resource_load_types,
timeout=timeout,
page_factory=page_factory,
browser_context_factory=browser_context_factory
)
for _ in range(num_sessions)
)
Expand Down
89 changes: 89 additions & 0 deletions tests/test_custom_launchers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import tempfile
import asyncio
from playwright.async_api import BrowserContext, Page, Playwright
from TikTokApi import TikTokApi
import os
import pytest

from TikTokApi.stealth.stealth import stealth_async



@pytest.mark.asyncio
async def test_browser_context_factory():

context_created_in_factory = [False]

async def browser_context_factory(p: Playwright) -> BrowserContext:
user_data_dir_tempdir = tempfile.TemporaryDirectory()
user_data_dir = user_data_dir_tempdir.name
ctx = await p.chromium.launch_persistent_context(user_data_dir, headless=False)
context_created_in_factory[0] = True
return ctx

async with TikTokApi() as api:
await api.create_sessions(
num_sessions=1,
sleep_after=3,
browser=os.getenv("TIKTOK_BROWSER", "chromium"),
headless=False,
browser_context_factory=browser_context_factory
)
assert context_created_in_factory[0] == True

@pytest.mark.asyncio
async def test_page_factory():

page_created_in_factory = [False]

async def page_factory(ctx: BrowserContext) -> Page:
page = await ctx.new_page()
_ = await page.goto("https://tiktok.com")
page_created_in_factory[0] = True
return page

async with TikTokApi() as api:
await api.create_sessions(
num_sessions=1,
sleep_after=3,
browser=os.getenv("TIKTOK_BROWSER", "chromium"),
headless=False,
page_factory=page_factory
)
assert page_created_in_factory[0] == True

@pytest.mark.asyncio
async def test_custom_login_flow_with_captcha_solve():
from tiktok_captcha_solver import make_async_playwright_solver_context

async def captcha_solver_context_factory(p: Playwright) -> BrowserContext:
ctx = await make_async_playwright_solver_context(
p,
os.environ["API_KEY"], # SadCaptcha key
headless=False
)
return ctx

async def login_page_factory(ctx: BrowserContext) -> Page:
page = await ctx.new_page()
await stealth_async(page)
_ = await page.goto("https://www.tiktok.com/login/phone-or-email/email")
await asyncio.sleep(5)
await page.locator('xpath=//input[contains(@name,"username")]').type(os.environ["TIKTOK_USERNAME"])
await asyncio.sleep(2)
await page.get_by_placeholder('Password').type(os.environ["TIKTOK_PASSWORD"]);
await asyncio.sleep(2)
await page.locator('//button[contains(@data-e2e,"login-button")]').click();
await asyncio.sleep(5) # wait for captcha to be solved
return page

async with TikTokApi() as api:
await api.create_sessions(
num_sessions=1,
sleep_after=3,
browser=os.getenv("TIKTOK_BROWSER", "chromium"),
headless=False,
page_factory=login_page_factory,
browser_context_factory=captcha_solver_context_factory
)

Loading