diff --git a/.gitignore b/.gitignore index 9dad7395..235d1c1a 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,4 @@ src .env /.idea/ /TikTok-Api.iml +venv/** diff --git a/README.md b/README.md index 9c513fb5..72feb59e 100644 --- a/README.md +++ b/README.md @@ -139,4 +139,11 @@ python -m examples.trending_example ``` You can access the full data dictionary the object was created from with `.as_dict`. On a video this may look like -[this](https://gist.github.com/davidteather/7c30780bbc30772ba11ec9e0b909e99d). TikTok changes their structure from time to time so it's worth investigating the structure of the dictionary when you use this package. \ No newline at end of file +[this](https://gist.github.com/davidteather/7c30780bbc30772ba11ec9e0b909e99d). TikTok changes their structure from time to time so it's worth investigating the structure of the dictionary when you use this package. + +## Custom launchers +When you call `TikTokApi.create_sessions()`, the program creates a browser context and a page, visits TikTok and extracts the cookies from the page. +To implement custom functionality, such as login or captcha solving, when the session is being created, you may use the keyword arguments `browser_context_factory` and `page_factory`. +These arguments are callable functions that TikTok-Api will use to launch your browser and pages, and allow you to perform custom actions on the page before the session is created. +You can find examples [here](https://github.com/davidteather/TikTok-Api/blob/main/tests/test_custom_launchers.py). + diff --git a/TikTokApi/tiktok.py b/TikTokApi/tiktok.py index 63d87e57..d2741db7 100644 --- a/TikTokApi/tiktok.py +++ b/TikTokApi/tiktok.py @@ -1,12 +1,12 @@ import asyncio import logging import dataclasses -from typing import Any +from typing import Any, Awaitable, Callable import random import time import json -from playwright.async_api import async_playwright, TimeoutError +from playwright.async_api import Browser, BrowserContext, Page, Playwright, ProxySettings, async_playwright, TimeoutError from urllib.parse import urlencode, quote, urlparse from .stealth import stealth_async from .helpers import random_choice @@ -81,6 +81,8 @@ def __init__(self, logging_level: int = logging.WARN, logger_name: str = None): Search.parent = self Playlist.parent = self + self.browser: Browser + def __create_logger(self, name: str, level: int = logging.DEBUG): """Create a logger for the class.""" self.logger: logging.Logger = logging.getLogger(name) @@ -140,13 +142,15 @@ async def __set_session_params(self, session: TikTokPlaywrightSession): async def __create_session( self, url: str = "https://www.tiktok.com", - ms_token: str = None, - proxy: str = None, - context_options: dict = {}, + ms_token: str | None = None, + proxy: dict[str, Any] | ProxySettings | None = None, + context_options: dict[str, Any] = {}, sleep_after: int = 1, - cookies: dict = None, + cookies: dict[str, Any] | None = None, suppress_resource_load_types: list[str] = None, timeout: int = 30000, + page_factory: Callable[[BrowserContext], Awaitable[Page]] | None = None, + browser_context_factory: Callable[[Playwright], Awaitable[BrowserContext]] | None = None, ): try: """Create a TikTokPlaywrightSession""" @@ -155,7 +159,10 @@ async def __create_session( cookies = {} cookies["msToken"] = ms_token - context = await self.browser.new_context(proxy=proxy, **context_options) + if browser_context_factory is not None: + context = self.browser + else: + context = await self.browser.new_context(proxy=proxy, **context_options) if cookies is not None: formatted_cookies = [ {"name": k, "value": v, "domain": urlparse(url).netloc, "path": "/"} @@ -163,8 +170,16 @@ async def __create_session( if v is not None ] await context.add_cookies(formatted_cookies) - page = await context.new_page() - await stealth_async(page) + + if page_factory: + page = await page_factory(context) + else: + page = await context.new_page() + await stealth_async(page) + _ = await page.goto(url) + + if "tiktok" not in page.url: + _ = await page.goto("https://www.tiktok.com") # Get the request headers to the url request_headers = None @@ -185,9 +200,6 @@ def handle_request(request): # Set the navigation timeout page.set_default_navigation_timeout(timeout) - - await page.goto(url) - await page.goto(url) # hack: tiktok blocks first request not sure why, likely bot detection # by doing this, we are simulate scroll event using mouse to `avoid` bot detection x, y = random.randint(0, 50), random.randint(0, 50) @@ -229,18 +241,20 @@ def handle_request(request): async def create_sessions( self, - num_sessions=5, - headless=True, - ms_tokens: list[str] = None, - proxies: list = None, - sleep_after=1, - starting_url="https://www.tiktok.com", - context_options: dict = {}, - override_browser_args: list[dict] = None, - cookies: list[dict] = None, - suppress_resource_load_types: list[str] = None, + num_sessions: int =5, + headless: bool =True, + ms_tokens: list[str] | None = None, + proxies: list[dict[str, Any] | ProxySettings] | None = None, + sleep_after: int = 1, + starting_url: str ="https://www.tiktok.com", + context_options: dict[str, Any] = {}, + override_browser_args: list[str] | None = None, + cookies: list[dict[str, Any]] | None = None, + suppress_resource_load_types: list[str] | None = None, browser: str = "chromium", - executable_path: str = None, + executable_path: str | None = None, + page_factory: Callable[[BrowserContext], Awaitable[Page]] | None = None, + browser_context_factory: Callable[[Playwright], Awaitable[BrowserContext]] | None = None, timeout: int = 30000, ): """ @@ -262,6 +276,7 @@ async def create_sessions( suppress_resource_load_types (list[str]): Types of resources to suppress playwright from loading, excluding more types will make playwright faster.. Types: document, stylesheet, image, media, font, script, textrack, xhr, fetch, eventsource, websocket, manifest, other. browser (str): firefox, chromium, or webkit; default is chromium executable_path (str): Path to the browser executable + page_factory (Callable[[], Awaitable[Page]]) | None: Optional async function for instantiating pages. timeout (int): The timeout in milliseconds for page navigation Example Usage: @@ -272,6 +287,8 @@ async def create_sessions( await api.create_sessions(num_sessions=5, ms_tokens=['msToken1', 'msToken2']) """ self.playwright = await async_playwright().start() + if browser_context_factory is not None: + self.browser = await browser_context_factory(self.playwright) if browser == "chromium": if headless and override_browser_args is None: override_browser_args = ["--headless=new"] @@ -301,6 +318,8 @@ async def create_sessions( cookies=random_choice(cookies), suppress_resource_load_types=suppress_resource_load_types, timeout=timeout, + page_factory=page_factory, + browser_context_factory=browser_context_factory ) for _ in range(num_sessions) ) diff --git a/tests/test_custom_launchers.py b/tests/test_custom_launchers.py new file mode 100644 index 00000000..36de3d47 --- /dev/null +++ b/tests/test_custom_launchers.py @@ -0,0 +1,89 @@ +import tempfile +import asyncio +from playwright.async_api import BrowserContext, Page, Playwright +from TikTokApi import TikTokApi +import os +import pytest + +from TikTokApi.stealth.stealth import stealth_async + + + +@pytest.mark.asyncio +async def test_browser_context_factory(): + + context_created_in_factory = [False] + + async def browser_context_factory(p: Playwright) -> BrowserContext: + user_data_dir_tempdir = tempfile.TemporaryDirectory() + user_data_dir = user_data_dir_tempdir.name + ctx = await p.chromium.launch_persistent_context(user_data_dir, headless=False) + context_created_in_factory[0] = True + return ctx + + async with TikTokApi() as api: + await api.create_sessions( + num_sessions=1, + sleep_after=3, + browser=os.getenv("TIKTOK_BROWSER", "chromium"), + headless=False, + browser_context_factory=browser_context_factory + ) + assert context_created_in_factory[0] == True + +@pytest.mark.asyncio +async def test_page_factory(): + + page_created_in_factory = [False] + + async def page_factory(ctx: BrowserContext) -> Page: + page = await ctx.new_page() + _ = await page.goto("https://tiktok.com") + page_created_in_factory[0] = True + return page + + async with TikTokApi() as api: + await api.create_sessions( + num_sessions=1, + sleep_after=3, + browser=os.getenv("TIKTOK_BROWSER", "chromium"), + headless=False, + page_factory=page_factory + ) + assert page_created_in_factory[0] == True + +@pytest.mark.asyncio +async def test_custom_login_flow_with_captcha_solve(): + from tiktok_captcha_solver import make_async_playwright_solver_context + + async def captcha_solver_context_factory(p: Playwright) -> BrowserContext: + ctx = await make_async_playwright_solver_context( + p, + os.environ["API_KEY"], # SadCaptcha key + headless=False + ) + return ctx + + async def login_page_factory(ctx: BrowserContext) -> Page: + page = await ctx.new_page() + await stealth_async(page) + _ = await page.goto("https://www.tiktok.com/login/phone-or-email/email") + await asyncio.sleep(5) + await page.locator('xpath=//input[contains(@name,"username")]').type(os.environ["TIKTOK_USERNAME"]) + await asyncio.sleep(2) + await page.get_by_placeholder('Password').type(os.environ["TIKTOK_PASSWORD"]); + await asyncio.sleep(2) + await page.locator('//button[contains(@data-e2e,"login-button")]').click(); + await asyncio.sleep(5) # wait for captcha to be solved + return page + + async with TikTokApi() as api: + await api.create_sessions( + num_sessions=1, + sleep_after=3, + browser=os.getenv("TIKTOK_BROWSER", "chromium"), + headless=False, + page_factory=login_page_factory, + browser_context_factory=captcha_solver_context_factory + ) +