diff --git a/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts b/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts index 17b097f026f4..1fbc849febcb 100644 --- a/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts +++ b/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts @@ -725,6 +725,73 @@ export interface SaveSnapshotOptions { config?: Configuration; } +export interface LoginLocators { + /** + * Function that returns the username input element. + * @default page.locator('input[type="email"], input[name*="username"], input[name*="email"], input[id*="username"], input[id*="email"]') + */ + getUsernameInput?: (page: Page) => Promise | null>; + + /** + * Function that returns the password input element. + * @default page.locator('input[type="password"]') + */ + getPasswordInput?: (page: Page) => Promise | null>; + + /** + * Function that returns the submit button element. + * @default page.locator('button[type="submit"], input[type="submit"], button:has-text("Sign in"), button:has-text("Login"), button:has-text("Log in")') + */ + getSubmitButton?: (page: Page) => Promise | null>; + + /** + * Function that returns the next button element (for two-step login). + * @default page.locator('button:has-text("Next"), button:has-text("Continue"), input[value*="Next"], input[value*="Continue"]') + */ + getNextButton?: (page: Page) => Promise | null>; +} + +export interface LoginOptions { + /** + * Username or email to use for login. + */ + username: string; + + /** + * Password to use for login. + */ + password: string; + + /** + * Custom locators for login form elements. + */ + locators?: LoginLocators; + + /** + * Function to detect if login was successful. + * @default Detects success by checking if URL changed from login page or if error indicators are not present + */ + detectLoginSuccess?: (page: Page) => Promise; + + /** + * Timeout for login operations in milliseconds. + * @default 10000 + */ + timeoutMs?: number; + + /** + * Optional function to handle captchas that may appear during login. + * Called after filling username/password but before submitting the form. + */ + handleCaptcha?: (page: Page) => Promise; + + /** + * Timeout for captcha handling in milliseconds. + * @default 30000 + */ + captchaTimeoutMs?: number; +} + /** * Saves a full screenshot and HTML of the current page into a Key-Value store. * @param page Puppeteer [`Page`](https://pptr.dev/api/puppeteer.page) object. @@ -782,6 +849,243 @@ export async function closeCookieModals(page: Page): Promise { await page.evaluate(getInjectableScript()); } +/** + * Automatically handles login forms on a page. + * Supports both single-step (username & password together) and two-step (username, then password) login flows. + * + * @param page Puppeteer Page object + * @param options Login configuration options + */ +export async function login(page: Page, options: LoginOptions): Promise { + const { + username, + password, + locators = {}, + detectLoginSuccess, + timeoutMs = 10_000, + handleCaptcha, + captchaTimeoutMs = 30_000, + } = options; + + // Default locators + const defaultLocators: Required = { + getUsernameInput: async (page) => { + const selectors = [ + 'input[type="email"]', + 'input[name*="username"]', + 'input[name*="email"]', + 'input[id*="username"]', + 'input[id*="email"]', + 'input[type="text"]', + ]; + for (const selector of selectors) { + const element = await page.$(selector); + if (element) return element; + } + return null; + }, + getPasswordInput: async (page) => { + return await page.$('input[type="password"]'); + }, + getSubmitButton: async (page) => { + const selectors = [ + 'button[type="submit"]', + 'input[type="submit"]', + 'button:contains("Sign in")', + 'button:contains("Login")', + 'button:contains("Log in")', + 'input[value*="Sign in"]', + 'input[value*="Login"]', + 'input[value*="Log in"]', + ]; + for (const selector of selectors) { + const element = await page.$(selector); + if (element) return element; + } + return null; + }, + getNextButton: async (page) => { + const selectors = [ + 'button:contains("Next")', + 'button:contains("Continue")', + 'input[value*="Next"]', + 'input[value*="Continue"]', + ]; + for (const selector of selectors) { + const element = await page.$(selector); + if (element) return element; + } + return null; + }, + }; + + const finalLocators = { + getUsernameInput: locators.getUsernameInput || defaultLocators.getUsernameInput, + getPasswordInput: locators.getPasswordInput || defaultLocators.getPasswordInput, + getSubmitButton: locators.getSubmitButton || defaultLocators.getSubmitButton, + getNextButton: locators.getNextButton || defaultLocators.getNextButton, + }; + + // Helper function to fill a field with error handling + const waitAndFill = async (getElement: () => Promise | null>, value: string, fieldName: string) => { + const element = await getElement(); + if (!element) { + throw new Error(`Could not find ${fieldName} field`); + } + try { + await element.click({ clickCount: 3 }); // Select all text + await element.type(value); + } catch (error) { + throw new Error(`Failed to fill ${fieldName} field: ${error instanceof Error ? error.message : String(error)}`); + } + }; + + // Helper function to handle captcha if provided + const maybeHandleCaptcha = async () => { + if (handleCaptcha) { + try { + await Promise.race([ + handleCaptcha(page), + new Promise((_, reject) => + setTimeout(() => reject(new Error('Captcha handling timed out')), captchaTimeoutMs) + ), + ]); + } catch (error) { + throw new Error(`Captcha handling failed: ${error instanceof Error ? error.message : String(error)}`); + } + } + }; + + // Check if login form exists + const usernameInput = await finalLocators.getUsernameInput(page); + if (!usernameInput) { + // No login form found, resolve successfully + return; + } + + const originalUrl = page.url(); + + try { + // Fill username + await waitAndFill(() => finalLocators.getUsernameInput(page), username, 'username'); + + // Check if this is a two-step login (password field not visible initially) + let passwordInput = await finalLocators.getPasswordInput(page); + + if (!passwordInput) { + // Two-step login: click next/continue button + const nextButton = await finalLocators.getNextButton(page); + if (!nextButton) { + throw new Error('Could not find next/continue button for two-step login'); + } + + try { + await nextButton.click(); + await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for password field to appear + } catch (error) { + throw new Error(`Failed to click next button: ${error instanceof Error ? error.message : String(error)}`); + } + + // Try to find password field again + passwordInput = await finalLocators.getPasswordInput(page); + if (!passwordInput) { + throw new Error('Password field not found after clicking next button'); + } + } + + // Handle captcha before filling password + await maybeHandleCaptcha(); + + // Fill password + await waitAndFill(() => finalLocators.getPasswordInput(page), password, 'password'); + + // Handle captcha after filling password + await maybeHandleCaptcha(); + + // Submit the form + const submitButton = await finalLocators.getSubmitButton(page); + if (!submitButton) { + throw new Error('Could not find submit button'); + } + + try { + await submitButton.click(); + } catch (error) { + throw new Error(`Failed to click submit button: ${error instanceof Error ? error.message : String(error)}`); + } + + // Wait for navigation or form submission + await new Promise(resolve => setTimeout(resolve, 1000)); + + // Check if login was successful + const isSuccessful = detectLoginSuccess + ? await detectLoginSuccess(page) + : await defaultDetectLoginSuccess(page, originalUrl); + + if (!isSuccessful) { + throw new Error('Login failed - success detection returned false'); + } + + } catch (error) { + throw new Error(`Login failed: ${error instanceof Error ? error.message : String(error)}`); + } +} + +/** + * Default login success detection logic. + * Checks if URL changed from login page or if error indicators are not present. + */ +async function defaultDetectLoginSuccess(page: Page, originalUrl: string): Promise { + const currentUrl = page.url(); + + // If URL changed, it's likely successful + if (currentUrl !== originalUrl) { + return true; + } + + // Check for common error indicators + const errorSelectors = [ + '.error', + '.alert-error', + '.alert-danger', + '.login-error', + '[data-testid="error"]', + '.error-message', + '.invalid-feedback', + ]; + + for (const selector of errorSelectors) { + const errorElement = await page.$(selector); + if (errorElement) { + const isVisible = await errorElement.evaluate(el => { + const style = window.getComputedStyle(el); + return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0'; + }); + if (isVisible) { + return false; + } + } + } + + // Check if we're still on a login page + const loginIndicators = [ + 'input[type="password"]', + 'form[action*="login"]', + 'form[action*="signin"]', + '.login-form', + '.signin-form', + ]; + + for (const selector of loginIndicators) { + const element = await page.$(selector); + if (element) { + return false; // Still on login page + } + } + + return true; +} + /** @internal */ export interface PuppeteerContextUtils { /** @@ -1059,6 +1363,25 @@ export interface PuppeteerContextUtils { * Tries to close cookie consent modals on the page. Based on the I Don't Care About Cookies browser extension. */ closeCookieModals(): Promise; + + /** + * Automatically handles login forms on a page. + * Supports both single-step (username & password together) and two-step (username, then password) login flows. + * + * **Example usage:** + * ```javascript + * async requestHandler({ login }) { + * await login({ + * username: 'your-username', + * password: 'your-password', + * handleCaptcha: async (page) => { + * // Solve captcha here + * }, + * }); + * }); + * ``` + */ + login(options: LoginOptions): Promise; } /** @internal */ @@ -1113,6 +1436,7 @@ export function registerUtilsToContext( context.saveSnapshot = async (options?: SaveSnapshotOptions) => saveSnapshot(context.page, { ...options, config: context.crawler.config }); context.closeCookieModals = async () => closeCookieModals(context.page); + context.login = async (options: LoginOptions) => login(context.page, options); } export { enqueueLinksByClickingElements, addInterceptRequestHandler, removeInterceptRequestHandler }; @@ -1133,4 +1457,5 @@ export const puppeteerUtils = { saveSnapshot, parseWithCheerio, closeCookieModals, + login, }; diff --git a/test/core/puppeteer_utils.test.ts b/test/core/puppeteer_utils.test.ts index 50c157827a27..a5ea3370db24 100644 --- a/test/core/puppeteer_utils.test.ts +++ b/test/core/puppeteer_utils.test.ts @@ -8,6 +8,8 @@ import { runExampleComServer } from 'test/shared/_helper'; import { MemoryStorageEmulator } from 'test/shared/MemoryStorageEmulator'; import log from '@apify/log'; +import { describe, test, expect, beforeEach, vi } from 'vitest'; +import { login } from '../../packages/puppeteer-crawler/src/internals/utils/puppeteer_utils'; const launchContext = { launchOptions: { headless: true } }; @@ -500,4 +502,270 @@ describe('puppeteerUtils', () => { } }); }); + + describe('login()', () => { + let page: Page; + let usernameInputMock: any; + let passwordInputMock: any; + let submitButtonMock: any; + let nextButtonMock: any; + + beforeEach(() => { + // Mock page methods + page = { + url: vi.fn().mockReturnValue('https://example.com/login'), + $: vi.fn(), + waitForSelector: vi.fn(), + } as any; + + // Mock element handles + usernameInputMock = { + click: vi.fn(), + type: vi.fn(), + }; + + passwordInputMock = { + click: vi.fn(), + type: vi.fn(), + }; + + submitButtonMock = { + click: vi.fn(), + }; + + nextButtonMock = { + click: vi.fn(), + }; + }); + + test('single-step login success', async () => { + // Mock page.$ to return elements + (page.$ as any).mockImplementation((selector: string) => { + if (selector.includes('email') || selector.includes('username')) { + return Promise.resolve(usernameInputMock); + } + if (selector.includes('password')) { + return Promise.resolve(passwordInputMock); + } + if (selector.includes('submit') || selector.includes('Sign in')) { + return Promise.resolve(submitButtonMock); + } + return Promise.resolve(null); + }); + + // Mock successful login detection + const detectLoginSuccessMock = vi.fn().mockResolvedValue(true); + + await login(page, { + username: 'testuser', + password: 'testpass', + detectLoginSuccess: detectLoginSuccessMock, + }); + + expect(usernameInputMock.click).toHaveBeenCalledWith({ clickCount: 3 }); + expect(usernameInputMock.type).toHaveBeenCalledWith('testuser'); + expect(passwordInputMock.click).toHaveBeenCalledWith({ clickCount: 3 }); + expect(passwordInputMock.type).toHaveBeenCalledWith('testpass'); + expect(submitButtonMock.click).toHaveBeenCalledTimes(1); + expect(detectLoginSuccessMock).toHaveBeenCalledWith(page); + }); + + test('single-step login failure', async () => { + // Mock page.$ to return elements + (page.$ as any).mockImplementation((selector: string) => { + if (selector.includes('email') || selector.includes('username')) { + return Promise.resolve(usernameInputMock); + } + if (selector.includes('password')) { + return Promise.resolve(passwordInputMock); + } + if (selector.includes('submit') || selector.includes('Sign in')) { + return Promise.resolve(submitButtonMock); + } + return Promise.resolve(null); + }); + + // Mock failed login detection + const detectLoginSuccessMock = vi.fn().mockResolvedValue(false); + + await expect(login(page, { + username: 'testuser', + password: 'testpass', + detectLoginSuccess: detectLoginSuccessMock, + })).rejects.toThrow('Login failed - success detection returned false'); + }); + + test('two-step login success', async () => { + let passwordCallCount = 0; + + // Mock page.$ to return elements for two-step flow + (page.$ as any).mockImplementation((selector: string) => { + if (selector.includes('email') || selector.includes('username')) { + return Promise.resolve(usernameInputMock); + } + if (selector.includes('password')) { + passwordCallCount++; + // First call returns null (password not visible), second call returns element + return Promise.resolve(passwordCallCount === 1 ? null : passwordInputMock); + } + if (selector.includes('Next') || selector.includes('Continue')) { + return Promise.resolve(nextButtonMock); + } + if (selector.includes('submit') || selector.includes('Sign in')) { + return Promise.resolve(submitButtonMock); + } + return Promise.resolve(null); + }); + + // Mock successful login detection + const detectLoginSuccessMock = vi.fn().mockResolvedValue(true); + + await login(page, { + username: 'testuser', + password: 'testpass', + detectLoginSuccess: detectLoginSuccessMock, + }); + + expect(usernameInputMock.click).toHaveBeenCalledWith({ clickCount: 3 }); + expect(usernameInputMock.type).toHaveBeenCalledWith('testuser'); + expect(nextButtonMock.click).toHaveBeenCalledTimes(1); + expect(passwordInputMock.click).toHaveBeenCalledWith({ clickCount: 3 }); + expect(passwordInputMock.type).toHaveBeenCalledWith('testpass'); + expect(submitButtonMock.click).toHaveBeenCalledTimes(1); + expect(detectLoginSuccessMock).toHaveBeenCalledWith(page); + }); + + test('two-step login failure', async () => { + let passwordCallCount = 0; + + // Mock page.$ to return elements for two-step flow + (page.$ as any).mockImplementation((selector: string) => { + if (selector.includes('email') || selector.includes('username')) { + return Promise.resolve(usernameInputMock); + } + if (selector.includes('password')) { + passwordCallCount++; + // First call returns null (password not visible), second call returns element + return Promise.resolve(passwordCallCount === 1 ? null : passwordInputMock); + } + if (selector.includes('Next') || selector.includes('Continue')) { + return Promise.resolve(nextButtonMock); + } + if (selector.includes('submit') || selector.includes('Sign in')) { + return Promise.resolve(submitButtonMock); + } + return Promise.resolve(null); + }); + + // Mock failed login detection + const detectLoginSuccessMock = vi.fn().mockResolvedValue(false); + + await expect(login(page, { + username: 'testuser', + password: 'testpass', + detectLoginSuccess: detectLoginSuccessMock, + })).rejects.toThrow('Login failed - success detection returned false'); + }); + + test('default locators usage', async () => { + // Mock page.$ to return elements + (page.$ as any).mockImplementation((selector: string) => { + if (selector.includes('email') || selector.includes('username')) { + return Promise.resolve(usernameInputMock); + } + if (selector.includes('password')) { + return Promise.resolve(passwordInputMock); + } + if (selector.includes('submit') || selector.includes('Sign in')) { + return Promise.resolve(submitButtonMock); + } + return Promise.resolve(null); + }); + + const detectLoginSuccessMock = vi.fn().mockResolvedValue(true); + + await login(page, { + username: 'testuser', + password: 'testpass', + detectLoginSuccess: detectLoginSuccessMock, + }); + + expect(usernameInputMock.click).toHaveBeenCalledWith({ clickCount: 3 }); + expect(usernameInputMock.type).toHaveBeenCalledWith('testuser'); + expect(passwordInputMock.click).toHaveBeenCalledWith({ clickCount: 3 }); + expect(passwordInputMock.type).toHaveBeenCalledWith('testpass'); + expect(submitButtonMock.click).toHaveBeenCalledTimes(1); + expect(detectLoginSuccessMock).toHaveBeenCalledWith(page); + }); + + test('login() calls handleCaptcha if provided (single-step)', async () => { + // Mock page.$ to return elements + (page.$ as any).mockImplementation((selector: string) => { + if (selector.includes('email') || selector.includes('username')) { + return Promise.resolve(usernameInputMock); + } + if (selector.includes('password')) { + return Promise.resolve(passwordInputMock); + } + if (selector.includes('submit') || selector.includes('Sign in')) { + return Promise.resolve(submitButtonMock); + } + return Promise.resolve(null); + }); + + const detectLoginSuccessMock = vi.fn().mockResolvedValue(true); + const handleCaptcha = vi.fn().mockResolvedValue(undefined); + + await login(page, { + username: 'testuser', + password: 'testpass', + detectLoginSuccess: detectLoginSuccessMock, + handleCaptcha, + }); + + expect(handleCaptcha).toHaveBeenCalledTimes(2); // Called before and after password fill + expect(handleCaptcha).toHaveBeenCalledWith(page); + expect(detectLoginSuccessMock).toHaveBeenCalledWith(page); + }); + + test('no login form found', async () => { + // Mock page.$ to return null (no login form) + (page.$ as any).mockResolvedValue(null); + + await expect(login(page, { + username: 'testuser', + password: 'testpass', + })).resolves.toBeUndefined(); + }); + + test('custom locators usage', async () => { + const customUsernameInput = { click: vi.fn(), type: vi.fn() }; + const customPasswordInput = { click: vi.fn(), type: vi.fn() }; + const customSubmitButton = { click: vi.fn() }; + + const customLocators = { + getUsernameInput: vi.fn().mockResolvedValue(customUsernameInput), + getPasswordInput: vi.fn().mockResolvedValue(customPasswordInput), + getSubmitButton: vi.fn().mockResolvedValue(customSubmitButton), + }; + + const detectLoginSuccessMock = vi.fn().mockResolvedValue(true); + + await login(page, { + username: 'testuser', + password: 'testpass', + locators: customLocators, + detectLoginSuccess: detectLoginSuccessMock, + }); + + expect(customLocators.getUsernameInput).toHaveBeenCalledWith(page); + expect(customLocators.getPasswordInput).toHaveBeenCalledWith(page); + expect(customLocators.getSubmitButton).toHaveBeenCalledWith(page); + expect(customUsernameInput.click).toHaveBeenCalledWith({ clickCount: 3 }); + expect(customUsernameInput.type).toHaveBeenCalledWith('testuser'); + expect(customPasswordInput.click).toHaveBeenCalledWith({ clickCount: 3 }); + expect(customPasswordInput.type).toHaveBeenCalledWith('testpass'); + expect(customSubmitButton.click).toHaveBeenCalledTimes(1); + }); + }); });