Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 33 additions & 3 deletions src/notebooklm/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ async def open(self) -> None:
"""Open the HTTP client connection.

Called automatically by NotebookLMClient.__aenter__.
Uses httpx.Cookies jar to properly handle cross-domain redirects
(e.g., to accounts.google.com for auth token refresh).
"""
if self._http_client is None:
# Use granular timeouts: shorter connect timeout helps detect network issues
Expand All @@ -138,11 +140,13 @@ async def open(self) -> None:
write=self._timeout,
pool=self._timeout,
)
# Build cookies jar for cross-domain redirect support
cookies = self._build_cookies_jar()
self._http_client = httpx.AsyncClient(
headers={
"Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
"Cookie": self.auth.cookie_header,
},
cookies=cookies,
timeout=timeout,
)

Expand All @@ -161,17 +165,43 @@ def is_open(self) -> bool:
return self._http_client is not None

def update_auth_headers(self) -> None:
"""Update HTTP client headers with current auth tokens.
"""Update HTTP client cookies with current auth tokens.

Call this after modifying auth tokens (e.g., after refresh_auth())
to ensure the HTTP client uses the updated credentials.
Uses httpx.Cookies jar to properly handle cross-domain redirects.

Note: This method MERGES new cookies into the existing jar rather
than replacing it, so that live cookies received from Google during
redirects (e.g., refreshed SID tokens) are preserved.

Raises:
RuntimeError: If client is not initialized.
"""
if not self._http_client:
raise RuntimeError("Client not initialized. Use 'async with' context.")
self._http_client.headers["Cookie"] = self.auth.cookie_header

# Merge new cookies into existing jar to preserve any live cookies
# received during redirects (e.g., refreshed tokens from accounts.google.com)
for name, value in self.auth.cookies.items():
self._http_client.cookies.set(name, value, domain=".google.com")

def _build_cookies_jar(self) -> httpx.Cookies:
"""Build an httpx.Cookies jar from auth tokens.

Uses .google.com as the domain to ensure cookies are sent
across all Google subdomains including accounts.google.com
for cross-domain auth refresh redirects.

Returns:
httpx.Cookies jar populated with auth cookies.
"""
cookies = httpx.Cookies()
for name, value in self.auth.cookies.items():
# Use .google.com domain to cover all subdomains including
# accounts.google.com (used for token refresh redirects)
cookies.set(name, value, domain=".google.com")
Comment on lines +200 to +203
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

Forcing the ".google.com" domain on all cookies is a problematic side effect of using a flat dictionary (self.auth.cookies) to populate the cookie jar. While this ensures cookies are sent to accounts.google.com during redirects, it also means that cookies originally intended for other domains—such as .googleusercontent.com (which is used for media downloads and is explicitly allowed in auth.py)—will now be sent to .google.com and not to their intended destination. This will likely break authenticated downloads from Google's content servers. Additionally, ensure that when making requests to external URLs using these cookies, the domain is validated against an allowlist of trusted domains to prevent credential leakage.

References
  1. When making requests to external URLs using authentication cookies, always validate that the URL's domain is on an allowlist of trusted domains to prevent credential leakage.

return cookies

def _build_url(self, rpc_method: RPCMethod, source_path: str = "/") -> str:
"""Build the batchexecute URL for an RPC call.
Expand Down
138 changes: 134 additions & 4 deletions src/notebooklm/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
".google.com",
"notebooklm.google.com",
".googleusercontent.com",
"accounts.google.com", # Required for token refresh redirects
}

# Regional Google ccTLDs where Google may set auth cookies
Expand Down Expand Up @@ -642,11 +643,88 @@ def load_httpx_cookies(path: Path | None = None) -> "httpx.Cookies":
return cookies


def extract_cookies_with_domains(
storage_state: dict[str, Any],
) -> dict[tuple[str, str], str]:
"""Extract Google cookies from storage state preserving original domains.

Unlike extract_cookies_from_storage() which returns a simple dict of
name->value, this function returns a dict of (name, domain)->value tuples
to preserve the original cookie domains. This is required for building
proper httpx.Cookies jars that handle cross-domain redirects correctly.

Args:
storage_state: Parsed JSON from Playwright's storage state file.

Returns:
Dict mapping (cookie_name, domain) tuples to values.
Example: {("SID", ".google.com"): "abc123", ("HSID", ".google.com"): "def456"}

Raises:
ValueError: If required cookies (SID) are missing from storage state.
"""
cookie_map: dict[tuple[str, str], str] = {}

for cookie in storage_state.get("cookies", []):
domain = cookie.get("domain", "")
name = cookie.get("name")
value = cookie.get("value", "")

if not _is_allowed_auth_domain(domain) or not name or not value:
continue

key = (name, domain)
# Prefer .google.com over regional domains
if key not in cookie_map:
cookie_map[key] = value
elif domain == ".google.com":
# .google.com takes precedence
cookie_map[key] = value

# Validate required cookies exist (any domain)
cookie_names = {name for name, _ in cookie_map}
missing = MINIMUM_REQUIRED_COOKIES - cookie_names
if missing:
raise ValueError(
f"Missing required cookies: {missing}\nRun 'notebooklm login' to authenticate."
)

return cookie_map


def build_httpx_cookies_from_storage(path: Path | None = None) -> "httpx.Cookies":
"""Build an httpx.Cookies jar with original domains preserved.

This function loads cookies from storage and creates a proper httpx.Cookies
jar with the original domains intact. This is critical for cross-domain
redirects (e.g., to accounts.google.com for token refresh) to work correctly.

Args:
path: Path to storage_state.json. If provided, takes precedence over env vars.

Returns:
httpx.Cookies jar with all cookies set to their original domains.

Raises:
FileNotFoundError: If storage file doesn't exist.
ValueError: If required cookies are missing or JSON is malformed.
"""
storage_state = _load_storage_state(path)
cookie_map = extract_cookies_with_domains(storage_state)

cookies = httpx.Cookies()
for (name, domain), value in cookie_map.items():
cookies.set(name, value, domain=domain)

return cookies


async def fetch_tokens(cookies: dict[str, str]) -> tuple[str, str]:
"""Fetch CSRF token and session ID from NotebookLM homepage.

Makes an authenticated request to NotebookLM and extracts the required
tokens from the page HTML.
tokens from the page HTML. Uses httpx.Cookies() jar to properly handle
cross-domain redirects (e.g., to accounts.google.com for token refresh).

Args:
cookies: Dict of Google auth cookies
Expand All @@ -659,12 +737,16 @@ async def fetch_tokens(cookies: dict[str, str]) -> tuple[str, str]:
ValueError: If tokens cannot be extracted from response
"""
logger.debug("Fetching CSRF and session tokens from NotebookLM")
cookie_header = "; ".join(f"{k}={v}" for k, v in cookies.items())

async with httpx.AsyncClient() as client:
# Build httpx.Cookies jar instead of raw header to properly handle
# cross-domain redirects (e.g., to accounts.google.com for refresh)
cookie_jar = httpx.Cookies()
for name, value in cookies.items():
cookie_jar.set(name, value, domain=".google.com")
Comment on lines +743 to +745
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This logic for building a cookie jar is duplicated in ClientCore._build_cookies_jar. It should be centralized (e.g., as a method on AuthTokens) to ensure consistent behavior. Additionally, as noted in the core client review, forcing the .google.com domain for all cookies may break functionality for other domains like googleusercontent.com which are used for media downloads.

Comment on lines +741 to +745
Copy link

Copilot AI Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fetch_tokens() builds a cookie jar from the filtered cookies dict, but extract_cookies_from_storage() currently excludes cookies whose domain is accounts.google.com (because _is_allowed_auth_domain() only allows ALLOWED_COOKIE_DOMAINS and regional .google.*). If the cross-host refresh flow depends on accounts-scoped cookies (as described in #273), they will never make it into this jar, so redirects to accounts.google.com can still fail. Consider expanding the auth-domain allowlist to include accounts.google.com (and possibly .accounts.google.com) or switching token fetching to use a domain-preserving httpx.Cookies source (similar to load_httpx_cookies()).

Suggested change
# Build httpx.Cookies jar instead of raw header to properly handle
# cross-domain redirects (e.g., to accounts.google.com for refresh)
cookie_jar = httpx.Cookies()
for name, value in cookies.items():
cookie_jar.set(name, value, domain=".google.com")
# Build an httpx.Cookies jar instead of a raw Cookie header so cookies
# participate in redirect handling. The input is a flattened name/value
# mapping, so domain specificity has already been lost by this point.
# To preserve current behavior while allowing redirects through the Google
# Accounts host, register each cookie for both the general Google domain
# and the concrete accounts.google.com host.
cookie_jar = httpx.Cookies()
for name, value in cookies.items():
cookie_jar.set(name, value, domain=".google.com")
cookie_jar.set(name, value, domain="accounts.google.com")

Copilot uses AI. Check for mistakes.

async with httpx.AsyncClient(cookies=cookie_jar) as client:
response = await client.get(
"https://notebooklm.google.com/",
headers={"Cookie": cookie_header},
follow_redirects=True,
timeout=30.0,
)
Comment on lines +747 to 752
Copy link

Copilot AI Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new redirect-handling behavior in fetch_tokens() is not directly asserted in tests. There are existing pytest_httpx patterns in this repo to inspect outgoing requests; adding an assertion that the redirected request to accounts.google.com still includes a Cookie header (and/or that the request chain completes when cookies are valid) would prevent regressions of #273.

Copilot uses AI. Check for mistakes.
Expand All @@ -685,3 +767,51 @@ async def fetch_tokens(cookies: dict[str, str]) -> tuple[str, str]:

logger.debug("Authentication tokens obtained successfully")
return csrf, session_id


async def fetch_tokens_with_domains(path: Path | None = None) -> tuple[str, str]:
"""Fetch CSRF token and session ID using storage with original domains.

Loads cookies from storage preserving their original domains and makes
an authenticated request. This version is preferred when you need proper
cross-domain redirect handling with the original cookie domains intact.

Args:
path: Path to storage_state.json. If provided, takes precedence over env vars.

Returns:
Tuple of (csrf_token, session_id)

Raises:
FileNotFoundError: If storage file doesn't exist.
httpx.HTTPError: If request fails.
ValueError: If tokens cannot be extracted from response.
"""
logger.debug("Fetching CSRF and session tokens with original cookie domains")

# Load cookies with original domains preserved
cookie_jar = build_httpx_cookies_from_storage(path)

async with httpx.AsyncClient(cookies=cookie_jar) as client:
response = await client.get(
"https://notebooklm.google.com/",
follow_redirects=True,
timeout=30.0,
)
response.raise_for_status()

final_url = str(response.url)

# Check if we were redirected to login
if is_google_auth_redirect(final_url):
raise ValueError(
"Authentication expired or invalid. "
"Redirected to: " + final_url + "\n"
"Run 'notebooklm login' to re-authenticate."
)

csrf = extract_csrf_from_html(response.text, final_url)
session_id = extract_session_id_from_html(response.text, final_url)

logger.debug("Authentication tokens obtained successfully")
return csrf, session_id
4 changes: 1 addition & 3 deletions src/notebooklm/cli/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,7 @@ def _login_with_browser_cookies(storage_path: Path, browser_name: str) -> None:
storage_path.chmod(0o600)
except OSError as e:
logger.error("Failed to save authentication to %s: %s", storage_path, e)
console.print(
f"[red]Failed to save authentication to {storage_path}.[/red]\n" f"Details: {e}"
)
console.print(f"[red]Failed to save authentication to {storage_path}.[/red]\nDetails: {e}")
raise SystemExit(1) from None

console.print(f"\n[green]Authentication saved to:[/green] {storage_path}")
Expand Down
46 changes: 46 additions & 0 deletions tests/integration/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,3 +446,49 @@ async def test_returns_empty_list_when_notebook_info_missing_sources(self, auth_
ids = await core.get_source_ids("nb_123")

assert ids == []


class TestCrossDomainCookiePreservation:
"""Tests for cookie preservation during cross-domain redirects."""

@pytest.mark.asyncio
async def test_cookies_preserved_on_cross_domain_redirect(self, auth_tokens):
"""Verify cookies persist when redirecting from notebooklm to accounts.google.com."""
async with NotebookLMClient(auth_tokens) as client:
core = client._core
http_client = core._http_client

# Set initial sentinel cookie in the jar
http_client.cookies.set("REDIRECT_SENTINEL", "survives_refresh", domain=".google.com")

# Simulate what happens during a redirect: update_auth_headers merges new cookies
# without wiping existing ones (like refreshed SID from accounts.google.com)
core.update_auth_headers()

# Verify original cookies are still present (not wiped)
# httpx.Cookies.get() returns None if cookie not found
assert (
http_client.cookies.get("REDIRECT_SENTINEL", domain=".google.com")
== "survives_refresh"
)

@pytest.mark.asyncio
async def test_update_auth_headers_merges_not_replaces(self, auth_tokens):
"""Verify update_auth_headers merges new cookies, preserving live redirect cookies."""
async with NotebookLMClient(auth_tokens) as client:
core = client._core
http_client = core._http_client

# Simulate a live cookie received from accounts.google.com redirect
http_client.cookies.set(
"__Secure-1PSIDRTS", "redirect_refreshed_value", domain=".google.com"
)

# Now update auth headers (simulating a token refresh)
core.update_auth_headers()

# The EXACT value should still be there (merged, not replaced)
assert (
http_client.cookies.get("__Secure-1PSIDRTS", domain=".google.com")
== "redirect_refreshed_value"
)
Loading
Loading