Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions fast_flights/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
create_query as create_filter, # alias
)
from .fetcher import get_flights, fetch_flights_html
from .fetch_result import FetchResult

__all__ = [
"FlightQuery",
Expand All @@ -17,5 +18,6 @@
"create_filter",
"get_flights",
"fetch_flights_html",
"FetchResult",
"integrations",
]
12 changes: 12 additions & 0 deletions fast_flights/fetch_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from __future__ import annotations

from dataclasses import dataclass, field


@dataclass
class FetchResult:
"""Result returned by integrations that capture both HTML and XHR data."""

html: str = ""
xhr_bodies: list[str | bytes] = field(default_factory=list)
url: str | None = None
7 changes: 4 additions & 3 deletions fast_flights/fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from primp import Client

from .fetch_result import FetchResult
from .integrations.base import Integration
from .parser import MetaList, parse
from .querying import Query
Expand Down Expand Up @@ -57,8 +58,8 @@ def get_flights(
q: The query.
proxy (str, optional): Proxy.
"""
html = fetch_flights_html(q, proxy=proxy, integration=integration)
return parse(html)
fetched = fetch_flights_html(q, proxy=proxy, integration=integration)
return parse(fetched)


def fetch_flights_html(
Expand All @@ -67,7 +68,7 @@ def fetch_flights_html(
*,
proxy: str | None = None,
integration: Integration | None = None,
) -> str:
) -> str | FetchResult:
"""Fetch flights and get the **HTML**.

Args:
Expand Down
3 changes: 2 additions & 1 deletion fast_flights/integrations/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .base import Integration
from .bright_data import BrightData
from .playwright import Playwright

__all__ = ["Integration", "BrightData"]
__all__ = ["Integration", "BrightData", "Playwright"]
3 changes: 2 additions & 1 deletion fast_flights/integrations/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from abc import ABC

from ..fetch_result import FetchResult
from ..querying import Query

try:
Expand All @@ -15,7 +16,7 @@
class Integration(ABC):
"""Represents an integration."""

def fetch_html(self, q: Query | str, /) -> str:
def fetch_html(self, q: Query | str, /) -> str | FetchResult:
"""Fetch the flights page HTML from a query.

Args:
Expand Down
73 changes: 73 additions & 0 deletions fast_flights/integrations/playwright.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from __future__ import annotations

from typing import Any

from ..fetch_result import FetchResult
from ..querying import Query
from .base import Integration

GOOGLE_FLIGHTS_URL = "https://www.google.com/travel/flights"
XHR_MARKER = "FlightsFrontendService/GetShoppingResults"


class Playwright(Integration):
"""Local Chromium integration that captures Google Flights shopping XHRs."""

def fetch_html(self, q: Query | str, /) -> FetchResult:
from playwright.sync_api import sync_playwright

url = q.url() if isinstance(q, Query) else GOOGLE_FLIGHTS_URL + "?q=" + q

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

URL-encode the raw string query.

When q is a str, the value is concatenated directly into the URL. Any spaces, &, #, +, %, or non-ASCII characters will produce a malformed URL or be parsed by Google as additional query parameters. A natural-language query like "Flights from TPE to MYJ on 2025-12-22 one way economy class" (the same example used in the get_flights docstring) will be broken on the spaces and -.

🛡️ Proposed fix
+from urllib.parse import quote_plus
+
 ...
-        url = q.url() if isinstance(q, Query) else GOOGLE_FLIGHTS_URL + "?q=" + q
+        url = q.url() if isinstance(q, Query) else f"{GOOGLE_FLIGHTS_URL}?q={quote_plus(q)}"
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
url = q.url() if isinstance(q, Query) else GOOGLE_FLIGHTS_URL + "?q=" + q
from urllib.parse import quote_plus
...
url = q.url() if isinstance(q, Query) else f"{GOOGLE_FLIGHTS_URL}?q={quote_plus(q)}"
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@fast_flights/integrations/playwright.py` at line 19, The code concatenates
raw string queries into GOOGLE_FLIGHTS_URL (line creating url = q.url() if
isinstance(q, Query) else GOOGLE_FLIGHTS_URL + "?q=" + q) which breaks on spaces
and special chars; fix by URL-encoding the string branch: when q is a str,
encode it with urllib.parse.quote_plus or urllib.parse.quote (to handle spaces
and non-ASCII) and build the URL as GOOGLE_FLIGHTS_URL + "?q=" + encoded_q (or
use urllib.parse.urlencode with {'q': q}) so the Query branch (q.url()) remains
unchanged and all special characters are properly escaped.

response_objects: list[Any] = []

with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(
user_agent=(
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/145.0.0.0 Safari/537.36"
),
locale="en-US",
viewport={"width": 1280, "height": 900},
)
page = context.new_page()
page.add_init_script("""
Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
window.chrome = {runtime: {}};
Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']});
""")

def on_response(response: Any) -> None:
if XHR_MARKER in response.url:
response_objects.append(response)

page.on("response", on_response)
try:
page.goto(
GOOGLE_FLIGHTS_URL,
timeout=25_000,
wait_until="domcontentloaded",
)
page.wait_for_timeout(1_000)
except Exception:
pass

page.goto(url, timeout=25_000, wait_until="domcontentloaded")
try:
page.wait_for_load_state("networkidle", timeout=12_000)
except Exception:
pass
page.wait_for_timeout(2_000)

xhr_bodies: list[bytes] = []
for response in response_objects:
try:
xhr_bodies.append(response.body())
except Exception:
pass

html = page.content()
browser.close()

return FetchResult(html=html, xhr_bodies=xhr_bodies, url=url)
1 change: 1 addition & 0 deletions fast_flights/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class Alliance:
class JsMetadata:
airlines: list[Airline]
alliances: list[Alliance]
diagnostics: dict[str, object] | None = None


@dataclass
Expand Down
Loading