Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.11.0
rev: v0.15.1
hooks:
- id: ruff
- id: ruff-check
args: [ --fix ]
- id: ruff-format
- repo: https://github.com/adamchainz/blacken-docs
Expand Down
6 changes: 5 additions & 1 deletion docs/_ext/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,12 @@


def http_api_reference_role(
name, rawtext, text, lineno, inliner, options={}, content=[]
name, rawtext, text, lineno, inliner, options=None, content=None
):
if options is None:
options = {}
if content is None:
content = []
match = re.search(
r"(?s)^(.+?)\s*<\s*((?:request|response):[a-zA-Z.]+)\s*>\s*$", text
)
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pathlib import Path

project = "scrapy-zyte-api"
copyright = "2023, Zyte Group Ltd"
project_copyright = "2023, Zyte Group Ltd"
author = "Zyte Group Ltd"
release = "0.32.0"

Expand Down
138 changes: 138 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,141 @@ filterwarnings = [
"ignore:RetryMiddleware\\.process_spider_exception\\(\\):scrapy.exceptions.ScrapyDeprecationWarning",
"ignore::scrapy.exceptions.ScrapyDeprecationWarning:scrapy_poet",
]

[tool.ruff.lint]
extend-select = [
# flake8-builtins
"A",
# flake8-async
"ASYNC",
# flake8-bugbear
"B",
# flake8-comprehensions
"C4",
# flake8-commas
"COM",
# pydocstyle
"D",
# flake8-future-annotations
"FA",
# flynt
"FLY",
# refurb
"FURB",
# isort
"I",
# flake8-implicit-str-concat
"ISC",
# flake8-logging
"LOG",
# Perflint
"PERF",
# pygrep-hooks
"PGH",
# flake8-pie
"PIE",
# pylint
"PL",
# flake8-pytest-style
"PT",
# flake8-use-pathlib
"PTH",
# flake8-pyi
"PYI",
# flake8-quotes
"Q",
# flake8-return
"RET",
# flake8-raise
"RSE",
# Ruff-specific rules
"RUF",
# flake8-bandit
"S",
# flake8-simplify
"SIM",
# flake8-slots
"SLOT",
# flake8-debugger
"T10",
# flake8-type-checking
"TC",
# pyupgrade
"UP",
# pycodestyle warnings
"W",
# flake8-2020
"YTT",
]
ignore = [
# Trailing comma missing
"COM812",
# Missing docstring in public module
"D100",
# Missing docstring in public class
"D101",
# Missing docstring in public method
"D102",
# Missing docstring in public function
"D103",
# Missing docstring in public package
"D104",
# Missing docstring in magic method
"D105",
# Missing docstring in public nested class
"D106",
# Missing docstring in __init__
"D107",
# One-line docstring should fit on one line with quotes
"D200",
# No blank lines allowed after function docstring
"D202",
# 1 blank line required between summary line and description
"D205",
# Multi-line docstring closing quotes should be on a separate line
"D209",
# First line should end with a period
"D400",
# First line should be in imperative mood; try rephrasing
"D401",
# First line should not be the function's "signature"
"D402",
# First word of the first line should be properly capitalized
"D403",
# No blank lines allowed between a section header and its content
"D412",
# `try`-`except` within a loop incurs performance overhead
"PERF203",
# Too many return statements
"PLR0911",
# Too many branches
"PLR0912",
# Too many arguments in function definition
"PLR0913",
# Too many statements
"PLR0915",
# Magic value used in comparison
"PLR2004",
# String contains ambiguous {}.
"RUF001",
# Docstring contains ambiguous {}.
"RUF002",
# Comment contains ambiguous {}.
"RUF003",
# Mutable class attributes should be annotated with `typing.ClassVar`
"RUF012",
# Use of `assert` detected
"S101",
]

[tool.ruff.lint.isort]
split-on-trailing-comma = false

[tool.ruff.lint.per-file-ignores]
# we need to use typing.Set[] over modern alternatives with web-poet<0.19.0 && Python<3.11
# see https://github.com/scrapinghub/web-poet/pull/219
"scrapy_zyte_api/providers.py" = ["UP006", "UP035"]
"tests/**" = ["S"]

[tool.ruff.lint.pydocstyle]
convention = "pep257"
19 changes: 9 additions & 10 deletions scrapy_zyte_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

# Register web-poet serializers
from . import _serialization # noqa: F401

from ._annotations import ExtractFrom, actions, custom_attrs
from ._middlewares import (
ScrapyZyteAPIDownloaderMiddleware,
Expand Down Expand Up @@ -51,25 +50,25 @@
session_config_registry = _session_config_registry

__all__ = [
"ExtractFrom",
"SESSION_AGGRESSIVE_RETRY_POLICY",
"SESSION_DEFAULT_RETRY_POLICY",
"Actions",
"Addon",
"ExtractFrom",
"Geolocation",
"Screenshot",
"LocationSessionConfig",
"ScrapyZyteAPIDownloadHandler",
"ScrapyZyteAPIRequestFingerprinter",
"ScrapyZyteAPIDownloaderMiddleware",
"ScrapyZyteAPIRefererSpiderMiddleware",
"ScrapyZyteAPISpiderMiddleware",
"ScrapyZyteAPIRequestFingerprinter",
"ScrapyZyteAPISessionDownloaderMiddleware",
"Addon",
"ScrapyZyteAPISpiderMiddleware",
"Screenshot",
"SessionConfig",
"actions",
"custom_attrs",
"get_request_session_id",
"is_session_init_request",
"session_config",
"session_config_registry",
"LocationSessionConfig",
"SessionConfig",
"SESSION_DEFAULT_RETRY_POLICY",
"SESSION_AGGRESSIVE_RETRY_POLICY",
]
70 changes: 36 additions & 34 deletions scrapy_zyte_api/_annotations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from collections.abc import Iterable
from enum import Enum
from typing import Any, Dict, FrozenSet, Iterable, List, Optional, Tuple, TypedDict
from typing import Any, TypedDict


class ExtractFrom(str, Enum):
Expand All @@ -18,48 +19,48 @@ class ExtractFrom(str, Enum):
class _Selector(TypedDict, total=False):
type: str
value: str
state: Optional[str]
state: str | None


class Action(TypedDict, total=False):
action: str
address: Optional[dict]
args: Optional[dict]
button: Optional[str]
delay: Optional[float]
id: Optional[str]
key: Optional[str]
keyword: Optional[str]
left: Optional[int]
maxPageHeight: Optional[int]
maxScrollCount: Optional[int]
maxScrollDelay: Optional[float]
onError: Optional[str]
options: Optional[dict]
selector: Optional[_Selector]
source: Optional[str]
text: Optional[str]
timeout: Optional[float]
top: Optional[int]
url: Optional[str]
urlMatchingOptions: Optional[str]
urlPattern: Optional[str]
values: Optional[List[str]]
waitForNavigationTimeout: Optional[float]
waitUntil: Optional[str]


class _ActionResult(TypedDict, total=False):
address: dict | None
args: dict | None
button: str | None
delay: float | None
id: str | None
key: str | None
keyword: str | None
left: int | None
maxPageHeight: int | None
maxScrollCount: int | None
maxScrollDelay: float | None
onError: str | None
options: dict | None
selector: _Selector | None
source: str | None
text: str | None
timeout: float | None
top: int | None
url: str | None
urlMatchingOptions: str | None
urlPattern: str | None
values: list[str] | None
waitForNavigationTimeout: float | None
waitUntil: str | None


class _ActionResult(TypedDict, total=False): # noqa: PYI049
action: str
elapsedTime: float
status: str
error: Optional[str]
error: str | None


def make_hashable(obj: Any) -> Any:
"""Converts input into hashable form, to use in ``Annotated``."""
if isinstance(obj, (tuple, list)):
return tuple((make_hashable(e) for e in obj))
return tuple(make_hashable(e) for e in obj)

if isinstance(obj, dict):
return frozenset((make_hashable(k), make_hashable(v)) for k, v in obj.items())
Expand All @@ -78,15 +79,16 @@ def _from_hashable(obj: Any) -> Any:
return obj


def actions(value: Iterable[Action]) -> Tuple[Any, ...]:
def actions(value: Iterable[Action]) -> tuple[Any, ...]:
"""Convert an iterable of :class:`~scrapy_zyte_api.Action` dicts into a hashable value."""
# both lists and dicts are not hashable and we need dep types to be hashable
return tuple(make_hashable(action) for action in value)


def custom_attrs(
input: Dict[str, Any], options: Optional[Dict[str, Any]] = None
) -> Tuple[FrozenSet[Any], Optional[FrozenSet[Any]]]:
input: dict[str, Any], # noqa: A002
options: dict[str, Any] | None = None,
) -> tuple[frozenset[Any], frozenset[Any] | None]:
input_wrapped = make_hashable(input)
options_wrapped = make_hashable(options) if options else None
return input_wrapped, options_wrapped
12 changes: 6 additions & 6 deletions scrapy_zyte_api/_cookies.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from http.cookiejar import Cookie
from typing import Any, Dict, List, Optional
from typing import Any
from urllib.parse import urlparse

from scrapy.http import Request
from scrapy.http.cookies import CookieJar


def _get_cookie_jar(request: Request, cookie_jars: Dict[Any, CookieJar]) -> CookieJar:
def _get_cookie_jar(request: Request, cookie_jars: dict[Any, CookieJar]) -> CookieJar:
jar_id = request.meta.get("cookiejar")
return cookie_jars[jar_id]

Expand All @@ -24,9 +24,9 @@ def _get_cookie_domain(cookie, url):


def _process_cookies(
api_response: Dict[str, Any],
api_response: dict[str, Any],
request: Request,
cookie_jars: Optional[Dict[Any, CookieJar]],
cookie_jars: dict[Any, CookieJar] | None,
):
if not cookie_jars:
return
Expand Down Expand Up @@ -64,7 +64,7 @@ def _process_cookies(


def _get_all_cookies(
request: Request, cookie_jars: Dict[Any, CookieJar]
) -> List[Cookie]:
request: Request, cookie_jars: dict[Any, CookieJar]
) -> list[Cookie]:
cookie_jar = _get_cookie_jar(request, cookie_jars)
return list(cookie_jar.jar)
4 changes: 2 additions & 2 deletions scrapy_zyte_api/_middlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,14 @@ def _get_spm_mw(self):
spm_mw_classes = []

try:
from scrapy_crawlera import CrawleraMiddleware
from scrapy_crawlera import CrawleraMiddleware # noqa: PLC0415
except ImportError:
pass
else:
spm_mw_classes.append(CrawleraMiddleware)

try:
from scrapy_zyte_smartproxy import ZyteSmartProxyMiddleware
from scrapy_zyte_smartproxy import ZyteSmartProxyMiddleware # noqa: PLC0415
except ImportError:
pass
else:
Expand Down
Loading
Loading