Skip to content

Add tests for PDF generation #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 99 additions & 60 deletions maykin_common/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,21 @@
Depends on ``weasyprint``.
"""

import functools
import logging
import mimetypes
from collections.abc import Mapping
from io import BytesIO
from pathlib import PurePosixPath
from typing import NotRequired, TypedDict
from urllib.parse import ParseResult, urljoin, urlparse
from urllib.parse import ParseResult, urlparse

from django.conf import settings
from django.contrib.staticfiles import finders
from django.contrib.staticfiles.storage import staticfiles_storage
from django.core.files.storage import FileSystemStorage, default_storage
from django.core.files.storage.base import Storage
from django.core.signals import setting_changed
from django.dispatch import receiver
from django.template.loader import render_to_string
from django.utils.module_loading import import_string

Expand All @@ -37,17 +40,68 @@
__all__ = ["render_to_pdf"]


def get_base_url(*args, **kwargs) -> str:
def get_base_url() -> str:
"""
Get the base URL where the project is served.
"""

if pdf_base_url_function := get_setting("PDF_BASE_URL_FUNCTION"):
return import_string(pdf_base_url_function)(*args, **kwargs)

return import_string(pdf_base_url_function)()
raise NotImplementedError("You must implement 'get_base_url'.")


def _ensure_fully_qualified_url(url: str, base: ParseResult) -> ParseResult:
"""
Ensure the passed in URL is fully qualified.

If the URL does not have a network location, we take the protocol and netloc from
the provided base URL to make it fully qualified. This assumes no netloc implies
no protocol.
"""
parsed_url = urlparse(url)
match parsed_url:
case ParseResult(scheme=scheme, netloc=netloc) if scheme and netloc:
return parsed_url
case _:
# it is documented as public API!
return parsed_url._replace(scheme=base.scheme, netloc=base.netloc)


@functools.cache
def _get_candidate_storages() -> Mapping[ParseResult, FileSystemStorage]:
"""
Introspect settings and determine which storages can serve static assets.

We can only consider storages that inherit from :class:`FileSystemStorage` for
optimized asset serving. The goal of this module is to avoid network round-trips to
our own ``MEDIA_ROOT`` or ``STATIC_ROOT``.
"""
base_url = urlparse(get_base_url())
candidates: dict[ParseResult, FileSystemStorage] = {}

# check staticfiles app
if isinstance(staticfiles_storage, FileSystemStorage):
static_url = _ensure_fully_qualified_url(settings.STATIC_URL, base=base_url)
candidates[static_url] = staticfiles_storage

# check media root
if isinstance(default_storage, FileSystemStorage):
media_url = _ensure_fully_qualified_url(settings.MEDIA_URL, base=base_url)
candidates[media_url] = default_storage

return candidates


@receiver(setting_changed, dispatch_uid="maykin_common.pdf._reset_storages")
def _reset_storages(sender, setting: str, **kwargs):
# mostly for tests, settings *should* not change in production code
match setting:
case "STATIC_ROOT" | "MEDIA_ROOT" | "STORAGES" | "PDF_BASE_URL_FUNCTION":
_get_candidate_storages.cache_clear()
case _: # pragma: no cover
pass


class UrlFetcherResult(TypedDict):
mime_type: str | None
encoding: str | None
Expand All @@ -59,84 +113,69 @@ class UrlFetcherResult(TypedDict):

class UrlFetcher:
"""
URL fetcher that skips the network for /static/* files.
URL fetcher that skips the network for /static/* and /media/* files.
"""

def __init__(self):
self.static_url = self._get_fully_qualified_url(settings.STATIC_URL)
is_static_local_storage = issubclass(
staticfiles_storage.__class__, FileSystemStorage
)

self.media_url = self._get_fully_qualified_url(settings.MEDIA_URL)
is_media_local_storage = issubclass(
default_storage.__class__, FileSystemStorage
)

self.candidates = (
(self.static_url, staticfiles_storage, is_static_local_storage),
(self.media_url, default_storage, is_media_local_storage),
)

@staticmethod
def _get_fully_qualified_url(setting: str):
fully_qualified_url = setting
if not urlparse(setting).netloc:
fully_qualified_url = urljoin(get_base_url(), setting)
return urlparse(fully_qualified_url)

def __call__(self, url: str) -> UrlFetcherResult:
orig_url = url
"""
Check if the URL matches one of our candidates and use it if there's a match.

Matching is done on the URLs of the storages and the requested asset. If the
prefix matches, look up the relative asset path in the storage and serve it
if it's found. If not, defer to the default URL fetcher of WeasyPrint.
"""
parsed_url = urlparse(url)
assert parsed_url.netloc and parsed_url.netloc, "Expected fully qualified URL"

candidate = self.get_match_candidate(parsed_url)
if candidate is not None:
base_url, storage = candidate
path = PurePosixPath(parsed_url.path).relative_to(base_url.path)
# Try candidates, respecting the order of the candidate configuration.
for base, storage in _get_candidate_storages().items():
base_url = base.geturl()
# Skip to the next candidate if the URLs don't share a prefix.
if not url.startswith(base_url):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there some normalisation going on? Should we consider:

Suggested change
if not url.startswith(base_url):
if not parsed_url.geturl().startswith(base_url):

I don't think so. Unsure.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had the same thoughts. Given a URL with trailing ?, it gets normalized to a string without the ? because there are no query params at all, but otherwise I think it's not relevant because in all realistic cases you configure static url to be [{scheme}{netloc}]{path} which always acts as a prefix. So whatever may be trailing after the base url is irrelevant anyway. The check before was checking that scheme and netloc match, and then prefix match on the path.

I picked the less complex option because of this context.

continue

# get the relative path to lookup in the storage to obtain an absolute path
rel_path = PurePosixPath(parsed_url.path).relative_to(base.path)
rel_path_str = str(rel_path)

absolute_path = None
if storage.exists(str(path)):
absolute_path = storage.path(str(path))
absolute_path: str | None = None
if storage.exists(rel_path_str):
absolute_path = storage.path(rel_path_str)
elif settings.DEBUG and storage is staticfiles_storage:
# use finders so that it works in dev too, we already check that it's
# using filesystem storage earlier
absolute_path = finders.find(str(path))
absolute_path = finders.find(rel_path_str)

# we bail out, since we hit a storage that matches the URL prefix. Other
# candidates will not have match either due to their different URL prefixes.
if absolute_path is None:
logger.error("Could not resolve path '%s'", path)
return weasyprint.default_url_fetcher(orig_url) # pyright:ignore[reportReturnType]
logger.error(
"path_resolution_failed",
extra={
"path": rel_path_str,
"storage": storage,
},
)
return weasyprint.default_url_fetcher(url) # pyright:ignore[reportReturnType]

content_type, encoding = mimetypes.guess_type(absolute_path)
result: UrlFetcherResult = {
"mime_type": content_type,
"encoding": encoding,
"redirected_url": orig_url,
"filename": path.parts[-1],
"redirected_url": url,
"filename": rel_path.parts[-1],
}
with open(absolute_path, "rb") as f:
result["file_obj"] = BytesIO(f.read())
return result
return weasyprint.default_url_fetcher(orig_url) # pyright:ignore[reportReturnType]

def get_match_candidate(
self, url: ParseResult
) -> tuple[ParseResult, Storage] | None:
for parsed_base_url, storage, is_local_storage in self.candidates:
if not is_local_storage:
continue
same_base = (parsed_base_url.scheme, parsed_base_url.netloc) == (
url.scheme,
url.netloc,
)
if not same_base:
continue
if not url.path.startswith(parsed_base_url.path):
continue
return (parsed_base_url, storage)
return None
else:
# all candidates were tried, none were a match -> defer to the weasyprint
# default
return weasyprint.default_url_fetcher(url) # pyright:ignore[reportReturnType]


def render_to_pdf(template_name: str, context: dict) -> tuple[str, bytes]:
def render_to_pdf(template_name: str, context: dict[str, object]) -> tuple[str, bytes]:
"""
Render a (HTML) template to PDF with the given context.
"""
Expand Down
1 change: 1 addition & 0 deletions testapp/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
]

STATIC_URL = "/static/"
STATIC_ROOT = BASE_DIR / "static_root"

ROOT_URLCONF = "testapp.urls"

Expand Down
3 changes: 3 additions & 0 deletions testapp/static/testapp/some.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
:root {
--color-primary: purple;
}
12 changes: 12 additions & 0 deletions testapp/templates/testapp/pdf/external_url.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>External URL</title>
<link href="https://example.com/index.css" rel="stylesheet">
</head>
<body>
External URL.
</body>
</html>
11 changes: 11 additions & 0 deletions testapp/templates/testapp/pdf/hello_world.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Hello world</title>
</head>
<body>
Hello {{ world }}.
</body>
</html>
14 changes: 14 additions & 0 deletions testapp/templates/testapp/pdf/local_url.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{% load static %}
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Local URL</title>
<link href="http://testserver/static/testapp/some.css" rel="stylesheet">
<link href="{% static 'testapp/some.css' %}" rel="stylesheet">
</head>
<body>
Local URL.
</body>
</html>
13 changes: 13 additions & 0 deletions testapp/templates/testapp/pdf/missing_asset.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{% load static %}
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Missing asset</title>
<link href="/static/non_existent.css" rel="stylesheet">
</head>
<body>
Missing asset.
</body>
</html>
96 changes: 96 additions & 0 deletions tests/pdf/test_pdf_generation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from unittest.mock import patch

from django.core.management import call_command

import pytest

from maykin_common.pdf import render_to_pdf


def get_base_url():
return "http://testserver"


@pytest.fixture(autouse=True)
def _collectstatic(settings, tmp_path):
static_root = tmp_path / "static_root"
settings.STATIC_ROOT = str(static_root)
call_command("collectstatic", interactive=False, link=True, verbosity=0)
yield
Comment on lines +14 to +19
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will the contents of static_root change in tests or could we scope="module" this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tried it but the settings fixture is function-scoped so I admitted defeat :(



@pytest.fixture(autouse=True)
def _settings(settings):
settings.PDF_BASE_URL_FUNCTION = f"{__name__}.get_base_url"


def test_raises_if_setting_not_configured_properly(settings):
settings.PDF_BASE_URL_FUNCTION = None

with pytest.raises(NotImplementedError):
render_to_pdf("testapp/pdf/hello_world.html", {})


def test_render_template_returns_html():
html, pdf = render_to_pdf("testapp/pdf/hello_world.html", {"world": "pytest"})

assert isinstance(html, str)
assert "Hello pytest" in html
assert isinstance(pdf, bytes)


def test_external_url_uses_default_resolver():
with patch("maykin_common.pdf.weasyprint.default_url_fetcher") as mock_fetcher:
render_to_pdf("testapp/pdf/external_url.html", {})

mock_fetcher.assert_called_once_with("https://example.com/index.css")


def test_local_asset_does_not_use_default_resolver():
with patch("maykin_common.pdf.weasyprint.default_url_fetcher") as mock_fetcher:
render_to_pdf("testapp/pdf/local_url.html", {})

mock_fetcher.assert_not_called()


def test_render_with_missing_asset():
with patch("maykin_common.pdf.weasyprint.default_url_fetcher") as mock_fetcher:
render_to_pdf("testapp/pdf/missing_asset.html", {})

mock_fetcher.assert_called_once_with("http://testserver/static/non_existent.css")


def test_resolves_assets_in_debug_mode(settings):
settings.STATIC_ROOT = "/bad/path"
settings.DEBUG = True

with patch("maykin_common.pdf.weasyprint.default_url_fetcher") as mock_fetcher:
render_to_pdf("testapp/pdf/local_url.html", {})

mock_fetcher.assert_not_called()


def test_fully_qualified_static_url(settings):
settings.STATIC_URL = "http://testserver/static/"

with patch("maykin_common.pdf.weasyprint.default_url_fetcher") as mock_fetcher:
render_to_pdf("testapp/pdf/local_url.html", {})

mock_fetcher.assert_not_called()


def test_other_storages_than_file_system_storage(settings):
settings.STORAGES = {
"default": {
"BACKEND": "django.core.files.storage.InMemoryStorage",
},
"staticfiles": {
# this causes the /static/ prefix to be absent (!)
"BACKEND": "django.core.files.storage.InMemoryStorage",
},
}

with patch("maykin_common.pdf.weasyprint.default_url_fetcher") as mock_fetcher:
render_to_pdf("testapp/pdf/local_url.html", {})

mock_fetcher.assert_called_with("http://testserver/testapp/some.css")
Loading