Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 53 additions & 13 deletions readthedocs/builds/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from readthedocs.notifications.models import Notification
from readthedocs.projects.constants import BITBUCKET_COMMIT_URL
from readthedocs.projects.constants import DOCTYPE_CHOICES
from readthedocs.projects.constants import DOWNLOADABLE_MEDIA_TYPES
from readthedocs.projects.constants import GITHUB_COMMIT_URL
from readthedocs.projects.constants import GITHUB_PULL_REQUEST_COMMIT_URL
from readthedocs.projects.constants import GITLAB_COMMIT_URL
Expand All @@ -73,6 +74,7 @@
from readthedocs.projects.ordering import ProjectItemPositionManager
from readthedocs.projects.validators import validate_build_config_file
from readthedocs.projects.version_handling import determine_stable_version
from readthedocs.storage import build_media_storage


log = structlog.get_logger(__name__)
Expand Down Expand Up @@ -542,20 +544,58 @@ def get_storage_paths(self, version_slug=None):
sometimes to clean old resources.
:rtype: list
"""
paths = []

slug = version_slug or self.slug
for type_ in MEDIA_TYPES:
paths.append(
self.project.get_storage_path(
type_=type_,
version_slug=slug,
include_file=False,
version_type=self.type,
)
)
return [
self.get_storage_path(media_type=media_type, version_slug=version_slug)
for media_type in MEDIA_TYPES
]

def get_storage_path(self, media_type, filename=None, version_slug=None):
"""
Get a path in storage for a given media type and filename for this version.

:param media_type: The type of media (e.g. "pdf", "epub", "htmlzip").
:param filename: Optional filename to append to the path.
If not provided, the directory path for the media type will be returned.
:param version_slug: Override the version slug to use in the path.
This is useful when the version slug has changed but we need to access old resources.
"""
if media_type not in MEDIA_TYPES:
raise ValueError("Invalid type.")

version_slug = version_slug or self.slug

path = media_type
if self.is_external:
path = f"{EXTERNAL}/{media_type}"

# Version slug may come from an unstrusted input,
# so we use join to avoid any path traversal.
# All other values are already validated.
path = build_media_storage.join(f"{path}/{self.project.slug}", version_slug)

# If the filename starts with `/`, the join will fail,
# so we strip it before joining it.
filename = (filename or "").lstrip("/")
if not filename:
return path

return build_media_storage.join(path, filename)

def get_download_storage_path(self, media_type):
"""
Get the storage path for a downloadable artifact of this version.

This is basically a shortcut to `get_storage_path` that also adds the
filename based on the version slug and media type.

:param media_type: The type of media (e.g. "pdf", "epub", "htmlzip").
"""
if media_type not in DOWNLOADABLE_MEDIA_TYPES:
raise ValueError("Invalid type for downloadable file.")

return paths
extension = media_type.replace("htmlzip", "zip")
filenane = f"{self.project.slug}.{extension}"
return self.get_storage_path(media_type=media_type, filename=filenane)


class APIVersion(Version):
Expand Down
29 changes: 10 additions & 19 deletions readthedocs/embed/v3/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from readthedocs.api.v3.permissions import HasEmbedAPIAccess
from readthedocs.core.utils.extend import SettingsOverrideObject
from readthedocs.embed.utils import clean_references
from readthedocs.projects.constants import MEDIA_TYPE_HTML
from readthedocs.storage import build_media_storage


Expand Down Expand Up @@ -92,26 +93,17 @@ def _download_page_content(self, url):
)
return response.content

def _get_page_content_from_storage(self, project, version, filename):
storage_path = project.get_storage_path(
"html",
version_slug=version.slug,
include_file=False,
version_type=version.type,
)

def _get_page_content_from_storage(self, version, filename):
# Decode encoded URLs (e.g. convert %20 into a whitespace)
filename = urllib.parse.unquote(filename)

# If the filename starts with `/`, the join will fail,
# so we strip it before joining it.
relative_filename = filename.lstrip("/")
file_path = build_media_storage.join(
storage_path,
relative_filename,
file_path = version.get_storage_path(
media_type=MEDIA_TYPE_HTML,
filename=filename,
)

tryfiles = [file_path, build_media_storage.join(file_path, "index.html")]
index_file_path = version.get_storage_path(
media_type=MEDIA_TYPE_HTML, filename=build_media_storage.join(filename, "index.html")
)
tryfiles = [file_path, index_file_path]
for tryfile in tryfiles:
try:
with build_media_storage.open(tryfile) as fd:
Expand All @@ -132,10 +124,9 @@ def _get_content_by_fragment(
if self.external:
page_content = self._download_page_content(url)
else:
project = self.unresolved_url.project
version = self.unresolved_url.version
filename = self.unresolved_url.filename
page_content = self._get_page_content_from_storage(project, version, filename)
page_content = self._get_page_content_from_storage(version, filename)

return self._parse_based_on_doctool(
page_content,
Expand Down
18 changes: 6 additions & 12 deletions readthedocs/filetreediff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,10 @@ def get_manifest(version: Version) -> FileTreeDiffManifest | None:

If the manifest file does not exist, return None.
"""
storage_path = version.project.get_storage_path(
type_=MEDIA_TYPE_DIFF,
version_slug=version.slug,
include_file=False,
version_type=version.type,
manifest_path = version.get_storage_path(
media_type=MEDIA_TYPE_DIFF,
filename=MANIFEST_FILE_NAME,
)
manifest_path = build_media_storage.join(storage_path, MANIFEST_FILE_NAME)
try:
with build_media_storage.open(manifest_path) as manifest_file:
manifest = json.load(manifest_file)
Expand All @@ -113,12 +110,9 @@ def get_manifest(version: Version) -> FileTreeDiffManifest | None:


def write_manifest(version: Version, manifest: FileTreeDiffManifest):
storage_path = version.project.get_storage_path(
type_=MEDIA_TYPE_DIFF,
version_slug=version.slug,
include_file=False,
version_type=version.type,
manifest_path = version.get_storage_path(
media_type=MEDIA_TYPE_DIFF,
filename=MANIFEST_FILE_NAME,
)
manifest_path = build_media_storage.join(storage_path, MANIFEST_FILE_NAME)
with build_media_storage.open(manifest_path, "w") as f:
json.dump(manifest.as_dict(), f)
42 changes: 2 additions & 40 deletions readthedocs/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,11 @@
from readthedocs.projects.validators import validate_repository_url
from readthedocs.projects.version_handling import determine_stable_version
from readthedocs.search.parsers import GenericParser
from readthedocs.storage import build_media_storage
from readthedocs.vcs_support.backends import backend_cls

from .constants import ADDONS_FLYOUT_POSITION_CHOICES
from .constants import ADDONS_FLYOUT_SORTING_CHOICES
from .constants import ADDONS_FLYOUT_SORTING_SEMVER_READTHEDOCS_COMPATIBLE
from .constants import DOWNLOADABLE_MEDIA_TYPES
from .constants import MEDIA_TYPES
from .constants import MULTIPLE_VERSIONS_WITH_TRANSLATIONS
from .constants import MULTIPLE_VERSIONS_WITHOUT_TRANSLATIONS
Expand Down Expand Up @@ -784,48 +782,12 @@ def get_storage_paths(self):
"""
Get the paths of all artifacts used by the project.

:return: the path to an item in storage
(can be used with ``storage.url`` to get the URL).
:return: A list of paths where the project's artifacts are stored.
"""
storage_paths = [f"{type_}/{self.slug}" for type_ in MEDIA_TYPES]
storage_paths.extend(f"{EXTERNAL}/{type_}/{self.slug}" for type_ in MEDIA_TYPES)
return storage_paths

def get_storage_path(self, type_, version_slug=LATEST, include_file=True, version_type=None):
"""
Get a path to a build artifact for use with Django's storage system.

:param type_: Media content type, ie - 'pdf', 'htmlzip'
:param version_slug: Project version slug for lookup
:param include_file: Include file name in return
:param version_type: Project version type
:return: the path to an item in storage
(can be used with ``storage.url`` to get the URL)
"""
if type_ not in MEDIA_TYPES:
raise ValueError("Invalid content type.")

if include_file and type_ not in DOWNLOADABLE_MEDIA_TYPES:
raise ValueError("Invalid content type for downloadable file.")

type_dir = type_
# Add `external/` prefix for external versions
if version_type == EXTERNAL:
type_dir = f"{EXTERNAL}/{type_}"

# Version slug may come from an unstrusted input,
# so we use join to avoid any path traversal.
# All other values are already validated.
folder_path = build_media_storage.join(f"{type_dir}/{self.slug}", version_slug)

if include_file:
extension = type_.replace("htmlzip", "zip")
return "{}/{}.{}".format(
folder_path,
self.slug,
extension,
)
return folder_path

def get_production_media_url(self, type_, version_slug, resolver=None):
"""Get the URL for downloading a specific media file."""
# Use project domain for full path --same domain as docs
Expand Down
15 changes: 2 additions & 13 deletions readthedocs/projects/tasks/builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -981,13 +981,7 @@ def store_build_artifacts(self):
version=self.data.version.slug,
type_=media_type,
)
to_path = self.data.project.get_storage_path(
type_=media_type,
version_slug=self.data.version.slug,
include_file=False,
version_type=self.data.version.type,
)

to_path = self.data.version.get_storage_path(media_type=media_type)
self._log_directory_size(from_path, media_type)

try:
Expand All @@ -1013,12 +1007,7 @@ def store_build_artifacts(self):

# Delete formats
for media_type in types_to_delete:
media_path = self.data.version.project.get_storage_path(
type_=media_type,
version_slug=self.data.version.slug,
include_file=False,
version_type=self.data.version.type,
)
media_path = self.data.version.get_storage_path(media_type=media_type)
try:
build_media_storage.delete_directory(media_path)
except Exception as exc:
Expand Down
8 changes: 2 additions & 6 deletions readthedocs/projects/tasks/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from readthedocs.filetreediff import write_manifest
from readthedocs.filetreediff.dataclasses import FileTreeDiffManifest
from readthedocs.filetreediff.dataclasses import FileTreeDiffManifestFile
from readthedocs.projects.constants import MEDIA_TYPE_HTML
from readthedocs.projects.models import HTMLFile
from readthedocs.projects.models import Project
from readthedocs.projects.signals import files_changed
Expand Down Expand Up @@ -214,12 +215,7 @@ def _get_indexers(


def _process_files(*, version: Version, indexers: list[Indexer]):
storage_path = version.project.get_storage_path(
type_="html",
version_slug=version.slug,
include_file=False,
version_type=version.type,
)
storage_path = version.get_storage_path(media_type=MEDIA_TYPE_HTML)
# A sync ID is a number different than the current `build` attribute (pending rename),
# it's used to differentiate the files from the current sync from the previous one.
# This is useful to easily delete the previous files from the DB and ES.
Expand Down
23 changes: 2 additions & 21 deletions readthedocs/proxito/views/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from slugify import slugify as unicode_slugify

from readthedocs.audit.models import AuditLog
from readthedocs.builds.constants import INTERNAL
from readthedocs.core.resolver import Resolver
from readthedocs.projects.constants import MEDIA_TYPE_HTML
from readthedocs.proxito.constants import RedirectType
Expand All @@ -40,10 +39,6 @@ class StorageFileNotFound(Exception):
class ServeDocsMixin:
"""Class implementing all the logic to serve a document."""

# We force all storage calls to use internal versions
# unless explicitly set to external.
version_type = INTERNAL

def _serve_docs(self, request, project, version, filename, check_if_exists=False):
"""
Serve a documentation file.
Expand All @@ -53,14 +48,7 @@ def _serve_docs(self, request, project, version, filename, check_if_exists=False
Useful to make sure were are serving a file that exists in storage,
checking if the file exists will make one additional request to the storage.
"""
base_storage_path = project.get_storage_path(
type_=MEDIA_TYPE_HTML,
version_slug=version.slug,
include_file=False,
# Force to always read from the internal or extrernal storage,
# according to the current request.
version_type=self.version_type,
)
base_storage_path = version.get_storage_path(media_type=MEDIA_TYPE_HTML)

# Handle our backend storage not supporting directory indexes,
# so we need to append index.html when appropriate.
Expand Down Expand Up @@ -100,14 +88,7 @@ def _serve_dowload(self, request, project, version, type_):
filename (e.g. "pip-pypa-io-en-latest.pdf" or "pip-pypi-io-en-v2.0.pdf"
or "docs-celeryproject-org-kombu-en-stable.pdf").
"""
storage_path = project.get_storage_path(
type_=type_,
version_slug=version.slug,
# Force to always read from the internal or extrernal storage,
# according to the current request.
version_type=self.version_type,
include_file=True,
)
storage_path = version.get_download_storage_path(media_type=type_)
self._track_pageview(
project=project,
path=storage_path,
Expand Down
Loading
Loading