Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,26 @@

## v0.3.*

### v0.3.1 - 2025-08-04

[#8](https://github.com/p2p-ld/torrent-models/pull/8)

**Features**
By the time we have piece ranges,
we don't know about the torrent `info.name` field anymore, so we can't construct URLs accurately.

Give that responsibility to the relevant piece range classes,
giving them a `webseed_url` method that can be used to get the full url to request from some url that's used as a webseed.

so e.g. for a multi-file torrent named `my_torrent` with a file `a.exe`,
a webseed given as `https://example.com/data/` should have the file stored at `https://example.com/data/my_torrent/a.exe`

**Bugfix**

this also fixes v1-only single file torrents
(a rare and discouraged case) which improperly added the metadata to the `files` list,
rather than just having `name` and `length`.

### v0.3.0 - 2025-07-28

[#6](https://github.com/p2p-ld/torrent-models/pull/6)
Expand Down
6 changes: 5 additions & 1 deletion src/torrent_models/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,11 @@ def _generate_v1(self, n_processes: int, progress: bool = False, **kwargs: Any)
file_items = self._get_v1_file_items(paths)

if not self.info.files:
dumped["info"]["files"] = file_items
if len(file_items) == 1:
dumped["info"]["name"] = file_items[0].path[-1]
dumped["info"]["length"] = file_items[0].length
else:
dumped["info"]["files"] = file_items

if "pieces" not in dumped["info"]:
hasher = V1Hasher(
Expand Down
7 changes: 7 additions & 0 deletions src/torrent_models/torrent.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ def v1_piece_range(self, piece_idx: int) -> V1PieceRange:
length=self.info.length,
range_start=start_range,
range_end=min(self.info.length, end_range),
full_path=self.info.name,
)
],
)
Expand All @@ -281,6 +282,7 @@ def v1_piece_range(self, piece_idx: int) -> V1PieceRange:
length=file.length,
range_start=file_range_start,
range_end=file_range_end,
full_path="/".join([self.info.name, *file.path]),
)
)

Expand All @@ -303,6 +305,7 @@ def v1_piece_range(self, piece_idx: int) -> V1PieceRange:
length=file.length,
range_start=file_range_start,
range_end=file_range_end,
full_path="/".join([self.info.name, *file.path]),
)
)
found_len += file_range_end - file_range_start
Expand Down Expand Up @@ -338,6 +341,8 @@ def v2_piece_range(self, file: str, piece_idx: int = 0) -> V2PieceRange:

root = flat_files[file]["pieces root"]

full_path = file if len(flat_files) == 1 else "/".join([self.info.name, file])

if root not in self.piece_layers:
# smaller then piece_length, piece range is whole file
return V2PieceRange(
Expand All @@ -348,6 +353,7 @@ def v2_piece_range(self, file: str, piece_idx: int = 0) -> V2PieceRange:
piece_length=self.info.piece_length,
file_size=flat_files[file]["length"],
root_hash=root,
full_path=full_path,
)
else:
if piece_idx >= len(self.piece_layers[root]):
Expand All @@ -364,6 +370,7 @@ def v2_piece_range(self, file: str, piece_idx: int = 0) -> V2PieceRange:
file_size=flat_files[file]["length"],
piece_hash=self.piece_layers[root][piece_idx],
root_hash=root,
full_path=full_path,
)

@model_validator(mode="after")
Expand Down
19 changes: 19 additions & 0 deletions src/torrent_models/types/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from enum import StrEnum
from pathlib import Path
from typing import Annotated, NotRequired, TypeAlias
from urllib.parse import quote

from annotated_types import Ge, Len
from pydantic import AfterValidator, AnyUrl, BaseModel, Field
Expand Down Expand Up @@ -100,3 +101,21 @@ class PieceRange(BaseModel):
@abstractmethod
def validate_data(self, data: list[bytes]) -> bool:
"""Check that the provided data matches the piece or root hash"""


def webseed_url(base_url: str, path: str) -> str:
"""
Given some base url that is to be used as a webseed url and a path within a torrent file,
get the full url that should be requested from the webseed server
- leave url unchanged in the case of single file torrents
- quote path segments
- handle duplicate leading/trailing slashes
"""
if base_url.endswith(path) or base_url.endswith(quote(path)):
url = base_url
else:
# webseed url must be a directory, so we quote the path segments and append
url_base = base_url.rstrip("/")
path_parts = [quote(part) for part in path.split("/")]
url = "/".join([url_base, *path_parts])
return url
14 changes: 11 additions & 3 deletions src/torrent_models/types/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pydantic_core.core_schema import SerializationInfo

from torrent_models.base import ConfiguredBase
from torrent_models.types.common import FilePart, PieceRange, SHA1Hash, _power_of_two
from torrent_models.types.common import FilePart, PieceRange, SHA1Hash, _power_of_two, webseed_url

V1PieceLength = Annotated[int, AfterValidator(_power_of_two)]
"""
Expand Down Expand Up @@ -86,6 +86,14 @@ class FileItemRange(FileItem):

range_start: int
range_end: int
full_path: str
"""
Path to be used with webseeds, includes `info.name` in the case of multifile torrents,
so the webseed base can be directly joined with `full_path`
"""

def webseed_url(self, base_url: str) -> str:
return webseed_url(base_url, self.full_path)


class V1PieceRange(PieceRange):
Expand All @@ -106,8 +114,8 @@ def validate_data(self, data: list[bytes]) -> bool:
assert len(data) == len(
self.ranges
), "Need to provide data chunks that correspond to each of the indicated file ranges"
for range, d in zip(self.ranges, data):
assert (range.range_end - range.range_start) == len(d), (
for range_, d in zip(self.ranges, data):
assert (range_.range_end - range_.range_start) == len(d), (
"Provided data chunks must match the sizes indicated by the "
"start and end ranges of each file range"
)
Expand Down
9 changes: 9 additions & 0 deletions src/torrent_models/types/v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
SHA256Hash,
_divisible_by_16kib,
_power_of_two,
webseed_url,
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -438,6 +439,11 @@ class V2PieceRange(PieceRange):
file_size: int
piece_hash: SHA256Hash | None = None
root_hash: SHA256Hash
full_path: str
"""
Path to be used with webseeds, includes `info.name` in the case of multifile torrents,
so the webseed base can be directly joined with `full_path`
"""

@property
def tree_shape(self) -> MerkleTreeShape:
Expand Down Expand Up @@ -474,3 +480,6 @@ def validate_data(self, data: list[bytes]) -> bool:
return hash == self.root_hash
else:
return hash == self.piece_hash

def webseed_url(self, base_url: str) -> str:
return webseed_url(base_url, self.full_path)
63 changes: 63 additions & 0 deletions tests/test_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import random
from pathlib import Path

import pytest

from torrent_models import KiB, TorrentCreate


@pytest.mark.parametrize("version", ["v1", "v2"])
def test_webseed_url_singlefile(version: str, tmp_path: Path):
"""
Webseed urls for piece ranges in a single file should be direct links to the file
"""
tfile = tmp_path / "my_cool_file.exe"
ws_url = "https://example.com/data/my_cool_file.exe"
with open(tfile, "wb") as f:
f.write(random.randbytes((32 * KiB) * 4))

t = TorrentCreate(paths=[tfile], path_root=tmp_path, piece_length=32 * KiB).generate(
version=version
)
if version == "v1":
assert t.info.files is None
v1_range = t.v1_piece_range(2)
prange = v1_range.ranges[0]
else:
assert len(t.flat_files) == 1
prange = t.v2_piece_range("my_cool_file.exe", 2)

# direct links should be unchanged
assert prange.webseed_url(ws_url) == ws_url
# file should be appended if directory given
assert prange.webseed_url("https://example.com/data/") == ws_url
assert prange.webseed_url("https://example.com/data") == ws_url


@pytest.mark.parametrize("version", ["v1", "v2"])
def test_webseed_url_multifile(version: str, tmp_path: Path):
"""
Multifile torrents should have their `info.name` prepended to the webseed url
"""
t_name = "my_torrent"
t_dir = tmp_path / t_name
t_dir.mkdir(exist_ok=True)
paths = [Path("a.exe"), Path("b.exe"), Path("c.png")]
for path in paths:
with open(t_dir / path, "wb") as f:
f.write(random.randbytes((32 * KiB) * 4))

t = TorrentCreate(paths=paths, path_root=t_dir, piece_length=32 * KiB).generate(version=version)
assert t.info.name == t_name
if version == "v1":
v1_range = t.v1_piece_range(6)
prange = v1_range.ranges[0]
else:
prange = t.v2_piece_range("b.exe", 2)

ws_expected = "https://example.com/data/my_torrent/b.exe"
# direct paths are unchanged
assert prange.webseed_url(ws_expected) == ws_expected
# ensure name is prepended to directories
assert prange.webseed_url("https://example.com/data/") == ws_expected
assert prange.webseed_url("https://example.com/data") == ws_expected
Loading