Skip to content

feat(handler): add par2 directory handler #1166

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/unblob/handlers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
cab,
cpio,
dmg,
par2,
rar,
sevenzip,
stuffit,
Expand Down Expand Up @@ -124,4 +125,5 @@
BUILTIN_DIR_HANDLERS: DirectoryHandlers = (
sevenzip.MultiVolumeSevenZipHandler,
gzip.MultiVolumeGzipHandler,
par2.MultiVolumePAR2Handler,
)
63 changes: 63 additions & 0 deletions python/unblob/handlers/archive/par2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import hashlib
import io
from pathlib import Path
from typing import Optional

from unblob.file_utils import Endian, StructParser
from unblob.models import (
DirectoryHandler,
Glob,
MultiFile,
)

C_DEFINITIONS = r"""
typedef struct par2_header{
char magic[8];
uint64 packet_length;
char md5_hash[16];
char recovery_set_id[16];
char type[16];
} par2_header_t;
"""

PAR2_MAGIC = b"PAR2\x00PKT"
HEADER_STRUCT = "par2_header_t"
HEADER_PARSER = StructParser(C_DEFINITIONS)


class MultiVolumePAR2Handler(DirectoryHandler):
NAME = "multi-par2"
PATTERN = Glob("*.par2")
EXTRACTOR = None

def is_valid_header(self, file_paths: list) -> bool:
for path in file_paths:
with path.open("rb") as f:
header = HEADER_PARSER.parse(HEADER_STRUCT, f, Endian.LITTLE)
if header.magic != PAR2_MAGIC:
return False

offset_to_recovery_id = 32
# seek to beginning of recovery set ID
f.seek(offset_to_recovery_id, io.SEEK_SET)
packet_content = f.read(
header.packet_length - len(header) + offset_to_recovery_id
)
packet_checksum = hashlib.md5(packet_content).digest() # noqa: S324

if packet_checksum != header.md5_hash:
return False
return True

def calculate_multifile(self, file: Path) -> Optional[MultiFile]:
paths = sorted(
[p for p in file.parent.glob(f"{file.stem}.*") if p.resolve().exists()]
)

if len(paths) <= 1 or not self.is_valid_header(paths):
return None

return MultiFile(
name=file.stem,
paths=paths,
)
10 changes: 5 additions & 5 deletions python/unblob/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,12 +408,15 @@ def get_files(self, directory: Path) -> Iterable[Path]:
return [path] if path.exists() else []


class DirectoryHandler(abc.ABC):
TExtractor = TypeVar("TExtractor", bound=Union[None, Extractor])


class DirectoryHandler(abc.ABC, Generic[TExtractor]):
"""A directory type handler is responsible for searching, validating and "unblobbing" files from multiple files in a directory."""

NAME: str

EXTRACTOR: DirectoryExtractor
EXTRACTOR: TExtractor

PATTERN: DirectoryPattern

Expand All @@ -439,9 +442,6 @@ def extract(self, paths: list[Path], outdir: Path) -> Optional[ExtractResult]:
return self.EXTRACTOR.extract(paths, outdir)


TExtractor = TypeVar("TExtractor", bound=Union[None, Extractor])


class Handler(abc.ABC, Generic[TExtractor]):
"""A file type handler is responsible for searching, validating and "unblobbing" files from Blobs."""

Expand Down
3 changes: 3 additions & 0 deletions tests/integration/archive/par2/__input__/foo.erofs.img.par2
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Empty file.
Loading