diff --git a/python/unblob/handlers/__init__.py b/python/unblob/handlers/__init__.py index 163dfc9c32..009dd7c716 100644 --- a/python/unblob/handlers/__init__.py +++ b/python/unblob/handlers/__init__.py @@ -6,6 +6,7 @@ cab, cpio, dmg, + par2, rar, sevenzip, stuffit, @@ -124,4 +125,5 @@ BUILTIN_DIR_HANDLERS: DirectoryHandlers = ( sevenzip.MultiVolumeSevenZipHandler, gzip.MultiVolumeGzipHandler, + par2.MultiVolumePAR2Handler, ) diff --git a/python/unblob/handlers/archive/par2.py b/python/unblob/handlers/archive/par2.py new file mode 100644 index 0000000000..c015876445 --- /dev/null +++ b/python/unblob/handlers/archive/par2.py @@ -0,0 +1,63 @@ +import hashlib +import io +from pathlib import Path +from typing import Optional + +from unblob.file_utils import Endian, StructParser +from unblob.models import ( + DirectoryHandler, + Glob, + MultiFile, +) + +C_DEFINITIONS = r""" + typedef struct par2_header{ + char magic[8]; + uint64 packet_length; + char md5_hash[16]; + char recovery_set_id[16]; + char type[16]; + } par2_header_t; +""" + +PAR2_MAGIC = b"PAR2\x00PKT" +HEADER_STRUCT = "par2_header_t" +HEADER_PARSER = StructParser(C_DEFINITIONS) + + +class MultiVolumePAR2Handler(DirectoryHandler): + NAME = "multi-par2" + PATTERN = Glob("*.par2") + EXTRACTOR = None + + def is_valid_header(self, file_paths: list) -> bool: + for path in file_paths: + with path.open("rb") as f: + header = HEADER_PARSER.parse(HEADER_STRUCT, f, Endian.LITTLE) + if header.magic != PAR2_MAGIC: + return False + + offset_to_recovery_id = 32 + # seek to beginning of recovery set ID + f.seek(offset_to_recovery_id, io.SEEK_SET) + packet_content = f.read( + header.packet_length - len(header) + offset_to_recovery_id + ) + packet_checksum = hashlib.md5(packet_content).digest() # noqa: S324 + + if packet_checksum != header.md5_hash: + return False + return True + + def calculate_multifile(self, file: Path) -> Optional[MultiFile]: + paths = sorted( + [p for p in file.parent.glob(f"{file.stem}.*") if p.resolve().exists()] + ) + + if len(paths) <= 1 or not self.is_valid_header(paths): + return None + + return MultiFile( + name=file.stem, + paths=paths, + ) diff --git a/python/unblob/models.py b/python/unblob/models.py index ed04887bc3..d3201094dd 100644 --- a/python/unblob/models.py +++ b/python/unblob/models.py @@ -408,12 +408,15 @@ def get_files(self, directory: Path) -> Iterable[Path]: return [path] if path.exists() else [] -class DirectoryHandler(abc.ABC): +TExtractor = TypeVar("TExtractor", bound=Union[None, Extractor]) + + +class DirectoryHandler(abc.ABC, Generic[TExtractor]): """A directory type handler is responsible for searching, validating and "unblobbing" files from multiple files in a directory.""" NAME: str - EXTRACTOR: DirectoryExtractor + EXTRACTOR: TExtractor PATTERN: DirectoryPattern @@ -439,9 +442,6 @@ def extract(self, paths: list[Path], outdir: Path) -> Optional[ExtractResult]: return self.EXTRACTOR.extract(paths, outdir) -TExtractor = TypeVar("TExtractor", bound=Union[None, Extractor]) - - class Handler(abc.ABC, Generic[TExtractor]): """A file type handler is responsible for searching, validating and "unblobbing" files from Blobs.""" diff --git a/tests/integration/archive/par2/__input__/foo.erofs.img.par2 b/tests/integration/archive/par2/__input__/foo.erofs.img.par2 new file mode 100644 index 0000000000..edd9018ed5 --- /dev/null +++ b/tests/integration/archive/par2/__input__/foo.erofs.img.par2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec6b3ce93cd3dbecf78e67740c4c7592bd2563004f319d63606e984deaadd816 +size 20892 diff --git a/tests/integration/archive/par2/__input__/foo.erofs.img.vol00+01.par2 b/tests/integration/archive/par2/__input__/foo.erofs.img.vol00+01.par2 new file mode 100644 index 0000000000..eb619561de --- /dev/null +++ b/tests/integration/archive/par2/__input__/foo.erofs.img.vol00+01.par2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84ded5e3d5a6676b65f735637532d7d2aa408215603b7044bdf9ee2b8deb3a1a +size 20964 diff --git a/tests/integration/archive/par2/__input__/foo.erofs.img.vol01+02.par2 b/tests/integration/archive/par2/__input__/foo.erofs.img.vol01+02.par2 new file mode 100644 index 0000000000..0d8c54719c --- /dev/null +++ b/tests/integration/archive/par2/__input__/foo.erofs.img.vol01+02.par2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:456d8113f08c9a6e3a98eb0df3fe0f069461435acc727527b932ee8e9e1e9c69 +size 41824 diff --git a/tests/integration/archive/par2/__input__/foo.erofs.img.vol03+04.par2 b/tests/integration/archive/par2/__input__/foo.erofs.img.vol03+04.par2 new file mode 100644 index 0000000000..94f16e28a1 --- /dev/null +++ b/tests/integration/archive/par2/__input__/foo.erofs.img.vol03+04.par2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b64b4c8eb00f579ad3370661307073f685bbc3ce2a8ebed413e530f87690090 +size 62756 diff --git a/tests/integration/archive/par2/__input__/foo.erofs.img.vol07+08.par2 b/tests/integration/archive/par2/__input__/foo.erofs.img.vol07+08.par2 new file mode 100644 index 0000000000..5d26291c54 --- /dev/null +++ b/tests/integration/archive/par2/__input__/foo.erofs.img.vol07+08.par2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acee373d6b04ccacd7414361d3e19a6c42fa4cf6d9cf6bc867060b8777de5b32 +size 83832 diff --git a/tests/integration/archive/par2/__input__/foo.erofs.img.vol15+16.par2 b/tests/integration/archive/par2/__input__/foo.erofs.img.vol15+16.par2 new file mode 100644 index 0000000000..e03db741a8 --- /dev/null +++ b/tests/integration/archive/par2/__input__/foo.erofs.img.vol15+16.par2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:162a4b25d4dba77bf4b059c79c2c361a620dd1cc2e4e46696fb1e97e3009fa48 +size 105196 diff --git a/tests/integration/archive/par2/__input__/foo.erofs.img.vol31+20.par2 b/tests/integration/archive/par2/__input__/foo.erofs.img.vol31+20.par2 new file mode 100644 index 0000000000..b1fcfdb851 --- /dev/null +++ b/tests/integration/archive/par2/__input__/foo.erofs.img.vol31+20.par2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f44677b7f3f4b3b811efaf8a4d8646107d1f0c4e8ab35aea854f8090c124e921 +size 105484 diff --git a/tests/integration/archive/par2/__output__/.gitkeep b/tests/integration/archive/par2/__output__/.gitkeep new file mode 100644 index 0000000000..e69de29bb2