diff --git a/docs/formats.md b/docs/formats.md index 2ba74e63ed..7ef61e4d4e 100644 --- a/docs/formats.md +++ b/docs/formats.md @@ -22,6 +22,7 @@ unblob supports more than 30 formats. You can see their code in | CAB | ❌ | ❌ | ❌ | [archive/cab.py][cab-handler] | [`7z`][cab-extractor] | | CPIO | ✅ | ✅ | ✅ | [archive/cpio.py][cpio-handler] | unblob extractor | | DMG | ❌ | ❌ | ❌ | [archive/dmg.py][dmg-handler] | [`7z`][dmg-extractor] | +| PARTCLONE | ✅ | ❌ | ❌ | [archive/partclone.py][partclone-hanlder] | [`partclone`][partclone-extractor] | | RAR | ❌ | ❌ | ❌ | [archive/rar.py][rar-handler] | [`unar`][rar-extractor] | | 7ZIP | ❌ | ❌ | ❌ | [archive/sevenzip.py][7zip-handler] | [`7z`][7zip-extractor] | | StuffIt | ❌ | ❌ | ❌ | [archive/stuffit.py][stuffit-handler] | [`unar`][stuffit-extractor] | @@ -39,6 +40,8 @@ unblob supports more than 30 formats. You can see their code in [cpio-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/cpio.py [dmg-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/dmg.py [dmg-extractor]: https://github.com/onekey-sec/unblob/blob/3008039881a0434deb75962e7999b7e35aca8271/unblob/handlers/archive/dmg.py#L67-L69 +[partclone-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/partclone.py +[partclone-extractor]: https://github.com/onekey-sec/unblob/blob/b21b6dc291583af6b7ec9b7c3d63ee8302328841/python/unblob/handlers/archive/partclone.py#L44 [rar-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/rar.py [rar-extractor]: https://github.com/onekey-sec/unblob/blob/3008039881a0434deb75962e7999b7e35aca8271/unblob/handlers/archive/rar.py#L32 [7zip-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/sevenzip.py diff --git a/install-deps.sh b/install-deps.sh index 49defea806..125d7ed777 100755 --- a/install-deps.sh +++ b/install-deps.sh @@ -9,6 +9,7 @@ apt-get install --no-install-recommends -y \ lziprecover \ lzop \ p7zip-full \ + partclone \ unar \ xz-utils \ libmagic1 \ diff --git a/overlay.nix b/overlay.nix index 9c5051e03e..ed35b8cc57 100644 --- a/overlay.nix +++ b/overlay.nix @@ -8,40 +8,5 @@ final: prev: nativeCheckInputs = (super.nativeCheckInputs or [ ]) ++ [ final.which ]; }); - unblob = - let - pyproject_toml = (builtins.fromTOML (builtins.readFile ./pyproject.toml)); - version = pyproject_toml.project.version; - in - (prev.unblob.override { e2fsprogs = final.e2fsprogs-nofortify; }).overridePythonAttrs (super: rec { - inherit version; - - src = final.nix-filter { - root = ./.; - include = [ - "Cargo.lock" - "Cargo.toml" - "pyproject.toml" - "python" - "rust" - "tests" - "README.md" - ]; - }; - - # remove this when packaging changes are upstreamed - cargoDeps = final.rustPlatform.importCargoLock { - lockFile = ./Cargo.lock; - }; - - nativeBuildInputs = with final.rustPlatform; [ - cargoSetupHook - maturinBuildHook - ]; - - # override disabling of 'test_all_handlers[filesystem.extfs]' from upstream - pytestFlagsArray = [ - "--no-cov" - ]; - }); + unblob = final.callPackage ./package.nix { }; } diff --git a/package.nix b/package.nix new file mode 100644 index 0000000000..3b6472ff6a --- /dev/null +++ b/package.nix @@ -0,0 +1,131 @@ +{ + lib, + python3, + fetchFromGitHub, + makeWrapper, + e2fsprogs-nofortify, + jefferson, + lz4, + lziprecover, + lzop, + p7zip, + partclone, + nix-filter, + sasquatch, + sasquatch-v4be, + simg2img, + ubi_reader, + unar, + zstd, + versionCheckHook, + rustPlatform, +}: + +let + # These dependencies are only added to PATH + runtimeDeps = [ + e2fsprogs-nofortify + jefferson + lziprecover + lzop + p7zip + partclone + sasquatch + sasquatch-v4be + ubi_reader + simg2img + unar + zstd + lz4 + ]; + pyproject_toml = (builtins.fromTOML (builtins.readFile ./pyproject.toml)); + version = pyproject_toml.project.version; +in +python3.pkgs.buildPythonApplication rec { + pname = "unblob"; + pyproject = true; + disabled = python3.pkgs.pythonOlder "3.9"; + inherit version; + src = nix-filter { + root = ./.; + include = [ + "Cargo.lock" + "Cargo.toml" + "pyproject.toml" + "python" + "rust" + "tests" + "README.md" + ]; + }; + + strictDeps = true; + + build-system = with python3.pkgs; [ poetry-core ]; + + dependencies = with python3.pkgs; [ + arpy + attrs + click + cryptography + dissect-cstruct + lark + lief.py + python3.pkgs.lz4 # shadowed by pkgs.lz4 + plotext + pluggy + pyfatfs + pyperscan + python-magic + pyzstd + rarfile + rich + structlog + treelib + unblob-native + ]; + + cargoDeps = rustPlatform.importCargoLock { + lockFile = ./Cargo.lock; + }; + + nativeBuildInputs = with rustPlatform; [ + cargoSetupHook + maturinBuildHook + makeWrapper + ]; + + # These are runtime-only CLI dependencies, which are used through + # their CLI interface + pythonRemoveDeps = [ + "jefferson" + "ubi-reader" + ]; + + pythonImportsCheck = [ "unblob" ]; + + makeWrapperArgs = [ + "--prefix PATH : ${lib.makeBinPath runtimeDeps}" + ]; + + nativeCheckInputs = + with python3.pkgs; + [ + pytestCheckHook + pytest-cov + versionCheckHook + ] + ++ runtimeDeps; + + versionCheckProgramArg = "--version"; + + pytestFlagsArray = [ + "--no-cov" + ]; + + passthru = { + # helpful to easily add these to a nix-shell environment + inherit runtimeDeps; + }; + +} diff --git a/python/unblob/file_utils.py b/python/unblob/file_utils.py index 1bd9774698..8078b675b0 100644 --- a/python/unblob/file_utils.py +++ b/python/unblob/file_utils.py @@ -355,17 +355,23 @@ def parse( return struct_parser(file) -def get_endian(file: File, big_endian_magic: int) -> Endian: +def get_endian(file: File, big_endian_magic: int, endian_len: int = 4) -> Endian: """Read a four bytes magic and derive endianness from it. It compares the read data with the big endian magic. It reads four bytes and seeks back after that. """ - if big_endian_magic > 0xFF_FF_FF_FF: - raise ValueError("big_endian_magic is larger than a 32 bit integer.") - magic_bytes = file.read(4) + if big_endian_magic > (1 << (endian_len * 8)) - 1: + raise ValueError( + f"big_endian_magic is larger than a {endian_len * 8} bit integer." + ) + magic_bytes = file.read(endian_len) file.seek(-len(magic_bytes), io.SEEK_CUR) - magic = convert_int32(magic_bytes, Endian.BIG) + magic = ( + convert_int32(magic_bytes, Endian.BIG) + if endian_len == 4 + else convert_int16(magic_bytes, Endian.BIG) + ) return Endian.BIG if magic == big_endian_magic else Endian.LITTLE diff --git a/python/unblob/handlers/__init__.py b/python/unblob/handlers/__init__.py index 7a422bb15f..29c3c37174 100644 --- a/python/unblob/handlers/__init__.py +++ b/python/unblob/handlers/__init__.py @@ -6,6 +6,7 @@ cab, cpio, dmg, + partclone, rar, sevenzip, stuffit, @@ -116,6 +117,7 @@ zlib.ZlibHandler, engenius.EngeniusHandler, ecc.AutelECCHandler, + partclone.PartcloneHandler, ) BUILTIN_DIR_HANDLERS: DirectoryHandlers = ( diff --git a/python/unblob/handlers/archive/partclone.py b/python/unblob/handlers/archive/partclone.py new file mode 100644 index 0000000000..06aaa25f62 --- /dev/null +++ b/python/unblob/handlers/archive/partclone.py @@ -0,0 +1,81 @@ +import binascii +import io +from math import ceil +from typing import Optional + +from unblob.extractors import Command +from unblob.file_utils import File, InvalidInputFormat, get_endian +from unblob.models import Regex, StructHandler, ValidChunk + +C_DEFINITIONS = r""" + typedef struct partclone_header{ + char magic[16]; + char partclone_version[14]; + char image_version_txt[4]; + char endian[2]; + char fs_type[16]; + uint64 fs_size; + uint64 fs_total_block_count; + uint64 fs_used_block_count_superblock; + uint64 fs_used_block_count_bitmap; + uint32 fs_block_size; + uint32 feature_size; + uint16 image_version; + uint16 number_of_bits_for_CPU; + uint16 checksum_mode; + uint16 checksum_size; + uint32 blocks_per_checksum; + uint8 reseed_checksum; + uint8 bitmap_mode; + uint32 crc32; + } partclone_header_t; +""" + +HEADER_STRUCT = "partclone_header_t" +BIG_ENDIAN_MAGIC = 0xC0DE +ENDIAN_OFFSET = 34 + + +class PartcloneHandler(StructHandler): + NAME = "partclone" + PATTERNS = [Regex(r"partclone-image\x00\d+\.\d+\.\d+.*?0002(\xde\xc0|\xc0\xde)")] + HEADER_STRUCT = HEADER_STRUCT + C_DEFINITIONS = C_DEFINITIONS + EXTRACTOR = Command( + "partclone.restore", + "-W", + "-s", + "{inpath}", + "-o", + "{outdir}/partclone.restored", + "-L", + "/dev/stdout", + ) + + def is_valid_header(self, header) -> bool: + calculated_crc = binascii.crc32(header.dumps()[0:-4]) + return ( + header.crc32 ^ 0xFFFFFFFF + ) == calculated_crc # partclone does not final XOR + + def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]: + file.seek(start_offset + ENDIAN_OFFSET, io.SEEK_SET) # go to endian + endian = get_endian(file, BIG_ENDIAN_MAGIC, endian_len=2) + file.seek(start_offset, io.SEEK_SET) # go to beginning of file + header = self.parse_header(file, endian) + + if not self.is_valid_header(header): + raise InvalidInputFormat("Invalid partclone header.") + + end_offset = start_offset + len(header) # header + end_offset += header.checksum_size # checksum size + end_offset += ceil(header.fs_total_block_count / 8) # bitmap, as bytes + + if header.checksum_mode != 0: + checksum_blocks = ceil( + header.fs_used_block_count_bitmap / header.blocks_per_checksum + ) + end_offset += checksum_blocks * header.checksum_size + + end_offset += header.fs_used_block_count_bitmap * header.fs_block_size # Data + return ValidChunk(start_offset=start_offset, end_offset=end_offset) diff --git a/shell.nix b/shell.nix index d933ce93f8..dcd9240a05 100644 --- a/shell.nix +++ b/shell.nix @@ -4,7 +4,7 @@ let ./flake.lock ./flake.nix ./overlay.nix - ./nix + ./package.nix ]; lock = builtins.fromJSON (builtins.readFile ./flake.lock); diff --git a/tests/integration/archive/partclone/__input__/floppy-144m.img b/tests/integration/archive/partclone/__input__/floppy-144m.img new file mode 100755 index 0000000000..4a01bc782e --- /dev/null +++ b/tests/integration/archive/partclone/__input__/floppy-144m.img @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e63b5b8ec0ab6dfc4a4254d72e26b8c1b7ee8b6ceb61fe67bea1105b0d60156 +size 69930 diff --git a/tests/integration/archive/partclone/__input__/fs_dev0.partclone.img b/tests/integration/archive/partclone/__input__/fs_dev0.partclone.img new file mode 100755 index 0000000000..657491385b --- /dev/null +++ b/tests/integration/archive/partclone/__input__/fs_dev0.partclone.img @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8fb4fbc359454b017521504eddf0e2955c5808280337b73ad9f897a5f501285 +size 40123 diff --git a/tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored b/tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored new file mode 100644 index 0000000000..e21579b62c --- /dev/null +++ b/tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6be413ccd078c706d4f7dd64d4e29fe917fd188f22202becf906b0b79aa9d645 +size 1474560 diff --git a/tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored_extract/lost+found/.gitkeep b/tests/integration/archive/partclone/__output__/floppy-144m.img_extract/partclone.restored_extract/lost+found/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored b/tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored new file mode 100644 index 0000000000..c149e9f285 --- /dev/null +++ b/tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:557ad6d9db9ea8ed1f749d8da063d661c78951e318f3d5f23e517b8b93a205d6 +size 565248 diff --git a/tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored_extract/lost+found/.gitkeep b/tests/integration/archive/partclone/__output__/fs_dev0.partclone.img_extract/partclone.restored_extract/lost+found/.gitkeep new file mode 100644 index 0000000000..e69de29bb2