fix(processing): adapt is_padding to fix potential MemoryError

qkaiser · qkaiser · commit ae2ce970c8af · 2025-03-21T10:37:06.000+01:00
If an unknown chunk size is larger than available RAM on the system
where unblob is run, the previous is_padding implementation could lead
to MemoryError as it tries to load everything in memory.

Fixed by iterating over the unknown chunk with iterate_file and using
all() so we break as soon as we have different bytes.
diff --git a/python/unblob/processing.py b/python/unblob/processing.py
@@ -462,7 +462,14 @@ def _iterate_directory(self, extract_dirs, processed_paths):
 
 
 def is_padding(file: File, chunk: UnknownChunk):
-    return len(set(file[chunk.start_offset : chunk.end_offset])) == 1
+    first_byte = file[chunk.start_offset]
+    return all(
+        current_byte == first_byte
+        for chunk in iterate_file(
+            file, chunk.start_offset, chunk.end_offset - chunk.start_offset
+        )
+        for current_byte in chunk
+    )
 
 
 def process_patterns(