-
-
Notifications
You must be signed in to change notification settings - Fork 585
Machofile analyzer #3268
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Machofile analyzer #3268
Changes from 18 commits
6f01a28
a185d08
40a9ebc
b4cd43b
3e6bc3d
a66781c
946e43a
933989b
3da2d8e
8c7f00f
520cf62
94a3c65
395a2a3
964400c
6bce4ff
e1b3822
11d1bab
3907083
c6526c5
5a9b7db
6185f41
f65ca9f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,139 @@ | ||||||
| # This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl | ||||||
| # See the file 'LICENSE' for copying permission. | ||||||
|
|
||||||
| import logging | ||||||
| from typing import Any, Dict, Optional | ||||||
|
|
||||||
| import machofile | ||||||
|
|
||||||
| from api_app.analyzers_manager.classes import FileAnalyzer | ||||||
| from api_app.analyzers_manager.exceptions import AnalyzerRunException | ||||||
|
|
||||||
| logger = logging.getLogger(__name__) | ||||||
|
|
||||||
|
|
||||||
| def _safe_decode(value: Any) -> str: | ||||||
| """Helper to safely decode bytes to string.""" | ||||||
| if isinstance(value, bytes): | ||||||
| return value.decode("utf-8", "ignore") | ||||||
| return str(value) | ||||||
|
|
||||||
|
|
||||||
| class MachoInfo(FileAnalyzer): | ||||||
| """ | ||||||
| Analyzer for Mach-O binary files (macOS/iOS executables). | ||||||
| Uses the machofile library to parse and extract information. | ||||||
| """ | ||||||
|
|
||||||
| @classmethod | ||||||
| def update(cls) -> bool: | ||||||
| return False | ||||||
|
|
||||||
| @staticmethod | ||||||
| def _get_attr(macho, getter: str, fallback: Optional[str], formatted=True): | ||||||
| """Try getter method first, then fall back to direct attribute.""" | ||||||
| if getter and hasattr(macho, getter): | ||||||
| try: | ||||||
| return getattr(macho, getter)(formatted=formatted) | ||||||
| except TypeError: | ||||||
| return getattr(macho, getter)() | ||||||
| if fallback and hasattr(macho, fallback): | ||||||
| return getattr(macho, fallback) | ||||||
| return None | ||||||
|
|
||||||
| def _parse_macho(self): | ||||||
| """Attempts to parse the file as Single or Universal Mach-O.""" | ||||||
| try: | ||||||
| macho = machofile.MachO(self.filepath) | ||||||
| if hasattr(macho, "parse"): | ||||||
| macho.parse() | ||||||
| return macho | ||||||
| except Exception as e: | ||||||
| try: | ||||||
| macho = machofile.UniversalMachO(self.filepath) | ||||||
| if hasattr(macho, "parse"): | ||||||
| macho.parse() | ||||||
| return macho | ||||||
| except Exception as universal_error: | ||||||
| raise AnalyzerRunException( | ||||||
| f"Failed to parse as both single and universal binary. " | ||||||
| f"Single: {e}, Universal: {universal_error}" | ||||||
| ) | ||||||
|
|
||||||
| def _extract_basic_info(self, macho, results: Dict[str, Any]): | ||||||
| """Extract basic information like headers and hashes.""" | ||||||
| if val := self._get_attr(macho, "get_general_info", "general_info"): | ||||||
| results["general_info"] = val | ||||||
| if val := self._get_attr(macho, "get_macho_header", "header"): | ||||||
| results["header"] = val | ||||||
| if val := self._get_attr(macho, "get_similarity_hashes", None): | ||||||
| results["hashes"] = val | ||||||
| if val := self._get_attr(macho, None, "code_signature_info"): | ||||||
| results["code_signature"] = val | ||||||
|
|
||||||
| if hasattr(macho, "get_architectures"): | ||||||
| results["architectures"] = macho.get_architectures() | ||||||
| elif isinstance(results.get("header"), dict): | ||||||
| results["architectures"] = list(results["header"].keys()) | ||||||
| else: | ||||||
| results["architectures"] = [] | ||||||
|
|
||||||
| for key, attr in [ | ||||||
| ("uuid", "uuid"), | ||||||
| ("entrypoint", "entry_point"), | ||||||
| ("version_info", "version_info"), | ||||||
| ]: | ||||||
| if hasattr(macho, attr): | ||||||
| val = getattr(macho, attr) | ||||||
| results[key] = val | ||||||
|
|
||||||
| def _extract_lists(self, macho, results: Dict[str, Any]): | ||||||
| """Extract list-based structures like load commands and segments.""" | ||||||
| is_universal = hasattr(macho, "architectures") and isinstance(macho.architectures, dict) | ||||||
|
|
||||||
| def get_macho_lists(m): | ||||||
| return { | ||||||
| "load_commands": [str(lc) for lc in m.load_commands] if hasattr(m, "load_commands") else [], | ||||||
IshaanXCoder marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| "segments": [str(s) for s in m.segments] if hasattr(m, "segments") else [], | ||||||
| "dylib_names": [_safe_decode(d) for d in m.dylib_names] if hasattr(m, "dylib_names") else [], | ||||||
| } | ||||||
|
|
||||||
| if is_universal: | ||||||
| for k in ["load_commands", "segments", "dylib_names"]: | ||||||
| results[k] = {} | ||||||
| for arch, m in macho.architectures.items(): | ||||||
| sub_lists = get_macho_lists(m) | ||||||
| for k, v in sub_lists.items(): | ||||||
| results[k][arch] = v | ||||||
| else: | ||||||
| results.update(get_macho_lists(macho)) | ||||||
|
|
||||||
| def _extract_symbols(self, macho, results: Dict[str, Any]): | ||||||
| """Extract imported and exported symbols.""" | ||||||
| if hasattr(macho, "get_imported_functions"): | ||||||
| results["imports"] = macho.get_imported_functions() | ||||||
| elif hasattr(macho, "imported_functions") and macho.imported_functions: | ||||||
| results["imports"] = [_safe_decode(f) for f in macho.imported_functions] | ||||||
|
|
||||||
| if hasattr(macho, "get_exported_symbols"): | ||||||
| results["exports"] = macho.get_exported_symbols() | ||||||
| elif hasattr(macho, "exported_symbols") and macho.exported_symbols: | ||||||
| results["exports"] = [_safe_decode(s) for s in macho.exported_symbols] | ||||||
|
|
||||||
| def run(self) -> Dict[str, Any]: | ||||||
| results: Dict[str, Any] = {} | ||||||
|
|
||||||
| try: | ||||||
| macho = self._parse_macho() | ||||||
| self._extract_basic_info(macho, results) | ||||||
| self._extract_lists(macho, results) | ||||||
| self._extract_symbols(macho, results) | ||||||
|
|
||||||
| except AnalyzerRunException: | ||||||
IshaanXCoder marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| raise | ||||||
| except Exception as e: | ||||||
| error_msg = f"job_id:{self.job_id} analyzer:{self.analyzer_name} md5:{self.md5} filename:{self.filename} MachoFile parsing error: {e}" | ||||||
|
||||||
| error_msg = f"job_id:{self.job_id} analyzer:{self.analyzer_name} md5:{self.md5} filename:{self.filename} MachoFile parsing error: {e}" | |
| error_msg = f"job_id:{self.job_id} analyzer:{self.analyzer_name} md5:{self.md5} filename:{self.filename} Mach-O parsing error: {e}" |
IshaanXCoder marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,127 @@ | ||||||
| from django.db import migrations | ||||||
| from django.db.models.fields.related_descriptors import ( | ||||||
| ForwardManyToOneDescriptor, | ||||||
IshaanXCoder marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| ForwardOneToOneDescriptor, | ||||||
| ManyToManyDescriptor, | ||||||
| ReverseManyToOneDescriptor, | ||||||
| ReverseOneToOneDescriptor, | ||||||
| ) | ||||||
|
|
||||||
| plugin = { | ||||||
| "name": "MachoInfo", | ||||||
| "python_module": { | ||||||
| "module": "macho_info.MachoInfo", | ||||||
| "base_path": "api_app.analyzers_manager.file_analyzers", | ||||||
| }, | ||||||
| "description": "Parse Mach-O binary files (macOS/iOS executables) using [machofile](https://github.com/pstirparo/machofile) library. Extracts headers, segments, dylibs, imports, exports, hashes and code signatures.", | ||||||
| "disabled": False, | ||||||
| "soft_time_limit": 60, | ||||||
| "routing_key": "local", | ||||||
| "health_check_status": True, | ||||||
| "type": "file", | ||||||
| "docker_based": False, | ||||||
| "maximum_tlp": "RED", | ||||||
| "observable_supported": [], | ||||||
| "supported_filetypes": [ | ||||||
| "application/x-mach-binary", | ||||||
mlodic marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| "application/mac-binary", | ||||||
| "application/x-binary", | ||||||
| "application/x-executable", | ||||||
|
Comment on lines
+31
to
+32
|
||||||
| "application/x-binary", | |
| "application/x-executable", |
Copilot
AI
Feb 7, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The supported_filetypes list includes very broad MIME types like "application/x-executable" and "application/x-binary", which are commonly used for non–Mach-O binaries (e.g., ELF). This will cause MachoInfo to run (and likely fail) on unrelated files; restrict supported_filetypes to Mach-O-specific MIME types (and/or add explicit signature checks + early exit) to avoid noisy failures and unnecessary work.
Copilot
AI
Feb 7, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
supported_filetypes includes generic MIME types like application/x-binary and application/x-executable. Since this analyzer is Mach-O specific (macOS/iOS), this configuration will make MachoInfo run on many non–Mach-O samples (e.g., ELF binaries commonly labeled application/x-executable), producing avoidable failures/noise in default playbooks. Consider restricting supported_filetypes to Mach-O specific MIME types (e.g., application/x-mach-binary and/or application/mac-binary) and removing the generic ones, or moving generic types to a separate opt-in config.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| # This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl | ||
| # See the file 'LICENSE' for copying permission. | ||
|
|
||
|
|
||
| from django.db import migrations | ||
|
|
||
|
|
||
| def migrate(apps, schema_editor): | ||
| playbook_config = apps.get_model("playbooks_manager", "PlaybookConfig") | ||
| AnalyzerConfig = apps.get_model("analyzers_manager", "AnalyzerConfig") | ||
| pc = playbook_config.objects.get(name="FREE_TO_USE_ANALYZERS") | ||
| pc.analyzers.add(AnalyzerConfig.objects.get(name="MachoInfo").id) | ||
| pc.full_clean() | ||
| pc.save() | ||
|
|
||
|
|
||
| def reverse_migrate(apps, schema_editor): | ||
| playbook_config = apps.get_model("playbooks_manager", "PlaybookConfig") | ||
| AnalyzerConfig = apps.get_model("analyzers_manager", "AnalyzerConfig") | ||
| pc = playbook_config.objects.get(name="FREE_TO_USE_ANALYZERS") | ||
IshaanXCoder marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| pc.analyzers.remove(AnalyzerConfig.objects.get(name="MachoInfo").id) | ||
| pc.full_clean() | ||
| pc.save() | ||
|
|
||
|
|
||
| class Migration(migrations.Migration): | ||
| dependencies = [ | ||
| ("playbooks_manager", "0062_add_cleanbrowsing_to_free_to_use"), | ||
| ("analyzers_manager", "0176_analyzer_config_macho_info"), | ||
| ] | ||
|
|
||
| operations = [ | ||
| migrations.RunPython(migrate, reverse_migrate), | ||
| ] | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,47 @@ | ||||||
| from api_app.analyzers_manager.file_analyzers.macho_info import MachoInfo | ||||||
mlodic marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| from tests import CustomTestCase | ||||||
|
|
||||||
IshaanXCoder marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
|
||||||
mlodic marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| class MachoInfoTestCase(CustomTestCase): | ||||||
| fixtures = [ | ||||||
| "api_app/fixtures/0001_user.json", | ||||||
| ] | ||||||
|
|
||||||
IshaanXCoder marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| def test_macho_analysis(self): | ||||||
| """Test MachoFile analyzer with a real sample""" | ||||||
| report = self._analyze_sample( | ||||||
| "macho_sample", | ||||||
| "e4292266cfed6154c231f566a4b96c48", | ||||||
| # not hardcoded, MD5 of tested data | ||||||
IshaanXCoder marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| "application/x-mach-binary", | ||||||
| "MachoFile", | ||||||
|
||||||
| "MachoFile", | |
| "MachoInfo", |
Uh oh!
There was an error while loading. Please reload this page.