Skip to content
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 139 additions & 0 deletions api_app/analyzers_manager/file_analyzers/macho_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
# See the file 'LICENSE' for copying permission.

import logging
from typing import Any, Dict, Optional

import machofile

from api_app.analyzers_manager.classes import FileAnalyzer
from api_app.analyzers_manager.exceptions import AnalyzerRunException

logger = logging.getLogger(__name__)


def _safe_decode(value: Any) -> str:
"""Helper to safely decode bytes to string."""
if isinstance(value, bytes):
return value.decode("utf-8", "ignore")
return str(value)


class MachoInfo(FileAnalyzer):
"""
Analyzer for Mach-O binary files (macOS/iOS executables).
Uses the machofile library to parse and extract information.
"""

@classmethod
def update(cls) -> bool:
return False

@staticmethod
def _get_attr(macho, getter: str, fallback: Optional[str], formatted=True):
"""Try getter method first, then fall back to direct attribute."""
if getter and hasattr(macho, getter):
try:
return getattr(macho, getter)(formatted=formatted)
except TypeError:
return getattr(macho, getter)()
if fallback and hasattr(macho, fallback):
return getattr(macho, fallback)
return None

def _parse_macho(self):
"""Attempts to parse the file as Single or Universal Mach-O."""
try:
macho = machofile.MachO(self.filepath)
if hasattr(macho, "parse"):
macho.parse()
return macho
except Exception as e:
try:
macho = machofile.UniversalMachO(self.filepath)
if hasattr(macho, "parse"):
macho.parse()
return macho
except Exception as universal_error:
raise AnalyzerRunException(
f"Failed to parse as both single and universal binary. "
f"Single: {e}, Universal: {universal_error}"
)

def _extract_basic_info(self, macho, results: Dict[str, Any]):
"""Extract basic information like headers and hashes."""
if val := self._get_attr(macho, "get_general_info", "general_info"):
results["general_info"] = val
if val := self._get_attr(macho, "get_macho_header", "header"):
results["header"] = val
if val := self._get_attr(macho, "get_similarity_hashes", None):
results["hashes"] = val
if val := self._get_attr(macho, None, "code_signature_info"):
results["code_signature"] = val

if hasattr(macho, "get_architectures"):
results["architectures"] = macho.get_architectures()
elif isinstance(results.get("header"), dict):
results["architectures"] = list(results["header"].keys())
else:
results["architectures"] = []

for key, attr in [
("uuid", "uuid"),
("entrypoint", "entry_point"),
("version_info", "version_info"),
]:
if hasattr(macho, attr):
val = getattr(macho, attr)
results[key] = val

def _extract_lists(self, macho, results: Dict[str, Any]):
"""Extract list-based structures like load commands and segments."""
is_universal = hasattr(macho, "architectures") and isinstance(macho.architectures, dict)

def get_macho_lists(m):
return {
"load_commands": [str(lc) for lc in m.load_commands] if hasattr(m, "load_commands") else [],
"segments": [str(s) for s in m.segments] if hasattr(m, "segments") else [],
"dylib_names": [_safe_decode(d) for d in m.dylib_names] if hasattr(m, "dylib_names") else [],
}

if is_universal:
for k in ["load_commands", "segments", "dylib_names"]:
results[k] = {}
for arch, m in macho.architectures.items():
sub_lists = get_macho_lists(m)
for k, v in sub_lists.items():
results[k][arch] = v
else:
results.update(get_macho_lists(macho))

def _extract_symbols(self, macho, results: Dict[str, Any]):
"""Extract imported and exported symbols."""
if hasattr(macho, "get_imported_functions"):
results["imports"] = macho.get_imported_functions()
elif hasattr(macho, "imported_functions") and macho.imported_functions:
results["imports"] = [_safe_decode(f) for f in macho.imported_functions]

if hasattr(macho, "get_exported_symbols"):
results["exports"] = macho.get_exported_symbols()
elif hasattr(macho, "exported_symbols") and macho.exported_symbols:
results["exports"] = [_safe_decode(s) for s in macho.exported_symbols]

def run(self) -> Dict[str, Any]:
results: Dict[str, Any] = {}

try:
macho = self._parse_macho()
self._extract_basic_info(macho, results)
self._extract_lists(macho, results)
self._extract_symbols(macho, results)

except AnalyzerRunException:
raise
except Exception as e:
error_msg = f"job_id:{self.job_id} analyzer:{self.analyzer_name} md5:{self.md5} filename:{self.filename} MachoFile parsing error: {e}"
Copy link

Copilot AI Feb 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message refers to "MachoFile" but the analyzer class is named "MachoInfo". For consistency, consider changing "MachoFile parsing error" to "MachoInfo parsing error" or use a generic term like "Mach-O parsing error".

Suggested change
error_msg = f"job_id:{self.job_id} analyzer:{self.analyzer_name} md5:{self.md5} filename:{self.filename} MachoFile parsing error: {e}"
error_msg = f"job_id:{self.job_id} analyzer:{self.analyzer_name} md5:{self.md5} filename:{self.filename} Mach-O parsing error: {e}"

Copilot uses AI. Check for mistakes.
self.report.errors.append(error_msg)
raise AnalyzerRunException(error_msg)

return results
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from django.db import migrations
from django.db.models.fields.related_descriptors import (
ForwardManyToOneDescriptor,
ForwardOneToOneDescriptor,
ManyToManyDescriptor,
ReverseManyToOneDescriptor,
ReverseOneToOneDescriptor,
)

plugin = {
"name": "MachoInfo",
"python_module": {
"module": "macho_info.MachoInfo",
"base_path": "api_app.analyzers_manager.file_analyzers",
},
"description": "Parse Mach-O binary files (macOS/iOS executables) using [machofile](https://github.com/pstirparo/machofile) library. Extracts headers, segments, dylibs, imports, exports, hashes and code signatures.",
"disabled": False,
"soft_time_limit": 60,
"routing_key": "local",
"health_check_status": True,
"type": "file",
"docker_based": False,
"maximum_tlp": "RED",
"observable_supported": [],
"supported_filetypes": [
"application/x-mach-binary",
"application/mac-binary",
"application/x-binary",
"application/x-executable",
Comment on lines +31 to +32
Copy link

Copilot AI Feb 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

supported_filetypes includes very generic executable mimetypes (e.g. application/x-executable, application/x-binary). Those often match ELF/Linux binaries too, which conflicts with the “mac samples only” intent and will cause the analyzer to be offered/run on non‑Mach‑O files (likely failing at runtime). Consider restricting this list to Mach-O specific mimetypes only, or add an early magic-byte check in run() that exits with a clear "not a Mach-O" error before attempting to parse.

Suggested change
"application/x-binary",
"application/x-executable",

Copilot uses AI. Check for mistakes.
],
Comment on lines +28 to +33
Copy link

Copilot AI Feb 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The supported_filetypes list includes very broad MIME types like "application/x-executable" and "application/x-binary", which are commonly used for non–Mach-O binaries (e.g., ELF). This will cause MachoInfo to run (and likely fail) on unrelated files; restrict supported_filetypes to Mach-O-specific MIME types (and/or add explicit signature checks + early exit) to avoid noisy failures and unnecessary work.

Copilot uses AI. Check for mistakes.
Comment on lines +28 to +33
Copy link

Copilot AI Feb 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

supported_filetypes includes generic MIME types like application/x-binary and application/x-executable. Since this analyzer is Mach-O specific (macOS/iOS), this configuration will make MachoInfo run on many non–Mach-O samples (e.g., ELF binaries commonly labeled application/x-executable), producing avoidable failures/noise in default playbooks. Consider restricting supported_filetypes to Mach-O specific MIME types (e.g., application/x-mach-binary and/or application/mac-binary) and removing the generic ones, or moving generic types to a separate opt-in config.

Copilot uses AI. Check for mistakes.
"run_hash": False,
"run_hash_type": "",
"not_supported_filetypes": [],
"health_check_task": None,
"model": "analyzers_manager.AnalyzerConfig",
}

params = []
values = []


def _get_real_obj(Model, field, value):
def _get_obj(Model, other_model, value):
if isinstance(value, dict):
real_vals = {}
for key, real_val in value.items():
real_vals[key] = _get_real_obj(other_model, key, real_val)
value = other_model.objects.get_or_create(**real_vals)[0]
# it is just the primary key serialized
else:
if isinstance(value, int):
if Model.__name__ == "PluginConfig":
value = other_model.objects.get(name=plugin["name"])
else:
value = other_model.objects.get(pk=value)
else:
value = other_model.objects.get(name=value)
return value

if (
type(getattr(Model, field))
in [
ForwardManyToOneDescriptor,
ReverseManyToOneDescriptor,
ReverseOneToOneDescriptor,
ForwardOneToOneDescriptor,
]
and value
):
other_model = getattr(Model, field).get_queryset().model
value = _get_obj(Model, other_model, value)
elif type(getattr(Model, field)) in [ManyToManyDescriptor] and value:
other_model = getattr(Model, field).rel.model
value = [_get_obj(Model, other_model, val) for val in value]
return value


def _create_object(Model, data):
mtm, no_mtm = {}, {}
for field, value in data.items():
value = _get_real_obj(Model, field, value)
if type(getattr(Model, field)) is ManyToManyDescriptor:
mtm[field] = value
else:
no_mtm[field] = value
try:
Model.objects.get(**no_mtm)
except Model.DoesNotExist:
o = Model(**no_mtm)
o.full_clean()
o.save()
for field, value in mtm.items():
attribute = getattr(o, field)
if value is not None:
attribute.set(value)
return False
return True


def migrate(apps, schema_editor):
Parameter = apps.get_model("api_app", "Parameter")
PluginConfig = apps.get_model("api_app", "PluginConfig")
python_path = plugin.pop("model")
Model = apps.get_model(*python_path.split("."))
if not Model.objects.filter(name=plugin["name"]).exists():
exists = _create_object(Model, plugin)
if not exists:
for param in params:
_create_object(Parameter, param)
for value in values:
_create_object(PluginConfig, value)


def reverse_migrate(apps, schema_editor):
python_path = plugin.pop("model")
Model = apps.get_model(*python_path.split("."))
Model.objects.get(name=plugin["name"]).delete()


class Migration(migrations.Migration):
dependencies = [
("api_app", "0071_delete_last_elastic_report"),
("analyzers_manager", "0175_analyzer_config_cleanbrowsing_malicious_detector"),
]

operations = [migrations.RunPython(migrate, reverse_migrate)]
atomic = False
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
# See the file 'LICENSE' for copying permission.


from django.db import migrations


def migrate(apps, schema_editor):
playbook_config = apps.get_model("playbooks_manager", "PlaybookConfig")
AnalyzerConfig = apps.get_model("analyzers_manager", "AnalyzerConfig")
pc = playbook_config.objects.get(name="FREE_TO_USE_ANALYZERS")
pc.analyzers.add(AnalyzerConfig.objects.get(name="MachoInfo").id)
pc.full_clean()
pc.save()


def reverse_migrate(apps, schema_editor):
playbook_config = apps.get_model("playbooks_manager", "PlaybookConfig")
AnalyzerConfig = apps.get_model("analyzers_manager", "AnalyzerConfig")
pc = playbook_config.objects.get(name="FREE_TO_USE_ANALYZERS")
pc.analyzers.remove(AnalyzerConfig.objects.get(name="MachoInfo").id)
pc.full_clean()
pc.save()


class Migration(migrations.Migration):
dependencies = [
("playbooks_manager", "0062_add_cleanbrowsing_to_free_to_use"),
("analyzers_manager", "0176_analyzer_config_macho_info"),
]

operations = [
migrations.RunPython(migrate, reverse_migrate),
]
9 changes: 7 additions & 2 deletions intel_owl/settings/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,13 @@

get_secret = os.environ.get

uid = pwd.getpwnam("www-data").pw_uid
gid = grp.getgrnam("www-data").gr_gid
try:
uid = pwd.getpwnam("www-data").pw_uid
gid = grp.getgrnam("www-data").gr_gid
except (KeyError, AttributeError):
# fallback for environments without www-data (like local macOS or CI envs)
uid = os.getuid()
gid = os.getgid()


def set_permissions(directory: Path, force_create: bool = False):
Expand Down
1 change: 1 addition & 0 deletions requirements/project-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ jbxapi==3.23.0
yara-x==1.10.0
flare-floss==3.1.1
flare-capa==9.3.1
machofile==2026.2.4

# httpx required for HTTP/2 support (Mullvad DNS rejects HTTP/1.1 with protocol errors)
httpx[http2]==0.28.1
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from api_app.analyzers_manager.file_analyzers.macho_info import MachoInfo
from tests import CustomTestCase


class MachoInfoTestCase(CustomTestCase):
fixtures = [
"api_app/fixtures/0001_user.json",
]

def test_macho_analysis(self):
"""Test MachoFile analyzer with a real sample"""
report = self._analyze_sample(
"macho_sample",
"e4292266cfed6154c231f566a4b96c48",
# not hardcoded, MD5 of tested data
"application/x-mach-binary",
"MachoFile",
Copy link

Copilot AI Feb 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The analyzer config name in the migration is "MachoInfo" but the integration test is trying to retrieve an AnalyzerConfig with name "MachoFile". This mismatch will cause the test to fail with a DoesNotExist error. Please update line 17 to use "MachoInfo" instead of "MachoFile" to match the analyzer configuration name.

Suggested change
"MachoFile",
"MachoInfo",

Copilot uses AI. Check for mistakes.
MachoInfo,
)

# distinct checks
self.assertIn("header", report)
self.assertIn("magic", report["header"])

# Check architecture (our sample is likely x86_64 or arm64 depending on host build)
self.assertIn("architectures", report)
self.assertIsInstance(report["architectures"], list)

# Check segments
self.assertIn("segments", report)
self.assertGreater(len(report["segments"]), 0)

# Check dylibs (should have at least libSystem)
self.assertIn("dylib_names", report)
self.assertIsInstance(report["dylib_names"], list)

# Check hashes
self.assertIn("hashes", report)
self.assertIn("dylib_hash", report["hashes"])

# Check exports
self.assertIn("exports", report)
# exports might be list or dict depending on result format, analyzing sample suggested dict
self.assertTrue(isinstance(report["exports"], (list, dict)))

# Check code signature
self.assertIn("code_signature", report)
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class BaseFileAnalyzerTest(TestCase):
"text/xml": "android.xml",
"application/zip": "test.zip",
"application/x-dex": "sample.dex",
"application/x-mach-binary": "macho.sample",
}

@classmethod
Expand Down Expand Up @@ -140,7 +141,7 @@ def test_analyzer_on_supported_filetypes(self):
with self._apply_patches(patches):
md5 = hashlib.md5(file_bytes).hexdigest()

analyzer = self.analyzer_class(config)
analyzer = self.analyzer_class(config) # pylint: disable=not-callable
analyzer.file_mimetype = mimetype
analyzer.filename = f"test_file_{mimetype}"
analyzer.md5 = md5
Expand Down
Loading
Loading