Skip to content

Commit d08e36c

Browse files
IshaanXCoderAbhishek
authored andcommitted
Machofile analyzer (intelowlproject#3268)
* feature/analyzer : machofile analyzer * fix/added unit tests, freeToUse, test_files.zip * fix/cicd and copilot recommendations * fix: precommit using ruff * fix: precommit using ruff(again) * fix: precommit using ruff(again) * fix/cicd * fix/cicd * fix/cicd * fix/migration-conflicts * fix/migration-conflicts * fix/cicd migrations * fix/cicd migrations due to zip * fix/add macho_sample * fix/copilot and maintainer recommendations * fix/cicd * fix/zip-pass * fix/cicd * minor fixes : copilot recommendations * fix/maintainer-feedback * fix/cicd failure due to BlintAnalyzer
1 parent 4e9a19a commit d08e36c

File tree

11 files changed

+442
-7
lines changed

11 files changed

+442
-7
lines changed

api_app/analyzers_manager/file_analyzers/blint_scan.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
import os
33
import shutil
4+
from unittest.mock import patch
45

56
from blint.config import BlintOptions
67
from blint.lib.runners import AnalysisRunner
@@ -28,10 +29,12 @@ def run(self) -> dict:
2829
set_permissions(reports_dir)
2930

3031
analyzer = AnalysisRunner()
31-
findings, reviews, fuzzables = analyzer.start(
32-
blint_options=BlintOptions(reports_dir=reports_dir),
33-
exe_files=[self.filepath],
34-
)
32+
# patch export_metadata to avoid RecursionError on Macho files and because we don't need the metadata report anyways
33+
with patch("blint.lib.runners.export_metadata"):
34+
findings, reviews, fuzzables = analyzer.start(
35+
blint_options=BlintOptions(reports_dir=reports_dir),
36+
exe_files=[self.filepath],
37+
)
3538
response = {"findings": findings, "reviews": reviews, "fuzzables": fuzzables}
3639

3740
shutil.rmtree(reports_dir)
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
2+
# See the file 'LICENSE' for copying permission.
3+
4+
import logging
5+
from typing import Any, Dict
6+
7+
import machofile
8+
9+
from api_app.analyzers_manager.classes import FileAnalyzer
10+
from api_app.analyzers_manager.exceptions import AnalyzerRunException
11+
12+
logger = logging.getLogger(__name__)
13+
14+
15+
def _safe_decode(value: Any) -> str:
16+
"""Helper to safely decode bytes to string."""
17+
if isinstance(value, bytes):
18+
return value.decode("utf-8", "ignore")
19+
return str(value)
20+
21+
22+
class MachoInfo(FileAnalyzer):
23+
"""
24+
Analyzer for Mach-O binary files (macOS/iOS executables).
25+
Uses the machofile library to parse and extract information.
26+
"""
27+
28+
@classmethod
29+
def update(cls) -> bool:
30+
return False
31+
32+
def _parse_macho(self):
33+
"""Attempts to parse the file as Single or Universal Mach-O."""
34+
try:
35+
macho = machofile.MachO(self.filepath)
36+
try:
37+
macho.parse()
38+
except AttributeError:
39+
pass
40+
return macho
41+
except Exception as e:
42+
try:
43+
macho = machofile.UniversalMachO(self.filepath)
44+
try:
45+
macho.parse()
46+
except AttributeError:
47+
pass
48+
return macho
49+
except Exception as universal_error:
50+
raise AnalyzerRunException(
51+
f"Failed to parse as both single and universal binary. "
52+
f"Single: {e}, Universal: {universal_error}"
53+
)
54+
55+
def _extract_basic_info(self, macho, results: Dict[str, Any]):
56+
"""Extract basic information like headers and hashes."""
57+
results["general_info"] = macho.get_general_info()
58+
results["header"] = macho.get_macho_header()
59+
results["hashes"] = macho.get_similarity_hashes()
60+
results["code_signature"] = macho.code_signature_info
61+
try:
62+
results["architectures"] = macho.get_architectures()
63+
except AttributeError:
64+
results["architectures"] = []
65+
results["uuid"] = macho.uuid
66+
results["entrypoint"] = macho.entry_point
67+
results["version_info"] = macho.version_info
68+
69+
def _extract_lists(self, macho, results: Dict[str, Any]):
70+
"""Extract list-based structures like load commands and segments."""
71+
is_universal = isinstance(macho, machofile.UniversalMachO)
72+
73+
def get_macho_lists(m):
74+
return {
75+
"load_commands": [str(lc) for lc in m.load_commands],
76+
"segments": [str(s) for s in m.segments],
77+
"dylib_names": [_safe_decode(d) for d in m.dylib_names],
78+
}
79+
80+
if is_universal:
81+
for k in ["load_commands", "segments", "dylib_names"]:
82+
results[k] = {}
83+
for arch, m in macho.architectures.items():
84+
sub_lists = get_macho_lists(m)
85+
for k, v in sub_lists.items():
86+
results[k][arch] = v
87+
else:
88+
results.update(get_macho_lists(macho))
89+
90+
def _extract_symbols(self, macho, results: Dict[str, Any]):
91+
"""Extract imported and exported symbols."""
92+
results["imports"] = macho.get_imported_functions()
93+
results["exports"] = macho.get_exported_symbols()
94+
95+
def run(self) -> Dict[str, Any]:
96+
results: Dict[str, Any] = {}
97+
98+
try:
99+
macho = self._parse_macho()
100+
self._extract_basic_info(macho, results)
101+
self._extract_lists(macho, results)
102+
self._extract_symbols(macho, results)
103+
104+
except Exception as e:
105+
error_msg = (
106+
f"job_id:{self.job_id} analyzer:{self.analyzer_name} "
107+
f"md5:{self.md5} filename:{self.filename} "
108+
f"MachoInfo parsing error: {e}"
109+
)
110+
logger.error(error_msg, exc_info=True)
111+
self.report.errors.append(error_msg)
112+
raise AnalyzerRunException(error_msg)
113+
114+
return results
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
2+
# See the file 'LICENSE' for copying permission.
3+
4+
from django.db import migrations
5+
from django.db.models.fields.related_descriptors import (
6+
ForwardManyToOneDescriptor,
7+
ForwardOneToOneDescriptor,
8+
ManyToManyDescriptor,
9+
ReverseManyToOneDescriptor,
10+
ReverseOneToOneDescriptor,
11+
)
12+
13+
plugin = {
14+
"name": "MachoInfo",
15+
"python_module": {
16+
"module": "macho_info.MachoInfo",
17+
"base_path": "api_app.analyzers_manager.file_analyzers",
18+
},
19+
"description": "Parse Mach-O binary files (macOS/iOS executables) using [machofile](https://github.com/pstirparo/machofile) library. Extracts headers, segments, dylibs, imports, exports, hashes and code signatures.",
20+
"disabled": False,
21+
"soft_time_limit": 60,
22+
"routing_key": "local",
23+
"health_check_status": True,
24+
"type": "file",
25+
"docker_based": False,
26+
"maximum_tlp": "RED",
27+
"observable_supported": [],
28+
"supported_filetypes": [
29+
"application/x-mach-binary",
30+
"application/mac-binary",
31+
"application/x-binary",
32+
"application/x-executable",
33+
],
34+
"run_hash": False,
35+
"run_hash_type": "",
36+
"not_supported_filetypes": [],
37+
"health_check_task": None,
38+
"model": "analyzers_manager.AnalyzerConfig",
39+
}
40+
41+
params = []
42+
values = []
43+
44+
45+
def _get_real_obj(Model, field, value):
46+
def _get_obj(Model, other_model, value):
47+
if isinstance(value, dict):
48+
real_vals = {}
49+
for key, real_val in value.items():
50+
real_vals[key] = _get_real_obj(other_model, key, real_val)
51+
value = other_model.objects.get_or_create(**real_vals)[0]
52+
# it is just the primary key serialized
53+
else:
54+
if isinstance(value, int):
55+
if Model.__name__ == "PluginConfig":
56+
value = other_model.objects.get(name=plugin["name"])
57+
else:
58+
value = other_model.objects.get(pk=value)
59+
else:
60+
value = other_model.objects.get(name=value)
61+
return value
62+
63+
if (
64+
type(getattr(Model, field))
65+
in [
66+
ForwardManyToOneDescriptor,
67+
ReverseManyToOneDescriptor,
68+
ReverseOneToOneDescriptor,
69+
ForwardOneToOneDescriptor,
70+
]
71+
and value
72+
):
73+
other_model = getattr(Model, field).get_queryset().model
74+
value = _get_obj(Model, other_model, value)
75+
elif type(getattr(Model, field)) in [ManyToManyDescriptor] and value:
76+
other_model = getattr(Model, field).rel.model
77+
value = [_get_obj(Model, other_model, val) for val in value]
78+
return value
79+
80+
81+
def _create_object(Model, data):
82+
mtm, no_mtm = {}, {}
83+
for field, value in data.items():
84+
value = _get_real_obj(Model, field, value)
85+
if type(getattr(Model, field)) is ManyToManyDescriptor:
86+
mtm[field] = value
87+
else:
88+
no_mtm[field] = value
89+
try:
90+
Model.objects.get(**no_mtm)
91+
except Model.DoesNotExist:
92+
o = Model(**no_mtm)
93+
o.full_clean()
94+
o.save()
95+
for field, value in mtm.items():
96+
attribute = getattr(o, field)
97+
if value is not None:
98+
attribute.set(value)
99+
return False
100+
return True
101+
102+
103+
def migrate(apps, schema_editor):
104+
Parameter = apps.get_model("api_app", "Parameter")
105+
PluginConfig = apps.get_model("api_app", "PluginConfig")
106+
python_path = plugin.pop("model")
107+
Model = apps.get_model(*python_path.split("."))
108+
if not Model.objects.filter(name=plugin["name"]).exists():
109+
exists = _create_object(Model, plugin)
110+
if not exists:
111+
for param in params:
112+
_create_object(Parameter, param)
113+
for value in values:
114+
_create_object(PluginConfig, value)
115+
116+
117+
def reverse_migrate(apps, schema_editor):
118+
python_path = plugin.pop("model")
119+
Model = apps.get_model(*python_path.split("."))
120+
Model.objects.get(name=plugin["name"]).delete()
121+
122+
123+
class Migration(migrations.Migration):
124+
dependencies = [
125+
("api_app", "0071_delete_last_elastic_report"),
126+
("analyzers_manager", "0175_analyzer_config_cleanbrowsing_malicious_detector"),
127+
]
128+
129+
operations = [migrations.RunPython(migrate, reverse_migrate)]
130+
atomic = False
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
2+
# See the file 'LICENSE' for copying permission.
3+
4+
5+
from django.db import migrations
6+
7+
8+
def migrate(apps, schema_editor):
9+
playbook_config = apps.get_model("playbooks_manager", "PlaybookConfig")
10+
AnalyzerConfig = apps.get_model("analyzers_manager", "AnalyzerConfig")
11+
pc = playbook_config.objects.get(name="FREE_TO_USE_ANALYZERS")
12+
pc.analyzers.add(AnalyzerConfig.objects.get(name="MachoInfo").id)
13+
pc.full_clean()
14+
pc.save()
15+
16+
17+
def reverse_migrate(apps, schema_editor):
18+
playbook_config = apps.get_model("playbooks_manager", "PlaybookConfig")
19+
AnalyzerConfig = apps.get_model("analyzers_manager", "AnalyzerConfig")
20+
pc = playbook_config.objects.get(name="FREE_TO_USE_ANALYZERS")
21+
pc.analyzers.remove(AnalyzerConfig.objects.get(name="MachoInfo").id)
22+
pc.full_clean()
23+
pc.save()
24+
25+
26+
class Migration(migrations.Migration):
27+
dependencies = [
28+
("playbooks_manager", "0062_add_cleanbrowsing_to_free_to_use"),
29+
("analyzers_manager", "0176_analyzer_config_macho_info"),
30+
]
31+
32+
operations = [
33+
migrations.RunPython(migrate, reverse_migrate),
34+
]
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
2+
# See the file 'LICENSE' for copying permission.
3+
4+
5+
from django.db import migrations
6+
7+
8+
def migrate(apps, schema_editor):
9+
playbook_config = apps.get_model("playbooks_manager", "PlaybookConfig")
10+
AnalyzerConfig = apps.get_model("analyzers_manager", "AnalyzerConfig")
11+
pc = playbook_config.objects.get(name="Sample_Static_Analysis")
12+
pc.analyzers.add(AnalyzerConfig.objects.get(name="MachoInfo").id)
13+
pc.full_clean()
14+
pc.save()
15+
16+
17+
def reverse_migrate(apps, schema_editor):
18+
playbook_config = apps.get_model("playbooks_manager", "PlaybookConfig")
19+
AnalyzerConfig = apps.get_model("analyzers_manager", "AnalyzerConfig")
20+
pc = playbook_config.objects.get(name="Sample_Static_Analysis")
21+
pc.analyzers.remove(AnalyzerConfig.objects.get(name="MachoInfo").id)
22+
pc.full_clean()
23+
pc.save()
24+
25+
26+
class Migration(migrations.Migration):
27+
dependencies = [
28+
("playbooks_manager", "0063_add_machofile_to_free_to_use"),
29+
("analyzers_manager", "0176_analyzer_config_macho_info"),
30+
]
31+
32+
operations = [
33+
migrations.RunPython(migrate, reverse_migrate),
34+
]

intel_owl/settings/_util.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,13 @@
1313

1414
get_secret = os.environ.get
1515

16-
uid = pwd.getpwnam("www-data").pw_uid
17-
gid = grp.getgrnam("www-data").gr_gid
16+
try:
17+
uid = pwd.getpwnam("www-data").pw_uid
18+
gid = grp.getgrnam("www-data").gr_gid
19+
except (KeyError, AttributeError):
20+
# fallback for environments without www-data (like local macOS or CI envs)
21+
uid = os.getuid()
22+
gid = os.getgid()
1823

1924

2025
def set_permissions(directory: Path, force_create: bool = False):

requirements/project-requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ jbxapi==3.23.0
9696
yara-x==1.10.0
9797
flare-floss==3.1.1
9898
flare-capa==9.3.1
99+
machofile==2026.2.4
99100

100101
# httpx required for HTTP/2 support (Mullvad DNS rejects HTTP/1.1 with protocol errors)
101102
httpx[http2]==0.28.1

0 commit comments

Comments
 (0)