Skip to content

Commit 811e518

Browse files
lakshita10341Abhishek
authored andcommitted
feat: improve generic observable type detection in InQuest analyzer (intelowlproject#3283)
* feat: improve generic observable type detection in InQuest analyzer * refactor: precompile regex patterns and improve test robustness * refactored the XMPID_PATTERN regex and added a test for logging a warning
1 parent fa586a0 commit 811e518

File tree

2 files changed

+119
-13
lines changed
  • api_app/analyzers_manager/observable_analyzers
  • tests/api_app/analyzers_manager/unit_tests/observable_analyzers

2 files changed

+119
-13
lines changed

api_app/analyzers_manager/observable_analyzers/inquest.py

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,34 @@
88
import requests
99

1010
from api_app.analyzers_manager.classes import ObservableAnalyzer
11-
from api_app.analyzers_manager.exceptions import (
12-
AnalyzerConfigurationException,
13-
AnalyzerRunException,
14-
)
11+
from api_app.analyzers_manager.exceptions import AnalyzerConfigurationException, AnalyzerRunException
1512
from api_app.choices import Classification
1613

1714
logger = logging.getLogger(__name__)
1815

16+
# Precompiled regex patterns for generic observable type detection
17+
# Email pattern - comprehensive regex supporting TLDs of any length and subdomains
18+
EMAIL_PATTERN = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
19+
20+
# Windows Registry key pattern (specific hives like HKEY_LOCAL_MACHINE, HKLM, etc.)
21+
REGISTRY_PATTERN = re.compile(
22+
r"^(?:HKEY_(?:LOCAL_MACHINE|CURRENT_USER|CLASSES_ROOT|USERS|CURRENT_CONFIG)"
23+
r"|HK(?:LM|CU|CR|U|CC))(?:\\|$)",
24+
re.IGNORECASE,
25+
)
26+
27+
# XMP ID pattern (UUID format)
28+
XMPID_PATTERN = re.compile(
29+
r"^[a-fA-F0-9]{8}-"
30+
r"[a-fA-F0-9]{4}-"
31+
r"[a-fA-F0-9]{4}-"
32+
r"[a-fA-F0-9]{4}-"
33+
r"[a-fA-F0-9]{12}$"
34+
)
35+
36+
# Filename pattern - must have an extension, no path separators
37+
FILENAME_PATTERN = re.compile(r"^[\w\-. ]+\.[a-zA-Z0-9]{1,10}$")
38+
1939

2040
class InQuest(ObservableAnalyzer):
2141
url: str = "https://labs.inquest.net"
@@ -43,12 +63,29 @@ def hash_type(self):
4363
return hash_type
4464

4565
def type_of_generic(self):
46-
if re.match(r"^[\w\.\+\-]+\@[\w]+\.[a-z]{2,3}$", self.observable_name):
47-
type_ = "email"
48-
else:
49-
# TODO: This should be validated more thoroughly
50-
type_ = "filename"
51-
return type_
66+
"""
67+
Determine the type of a generic observable.
68+
69+
Supported types: email, filename, registry, xmpid
70+
"""
71+
if EMAIL_PATTERN.match(self.observable_name):
72+
return "email"
73+
74+
if REGISTRY_PATTERN.match(self.observable_name):
75+
return "registry"
76+
77+
if XMPID_PATTERN.match(self.observable_name):
78+
return "xmpid"
79+
80+
if FILENAME_PATTERN.match(self.observable_name):
81+
return "filename"
82+
83+
# Default to filename with warning for unrecognized patterns
84+
logger.warning(
85+
f"Could not determine type of generic observable: "
86+
f"'{self.observable_name}'. Defaulting to 'filename'."
87+
)
88+
return "filename"
5289

5390
def run(self):
5491
headers = {"Content-Type": "application/json"}

tests/api_app/analyzers_manager/unit_tests/observable_analyzers/test_inquest.py

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
from unittest.mock import patch
22

33
from api_app.analyzers_manager.observable_analyzers.inquest import InQuest
4-
from tests.api_app.analyzers_manager.unit_tests.observable_analyzers.base_test_class import (
5-
BaseAnalyzerTest,
6-
)
4+
from tests.api_app.analyzers_manager.unit_tests.observable_analyzers.base_test_class import BaseAnalyzerTest
75
from tests.mock_utils import MockUpResponse
86

97

@@ -22,3 +20,74 @@ def get_extra_config(cls) -> dict:
2220
"_api_key_name": "Bearer dummy_api_key",
2321
"generic_identifier_mode": "user-defined",
2422
}
23+
24+
25+
class TypeOfGenericTestCase(InQuestTestCase):
26+
"""Tests for the type_of_generic method."""
27+
28+
@classmethod
29+
def get_extra_config(cls) -> dict:
30+
config = super().get_extra_config()
31+
config["generic_identifier_mode"] = "auto"
32+
return config
33+
34+
def setUp(self):
35+
super().setUp()
36+
# Create a mock analyzer config
37+
from api_app.analyzers_manager.models import AnalyzerConfig
38+
39+
config = AnalyzerConfig.objects.filter(python_module=self.analyzer_class.python_module).first()
40+
if not config:
41+
self.skipTest(
42+
"AnalyzerConfig for InQuest is not available; skipping TypeOfGenericTestCase tests."
43+
)
44+
self.analyzer = self._setup_analyzer(config, "generic", "test")
45+
46+
def test_type_of_generic_email_simple(self):
47+
self.analyzer.observable_name = "user@example.com"
48+
self.assertEqual(self.analyzer.type_of_generic(), "email")
49+
50+
def test_type_of_generic_email_with_subdomain(self):
51+
self.analyzer.observable_name = "user.name+tag@sub.domain.info"
52+
self.assertEqual(self.analyzer.type_of_generic(), "email")
53+
54+
def test_type_of_generic_email_long_tld(self):
55+
self.analyzer.observable_name = "test@domain.museum"
56+
self.assertEqual(self.analyzer.type_of_generic(), "email")
57+
58+
def test_type_of_generic_registry_hkey(self):
59+
self.analyzer.observable_name = "HKEY_LOCAL_MACHINE\\Software\\Test"
60+
self.assertEqual(self.analyzer.type_of_generic(), "registry")
61+
62+
def test_type_of_generic_registry_hklm(self):
63+
self.analyzer.observable_name = "HKLM\\Software\\Microsoft"
64+
self.assertEqual(self.analyzer.type_of_generic(), "registry")
65+
66+
def test_type_of_generic_registry_hkcu(self):
67+
self.analyzer.observable_name = "HKCU\\Desktop"
68+
self.assertEqual(self.analyzer.type_of_generic(), "registry")
69+
70+
def test_type_of_generic_xmpid(self):
71+
self.analyzer.observable_name = "550e8400-e29b-41d4-a716-446655440000"
72+
self.assertEqual(self.analyzer.type_of_generic(), "xmpid")
73+
74+
def test_type_of_generic_filename_simple(self):
75+
self.analyzer.observable_name = "malware.exe"
76+
self.assertEqual(self.analyzer.type_of_generic(), "filename")
77+
78+
def test_type_of_generic_filename_with_spaces(self):
79+
self.analyzer.observable_name = "my document.pdf"
80+
self.assertEqual(self.analyzer.type_of_generic(), "filename")
81+
82+
def test_type_of_generic_unknown_defaults_to_filename(self):
83+
self.analyzer.observable_name = "random-text-no-extension"
84+
self.assertEqual(self.analyzer.type_of_generic(), "filename")
85+
86+
@patch("api_app.analyzers_manager.observable_analyzers.inquest.logger.warning")
87+
def test_type_of_generic_unknown_warning(self, mock_warning):
88+
self.analyzer.observable_name = "random-text-no-extension"
89+
self.analyzer.type_of_generic()
90+
mock_warning.assert_called_once_with(
91+
"Could not determine type of generic observable: "
92+
"'random-text-no-extension'. Defaulting to 'filename'."
93+
)

0 commit comments

Comments
 (0)