Skip to content

Commit c621fe8

Browse files
authored
Merge pull request #454 from target/ScanJNLP
Adding in ScanJNLP
2 parents e499d29 + a8d847a commit c621fe8

File tree

5 files changed

+159
-1
lines changed

5 files changed

+159
-1
lines changed

configs/python/backend/backend.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,11 @@ scanners:
273273
- 'application/json'
274274
- 'json_file'
275275
priority: 5
276+
'ScanJnlp':
277+
- positive:
278+
flavors:
279+
- "jnlp_file"
280+
priority: 5
276281
'ScanLibarchive':
277282
- positive:
278283
flavors:

docs/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -798,7 +798,8 @@ The table below describes each scanner and its options. Each scanner has the hid
798798
| ScanIso | Collects and extracts files from ISO files | `limit` -- maximum number of files to extract (defaults to `0`) |
799799
| ScanJarManifest | Collects metadata from JAR manifest files | N/A |
800800
| ScanJavascript | Collects metadata from Javascript files | `beautify` -- beautifies JavaScript before parsing (defaults to `True`) |
801-
| ScanJpeg | Extracts data embedded in JPEG files | N/A |
801+
| ScanJpeg | Extracts data embedded in JPEG files | N/A
802+
| ScanJnlp | Identifies JNLP files that reference external HTTP resources, particularly those not associated with trusted domains | N/A | Ryan Borre, [Paul Hutelmyer](https://github.com/phutelmyer) |
802803
| ScanJson | Collects keys from JSON files | N/A |
803804
| ScanLibarchive | Extracts files from libarchive-compatible archives. | `limit` -- maximum number of files to extract (defaults to `1000`) |
804805
| ScanLnk | Collects metadata from lnk files. | N/A | Ryan Borre, [DerekT2](https://github.com/Derekt2), [Nathan Icart](https://github.com/nateicart)
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
from io import BytesIO
2+
3+
from lxml import etree
4+
5+
from strelka import strelka
6+
7+
8+
class ScanJnlp(strelka.Scanner):
9+
"""
10+
Analyzes Java Network Launch Protocol (JNLP) files.
11+
12+
JNLP files, used by Java Web Start technology, can launch Java applications from a web browser. While facilitating
13+
legitimate applications, they can also be abused for malicious purposes such as distributing malware or executing
14+
phishing attacks.
15+
16+
Scanner Type: Collection
17+
18+
Attributes:
19+
event (dict): Stores extracted data during the scan for further analysis.
20+
21+
Detection Use Cases:
22+
- **External Resource Reference**
23+
- Identify JNLP files that reference external HTTP resources, particularly those not associated with trusted
24+
domains.
25+
26+
Known Limitations:
27+
- **Java Dependence**
28+
- Effectiveness is contingent on the presence and version of Java installed on the target system.
29+
30+
Todo:
31+
- Improve detection of obfuscated or sophisticated threats within JNLP files.
32+
- Extract any other potential JNLP content / headers.
33+
34+
References:
35+
- **File Structure**
36+
- https://docs.oracle.com/javase/tutorial/deployment/deploymentInDepth/jnlpFileSyntax.html
37+
- **Malicious Usage**
38+
- https://www.forcepoint.com/blog/x-labs/java-network-launch-protocol
39+
- https://newtonpaul.com/analysing-fileless-malware-cobalt-strike-beacon
40+
"""
41+
42+
def scan(self, data, file, options, expire_at):
43+
"""
44+
Scans the given data for JNLP-related information.
45+
46+
Extracts 'codebase' and 'href' attributes from JNLP and JAR tags to detect potential malicious activities.
47+
48+
Args:
49+
data (bytes): Data of the file being scanned.
50+
file (File): File object being scanned.
51+
options (dict): Options for the scanner.
52+
expire_at (datetime): Expiration time of the scan result.
53+
"""
54+
# Initialize variables for 'codebase' and 'href' attributes
55+
codebase = ""
56+
href = ""
57+
58+
# Parse the XML to find 'jnlp' and 'jar' elements
59+
for elem, _ in iterate_xml_elements(data, tags=["jnlp", "jar"]):
60+
if elem.tag == "jnlp":
61+
codebase = elem.get("codebase", "").rstrip("/")
62+
elif elem.tag == "jar":
63+
href = elem.get("href", "").lstrip("/")
64+
65+
# If both 'codebase' and 'href' are found, construct the full resource URL
66+
if codebase and href:
67+
self.event["resource"] = f"{codebase}/{href}"
68+
69+
70+
def iterate_xml_elements(data, tags=None):
71+
"""
72+
Iterates over XML data, yielding elements with specified tags.
73+
74+
This method parses the XML data byte by byte and yields elements that match the specified tags. This is useful
75+
for extracting specific information from structured XML documents.
76+
77+
Args:
78+
data (bytes): The XML data to parse.
79+
tags (list): List of XML tags to filter elements by.
80+
81+
Yields:
82+
tuple: A tuple containing the XML element and its depth in the XML tree.
83+
"""
84+
# Define the events to listen for during XML parsing
85+
events = ("start", "end")
86+
depth = 0
87+
inside_tags = []
88+
89+
# Parse the XML data
90+
for event, elem in etree.iterparse(BytesIO(data), events=events):
91+
if event == "start":
92+
# If the element's tag is one we're interested in, track it and its depth
93+
if tags is None or elem.tag in tags:
94+
inside_tags.append((elem.tag, depth))
95+
depth += 1
96+
elif event == "end":
97+
# On end tag, reduce depth and check if the closing tag is one we're tracking
98+
depth -= 1
99+
if depth < 0:
100+
continue
101+
102+
# Check if the current element should be yielded
103+
is_wanted = tags is None or elem.tag in tags
104+
if is_wanted and inside_tags and inside_tags[-1][0] == elem.tag:
105+
inside_tags.pop()
106+
yield elem, depth
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<jnlp spec="1.0+" codebase="https://example.com/" href="file-1.jnlp">
3+
<information>
4+
<title>SECURE DOCUMENT VIEWER</title>
5+
<vendor>Microsoft</vendor>
6+
<homepage href="https://microsoft.com"/>
7+
<description>Secure document viewer app</description>
8+
</information>
9+
<security>
10+
<all-permissions/>
11+
</security>
12+
<resources>
13+
<j2se version="1.6+" />
14+
<jar href="uplib.jar" />
15+
</resources>
16+
<application-desc main-class="Viewer">
17+
</application-desc>
18+
19+
1234abcdeF56789
20+
</jnlp>
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from pathlib import Path
2+
from unittest import TestCase, mock
3+
4+
from strelka.scanners.scan_jnlp import ScanJnlp as ScanUnderTest
5+
from strelka.tests import run_test_scan
6+
7+
8+
def test_scan_jnlp(mocker):
9+
"""
10+
Pass: Sample event matches output of scanner.
11+
Failure: Unable to load file or sample event fails to match.
12+
"""
13+
test_scan_event = {
14+
"elapsed": mock.ANY,
15+
"flags": [],
16+
"resource": "https://example.com/uplib.jar",
17+
}
18+
19+
scanner_event = run_test_scan(
20+
mocker=mocker,
21+
scan_class=ScanUnderTest,
22+
fixture_path=Path(__file__).parent / "fixtures/test.jnlp",
23+
)
24+
25+
TestCase.maxDiff = None
26+
TestCase().assertDictEqual(test_scan_event, scanner_event)

0 commit comments

Comments
 (0)