Skip to content

Commit c896fa9

Browse files
authored
Merge pull request #2 from honeynet/dev
0.0.1
2 parents eabc93f + 89ea8c1 commit c896fa9

File tree

19 files changed

+473
-225
lines changed

19 files changed

+473
-225
lines changed

.github/dependabot.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
version: 2
2+
updates:
3+
- package-ecosystem: "pip"
4+
directory: "/"
5+
schedule:
6+
interval: "weekly"
7+
day: "tuesday"
8+
target-branch: "dev"
9+
open-pull-requests-limit: 1
10+
ignore:
11+
# ignore all patch updates since we are using ~=
12+
# this does not work for security updates
13+
- dependency-name: "*"
14+
update-types: [ "version-update:semver-patch" ]
15+
16+
- package-ecosystem: "docker"
17+
directory: "/docker"
18+
schedule:
19+
interval: "weekly"
20+
day: "tuesday"
21+
target-branch: "dev"
22+
open-pull-requests-limit: 1
23+
ignore:
24+
# ignore all patch updates since we are using ~=
25+
# this does not work for security updates
26+
- dependency-name: "*"
27+
update-types: [ "version-update:semver-patch" ]
28+
29+
- package-ecosystem: "github-actions"
30+
directory: "/"
31+
schedule:
32+
interval: "weekly"
33+
day: "tuesday"
34+
target-branch: "dev"
35+
open-pull-requests-limit: 1

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# GreedyBear
22

3-
The project goal is to extract data of the attacks detected by a TPOT or a cluster of them and to generate some feeds that can be used to prevent and detect attacks.
3+
The project goal is to extract data of the attacks detected by a [TPOT](https://github.com/telekom-security/tpotce) or a cluster of them and to generate some feeds that can be used to prevent and detect attacks.
44

55
## Available Feeds
66
The feeds are reachable through the following URL:
@@ -10,7 +10,8 @@ https://<greedybear_site>/api/feeds/<feed_type>/<attack_type>/<age>.<format>
1010

1111
The available `feed_type` are:
1212

13-
* `log4j`: attacks detected from the Log4pot.
13+
* `log4j`: attacks detected from the [Log4pot](https://github.com/thomaspatzke/Log4Pot).
14+
* `cowrie`: attacks detected from the [Cowrie Honeypot](https://github.com/cowrie/cowrie)
1415

1516
The available `attack_type` are:
1617

api/views.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def feeds(request, feed_type, attack_type, age, format_):
4343
f" Age: {age}, format: {format_}"
4444
)
4545

46-
feed_choices = "log4j"
46+
feed_choices = ["log4j", "cowrie"]
4747
if feed_type not in feed_choices:
4848
return _formatted_bad_request(format_)
4949

@@ -61,7 +61,7 @@ def feeds(request, feed_type, attack_type, age, format_):
6161

6262
query_dict = {}
6363

64-
if feed_type == "log4j":
64+
if feed_type:
6565
query_dict[feed_type] = True
6666
else:
6767
logger.error("this is impossible. check the code")
@@ -74,7 +74,7 @@ def feeds(request, feed_type, attack_type, age, format_):
7474
# everything in the last 3 days
7575
three_days_ago = datetime.utcnow() - timedelta(days=3)
7676
query_dict["last_seen__gte"] = three_days_ago
77-
iocs = IOC.objects.filter(**query_dict).order_by("-last_seen")[:1000]
77+
iocs = IOC.objects.filter(**query_dict).order_by("-last_seen")[:5000]
7878
elif age == "persistent":
7979
# scanners detected in the last 14 days
8080
fourteen_days_ago = datetime.utcnow() - timedelta(days=14)
@@ -83,7 +83,7 @@ def feeds(request, feed_type, attack_type, age, format_):
8383
number_of_days_seen = 10
8484
query_dict["number_of_days_seen__gte"] = number_of_days_seen
8585
# order by the number of times seen
86-
iocs = IOC.objects.filter(**query_dict).order_by("-times_seen")[:100]
86+
iocs = IOC.objects.filter(**query_dict).order_by("-times_seen")[:1000]
8787
else:
8888
logger.error("this is impossible. check the code")
8989
return HttpResponseServerError()

greedybear/celery.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from celery import Celery
66
from celery.schedules import crontab
7+
from celery.signals import setup_logging
78
from django.conf import settings
89
from kombu import Exchange, Queue
910

@@ -43,6 +44,15 @@
4344
)
4445

4546

47+
@setup_logging.connect
48+
def setup_loggers(*args, **kwargs):
49+
from logging.config import dictConfig
50+
51+
from django.conf import settings
52+
53+
dictConfig(settings.LOGGING)
54+
55+
4656
app.conf.beat_schedule = {
4757
# every 10 minutes
4858
"extract_attacks": {

greedybear/cronjobs/attacks.py

Lines changed: 18 additions & 184 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,19 @@
1-
import base64
2-
import re
1+
from abc import ABCMeta
32
from datetime import datetime
43
from ipaddress import IPv4Address
5-
from urllib.parse import urlparse
64

75
from greedybear.consts import PAYLOAD_REQUEST, SCANNER
8-
from greedybear.cronjobs.base import ExtractDataFromElastic, Honeypot
6+
from greedybear.cronjobs.base import ExtractDataFromElastic
97
from greedybear.cronjobs.sensors import ExtractSensors
108
from greedybear.models import IOC, Sensors
11-
from greedybear.regex import REGEX_CVE_BASE64COMMAND, REGEX_CVE_LOG4J, REGEX_URL
129

1310

14-
class ExtractAttacks(ExtractDataFromElastic):
11+
class ExtractAttacks(ExtractDataFromElastic, metaclass=ABCMeta):
1512
class IOCWhitelist(Exception):
1613
pass
1714

1815
def __init__(self):
1916
super().__init__()
20-
self.honeypot = Honeypot("Log4pot")
2117
self.first_time_run = False
2218

2319
@property
@@ -28,147 +24,7 @@ def minutes_back_to_lookup(self):
2824
minutes = 11
2925
return minutes
3026

31-
def _log4pot_lookup(self):
32-
search = self._base_search(self.honeypot)
33-
# we want to get only probes that tried to exploit the specific log4j CVE
34-
search = search.filter("term", reason="exploit")
35-
search = search.source(["deobfuscated_payload", "correlation_id"])
36-
hits = search[:10000].execute()
37-
38-
url = None
39-
hostname = None
40-
hidden_url = None
41-
hidden_hostname = None
42-
added_scanners = 0
43-
added_payloads = 0
44-
added_hidden_payloads = 0
45-
46-
for hit in hits:
47-
scanner_ip = self._get_scanner_ip(hit.correlation_id)
48-
49-
match = re.search(REGEX_CVE_LOG4J, hit.deobfuscated_payload)
50-
if match:
51-
# we are losing the protocol but that's ok for now
52-
url = match.group()
53-
url_adjusted = "tcp:" + url
54-
# removing double slash
55-
url = url[2:]
56-
self.log.info(f"found URL {url} in payload for CVE-2021-44228")
57-
# protocol required or extraction won't work
58-
hostname = urlparse(url_adjusted).hostname
59-
self.log.info(f"extracted hostname {hostname} from {url}")
60-
61-
# it is possible to extract another payload from base64 encoded string.
62-
# this is a behavior related to the attack that leverages LDAP
63-
match_command = re.search(REGEX_CVE_BASE64COMMAND, hit.deobfuscated_payload)
64-
if match_command:
65-
# we are losing the protocol but that's ok for now
66-
base64_encoded = match_command.group(1)
67-
self.log.info(
68-
f"found base64 encoded command {base64_encoded}"
69-
f" in payload from base64 code for CVE-2021-44228"
70-
)
71-
try:
72-
decoded_str = base64.b64decode(base64_encoded).decode()
73-
self.log.info(
74-
f"decoded base64 command to {decoded_str}"
75-
f" from payload from base64 code for CVE-2021-44228"
76-
)
77-
except Exception as e:
78-
self.log.warning(e, stack_info=True)
79-
else:
80-
match_url = re.search(REGEX_URL, decoded_str)
81-
if match_url:
82-
hidden_url = match_url.group()
83-
if "://" not in hidden_url:
84-
hidden_url = "tcp://" + hidden_url
85-
self.log.info(
86-
f"found hidden URL {hidden_url}"
87-
f" in payload for CVE-2021-44228"
88-
)
89-
90-
hidden_hostname = urlparse(hidden_url).hostname
91-
self.log.info(
92-
f"extracted hostname {hidden_hostname} from {hidden_url}"
93-
)
94-
95-
# add scanner
96-
if scanner_ip:
97-
self._add_ioc(scanner_ip, SCANNER)
98-
added_scanners += 1
99-
100-
# add first URL
101-
if hostname:
102-
related_urls = [url] if url else None
103-
self._add_ioc(hostname, PAYLOAD_REQUEST, related_urls=related_urls)
104-
added_payloads += 1
105-
106-
# add hidden URL
107-
if hidden_hostname:
108-
related_urls = [hidden_url] if hidden_url else None
109-
self._add_ioc(
110-
hidden_hostname, PAYLOAD_REQUEST, related_urls=related_urls
111-
)
112-
added_hidden_payloads += 1
113-
114-
# once all have added, we can add the foreign keys
115-
self._add_fks(scanner_ip, hostname, hidden_hostname)
116-
117-
self.log.info(
118-
f"added {added_scanners} scanners, {added_payloads} payloads"
119-
f" and {added_hidden_payloads} hidden payloads"
120-
)
121-
122-
def _add_fks(self, scanner_ip, hostname, hidden_hostname):
123-
self.log.info(
124-
f"adding foreign keys for the following iocs: {scanner_ip}, {hostname}, {hidden_hostname}"
125-
)
126-
scanner_ip_instance = IOC.objects.filter(name=scanner_ip).first()
127-
hostname_instance = IOC.objects.filter(name=hostname).first()
128-
hidden_hostname_instance = IOC.objects.filter(name=hidden_hostname).first()
129-
130-
if scanner_ip_instance:
131-
if (
132-
hostname_instance
133-
and hostname_instance not in scanner_ip_instance.related_ioc.all()
134-
):
135-
scanner_ip_instance.related_ioc.add(hostname_instance)
136-
if (
137-
hidden_hostname_instance
138-
and hidden_hostname_instance
139-
not in scanner_ip_instance.related_ioc.all()
140-
):
141-
scanner_ip_instance.related_ioc.add(hidden_hostname_instance)
142-
scanner_ip_instance.save()
143-
144-
if hostname_instance:
145-
if (
146-
scanner_ip_instance
147-
and scanner_ip_instance not in hostname_instance.related_ioc.all()
148-
):
149-
hostname_instance.related_ioc.add(scanner_ip_instance)
150-
if (
151-
hidden_hostname_instance
152-
and hidden_hostname_instance not in hostname_instance.related_ioc.all()
153-
):
154-
hostname_instance.related_ioc.add(hidden_hostname_instance)
155-
hostname_instance.save()
156-
157-
if hidden_hostname_instance:
158-
if (
159-
hostname_instance
160-
and hostname_instance not in hidden_hostname_instance.related_ioc.all()
161-
):
162-
hidden_hostname_instance.related_ioc.add(hostname_instance)
163-
if (
164-
scanner_ip_instance
165-
and scanner_ip_instance
166-
not in hidden_hostname_instance.related_ioc.all()
167-
):
168-
hidden_hostname_instance.related_ioc.add(scanner_ip_instance)
169-
hidden_hostname_instance.save()
170-
171-
def _add_ioc(self, ioc, attack_type, related_urls=None):
27+
def _add_ioc(self, ioc, attack_type, related_urls=None, log4j=False, cowrie=False):
17228
self.log.info(
17329
f"saving ioc {ioc} for attack_type {attack_type} and related_urls {related_urls}"
17430
)
@@ -202,7 +58,11 @@ def _add_ioc(self, ioc, attack_type, related_urls=None):
20258
if attack_type == PAYLOAD_REQUEST:
20359
ioc_instance.payload_request = True
20460

205-
ioc_instance.log4j = True
61+
if log4j:
62+
ioc_instance.log4j = True
63+
64+
if cowrie:
65+
ioc_instance.cowrie = True
20666

20767
if ioc_instance:
20868
ioc_instance.save()
@@ -227,42 +87,16 @@ def _get_ioc_type(self, ioc):
22787
ioc_type = "ip"
22888
return ioc_type
22989

230-
def _get_scanner_ip(self, correlation_id):
231-
self.log.info(f"extracting scanner IP from correlation_id {correlation_id}")
232-
scanner_ip = None
233-
search = self._base_search(self.honeypot)
234-
search = search.filter(
235-
"term", **{"correlation_id.keyword": str(correlation_id)}
236-
)
237-
search = search.filter("term", reason="request")
238-
search = search.source(["src_ip"])
239-
# only one should be available
240-
hits = search[:10].execute()
241-
for hit in hits:
242-
scanner_ip = hit.src_ip
243-
break
244-
245-
if scanner_ip:
246-
self.log.info(
247-
f"extracted scanner IP {scanner_ip} from correlation_id {correlation_id}"
248-
)
249-
else:
250-
self.log.warning(
251-
f"scanner IP was not extracted from correlation_id {correlation_id}"
252-
)
253-
254-
return scanner_ip
255-
256-
def _check_first_time_run(self):
90+
def _check_first_time_run(self, honeypot_flag):
25791
all_ioc = IOC.objects.all()
25892
if not all_ioc:
259-
# first time we execute this project.
260-
# So we increment the time range to get the data from the last 3 days
261-
self.first_time_run = True
26293
# plus, we extract the sensors addresses so we can whitelist them
26394
ExtractSensors().execute()
264-
265-
def run(self):
266-
self._healthcheck()
267-
self._check_first_time_run()
268-
self._log4pot_lookup()
95+
self.first_time_run = True
96+
else:
97+
# if this is not the overall first time, it could that honeypot first time
98+
honeypot_ioc = IOC.objects.filter(**{f"{honeypot_flag}": True})
99+
if not honeypot_ioc:
100+
# first time we execute this project.
101+
# So we increment the time range to get the data from the last 3 days
102+
self.first_time_run = True

greedybear/cronjobs/base.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,10 @@
11
import logging
22
from abc import ABCMeta, abstractmethod
3-
from dataclasses import dataclass
43

54
from django.conf import settings
65
from elasticsearch_dsl import Search
76

87

9-
@dataclass
10-
class Honeypot:
11-
name: str
12-
description: str = ""
13-
14-
158
class ExtractDataFromElastic(metaclass=ABCMeta):
169
class ElasticServerDownException(Exception):
1710
pass

0 commit comments

Comments
 (0)