Skip to content

Commit 880c351

Browse files
committed
chg: [update + show decoded items] add background update
1 parent 6b9ba9d commit 880c351

File tree

11 files changed

+288
-8
lines changed

11 files changed

+288
-8
lines changed

bin/lib/Domain.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import Correlation
1818
from Cryptocurrency import cryptocurrency
1919
from Pgp import pgp
20+
import Decoded
2021
import Item
2122
import Tag
2223

@@ -197,6 +198,14 @@ def get_domain_pgp(domain, currencies_type=None, get_nb=False):
197198
'''
198199
return pgp.get_domain_correlation_dict(domain, correlation_type=currencies_type, get_nb=get_nb)
199200

201+
def get_domain_decoded(domain):
202+
'''
203+
Retun all decoded item of a given domain.
204+
205+
:param domain: crawled domain
206+
'''
207+
return Decoded.get_domain_decoded_item(domain)
208+
200209
def get_domain_all_correlation(domain, correlation_type=None, get_nb=False):
201210
'''
202211
Retun all correlation of a given domain.
@@ -214,6 +223,9 @@ def get_domain_all_correlation(domain, correlation_type=None, get_nb=False):
214223
res = get_domain_pgp(domain, get_nb=get_nb)
215224
if res:
216225
domain_correl['pgp'] = res
226+
res = get_domain_decoded(domain)
227+
if res:
228+
domain_correl['decoded'] = res
217229
return domain_correl
218230

219231
# TODO: handle port
@@ -271,6 +283,12 @@ def __init__(self, domain, port=None):
271283
if self.is_domain_up():
272284
self.current_port = sanathyse_port(port, self.domain, self.type)
273285

286+
def get_domain_name(self):
287+
return self.domain
288+
289+
def get_domain_type(self):
290+
return self.type
291+
274292
def get_current_port(self):
275293
return self.current_port
276294

@@ -361,10 +379,16 @@ def get_domain_correlation(self):
361379
'''
362380
return get_domain_all_correlation(self.domain, get_nb=True)
363381

364-
def get_domain_history_with_status(self):
382+
def get_domain_history(self):
365383
'''
366384
Retun the full history of a given domain and port.
367385
'''
386+
return get_domain_history(self.domain, self.type, 80)
387+
388+
def get_domain_history_with_status(self):
389+
'''
390+
Retun the full history (with status) of a given domain and port.
391+
'''
368392
return get_domain_history_with_status(self.domain, self.type, 80)
369393

370394
def get_domain_items_crawled(self, port=None, epoch=None, items_link=False, item_screenshot=False, item_tag=False):

bin/packages/Correlation.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,50 @@ def get_domain_correlation_dict(self, domain, correlation_type=None, get_nb=Fals
147147
dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl])
148148
return dict_correlation
149149

150+
def _get_item_correlation_obj(self, item_id, correlation_type):
151+
'''
152+
Return correlation of a given item id.
153+
154+
:param item_id: item id
155+
:type item_id: str
156+
:param correlation_type: correlation type
157+
:type correlation_type: str
158+
159+
:return: a list of correlation
160+
:rtype: list
161+
'''
162+
res = r_serv_metadata.smembers('item_{}_{}:{}'.format(self.correlation_name, correlation_type, item_id))
163+
if res:
164+
return list(res)
165+
else:
166+
return []
167+
168+
def get_item_correlation_dict(self, item_id, correlation_type=None, get_nb=False):
169+
'''
170+
Return all correlation of a given item id.
171+
172+
:param item_id: item id
173+
:param correlation_type: list of correlation types
174+
:type correlation_type: list, optional
175+
176+
:return: a dictionnary of all the requested correlations
177+
:rtype: dict
178+
'''
179+
correlation_type = self.sanythise_correlation_types(correlation_type)
180+
dict_correlation = {}
181+
for correl in correlation_type:
182+
res = self._get_item_correlation_obj(item_id, correl)
183+
if res:
184+
dict_correlation[correl] = res
185+
if get_nb:
186+
dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl])
187+
return dict_correlation
188+
189+
190+
191+
def save_domain_correlation(self, domain, correlation_type, correlation_value):
192+
r_serv_metadata.sadd('domain_{}_{}:{}'.format(self.correlation_name, correlation_type, domain), correlation_value)
193+
r_serv_metadata.sadd('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, correlation_value), domain)
150194

151195
######## API EXPOSED ########
152196

bin/packages/Cryptocurrency.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def get_cryptocurrency(request_dict, cryptocurrency_type):
6161

6262
return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name)
6363

64+
# # TODO: refractor/move me in Correlation
6465
def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address):
6566
# create basic medata
6667
if not r_serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)):
@@ -89,7 +90,7 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc
8990
r_serv_metadata.sadd('item_cryptocurrency_{}:{}'.format(cryptocurrency_name, item_path), cryptocurrency_address)
9091

9192
# domain
92-
if Item.is_crawled(item_path):
93+
if Item.is_crawled(item_path): # # TODO: use save_domain_correlation
9394
domain = Item.get_item_domain(item_path)
9495
r_serv_metadata.sadd('domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, domain), cryptocurrency_address)
9596
r_serv_metadata.sadd('set_domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), domain)

bin/packages/Item.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,13 @@
88

99
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
1010
import ConfigLoader
11+
import Decoded
1112

1213
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
1314
import Date
1415
import Tag
16+
from Cryptocurrency import cryptocurrency
17+
from Pgp import pgp
1518

1619
config_loader = ConfigLoader.ConfigLoader()
1720
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
@@ -126,26 +129,85 @@ def get_item(request_dict):
126129
###
127130
### correlation
128131
###
132+
def get_item_cryptocurrency(item_id, currencies_type=None, get_nb=False):
133+
'''
134+
Return all cryptocurrencies of a given item.
135+
136+
:param item_id: item id
137+
:param currencies_type: list of cryptocurrencies type
138+
:type currencies_type: list, optional
139+
'''
140+
return cryptocurrency.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb)
141+
142+
def get_item_pgp(item_id, currencies_type=None, get_nb=False):
143+
'''
144+
Return all pgp of a given item.
145+
146+
:param item_id: item id
147+
:param currencies_type: list of cryptocurrencies type
148+
:type currencies_type: list, optional
149+
'''
150+
return pgp.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb)
151+
152+
def get_item_decoded(item_id):
153+
'''
154+
Return all pgp of a given item.
155+
156+
:param item_id: item id
157+
:param currencies_type: list of cryptocurrencies type
158+
:type currencies_type: list, optional
159+
'''
160+
return Decoded.get_item_decoded(item_id)
161+
162+
def get_item_all_correlation(item_id, correlation_type=None, get_nb=False):
163+
'''
164+
Retun all correlation of a given item id.
165+
166+
:param item_id: item id
167+
:type domain: str
168+
169+
:return: a dict of all correlation for a item id
170+
:rtype: dict
171+
'''
172+
item_correl = {}
173+
res = get_item_cryptocurrency(item_id, get_nb=get_nb)
174+
if res:
175+
item_correl['cryptocurrency'] = res
176+
res = get_item_pgp(item_id, get_nb=get_nb)
177+
if res:
178+
item_correl['pgp'] = res
179+
res = get_item_decoded(item_id)
180+
if res:
181+
item_correl['decoded'] = res
182+
return item_correl
183+
184+
129185

186+
## TODO: REFRACTOR
130187
def _get_item_correlation(correlation_name, correlation_type, item_id):
131188
res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
132189
if res:
133190
return list(res)
134191
else:
135192
return []
136193

194+
## TODO: REFRACTOR
137195
def get_item_bitcoin(item_id):
138196
return _get_item_correlation('cryptocurrency', 'bitcoin', item_id)
139197

198+
## TODO: REFRACTOR
140199
def get_item_pgp_key(item_id):
141200
return _get_item_correlation('pgpdump', 'key', item_id)
142201

202+
## TODO: REFRACTOR
143203
def get_item_pgp_name(item_id):
144204
return _get_item_correlation('pgpdump', 'name', item_id)
145205

206+
## TODO: REFRACTOR
146207
def get_item_pgp_mail(item_id):
147208
return _get_item_correlation('pgpdump', 'mail', item_id)
148209

210+
## TODO: REFRACTOR
149211
def get_item_pgp_correlation(item_id):
150212
pass
151213

bin/packages/Tag.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,11 @@ def add_items_tags(tags=[], galaxy_tags=[], item_id=None, item_type="paste"):
172172
res_dict['type'] = item_type
173173
return (res_dict, 200)
174174

175+
def add_domain_tag(tag, domain, item_date):
176+
r_serv_metadata.sadd('tag:{}'.format(domain), tag)
177+
r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain)
175178

176-
def add_item_tag(tag, item_path, item_type="paste"):
179+
def add_item_tag(tag, item_path, item_type="paste", tag_date=None):
177180

178181
if item_type=="paste":
179182
item_date = int(Item.get_item_date(item_path))
@@ -189,8 +192,7 @@ def add_item_tag(tag, item_path, item_type="paste"):
189192
# domain item
190193
else:
191194
item_date = int(Domain.get_domain_last_check(item_path, r_format="int"))
192-
r_serv_metadata.sadd('tag:{}'.format(item_path), tag)
193-
r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), item_path)
195+
add_domain_tag(tag, item_path, item_date)
194196

195197
r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1)
196198

bin/update-background.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,22 @@
5454
r_serv.delete('ail:current_background_script_stat')
5555
r_serv.delete('ail:current_background_update')
5656

57-
if r_serv.scard('ail:update_v2.4') != 1:
58-
pass
57+
if r_serv.get('ail:current_background_update') == 'v2.4':
58+
r_serv.delete('ail:update_error')
59+
r_serv.set('ail:update_in_progress', 'v2.4')
60+
r_serv.set('ail:current_background_update', 'v2.4')
61+
r_serv.set('ail:current_background_script', 'domain update')
62+
63+
update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v2.4', 'Update_domain.py')
64+
process = subprocess.run(['python' ,update_file])
65+
66+
67+
if int(r_serv.get('ail:current_background_script_stat')) != 100:
68+
r_serv.set('ail:update_error', 'Update v2.4 Failed, please relaunch the bin/update-background.py script')
69+
else:
70+
r_serv.delete('ail:update_in_progress')
71+
r_serv.delete('ail:current_background_script')
72+
r_serv.delete('ail:current_background_script_stat')
73+
r_serv.delete('ail:current_background_update')
74+
r_serv.delete('update:nb_elem_to_convert')
75+
r_serv.delete('update:nb_elem_converted')

update/v2.4/Update.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,17 @@
1919

2020
config_loader = ConfigLoader.ConfigLoader()
2121
r_serv = config_loader.get_redis_conn("ARDB_DB")
22+
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
2223
config_loader = None
2324

2425
#Set current update_in_progress
2526
r_serv.set('ail:update_in_progress', new_version)
2627
r_serv.set('ail:current_background_update', new_version)
2728

29+
r_serv_onion.sunionstore('domain_update_v2.4', 'full_onion_up', 'full_regular_up')
30+
r_serv.set('update:nb_elem_to_convert', r_serv_onion.scard('domain_update_v2.4'))
31+
r_serv.set('update:nb_elem_converted',0)
32+
2833
#Set current ail version
2934
r_serv.set('ail:version', new_version)
3035

update/v2.4/Update_domain.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#!/usr/bin/env python3
2+
# -*-coding:UTF-8 -*
3+
4+
import os
5+
import re
6+
import sys
7+
import time
8+
import redis
9+
import datetime
10+
11+
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
12+
import Item
13+
import Tag
14+
from Cryptocurrency import cryptocurrency
15+
from Pgp import pgp
16+
17+
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
18+
import ConfigLoader
19+
import Decoded
20+
import Domain
21+
22+
def update_update_stats():
23+
nb_updated = int(r_serv_db.get('update:nb_elem_converted'))
24+
progress = int((nb_updated * 100) / nb_elem_to_update)
25+
print('{}/{} updated {}%'.format(nb_updated, nb_elem_to_update, progress))
26+
r_serv_db.set('ail:current_background_script_stat', progress)
27+
28+
def update_domain_by_item(domain_obj, item_id):
29+
domain_name = domain_obj.get_domain_name()
30+
# update domain tags
31+
for tag in Tag.get_item_tags(item_id):
32+
if tag != 'infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"':
33+
Tag.add_domain_tag(tag, domain_name, Item.get_item_date(item_id))
34+
35+
# update domain correlation
36+
item_correlation = Item.get_item_all_correlation(item_id)
37+
38+
for correlation_name in item_correlation:
39+
for correlation_type in item_correlation[correlation_name]:
40+
if correlation_name in ('pgp', 'cryptocurrency'):
41+
for correl_value in item_correlation[correlation_name][correlation_type]:
42+
if correlation_name=='pgp':
43+
pgp.save_domain_correlation(domain_name, correlation_type, correl_value)
44+
if correlation_name=='cryptocurrency':
45+
cryptocurrency.save_domain_correlation(domain_name, correlation_type, correl_value)
46+
if correlation_name=='decoded':
47+
for decoded_item in item_correlation['decoded']:
48+
Decoded.save_domain_decoded(domain_name, decoded_item)
49+
50+
if __name__ == '__main__':
51+
52+
start_deb = time.time()
53+
54+
config_loader = ConfigLoader.ConfigLoader()
55+
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
56+
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
57+
config_loader = None
58+
59+
nb_elem_to_update = int( r_serv_db.get('update:nb_elem_to_convert') )
60+
61+
while True:
62+
domain = r_serv_onion.spop('domain_update_v2.4')
63+
if domain is not None:
64+
print(domain)
65+
domain = Domain.Domain(domain)
66+
for domain_history in domain.get_domain_history():
67+
68+
domain_item = domain.get_domain_items_crawled(epoch=domain_history[1]) # item_tag
69+
if "items" in domain_item:
70+
for item_dict in domain_item['items']:
71+
update_domain_by_item(domain, item_dict['id'])
72+
73+
r_serv_db.incr('update:nb_elem_converted')
74+
update_update_stats()
75+
76+
else:
77+
sys.exit(0)

var/www/blueprints/crawler_splash.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,5 +69,7 @@ def showDomain():
6969
dict_domain['crawler_history'] = domain.get_domain_items_crawled(items_link=True, epoch=epoch, item_screenshot=True, item_tag=True) # # TODO: handle multiple port
7070
dict_domain['crawler_history']['random_item'] = random.choice(dict_domain['crawler_history']['items'])
7171

72+
print(dict_domain)
73+
7274
return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label,
7375
modal_add_tags=get_modal_add_tags(dict_domain['domain'], tag_type="domain"))

var/www/modules/Flask_config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@
8686
bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
8787

8888
dict_update_description = {'v1.5':{'nb_background_update': 5, 'update_warning_message': 'An Update is running on the background. Some informations like Tags, screenshot can be',
89-
'update_warning_message_notice_me': 'missing from the UI.'}
89+
'update_warning_message_notice_me': 'missing from the UI.'},
90+
'v2.4':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Tags/Correlation can be',
91+
'update_warning_message_notice_me': 'missing from the UI.'}
9092
}
9193

9294
UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted')

0 commit comments

Comments
 (0)