Skip to content

Commit 688166d

Browse files
committed
Support meta-data for suspicion reasons
* Represent suspicion reasons with a new Reason class that, in addition to the string identifier, supports a label and source/origin of the reason * Continue to store string identifiers in the Changeset.suspicion_reasons field for backward compatibility * Introduce a new Changeset.detailed_reasons field that has the full Reason objects * Use the Reason.source to track when suspicion reasons originated as warnings generated by an editor, such as iD * When printing suspicion reasons in the CLI, include the source of the reason if it was not from osmcha * Update unit tests
1 parent cd39be8 commit 688166d

File tree

3 files changed

+44
-20
lines changed

3 files changed

+44
-20
lines changed

osmcha/changeset.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,14 @@ class InvalidChangesetError(Exception):
4242
pass
4343

4444

45+
class Reason:
46+
"""Details and meta-data for a suspicion reason."""
47+
def __init__(self, identifier, label=None, source="osmcha"):
48+
self.identifier = identifier
49+
self.label = label if label else identifier
50+
self.source = source
51+
52+
4553
def get_user_details(user_id):
4654
"""Get information about number of changesets, blocks and mapping days of a
4755
user, using both the OSM API and the Mapbox comments APIself.
@@ -56,7 +64,7 @@ def get_user_details(user_id):
5664
changesets = [i for i in xml_data if i.tag == 'changesets'][0]
5765
blocks = [i for i in xml_data if i.tag == 'blocks'][0]
5866
if int(changesets.get('count')) <= 5:
59-
reasons.append('New mapper')
67+
reasons.append(Reason('New mapper'))
6068
elif int(changesets.get('count')) <= 30:
6169
url = MAPBOX_USERS_API.format(
6270
user_id=requests.compat.quote(user_id)
@@ -67,9 +75,9 @@ def get_user_details(user_id):
6775
user_request.json().get('extra').get('mapping_days')
6876
)
6977
if mapping_days <= 5:
70-
reasons.append('New mapper')
78+
reasons.append(Reason('New mapper'))
7179
if int(blocks[0].get('count')) > 1:
72-
reasons.append('User has multiple blocks')
80+
reasons.append(Reason('User has multiple blocks'))
7381
except Exception as e:
7482
message = 'Could not verify user of the changeset: {}, {}'
7583
print(message.format(user_id, str(e)))
@@ -239,7 +247,6 @@ def filter(self):
239247
if get_bounds(ch).intersects(self.area)
240248
]
241249

242-
243250
class Analyse(object):
244251
"""Analyse a changeset and define if it is suspect."""
245252
def __init__(self, changeset, create_threshold=200, modify_threshold=200,
@@ -299,6 +306,7 @@ def set_fields(self, changeset):
299306
'%Y-%m-%dT%H:%M:%SZ'
300307
)
301308
self.suspicion_reasons = []
309+
self.detailed_reasons = {}
302310
self.is_suspect = False
303311
self.powerfull_editor = False
304312
self.warning_tags = [
@@ -307,7 +315,8 @@ def set_fields(self, changeset):
307315

308316
def label_suspicious(self, reason):
309317
"""Add suspicion reason and set the suspicious flag."""
310-
self.suspicion_reasons.append(reason)
318+
self.suspicion_reasons.append(reason.identifier)
319+
self.detailed_reasons[reason.identifier] = reason
311320
self.is_suspect = True
312321

313322
def full_analysis(self):
@@ -318,12 +327,12 @@ def full_analysis(self):
318327
self.verify_warning_tags()
319328

320329
if self.review_requested == 'yes':
321-
self.label_suspicious('Review requested')
330+
self.label_suspicious(Reason('Review requested'))
322331

323332
def verify_warning_tags(self):
324333
for tag in self.warning_tags:
325334
if tag in self.enabled_warnings.keys():
326-
self.label_suspicious(self.enabled_warnings.get(tag))
335+
self.label_suspicious(Reason(self.enabled_warnings.get(tag), None, self.editor))
327336

328337
def verify_user(self):
329338
"""Verify if the changeset was made by a inexperienced mapper (anyone
@@ -338,21 +347,21 @@ def verify_words(self):
338347
"""
339348
if self.comment:
340349
if find_words(self.comment, self.suspect_words, self.excluded_words):
341-
self.label_suspicious('suspect_word')
350+
self.label_suspicious(Reason('suspect_word'))
342351

343352
if self.source:
344353
for word in self.illegal_sources:
345354
if word in self.source.lower():
346355
if word == 'yandex' and 'yandex panorama' in self.source.lower():
347356
pass
348357
else:
349-
self.label_suspicious('suspect_word')
358+
self.label_suspicious(Reason('suspect_word'))
350359
break
351360

352361
if self.imagery_used:
353362
for word in self.illegal_sources:
354363
if word in self.imagery_used.lower():
355-
self.label_suspicious('suspect_word')
364+
self.label_suspicious(Reason('suspect_word'))
356365
break
357366

358367
self.suspicion_reasons = list(set(self.suspicion_reasons))
@@ -383,10 +392,10 @@ def verify_editor(self):
383392
'mapwith.ai'
384393
]
385394
if self.host.split('://')[-1].split('/')[0] not in trusted_hosts:
386-
self.label_suspicious('Unknown iD instance')
395+
self.label_suspicious(Reason('Unknown iD instance'))
387396
else:
388397
self.powerfull_editor = True
389-
self.label_suspicious('Software editor was not declared')
398+
self.label_suspicious(Reason('Software editor was not declared'))
390399

391400
def count(self):
392401
"""Count the number of elements created, modified and deleted by the
@@ -404,14 +413,14 @@ def count(self):
404413
if (self.create / len(actions) > self.percentage and
405414
self.create > self.create_threshold and
406415
(self.powerfull_editor or self.create > self.top_threshold)):
407-
self.label_suspicious('possible import')
416+
self.label_suspicious(Reason('possible import'))
408417
elif (self.modify / len(actions) > self.percentage and
409418
self.modify > self.modify_threshold):
410-
self.label_suspicious('mass modification')
419+
self.label_suspicious(Reason('mass modification'))
411420
elif ((self.delete / len(actions) > self.percentage and
412421
self.delete > self.delete_threshold) or
413422
self.delete > self.top_threshold):
414-
self.label_suspicious('mass deletion')
423+
self.label_suspicious(Reason('mass deletion'))
415424
except ZeroDivisionError:
416425
print('It seems this changeset was redacted')
417426

osmcha/scripts/cli.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,19 @@ def cli(id):
1414
'Created: %s. Modified: %s. Deleted: %s' % (ch.create, ch.modify, ch.delete)
1515
)
1616
if ch.is_suspect:
17+
reasonDescriptions = [formatted_reason(reason) for reason in ch.detailed_reasons.values()]
1718
click.echo('The changeset {} is suspect! Reasons: {}'.format(
1819
id,
19-
', '.join(ch.suspicion_reasons)
20+
', '.join(reasonDescriptions)
2021
))
2122
else:
2223
click.echo('The changeset %s is not suspect!' % id)
24+
25+
def formatted_reason(reason):
26+
"""
27+
Generate a formatted description of the reason using its label and appending
28+
the source of the reason if it was not from osmcha (e.g. if it originated
29+
from an editor warning)
30+
"""
31+
sourceLabel = f' ({reason.source})' if reason.source != 'osmcha' else ''
32+
return f'{reason.label}{sourceLabel}'

tests/test_mod.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from shapely.geometry import Polygon
66

77
from osmcha.changeset import ChangesetList
8+
from osmcha.changeset import Reason
89
from osmcha.changeset import Analyse
910
from osmcha.changeset import WORDS
1011
from osmcha.changeset import find_words
@@ -100,8 +101,9 @@ def test_analyse_label_suspicious():
100101
])
101102
}
102103
ch = Analyse(ch_dict)
103-
ch.label_suspicious('some reason')
104+
ch.label_suspicious(Reason('some reason'))
104105
assert 'some reason' in ch.suspicion_reasons
106+
assert ch.detailed_reasons['some reason'].identifier == 'some reason'
105107
assert ch.is_suspect
106108

107109

@@ -601,10 +603,11 @@ def test_get_dict():
601603
assert 'is_suspect' in ch.get_dict().keys()
602604
assert 'powerfull_editor' in ch.get_dict().keys()
603605
assert 'suspicion_reasons' in ch.get_dict().keys()
606+
assert 'detailed_reasons' in ch.get_dict().keys()
604607
assert 'create' in ch.get_dict().keys()
605608
assert 'modify' in ch.get_dict().keys()
606609
assert 'delete' in ch.get_dict().keys()
607-
assert len(ch.get_dict().keys()) == 15
610+
assert len(ch.get_dict().keys()) == 16
608611

609612
# An iD changeset with warnings:
610613
ch = Analyse(72783703)
@@ -621,10 +624,11 @@ def test_get_dict():
621624
assert 'is_suspect' in ch.get_dict().keys()
622625
assert 'powerfull_editor' in ch.get_dict().keys()
623626
assert 'suspicion_reasons' in ch.get_dict().keys()
627+
assert 'detailed_reasons' in ch.get_dict().keys()
624628
assert 'create' in ch.get_dict().keys()
625629
assert 'modify' in ch.get_dict().keys()
626630
assert 'delete' in ch.get_dict().keys()
627-
assert len(ch.get_dict().keys()) == 15
631+
assert len(ch.get_dict().keys()) == 16
628632

629633
# A JOSM changeset
630634
ch = Analyse(46315321)
@@ -641,10 +645,11 @@ def test_get_dict():
641645
assert 'is_suspect' in ch.get_dict().keys()
642646
assert 'powerfull_editor' in ch.get_dict().keys()
643647
assert 'suspicion_reasons' in ch.get_dict().keys()
648+
assert 'detailed_reasons' in ch.get_dict().keys()
644649
assert 'create' in ch.get_dict().keys()
645650
assert 'modify' in ch.get_dict().keys()
646651
assert 'delete' in ch.get_dict().keys()
647-
assert len(ch.get_dict().keys()) == 15
652+
assert len(ch.get_dict().keys()) == 16
648653

649654

650655
def test_changeset_without_tags():

0 commit comments

Comments
 (0)