Skip to content

Commit 190719e

Browse files
committed
search through all namespaces
The mail providers gmx.net and web.de send dmarc reports with xml dmarc namespaces (https://datatracker.ietf.org/doc/draft-ietf-dmarc-aggregate-reporting/): <?xml version="1.0" encoding="UTF-8"?> <feedback xmlns="urn:ietf:params:xml:ns:dmarc-2.0"> <report_metadata> ... and generate a parsing error Error while parsing files/gmx.net!example.org!1721865600!1721951999!0d5403a6-de18-473b-938e-2bc41d0d271e.xml: File "files/gmx.net!example.org!1721865600!1721951999!0d5403a6-de18-473b-938e-2bc41d0d271e.xml" has no metadata reporting This commit uses the wildcard xpath syntax supported since python 3.8 to correctly parse them.
1 parent 7db680a commit 190719e

File tree

1 file changed

+14
-13
lines changed

1 file changed

+14
-13
lines changed

analysis.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def scan(path):
3333

3434
def parse(domain, filename):
3535
def get(node, name, default=None, expected=False):
36-
child = node.find(name)
36+
child = node.find('{*}' + name)
3737
if child is not None:
3838
if child.text is None:
3939
if expected:
@@ -52,14 +52,15 @@ def convert_timestamp(timestamp):
5252
return datetime.datetime.fromtimestamp(int(timestamp))
5353

5454
e = xml.etree.ElementTree.parse(filename).getroot()
55-
rm = e.find('report_metadata')
55+
rm = e.find('{*}report_metadata')
5656
if rm is None:
5757
raise Exception('File "{0}" has no metadata reporting'.format(filename))
5858
rm_org_name = get(rm, 'org_name', None)
59-
rm_dr = rm.find('date_range')
59+
rm_dr = rm.find('{*}date_range')
6060
rm_start = convert_timestamp(get(rm_dr, 'begin', expected=True))
6161
rm_end = convert_timestamp(get(rm_dr, 'end', expected=True))
62-
pp = e.find('policy_published')
62+
pp = e.find('{*}policy_published')
63+
print(pp, file=sys.stderr)
6364
if pp is None:
6465
raise Exception('File "{0}" has no published policy'.format(filename))
6566
pp_domain = get(pp, 'domain', expected=True)
@@ -69,30 +70,30 @@ def convert_timestamp(timestamp):
6970
pp_sp = get(pp, 'sp', 'none')
7071
pp_pct = int(get(pp, 'pct', '100'))
7172
data = []
72-
for i, r in enumerate(e.findall('record')):
73-
rr = r.find('row')
73+
for i, r in enumerate(e.findall('{*}record')):
74+
rr = r.find('{*}row')
7475
if rr is None:
7576
raise Exception('File "{0}" has no row data in record {1}'.format(filename, i + 1))
7677
rr_source_ip = get(rr, 'source_ip', expected=True)
7778
rr_count = int(get(rr, 'count', 0))
78-
rrpe = rr.find('policy_evaluated')
79+
rrpe = rr.find('{*}policy_evaluated')
7980
if rrpe is None:
8081
raise Exception('File "{0}" has no evaluated policy in record {1}'.format(filename, i + 1))
8182
rrpe_disposition = get(rrpe, 'disposition', expected=True)
8283
rrpe_dkim = get(rrpe, 'dkim', expected=True)
8384
rrpe_spf = get(rrpe, 'spf', expected=True)
84-
ri = r.find('identifiers')
85+
ri = r.find('{*}identifiers')
8586
if ri is None:
8687
raise Exception('File "{0}" has no identifier in record {1}'.format(filename, i + 1))
8788
ri_header_from = get(ri, 'header_from', expected=True)
88-
ra = r.find('auth_results')
89+
ra = r.find('{*}auth_results')
8990
if ra is None:
9091
raise Exception('File "{0}" has no authentication results in record {1}'.format(filename, i + 1))
9192
auth_results = {}
92-
rad = ra.find('dkim')
93+
rad = ra.find('{*}dkim')
9394
if rad is not None:
9495
auth_results['dkim'] = (get(rad, 'domain', None), get(rad, 'result', None))
95-
ras = ra.find('spf')
96+
ras = ra.find('{*}spf')
9697
if ras is not None:
9798
auth_results['spf'] = (get(ras, 'domain', None), get(ras, 'result', None))
9899
data.append((rr_source_ip, rr_count, {'disposition': rrpe_disposition, 'dkim': rrpe_dkim, 'spf': rrpe_spf}, ri_header_from, auth_results))
@@ -172,8 +173,8 @@ def format_result(result):
172173
(policy_evaluated['dkim'], 'green' if (policy_evaluated['dkim'] == 'pass') == is_own else 'red'),
173174
(policy_evaluated['spf'], 'green' if (policy_evaluated['spf'] == 'pass') == is_own else 'red'),
174175
header_from,
175-
format_result(auth_results.get('dkim', None)),
176-
format_result(auth_results.get('spf', None))])
176+
format_result(auth_results.get('{*}dkim', None)),
177+
format_result(auth_results.get('{*}spf', None))])
177178
field = None
178179
if field is not None:
179180
table.append([None, field])

0 commit comments

Comments
 (0)