Skip to content

Commit 9804cc2

Browse files
authored
Merge pull request #68 from NetherlandsForensicInstitute/bug/never_ending_loop
Bug/never ending loop
2 parents 0da2b72 + ed3e0da commit 9804cc2

File tree

4 files changed

+48
-13
lines changed

4 files changed

+48
-13
lines changed

bin/demeuk.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@
142142
check-replacement-character, check-empty-line
143143
"""
144144
from binascii import hexlify, unhexlify
145+
from collections import deque
145146
from glob import glob
146147
from html import unescape
147148
from inspect import cleandoc
@@ -171,7 +172,7 @@
171172
from unidecode import unidecode
172173

173174

174-
version = '4.5.0'
175+
version = '4.5.1'
175176

176177
# Search from start to finish for the string $HEX[], with block of a-f0-9 with even number
177178
# of hex chars. The first match group is repeated.
@@ -1016,8 +1017,16 @@ def clean_up(lines):
10161017
"""
10171018
results = []
10181019
log = []
1020+
processed_lines = set()
1021+
work_queue = deque(lines)
1022+
1023+
while work_queue:
1024+
line = work_queue.popleft()
1025+
1026+
if line in processed_lines:
1027+
continue
1028+
processed_lines.add(line)
10191029

1020-
for line in lines:
10211030
# Check if the limit is set, if so minus 1 and if 0 is reached lets quit.
10221031
if type(config['limit']) is int:
10231032
if config['limit'] > 0:
@@ -1057,7 +1066,7 @@ def clean_up(lines):
10571066
if status:
10581067
# Lines contains hex, this function will return binary string, so add it back to
10591068
# our undecoded lines
1060-
lines.append(line_decoded)
1069+
work_queue.append(line_decoded)
10611070
if config['debug']:
10621071
log.append(f'Clean_hex; replaced $HEX[], added to queue and quiting; {line}{linesep}')
10631072
# Aborting future processing of this line.
@@ -1069,7 +1078,7 @@ def clean_up(lines):
10691078
if status:
10701079
# Line contains html string, because this can be binary data (linefeeds etc)
10711080
# convert back to binary string and add to queue again.
1072-
lines.append(line_decoded.encode())
1081+
work_queue.append(line_decoded.encode())
10731082
if config['debug']:
10741083
log.append(f'Clean_html; replaced html, added to queue and quiting; {line_decoded}{linesep}')
10751084
stop = True
@@ -1283,49 +1292,49 @@ def clean_up(lines):
12831292
for modified_line in modified_lines:
12841293
if config['debug']:
12851294
log.append(f'Add_split; new line because of split; {modified_line}{linesep}')
1286-
lines.append(modified_line.encode())
1295+
work_queue.append(modified_line.encode())
12871296

12881297
if config.get('add-lower'):
12891298
modified_line = add_lower(line_decoded)
12901299
if modified_line:
12911300
if config['debug']:
12921301
log.append(f'Add_lower; new line; {modified_line}{linesep}')
1293-
lines.append(modified_line.encode())
1302+
work_queue.append(modified_line.encode())
12941303

12951304
if config.get('add-first-upper'):
12961305
modified_line = add_first_upper(line_decoded)
12971306
if modified_line:
12981307
if config['debug']:
12991308
log.append(f'Add_first_upper; new line; {modified_line}{linesep}')
1300-
lines.append(modified_line.encode())
1309+
work_queue.append(modified_line.encode())
13011310

13021311
if config.get('add-title-case'):
13031312
modified_line = add_title_case(line_decoded)
13041313
if modified_line:
13051314
if config['debug']:
13061315
log.append(f'Add_title_case; new line; {modified_line}{linesep}')
1307-
lines.append(modified_line.encode())
1316+
work_queue.append(modified_line.encode())
13081317

13091318
if config.get('add-latin-ligatures'):
13101319
modified_line = add_latin_ligatures(line_decoded)
13111320
if modified_line:
13121321
if config['debug']:
13131322
log.append(f'Add_latin_ligatures; new line; {modified_line}{linesep}')
1314-
lines.append(modified_line.encode())
1323+
work_queue.append(modified_line.encode())
13151324

13161325
if config.get('add-umlaut'):
13171326
status, modified_line = clean_add_umlaut(line_decoded)
13181327
if status:
13191328
if config['debug']:
13201329
log.append(f'Add_umlaut; new line; {modified_line}{linesep}')
1321-
lines.append(modified_line.encode())
1330+
work_queue.append(modified_line.encode())
13221331

13231332
if config.get('add-without-punctuation'):
13241333
modified_line = add_without_punctuation(line_decoded, config.get('punctuation'))
13251334
if modified_line:
13261335
if config['debug']:
13271336
log.append(f'Add_without_punctuation; new line; {modified_line}{linesep}')
1328-
lines.append(modified_line.encode())
1337+
work_queue.append(modified_line.encode())
13291338

13301339
if config['debug']:
13311340
log.append(f'----End---- {line_decoded}{linesep}{linesep}')

tests/conftest.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,3 +393,7 @@
393393
file.write(f'_amsterdam {linesep}')
394394
file.write(f'ROTTERDAM_ {linesep}')
395395
file.write(f'Cookie Monster {linesep}')
396+
397+
with open('testdata/input54', 'w') as file:
398+
file.write(f'Golf Trip{linesep}')
399+
file.write(f'Sequences{linesep}')

tests/test_app.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from subprocess import PIPE, run
33
from unittest.mock import patch
44

5-
from pytest import raises
5+
from pytest import raises, mark
66

77
from bin.demeuk import main
88

@@ -188,7 +188,7 @@ def test_language_processing():
188188
with patch.object(sys, 'argv', testargs):
189189
main()
190190
line_num_output = calculate_line_numbers('testdata/output11')
191-
assert line_num_output == 29
191+
assert line_num_output == 21
192192
with open('testdata/output11') as f:
193193
filecontent = f.read()
194194
assert 'cijfer\n' in filecontent
@@ -982,3 +982,24 @@ def test_check_contains():
982982
assert '_amsterdam' not in filecontent
983983
assert 'ROTTERDAM_' not in filecontent
984984
assert 'Cookie Monster' in filecontent
985+
986+
987+
@mark.timeout(1)
988+
def test_infinite_loop():
989+
testargs = [
990+
'demeuk', '-i', 'testdata/input54', '-o', 'testdata/output54', '-l', 'testdata/log54',
991+
'--add-lower', '--add-title-case',
992+
]
993+
994+
with patch.object(sys, 'argv', testargs):
995+
main()
996+
997+
with open('testdata/output54') as f:
998+
filecontent = f.read()
999+
1000+
line_num_output = calculate_line_numbers('testdata/output54')
1001+
assert line_num_output == 4
1002+
assert 'Golf Trip' in filecontent
1003+
assert 'Sequences' in filecontent
1004+
assert 'golf trip' in filecontent
1005+
assert 'sequences' in filecontent

tox.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ deps =
1313
-rrequirements.txt
1414
pytest
1515
flake8
16+
pytest-timeout
1617
commands =
1718
pytest
1819
flake8

0 commit comments

Comments
 (0)