Skip to content

Commit 74e95e7

Browse files
committed
fix: UTF16 bug
1 parent 878f332 commit 74e95e7

1 file changed

Lines changed: 10 additions & 5 deletions

File tree

yarGen.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -301,11 +301,15 @@ def extract_strings(fileData):
301301
strings_limited = re.findall(b"[\x1f-\x7e]{6,%d}" % args.s, fileData)
302302
strings_hex = extract_hex_strings(fileData)
303303
strings = list(set(strings_full) | set(strings_limited) | set(strings_hex))
304-
strings += [str("UTF16LE:%s" % ws.decode('utf-16-le')) for ws in re.findall(b"(?:[\x1f-\x7e][\x00]){6,}", fileData)]
304+
wide_strings = [ws for ws in re.findall(b"(?:[\x1f-\x7e][\x00]){6,}", fileData)]
305305

306-
# Escape strings
306+
# Post-process
307+
# WIDE
308+
for ws in wide_strings:
309+
# Decode UTF16 and prepend a marker (facilitates handling)
310+
strings.append(("UTF16LE:%s" % ws.decode('utf-16')).encode('utf-8'))
307311
for string in strings:
308-
# Check if last bytes have been string and not yet saved to list
312+
# Escape strings
309313
if len(string) > 0:
310314
string = string.replace(b'\\', b'\\\\')
311315
string = string.replace(b'"', b'\\"')
@@ -315,11 +319,12 @@ def extract_strings(fileData):
315319
else:
316320
cleaned_strings.append(string.decode('utf-8'))
317321
except AttributeError as e:
318-
traceback.print_exc()
319322
print(string)
323+
traceback.print_exc()
320324

321325
except Exception as e:
322326
if args.debug:
327+
print(string)
323328
traceback.print_exc()
324329
pass
325330

@@ -1690,7 +1695,7 @@ def write_strings(filePath, strings, output_dir, scores):
16901695
score = stringScores["UTF16LE:%s" % string]
16911696
else:
16921697
score = stringScores[string]
1693-
output_string.append("%d;%s" % score, string)
1698+
output_string.append("%d;%s" % (score, string))
16941699
else:
16951700
output_string.append(string)
16961701
# Empty line between sections

0 commit comments

Comments
 (0)