File tree 2 files changed +5
-1
lines changed
unstructured/partition/common
2 files changed +5
-1
lines changed Original file line number Diff line number Diff line change 22
22
psutil
23
23
python-oxmsg
24
24
html5lib
25
+ chardet
Original file line number Diff line number Diff line change 1
1
from __future__ import annotations
2
2
3
+ import chardet
3
4
import numbers
4
5
import subprocess
5
6
from io import BufferedReader , BytesIO , TextIOWrapper
@@ -296,7 +297,9 @@ def convert_office_doc(
296
297
wait_time = 0
297
298
sleep_time = 0.1
298
299
output = subprocess .run (command , capture_output = True )
299
- message = output .stdout .decode ().strip ()
300
+ detected_encoding = chardet .detect (output .stdout )
301
+ encoding = detected_encoding ['encoding' ] or 'utf-8' # Default to utf-8 if detection fails
302
+ message = output .stdout .decode (encoding ).strip ()
300
303
# we can't rely on returncode unfortunately because on macOS it would return 0 even when the
301
304
# command failed to run; instead we have to rely on the stdout being empty as a sign of the
302
305
# process failed
You can’t perform that action at this time.
0 commit comments