Skip to content

Commit 5a70364

Browse files
jpenneyFichteFoll
authored andcommitted
use codecs to check preferred encoding
This allows all python supported alias for `utf_8_sig` to pass the encoding check without issuing a warning.
1 parent 04f8320 commit 5a70364

File tree

2 files changed

+20
-5
lines changed

2 files changed

+20
-5
lines changed

ass/document.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import codecs
2+
13
from .section import ScriptInfoSection, FieldSection, StylesSection, EventsSection, LineSection
24
from ._util import CaseInsensitiveOrderedDict
35

@@ -39,6 +41,7 @@ class Document(object):
3941
STYLE_ASS_HEADER = "V4+ Styles"
4042
EVENTS_HEADER = "Events"
4143
AEGISUB_PROJECT_HEADER = "Aegisub Project Garbage"
44+
PREFERRED_ENCODING = codecs.lookup('utf_8_sig')
4245

4346
SECTIONS = CaseInsensitiveOrderedDict({
4447
SCRIPT_INFO_HEADER: ScriptInfoSection,
@@ -84,7 +87,7 @@ def parse_file(cls, f):
8487
bom_seqeunces = ("\xef\xbb\xbf", "\xff\xfe", "\ufeff")
8588
if any(line.startswith(seq) for seq in bom_seqeunces):
8689
raise ValueError("BOM detected. Please open the file with the proper encoding,"
87-
" usually 'utf_8_sig'")
90+
" usually '%s'" % cls.PREFERRED_ENCODING.name)
8891

8992
line = line.strip()
9093
if not line or line.startswith(';'):
@@ -127,14 +130,22 @@ def parse_string(cls, string):
127130
"""
128131
return cls.parse_file(string.splitlines())
129132

133+
@classmethod
134+
def is_preferred_encoding(cls, encoding):
135+
try:
136+
enc_codec = codecs.lookup(encoding)
137+
except (LookupError, TypeError):
138+
return False
139+
return enc_codec == cls.PREFERRED_ENCODING
140+
130141
def dump_file(self, f):
131142
""" Dump this ASS document to a file object.
132143
"""
133144
encoding = getattr(f, 'encoding')
134-
if encoding and encoding != 'utf_8_sig':
145+
if encoding and not self.is_preferred_encoding(encoding):
135146
import warnings
136147
warnings.warn("It is recommended to write UTF-8 with BOM"
137-
" using the 'utf_8_sig' encoding")
148+
" using the '%s' encoding" % self.PREFERRED_ENCODING.name)
138149

139150
for section in self.sections.values():
140151
f.write("\n".join(section.dump()))

tests/test_ass.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,12 @@ def test_parse_encoding(self):
3535
ass.parse(f)
3636

3737
def test_dump_encoding(self):
38-
with self.test_ass.open("r", encoding='utf_8_sig') as f:
39-
doc = ass.parse(f)
38+
for encoding in ('utf_8_sig', 'utf-8-sig'):
39+
with self.test_ass.open("r", encoding=encoding) as f:
40+
doc = ass.parse(f)
41+
42+
with self.test_ass.open("r", encoding=encoding.upper()) as f:
43+
doc = ass.parse(f)
4044

4145
import tempfile
4246
with tempfile.TemporaryFile(mode='w', encoding='utf_8') as f:

0 commit comments

Comments
 (0)