diff --git a/pycodestyle.py b/pycodestyle.py index ec6b894d..213eacf1 100755 --- a/pycodestyle.py +++ b/pycodestyle.py @@ -49,6 +49,7 @@ """ from __future__ import with_statement +import bisect import inspect import keyword import os @@ -56,8 +57,8 @@ import sys import time import tokenize +import unicodedata import warnings -import bisect try: from functools import lru_cache @@ -296,12 +297,15 @@ def maximum_line_length(physical_line, max_line_length, multiline, (len(chunks) == 2 and chunks[0] == '#')) and \ len(line) - len(chunks[-1]) < max_line_length - 7: return - if hasattr(line, 'decode'): # Python 2 - # The line could contain multi-byte characters + # Special case: multi-byte chars and combining diacritics + if sys.version_info >= (3,): + line_text = line + else: try: - length = len(line.decode('utf-8')) - except UnicodeError: - pass + line_text = line.decode('UTF-8') + except UnicodeDecodeError: + line_text = u'' + length = sum(not unicodedata.combining(c) for c in line_text) if length > max_line_length: return (max_line_length, "E501 line too long " "(%d > %d characters)" % (length, max_line_length)) diff --git a/testsuite/E50.py b/testsuite/E50.py index bcf3bdce..4b406006 100644 --- a/testsuite/E50.py +++ b/testsuite/E50.py @@ -59,6 +59,10 @@ (''' ''' + ' \ ') + +# +#: Okay +this = 'é́́́́xá́́́́mplé́́́́ há́́́́s ló́́́́ts ó́́́́f có́́́́mbí́́́́ní́́́́ng dí́́́́á́́́́crí́́́́tí́́́́cs, á́́́́nd thá́́́́t í́́́́s ó́́́́ká́́́́y.' #: E501 E225 E226 very_long_identifiers=and_terrible_whitespace_habits(are_no_excuse+for_long_lines) #