From ed954b4c9f8800cd0bb34c4fed5968fd5540f4ab Mon Sep 17 00:00:00 2001 From: Rob Reynolds Date: Thu, 30 May 2019 10:08:33 -0600 Subject: [PATCH 1/3] Fix #856 I also alphabetized the import statements by moving `bisect` to the top. --- pycodestyle.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pycodestyle.py b/pycodestyle.py index ec6b894d..d6d0b733 100755 --- a/pycodestyle.py +++ b/pycodestyle.py @@ -49,6 +49,7 @@ """ from __future__ import with_statement +import bisect import inspect import keyword import os @@ -56,8 +57,8 @@ import sys import time import tokenize +import unicodedata import warnings -import bisect try: from functools import lru_cache @@ -283,7 +284,8 @@ def maximum_line_length(physical_line, max_line_length, multiline, Reports error E501. """ line = physical_line.rstrip() - length = len(line) + # compute length ignoring combining diacritics + length = sum(not unicodedata.combining(c) for c in line) if length > max_line_length and not noqa: # Special case: ignore long shebang lines. if line_number == 1 and line.startswith('#!'): @@ -299,7 +301,8 @@ def maximum_line_length(physical_line, max_line_length, multiline, if hasattr(line, 'decode'): # Python 2 # The line could contain multi-byte characters try: - length = len(line.decode('utf-8')) + length = sum(not unicodedata.combining(c) + for c in line.decode('utf-8')) except UnicodeError: pass if length > max_line_length: From 11c88ac5d1dd87370782ff39dd5ca7d44b4816b7 Mon Sep 17 00:00:00 2001 From: reynoldsnlp Date: Thu, 30 May 2019 11:11:30 -0600 Subject: [PATCH 2/3] refactor E501 and add test --- pycodestyle.py | 20 +++++++++++--------- testsuite/E50.py | 4 ++++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/pycodestyle.py b/pycodestyle.py index d6d0b733..7eaa3aca 100755 --- a/pycodestyle.py +++ b/pycodestyle.py @@ -284,8 +284,7 @@ def maximum_line_length(physical_line, max_line_length, multiline, Reports error E501. """ line = physical_line.rstrip() - # compute length ignoring combining diacritics - length = sum(not unicodedata.combining(c) for c in line) + length = len(line) if length > max_line_length and not noqa: # Special case: ignore long shebang lines. if line_number == 1 and line.startswith('#!'): @@ -298,13 +297,16 @@ def maximum_line_length(physical_line, max_line_length, multiline, (len(chunks) == 2 and chunks[0] == '#')) and \ len(line) - len(chunks[-1]) < max_line_length - 7: return - if hasattr(line, 'decode'): # Python 2 - # The line could contain multi-byte characters - try: - length = sum(not unicodedata.combining(c) - for c in line.decode('utf-8')) - except UnicodeError: - pass + # Special case: multi-byte chars and combining diacritics + try: + length = sum(not unicodedata.combining(c) for c in line) + except TypeError: # Python 2 str + if hasattr(line, 'decode'): # Python 2 str + try: + length = sum(not unicodedata.combining(c) + for c in line.decode('utf-8')) + except UnicodeError: + pass if length > max_line_length: return (max_line_length, "E501 line too long " "(%d > %d characters)" % (length, max_line_length)) diff --git a/testsuite/E50.py b/testsuite/E50.py index bcf3bdce..4b406006 100644 --- a/testsuite/E50.py +++ b/testsuite/E50.py @@ -59,6 +59,10 @@ (''' ''' + ' \ ') + +# +#: Okay +this = 'é́́́́xá́́́́mplé́́́́ há́́́́s ló́́́́ts ó́́́́f có́́́́mbí́́́́ní́́́́ng dí́́́́á́́́́crí́́́́tí́́́́cs, á́́́́nd thá́́́́t í́́́́s ó́́́́ká́́́́y.' #: E501 E225 E226 very_long_identifiers=and_terrible_whitespace_habits(are_no_excuse+for_long_lines) # From 281424c4ce5a375fea4bcb91e8425c7502534c08 Mon Sep 17 00:00:00 2001 From: reynoldsnlp Date: Thu, 30 May 2019 12:30:35 -0600 Subject: [PATCH 3/3] E501 explicit version check --- pycodestyle.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pycodestyle.py b/pycodestyle.py index 7eaa3aca..213eacf1 100755 --- a/pycodestyle.py +++ b/pycodestyle.py @@ -298,15 +298,14 @@ def maximum_line_length(physical_line, max_line_length, multiline, len(line) - len(chunks[-1]) < max_line_length - 7: return # Special case: multi-byte chars and combining diacritics - try: - length = sum(not unicodedata.combining(c) for c in line) - except TypeError: # Python 2 str - if hasattr(line, 'decode'): # Python 2 str - try: - length = sum(not unicodedata.combining(c) - for c in line.decode('utf-8')) - except UnicodeError: - pass + if sys.version_info >= (3,): + line_text = line + else: + try: + line_text = line.decode('UTF-8') + except UnicodeDecodeError: + line_text = u'' + length = sum(not unicodedata.combining(c) for c in line_text) if length > max_line_length: return (max_line_length, "E501 line too long " "(%d > %d characters)" % (length, max_line_length))