From ed954b4c9f8800cd0bb34c4fed5968fd5540f4ab Mon Sep 17 00:00:00 2001
From: Rob Reynolds <reynoldsnlp@users.noreply.github.com>
Date: Thu, 30 May 2019 10:08:33 -0600
Subject: [PATCH 1/3] Fix #856

I also alphabetized the import statements by moving `bisect` to the top.
---
 pycodestyle.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pycodestyle.py b/pycodestyle.py
index ec6b894d..d6d0b733 100755
--- a/pycodestyle.py
+++ b/pycodestyle.py
@@ -49,6 +49,7 @@
 """
 from __future__ import with_statement
 
+import bisect
 import inspect
 import keyword
 import os
@@ -56,8 +57,8 @@
 import sys
 import time
 import tokenize
+import unicodedata
 import warnings
-import bisect
 
 try:
     from functools import lru_cache
@@ -283,7 +284,8 @@ def maximum_line_length(physical_line, max_line_length, multiline,
     Reports error E501.
     """
     line = physical_line.rstrip()
-    length = len(line)
+    # compute length ignoring combining diacritics
+    length = sum(not unicodedata.combining(c) for c in line)
     if length > max_line_length and not noqa:
         # Special case: ignore long shebang lines.
         if line_number == 1 and line.startswith('#!'):
@@ -299,7 +301,8 @@ def maximum_line_length(physical_line, max_line_length, multiline,
         if hasattr(line, 'decode'):   # Python 2
             # The line could contain multi-byte characters
             try:
-                length = len(line.decode('utf-8'))
+                length = sum(not unicodedata.combining(c)
+                             for c in line.decode('utf-8'))
             except UnicodeError:
                 pass
         if length > max_line_length:

From 11c88ac5d1dd87370782ff39dd5ca7d44b4816b7 Mon Sep 17 00:00:00 2001
From: reynoldsnlp <reynoldsnlp@users.noreply.github.com>
Date: Thu, 30 May 2019 11:11:30 -0600
Subject: [PATCH 2/3] refactor E501 and add test

---
 pycodestyle.py   | 20 +++++++++++---------
 testsuite/E50.py |  4 ++++
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/pycodestyle.py b/pycodestyle.py
index d6d0b733..7eaa3aca 100755
--- a/pycodestyle.py
+++ b/pycodestyle.py
@@ -284,8 +284,7 @@ def maximum_line_length(physical_line, max_line_length, multiline,
     Reports error E501.
     """
     line = physical_line.rstrip()
-    # compute length ignoring combining diacritics
-    length = sum(not unicodedata.combining(c) for c in line)
+    length = len(line)
     if length > max_line_length and not noqa:
         # Special case: ignore long shebang lines.
         if line_number == 1 and line.startswith('#!'):
@@ -298,13 +297,16 @@ def maximum_line_length(physical_line, max_line_length, multiline,
             (len(chunks) == 2 and chunks[0] == '#')) and \
                 len(line) - len(chunks[-1]) < max_line_length - 7:
             return
-        if hasattr(line, 'decode'):   # Python 2
-            # The line could contain multi-byte characters
-            try:
-                length = sum(not unicodedata.combining(c)
-                             for c in line.decode('utf-8'))
-            except UnicodeError:
-                pass
+        # Special case: multi-byte chars and combining diacritics
+        try:
+            length = sum(not unicodedata.combining(c) for c in line)
+        except TypeError:  # Python 2 str
+            if hasattr(line, 'decode'):  # Python 2 str
+                try:
+                    length = sum(not unicodedata.combining(c)
+                                 for c in line.decode('utf-8'))
+                except UnicodeError:
+                    pass
         if length > max_line_length:
             return (max_line_length, "E501 line too long "
                     "(%d > %d characters)" % (length, max_line_length))
diff --git a/testsuite/E50.py b/testsuite/E50.py
index bcf3bdce..4b406006 100644
--- a/testsuite/E50.py
+++ b/testsuite/E50.py
@@ -59,6 +59,10 @@
 ('''
     ''' + ' \
 ')
+
+#
+#: Okay
+this = 'é́́́́xá́́́́mplé́́́́ há́́́́s ló́́́́ts ó́́́́f có́́́́mbí́́́́ní́́́́ng dí́́́́á́́́́crí́́́́tí́́́́cs, á́́́́nd thá́́́́t í́́́́s ó́́́́ká́́́́y.'
 #: E501 E225 E226
 very_long_identifiers=and_terrible_whitespace_habits(are_no_excuse+for_long_lines)
 #

From 281424c4ce5a375fea4bcb91e8425c7502534c08 Mon Sep 17 00:00:00 2001
From: reynoldsnlp <reynoldsnlp@users.noreply.github.com>
Date: Thu, 30 May 2019 12:30:35 -0600
Subject: [PATCH 3/3] E501 explicit version check

---
 pycodestyle.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/pycodestyle.py b/pycodestyle.py
index 7eaa3aca..213eacf1 100755
--- a/pycodestyle.py
+++ b/pycodestyle.py
@@ -298,15 +298,14 @@ def maximum_line_length(physical_line, max_line_length, multiline,
                 len(line) - len(chunks[-1]) < max_line_length - 7:
             return
         # Special case: multi-byte chars and combining diacritics
-        try:
-            length = sum(not unicodedata.combining(c) for c in line)
-        except TypeError:  # Python 2 str
-            if hasattr(line, 'decode'):  # Python 2 str
-                try:
-                    length = sum(not unicodedata.combining(c)
-                                 for c in line.decode('utf-8'))
-                except UnicodeError:
-                    pass
+        if sys.version_info >= (3,):
+            line_text = line
+        else:
+            try:
+                line_text = line.decode('UTF-8')
+            except UnicodeDecodeError:
+                line_text = u''
+        length = sum(not unicodedata.combining(c) for c in line_text)
         if length > max_line_length:
             return (max_line_length, "E501 line too long "
                     "(%d > %d characters)" % (length, max_line_length))