google · lizawang · Aug 18, 2022 · Aug 30, 2022 · Aug 30, 2022 · Aug 30, 2022
diff --git a/CHANGELOG b/CHANGELOG
@@ -2,6 +2,10 @@
 # All notable changes to this project will be documented in this file.
 # This project adheres to [Semantic Versioning](http://semver.org/).
 
+## [0.41.1] 2022-08-30
+### Added
+- Add 4 new knobs to align assignment operators and dictionary colons. They are align_assignment, align_argument_assignment, align_dict_colon and new_alignment_after_commentline.
+
 ## [0.40.0] UNRELEASED
 ### Added
 - Add a new Python parser to generate logical lines.

diff --git a/CONTRIBUTORS b/CONTRIBUTORS
@@ -15,3 +15,4 @@ Sam Clegg <[email protected]>
 Łukasz Langa <[email protected]>
 Oleg Butuzov <[email protected]>
 Mauricio Herrera Cuadra <[email protected]>
+Xiao Wang <[email protected]>
diff --git a/README.rst b/README.rst
@@ -390,6 +390,61 @@ Options::
 Knobs
 =====
 
+``ALIGN_ASSIGNMENT``
+    Align assignment or augmented assignment operators.
+    If there is a blank line or a newline comment or a multiline object
+    (e.g. a dictionary, a list, a function call) in between,
+    it will start new block alignment. Lines in the same block have the same
+    indentation level.
+
+    .. code-block:: python
+
+        a   = 1
+        abc = 2
+        if condition == None:
+            var       += ''
+            var_long  -= 4
+        b  = 3
+        bc = 4
+
+``ALIGN_ARGUMENT_ASSIGNMENT``
+    Align assignment operators in the argument list if they are all split on newlines.
+    Arguments without assignment in between will initiate new block alignment calulation;
+    for example, a comment line.
+    Multiline objects in between will also initiate a new alignment block.
+
+    .. code-block:: python
+
+        rglist = test(
+            var_first  = 0,
+            var_second = '',
+            var_dict   = {
+                "key_1" : '',
+                "key_2" : 2,
+                "key_3" : True,
+            },
+            var_third     = 1,
+            var_very_long = None )
+
+``ALIGN_DICT_COLON``
+    Align the colons in the dictionary if all entries in dictionay are split on newlines
+    or 'EACH_DICT_ENTRY_ON_SEPERATE_LINE' is set True.
+    A commentline or multi-line object in between will start new alignment block.
+
+    .. code-block:: python
+
+        fields =
+            {
+                "field" : "ediid",
+                "type"  : "text",
+                # key: value
+                "required" : True,
+            }
+
+``NEW_ALIGNMENT_AFTER_COMMENTLINE``
+    Make it optional to start a new alignmetn block for assignment
+    alignment and colon alignment after a comment line.
+
 ``ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT``
     Align closing bracket with visual indentation.
 

diff --git a/yapf/pytree/split_penalty.py b/yapf/pytree/split_penalty.py
@@ -89,7 +89,7 @@ def Visit_classdef(self, node):  # pylint: disable=invalid-name
     if len(node.children) > 4:
       # opening '('
       _SetUnbreakable(node.children[2])
-    # ':'
+      # ':'
     _SetUnbreakable(node.children[-2])
     self.DefaultNodeVisit(node)
 

diff --git a/yapf/pytree/subtype_assigner.py b/yapf/pytree/subtype_assigner.py
@@ -240,6 +240,7 @@ def Visit_argument(self, node):  # pylint: disable=invalid-name
     # argument ::=
     #     test [comp_for] | test '=' test
     self._ProcessArgLists(node)
+    #TODO add a subtype to each argument?
 
   def Visit_arglist(self, node):  # pylint: disable=invalid-name
     # arglist ::=

diff --git a/yapf/yapflib/format_decision_state.py b/yapf/yapflib/format_decision_state.py
@@ -978,6 +978,7 @@ def _GetNewlineColumn(self):
         not self.param_list_stack[-1].SplitBeforeClosingBracket(
             top_of_stack.indent) and top_of_stack.indent
         == ((self.line.depth + 1) * style.Get('INDENT_WIDTH'))):
+      # NOTE: comment inside argument list is not excluded in subtype assigner
       if (subtypes.PARAMETER_START in current.subtypes or
           (previous.is_comment and
            subtypes.PARAMETER_START in previous.subtypes)):

diff --git a/yapf/yapflib/format_token.py b/yapf/yapflib/format_token.py
@@ -322,3 +322,24 @@ def is_pytype_comment(self):
   def is_copybara_comment(self):
     return self.is_comment and re.match(
         r'#.*\bcopybara:\s*(strip|insert|replace)', self.value)
+
+  @property
+  def is_dict_colon(self):
+    # if the token is dictionary colon and
+    # the dictionary has no comp_for
+    return self.value == ':' and self.previous_token.is_dict_key
+
+  @property
+  def is_dict_key(self):
+    # if the token is dictionary key which is not preceded by doubel stars and
+    # the dictionary has no comp_for
+    return subtypes.DICTIONARY_KEY_PART in self.subtypes
+
+  @property
+  def is_dict_key_start(self):
+    # if the token is dictionary key start
+    return subtypes.DICTIONARY_KEY in self.subtypes
+
+  @property
+  def is_dict_value(self):
+    return subtypes.DICTIONARY_VALUE in self.subtypes
diff --git a/yapf/yapflib/reformatter.py b/yapf/yapflib/reformatter.py
@@ -22,6 +22,7 @@
 from __future__ import unicode_literals
 
 import collections
+from distutils.errors import LinkError
 import heapq
 import re
 
@@ -102,6 +103,10 @@ def Reformat(llines, verify=False, lines=None):
     final_lines.append(lline)
     prev_line = lline
 
+  if (style.Get('EACH_DICT_ENTRY_ON_SEPARATE_LINE') and
+      style.Get('ALIGN_DICT_COLON')):
+    _AlignDictColon(final_lines)
+
   _AlignTrailingComments(final_lines)
   return _FormatFinalLines(final_lines, verify)
 
@@ -394,6 +399,186 @@ def _AlignTrailingComments(final_lines):
       final_lines_index += 1
 
 
+def _AlignDictColon(final_lines):
+  """Align colons in a dict to the same column"""
+  """NOTE One (nested) dict/list is one logical line!"""
+  final_lines_index = 0
+  while final_lines_index < len(final_lines):
+    line = final_lines[final_lines_index]
+    if line.disable:
+      final_lines_index += 1
+      continue
+
+    assert line.tokens
+    process_content = False
+
+    for tok in line.tokens:
+      # make sure each dict entry on separate lines and
+      # the dict has more than one entry
+      if (tok.is_dict_key and
+          tok.formatted_whitespace_prefix.startswith('\n') and
+          not tok.is_comment):
+
+        this_line = line
+
+        line_tokens = this_line.tokens
+        for open_index in range(len(line_tokens)):
+          line_tok = line_tokens[open_index]
+
+          # check each time if the detected dict is the dict we aim for
+          if line_tok.value == '{' and line_tok.next_token.formatted_whitespace_prefix.startswith(
+              '\n'):
+            index = open_index
+            # skip the comments in the beginning
+            index += 1
+            line_tok = line_tokens[index]
+            while not line_tok.is_dict_key and index < len(line_tokens) - 1:
+              index += 1
+              line_tok = line_tokens[index]
+            # in case empty dict, check if dict key again
+            if line_tok.is_dict_key and line_tok.formatted_whitespace_prefix.startswith(
+                '\n'):
+              closing = False  # the closing bracket in dict '}'.
+              keys_content = ''
+              all_dict_keys_lengths = []
+              dict_keys_lengths = []
+
+              # record the column number of the first key
+              first_key_column = len(
+                  line_tok.formatted_whitespace_prefix.lstrip('\n'))
+              key_column = first_key_column
+
+              # while not closing:
+              while not closing:
+                prefix = line_tok.formatted_whitespace_prefix
+                newline = prefix.startswith('\n')
+                if newline:
+                  # if comments inbetween, save, reset and continue to caluclate new alignment
+                  if (style.Get('NEW_ALIGNMENT_AFTER_COMMENTLINE') and
+                      dict_keys_lengths and line_tok.is_comment):
+                    all_dict_keys_lengths.append(dict_keys_lengths)
+                    dict_keys_lengths = []
+                    index += 1
+                    line_tok = line_tokens[index]
+                    continue
+                  if line_tok.is_dict_key_start:
+                    keys_content = ''
+                    prefix = prefix.lstrip('\n')
+                    key_column = len(prefix)
+                  # if the dict key is so long that it has multi-lines
+                  # only caculate the last line that has the colon
+                  elif line_tok.is_dict_key:
+                    keys_content = ''
+                    prefix = prefix.lstrip('\n')
+                elif line_tok.is_dict_key_start:
+                  key_column = line_tok.column
+
+                if line_tok.is_dict_colon and key_column == first_key_column:
+                  dict_keys_lengths.append(len(keys_content))
+                elif line_tok.is_dict_key and key_column == first_key_column:
+                  keys_content += '{}{}'.format(prefix, line_tok.value)
+
+                index += 1
+                if index < len(line_tokens):
+                  line_tok = line_tokens[index]
+                # when the matching closing bracket is never found
+                # due to edge cases where the closing bracket
+                # is not indented or dedented, e.g. ']}', with another bracket before
+                else:
+                  all_dict_keys_lengths.append(dict_keys_lengths)
+                  break
+
+                # if there is new objects(list/tuple/dict) with its entries on newlines,
+                # or a function call with any of its arguments on newlines,
+                # save, reset and continue to calulate new alignment
+                if (line_tok.value in ['(', '[', '{'] and
+                    not line_tok.is_pseudo and line_tok.next_token and
+                    line_tok.next_token.formatted_whitespace_prefix.startswith(
+                        '\n')):
+                  if dict_keys_lengths:
+                    all_dict_keys_lengths.append(dict_keys_lengths)
+                  dict_keys_lengths = []
+                  index += 1
+                  line_tok = line_tokens[index]
+                  continue
+                # the matching closing bracket is either same indented or dedented
+                # accordingly to previous level's indentation
+                # the first found, immediately break the while loop
+                if line_tok.value == '}':
+                  if line_tok.formatted_whitespace_prefix.startswith('\n'):
+                    close_column = len(
+                        line_tok.formatted_whitespace_prefix.lstrip('\n'))
+                  else:
+                    close_column = line_tok.column
+                  if close_column < first_key_column:
+                    if dict_keys_lengths:
+                      all_dict_keys_lengths.append(dict_keys_lengths)
+                    closing = True
+
+              # update the alignment once one dict is processed
+              if all_dict_keys_lengths:
+                max_keys_length = 0
+                all_dict_keys_lengths_index = 0
+                dict_keys_lengths = all_dict_keys_lengths[
+                    all_dict_keys_lengths_index]
+                max_keys_length = max(dict_keys_lengths or [0]) + 2
+                keys_lengths_index = 0
+                for token in line_tokens[open_index + 1:index]:
+                  if token.is_dict_colon:
+                    # check if the key has multiple tokens and
+                    # get the first key token in this key
+                    key_token = token.previous_token
+                    while key_token.is_dict_key and not key_token.is_dict_key_start:
+                      key_token = key_token.previous_token
+                    key_column = len(
+                        key_token.formatted_whitespace_prefix.lstrip('\n'))
+
+                    if key_column == first_key_column:
+
+                      if keys_lengths_index == len(dict_keys_lengths):
+                        all_dict_keys_lengths_index += 1
+                        dict_keys_lengths = all_dict_keys_lengths[
+                            all_dict_keys_lengths_index]
+                        max_keys_length = max(dict_keys_lengths or [0]) + 2
+                        keys_lengths_index = 0
+
+                      if keys_lengths_index < len(dict_keys_lengths):
+                        assert dict_keys_lengths[
+                            keys_lengths_index] < max_keys_length
+
+                        padded_spaces = ' ' * (
+                            max_keys_length -
+                            dict_keys_lengths[keys_lengths_index] - 1)
+                        keys_lengths_index += 1
+                        #NOTE if the existing whitespaces are larger than padded spaces
+                        existing_whitespace_prefix = \
+                              token.formatted_whitespace_prefix.lstrip('\n')
+                        colon_content = '{}{}'.format(padded_spaces,
+                                                      token.value.strip())
+
+                        # in case the existing spaces are larger than the paddes spaces
+                        if (len(padded_spaces) == 1 or
+                            len(padded_spaces) > 1 and
+                            len(existing_whitespace_prefix)
+                            >= len(padded_spaces)):
+                          # remove the existing spaces
+                          token.whitespace_prefix = ''
+                        elif colon_content.startswith(
+                            existing_whitespace_prefix):
+                          colon_content = colon_content[
+                              len(existing_whitespace_prefix):]
+
+                        token.value = colon_content
+
+        final_lines_index += 1
+
+        process_content = True
+        break
+
+    if not process_content:
+      final_lines_index += 1
+
+
 def _FormatFinalLines(final_lines, verify):
   """Compose the final output from the finalized lines."""
   formatted_code = []

diff --git a/yapf/yapflib/style.py b/yapf/yapflib/style.py
@@ -54,6 +54,14 @@ def SetGlobalStyle(style):
 _STYLE_HELP = dict(
     ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT=textwrap.dedent("""\
       Align closing bracket with visual indentation."""),
+    ALIGN_DICT_COLON=textwrap.dedent("""\
+      Align the colons in the dictionary
+      if all entries in dictionay are split on newlines.
+      or 'EACH_DICT_ENTRY_ON_SEPERATE_LINE' is set True.
+      """),
+    NEW_ALIGNMENT_AFTER_COMMENTLINE=textwrap.dedent("""\
+      Start new assignment or colon alignment when there is a newline comment in between."""
+                                                   ),
     ALLOW_MULTILINE_LAMBDAS=textwrap.dedent("""\
       Allow lambdas to be formatted on more than one line."""),
     ALLOW_MULTILINE_DICTIONARY_KEYS=textwrap.dedent("""\
@@ -419,6 +427,8 @@ def CreatePEP8Style():
   """Create the PEP8 formatting style."""
   return dict(
       ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT=True,
+      ALIGN_DICT_COLON=False,
+      NEW_ALIGNMENT_AFTER_COMMENTLINE=False,
       ALLOW_MULTILINE_LAMBDAS=False,
       ALLOW_MULTILINE_DICTIONARY_KEYS=False,
       ALLOW_SPLIT_BEFORE_DEFAULT_OR_NAMED_ASSIGNS=True,
@@ -607,6 +617,8 @@ def _IntOrIntListConverter(s):
 # Note: this dict has to map all the supported style options.
 _STYLE_OPTION_VALUE_CONVERTER = dict(
     ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT=_BoolConverter,
+    ALIGN_DICT_COLON=_BoolConverter,
+    NEW_ALIGNMENT_AFTER_COMMENTLINE=_BoolConverter,
     ALLOW_MULTILINE_LAMBDAS=_BoolConverter,
     ALLOW_MULTILINE_DICTIONARY_KEYS=_BoolConverter,
     ALLOW_SPLIT_BEFORE_DEFAULT_OR_NAMED_ASSIGNS=_BoolConverter,