Skip to content

Improve non ascii checker #5643

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Jan 10, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CONTRIBUTORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -595,3 +595,6 @@ contributors:
* Eero Vuojolahti: contributor

* Kian-Meng, Ang: contributor

* Carli* Freudenberg (CarliJoy): contributor
- Improve non-ascii-name checker
9 changes: 9 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ Release date: TBA

Closes #5588

* Rewrote checker for ``non-ascii-name``.
It now ensures __all__ Python names are ASCII and also properly
checks the names of imports (``non-ascii-module-import``) as
well as file names (``non-ascii-file-name``) and emits their respective new warnings.

Non ASCII characters could be homoglyphs (look alike characters) and hard to
enter on a non specialized keyboard.
See `Confusable Characters in PEP 672 <https://www.python.org/dev/peps/pep-0672/#confusable-characters-in-identifiers>`_

* When run in parallel mode ``pylint`` now pickles the data passed to subprocesses with
the ``dill`` package. The ``dill`` package has therefore been added as a dependency.

Expand Down
9 changes: 9 additions & 0 deletions doc/whatsnew/2.13.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ New checkers

Closes #5460

* Rewrote Checker of ``non-ascii-name``.
It now ensures __all__ Python names are ASCII and also properly
checks the names of imports (``non-ascii-module-import``) as
well as file names (``non-ascii-file-name``) and emits their respective new warnings.

Non ASCII characters could be homoglyphs (look alike characters) and hard to
enter on a non specialized keyboard.
See `Confusable Characters in PEP 672 <https://www.python.org/dev/peps/pep-0672/#confusable-characters-in-identifiers>`_

Removed checkers
================

Expand Down
9 changes: 8 additions & 1 deletion pylint/checkers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,14 @@
15: stdlib
16: python3
17: refactoring
18-50: not yet used: reserved for future internal checkers.
.
.
.
25: non-ascii-names
25-50: not yet used: reserved for future internal checkers.
This file is not updated. Use
script/get_unused_message_id_category.py
to get the next free checker id.
51-99: perhaps used: reserved for external checkers

The raw_metrics checker has no number associated since it doesn't emit any
Expand Down
56 changes: 13 additions & 43 deletions pylint/checkers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@
import astroid
from astroid import nodes

from pylint import checkers, constants, interfaces
from pylint import constants, interfaces
from pylint import utils as lint_utils
from pylint.checkers import utils
from pylint.checkers import base_checker, utils
from pylint.checkers.utils import (
infer_all,
is_overload_stub,
Expand Down Expand Up @@ -456,12 +456,7 @@ def x(self, value): self._x = value
return False


class _BasicChecker(checkers.BaseChecker):
__implements__ = interfaces.IAstroidChecker
name = "basic"


class BasicErrorChecker(_BasicChecker):
class BasicErrorChecker(base_checker._BasicChecker):
msgs = {
"E0100": (
"__init__ method is a generator",
Expand Down Expand Up @@ -937,7 +932,7 @@ def _check_redefinition(self, redeftype, node):
)


class BasicChecker(_BasicChecker):
class BasicChecker(base_checker._BasicChecker):
"""checks for :
* doc strings
* number of arguments, local variables, branches, returns and statements in
Expand Down Expand Up @@ -1719,7 +1714,7 @@ def _create_naming_options():
return tuple(name_options)


class NameChecker(_BasicChecker):
class NameChecker(base_checker._NameCheckerBase):
msgs = {
"C0103": (
'%s name "%s" doesn\'t conform to %s',
Expand All @@ -1737,11 +1732,6 @@ class NameChecker(_BasicChecker):
]
},
),
"C0144": (
'%s name "%s" contains a non-ASCII unicode character',
"non-ascii-name",
"Used when the name contains at least one non-ASCII unicode character.",
),
"W0111": (
"Name %s will become a keyword in Python %s",
"assign-to-new-keyword",
Expand Down Expand Up @@ -1838,7 +1828,6 @@ def __init__(self, linter):
self._name_hints = {}
self._good_names_rgxs_compiled = []
self._bad_names_rgxs_compiled = []
self._non_ascii_rgx_compiled = re.compile("[^\u0000-\u007F]")

def open(self):
self.linter.stats.reset_bad_names()
Expand Down Expand Up @@ -1878,7 +1867,7 @@ def _create_naming_rules(self):

return regexps, hints

@utils.check_messages("disallowed-name", "invalid-name", "non-ascii-name")
@utils.check_messages("disallowed-name", "invalid-name")
def visit_module(self, node: nodes.Module) -> None:
self._check_name("module", node.name.split(".")[-1], node)
self._bad_names = {}
Expand All @@ -1904,19 +1893,15 @@ def leave_module(self, _: nodes.Module) -> None:
for args in warnings:
self._raise_name_warning(prevalent_group, *args)

@utils.check_messages(
"disallowed-name", "invalid-name", "assign-to-new-keyword", "non-ascii-name"
)
@utils.check_messages("disallowed-name", "invalid-name", "assign-to-new-keyword")
def visit_classdef(self, node: nodes.ClassDef) -> None:
self._check_assign_to_new_keyword_violation(node.name, node)
self._check_name("class", node.name, node)
for attr, anodes in node.instance_attrs.items():
if not any(node.instance_attr_ancestors(attr)):
self._check_name("attr", attr, anodes[0])

@utils.check_messages(
"disallowed-name", "invalid-name", "assign-to-new-keyword", "non-ascii-name"
)
@utils.check_messages("disallowed-name", "invalid-name", "assign-to-new-keyword")
def visit_functiondef(self, node: nodes.FunctionDef) -> None:
# Do not emit any warnings if the method is just an implementation
# of a base class method.
Expand Down Expand Up @@ -1944,14 +1929,12 @@ def visit_functiondef(self, node: nodes.FunctionDef) -> None:

visit_asyncfunctiondef = visit_functiondef

@utils.check_messages("disallowed-name", "invalid-name", "non-ascii-name")
@utils.check_messages("disallowed-name", "invalid-name")
def visit_global(self, node: nodes.Global) -> None:
for name in node.names:
self._check_name("const", name, node)

@utils.check_messages(
"disallowed-name", "invalid-name", "assign-to-new-keyword", "non-ascii-name"
)
@utils.check_messages("disallowed-name", "invalid-name", "assign-to-new-keyword")
def visit_assignname(self, node: nodes.AssignName) -> None:
"""check module level assigned names"""
self._check_assign_to_new_keyword_violation(node.name, node)
Expand Down Expand Up @@ -1991,14 +1974,6 @@ def visit_assignname(self, node: nodes.AssignName) -> None:
else:
self._check_name("class_attribute", node.name, node)

def _recursive_check_names(self, args):
"""check names in a possibly recursive list <arg>"""
for arg in args:
if isinstance(arg, nodes.AssignName):
self._check_name("argument", arg.name, arg)
else:
self._recursive_check_names(arg.elts)

def _find_name_group(self, node_type):
return self._name_group.get(node_type, node_type)

Expand Down Expand Up @@ -2041,11 +2016,6 @@ def _name_disallowed_by_regex(self, name: str) -> bool:

def _check_name(self, node_type, name, node, confidence=interfaces.HIGH):
"""check for a name using the type's regexp"""
non_ascii_match = self._non_ascii_rgx_compiled.match(name)
if non_ascii_match is not None:
self._raise_name_warning(
None, node, node_type, name, confidence, warning="non-ascii-name"
)

def _should_exempt_from_invalid_name(node):
if node_type == "variable":
Expand Down Expand Up @@ -2092,7 +2062,7 @@ def _name_became_keyword_in_version(name, rules):
return None


class DocStringChecker(_BasicChecker):
class DocStringChecker(base_checker._BasicChecker):
msgs = {
"C0112": (
"Empty %s docstring",
Expand Down Expand Up @@ -2258,7 +2228,7 @@ def _check_docstring(
)


class PassChecker(_BasicChecker):
class PassChecker(base_checker._BasicChecker):
"""check if the pass statement is really necessary"""

msgs = {
Expand Down Expand Up @@ -2300,7 +2270,7 @@ def _infer_dunder_doc_attribute(node):
return docstring.value


class ComparisonChecker(_BasicChecker):
class ComparisonChecker(base_checker._BasicChecker):
"""Checks for comparisons

- singleton comparison: 'expr == True', 'expr == False' and 'expr == None'
Expand Down
32 changes: 31 additions & 1 deletion pylint/checkers/base_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
# For details: https://github.com/PyCQA/pylint/blob/main/LICENSE
import functools
from inspect import cleandoc
from typing import Any, Optional
from typing import Any, Iterable, Optional

from astroid import nodes

from pylint import interfaces
from pylint.config import OptionsProviderMixIn
from pylint.constants import _MSG_ORDER, WarningScope
from pylint.exceptions import InvalidMessageError
Expand Down Expand Up @@ -201,3 +202,32 @@ class BaseTokenChecker(BaseChecker):
def process_tokens(self, tokens):
"""Should be overridden by subclasses."""
raise NotImplementedError()


class _BasicChecker(BaseChecker):
__implements__ = interfaces.IAstroidChecker
name = "basic"


class _NameCheckerBase(_BasicChecker):
"""Class containing functions required by NameChecker and NonAsciiNameChecker"""

def _check_name(
self, node_type: str, name: str, node: nodes.NodeNG, confidence=interfaces.HIGH
):
"""Only Dummy function will be overwritten by implementing classes

Note: kwarg arguments will be different in implementing classes
"""
raise NotImplementedError

def _recursive_check_names(self, args: Iterable[nodes.AssignName]):
"""Check names in a possibly recursive list <arg>"""
for arg in args:
if isinstance(arg, nodes.AssignName):
self._check_name("argument", arg.name, arg)
else:
# pylint: disable-next=fixme
# TODO: Check if we can remove this if branch because of
# the up to date astroid version used
self._recursive_check_names(arg.elts)
Loading