Skip to content

Commit 0d83595

Browse files
authored
Merge pull request #342 from gdiscry/encodings
Handle files not encoded with UTF-8
2 parents 2a04746 + 95a02f1 commit 0d83595

File tree

9 files changed

+207
-72
lines changed

9 files changed

+207
-72
lines changed

CHANGES.rst

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ Added
1212
Fixed
1313
-----
1414
- Avoid memory leak from using ``@lru_cache`` on a method.
15+
- Handle files encoded with an encoding other than UTF-8 without an exception.
1516

1617

1718
1.4.2_ - 2022-03-12

CONTRIBUTORS.rst

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
- Eric Riddoch (@phitoduck)
2626
- Filippos Giannakos (@philipgian)
2727
- Fox_white (@foxwhite25)
28+
- Georges Discry (@gdiscry)
2829
- Giel van Schijndel (@muggenhor)
2930
- Hugo Dupras (@jabesq)
3031
- Iryna (@irynahryshanovich)

README.rst

+23-12
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,17 @@ Thanks goes to these wonderful people (`emoji key`_):
977977
<br />
978978
<a href="https://github.com/akaihola/darker/search?q=foxwhite25" title="Bug reports">🐛</a>
979979
</td>
980+
<td align="center">
981+
<a href="https://github.com/gdiscry">
982+
<img src="https://avatars.githubusercontent.com/u/476823?v=3" width="100px;" alt="@gdiscry" />
983+
<br />
984+
<sub>
985+
<b>Georges Discry</b>
986+
</sub>
987+
</a>
988+
<br />
989+
<a href="https://github.com/akaihola/darker/pulls?q=is%3Apr+author%3Agdiscry" title="Code">💻</a>
990+
</td>
980991
<td align="center">
981992
<a href="https://github.com/muggenhor">
982993
<img src="https://avatars.githubusercontent.com/u/484066?v=3" width="100px;" alt="@muggenhor" />
@@ -1011,6 +1022,8 @@ Thanks goes to these wonderful people (`emoji key`_):
10111022
<br />
10121023
<a href="https://github.com/akaihola/darker/issues?q=author%3Airynahryshanovich" title="Bug reports">🐛</a>
10131024
</td>
1025+
</tr>
1026+
<tr>
10141027
<td align="center">
10151028
<a href="https://github.com/jasleen19">
10161029
<img src="https://avatars.githubusercontent.com/u/30443449?v=3" width="100px;" alt="@jasleen19" />
@@ -1023,8 +1036,6 @@ Thanks goes to these wonderful people (`emoji key`_):
10231036
<a href="https://github.com/akaihola/darker/issues?q=author%3Ajasleen19" title="Bug reports">🐛</a>
10241037
<a href="https://github.com/akaihola/darker/pulls?q=is%3Apr+reviewed-by%3Ajasleen19" title="Reviewed Pull Requests">👀</a>
10251038
</td>
1026-
</tr>
1027-
<tr>
10281039
<td align="center">
10291040
<a href="https://github.com/jedie">
10301041
<img src="https://avatars.githubusercontent.com/u/71315?v=3" width="100px;" alt="@jedie" />
@@ -1080,6 +1091,8 @@ Thanks goes to these wonderful people (`emoji key`_):
10801091
<br />
10811092
<a href="https://github.com/akaihola/darker/issues?q=author%3Amagnunm" title="Bug reports">🐛</a>
10821093
</td>
1094+
</tr>
1095+
<tr>
10831096
<td align="center">
10841097
<a href="https://github.com/markddavidoff">
10851098
<img src="https://avatars.githubusercontent.com/u/1360543?v=3" width="100px;" alt="@markddavidoff" />
@@ -1091,8 +1104,6 @@ Thanks goes to these wonderful people (`emoji key`_):
10911104
<br />
10921105
<a href="https://github.com/akaihola/darker/issues?q=author%3Amarkddavidoff" title="Bug reports">🐛</a>
10931106
</td>
1094-
</tr>
1095-
<tr>
10961107
<td align="center">
10971108
<a href="https://github.com/matclayton">
10981109
<img src="https://avatars.githubusercontent.com/u/126218?v=3" width="100px;" alt="@matclayton" />
@@ -1153,6 +1164,8 @@ Thanks goes to these wonderful people (`emoji key`_):
11531164
<a href="https://github.com/akaihola/darker/issues?q=author%3Anjhuffman" title="Bug reports">🐛</a>
11541165
<a href="https://github.com/akaihola/darker/commits?author=njhuffman" title="Code">💻</a>
11551166
</td>
1167+
</tr>
1168+
<tr>
11561169
<td align="center">
11571170
<a href="https://github.com/CircleOnCircles">
11581171
<img src="https://avatars.githubusercontent.com/u/8089231?v=3" width="100px;" alt="@CircleOnCircles" />
@@ -1164,8 +1177,6 @@ Thanks goes to these wonderful people (`emoji key`_):
11641177
<br />
11651178
<a href="https://github.com/akaihola/darker/issues?q=author%3ACircleOnCircles" title="Bug reports">🐛</a>
11661179
</td>
1167-
</tr>
1168-
<tr>
11691180
<td align="center">
11701181
<a href="https://github.com/Pacu2">
11711182
<img src="https://avatars.githubusercontent.com/u/21290461?v=3" width="100px;" alt="@Pacu2" />
@@ -1224,6 +1235,8 @@ Thanks goes to these wonderful people (`emoji key`_):
12241235
<br />
12251236
<a href="https://github.com/akaihola/darker/issues?q=author%3Aroniemartinez" title="Bug reports">🐛</a>
12261237
</td>
1238+
</tr>
1239+
<tr>
12271240
<td align="center">
12281241
<a href="https://github.com/rossbar">
12291242
<img src="https://avatars.githubusercontent.com/u/1268991?v=3" width="100px;" alt="@rossbar" />
@@ -1235,8 +1248,6 @@ Thanks goes to these wonderful people (`emoji key`_):
12351248
<br />
12361249
<a href="https://github.com/akaihola/darker/issues?q=author%3Arossbar" title="Bug reports">🐛</a>
12371250
</td>
1238-
</tr>
1239-
<tr>
12401251
<td align="center">
12411252
<a href="https://github.com/sherbie">
12421253
<img src="https://avatars.githubusercontent.com/u/15087653?v=3" width="100px;" alt="@sherbie" />
@@ -1292,6 +1303,8 @@ Thanks goes to these wonderful people (`emoji key`_):
12921303
<br />
12931304
<a href="https://github.com/akaihola/darker/issues?q=author%3Aguettli" title="Bug reports">🐛</a>
12941305
</td>
1306+
</tr>
1307+
<tr>
12951308
<td align="center">
12961309
<a href="https://github.com/tobiasdiez">
12971310
<img src="https://avatars.githubusercontent.com/u/5037600?v=3" width="100px;" alt="@tobiasdiez" />
@@ -1302,8 +1315,6 @@ Thanks goes to these wonderful people (`emoji key`_):
13021315
</a>
13031316
<br />
13041317
</td>
1305-
</tr>
1306-
<tr>
13071318
<td align="center">
13081319
<a href="https://github.com/yoursvivek">
13091320
<img src="https://avatars.githubusercontent.com/u/163296?v=3" width="100px;" alt="@yoursvivek" />
@@ -1362,6 +1373,8 @@ Thanks goes to these wonderful people (`emoji key`_):
13621373
<a href="https://github.com/conda-forge/staged-recipes/search?q=darker&type=issues&author=martinRenou" title="Code">💻</a>
13631374
<a href="https://github.com/akaihola/darker/pulls?q=is%3Apr+reviewed-by%3AmartinRenou" title="Reviewed Pull Requests">👀</a>
13641375
</td>
1376+
</tr>
1377+
<tr>
13651378
<td align="center">
13661379
<a href="https://github.com/mayk0gan">
13671380
<img src="https://avatars.githubusercontent.com/u/96263702?v=3" width="100px;" alt="@mayk0gan" />
@@ -1373,8 +1386,6 @@ Thanks goes to these wonderful people (`emoji key`_):
13731386
<br />
13741387
<a href="https://github.com/akaihola/darker/issues?q=author%3Amayk0gan" title="Bug reports">🐛</a>
13751388
</td>
1376-
</tr>
1377-
<tr>
13781389
<td align="center">
13791390
<a href="https://github.com/overratedpro">
13801391
<img src="https://avatars.githubusercontent.com/u/1379994?v=3" width="100px;" alt="@overratedpro" />

contributors.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ flying-sheep:
5858
- {link_type: issues, type: Bug reports}
5959
foxwhite25:
6060
- {link_type: search, type: Bug reports}
61+
gdiscry:
62+
- {link_type: pulls-author, type: Code}
6163
guettli:
6264
- {link_type: issues, type: Bug reports}
6365
hauntsaninja:

src/darker/git.py

+35-5
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from functools import lru_cache
1010
from pathlib import Path
1111
from subprocess import DEVNULL, PIPE, CalledProcessError, check_output, run # nosec
12-
from typing import Dict, Iterable, List, Set, Tuple
12+
from typing import Dict, Iterable, List, Optional, Set, Tuple, Union, overload
1313

1414
from darker.diff import diff_and_get_opcodes, opcodes_to_edit_linenums
1515
from darker.utils import GIT_DATEFORMAT, TextDocument
@@ -79,8 +79,8 @@ def git_get_content_at_revision(path: Path, revision: str, cwd: Path) -> TextDoc
7979
return TextDocument.from_file(abspath)
8080
cmd = ["show", f"{revision}:./{path.as_posix()}"]
8181
try:
82-
return TextDocument.from_lines(
83-
_git_check_output_lines(cmd, cwd, exit_on_error=False),
82+
return TextDocument.from_bytes(
83+
_git_check_output(cmd, cwd, exit_on_error=False),
8484
mtime=git_get_mtime_at_commit(path, revision, cwd),
8585
)
8686
except CalledProcessError as exc_info:
@@ -207,15 +207,45 @@ def _git_check_output_lines(
207207
cmd: List[str], cwd: Path, exit_on_error: bool = True
208208
) -> List[str]:
209209
"""Log command line, run Git, split stdout to lines, exit with 123 on error"""
210+
return _git_check_output(
211+
cmd,
212+
cwd,
213+
exit_on_error=exit_on_error,
214+
encoding="utf-8",
215+
).splitlines()
216+
217+
218+
@overload
219+
def _git_check_output(
220+
cmd: List[str], cwd: Path, *, exit_on_error: bool = ..., encoding: None = ...
221+
) -> bytes:
222+
...
223+
224+
225+
@overload
226+
def _git_check_output(
227+
cmd: List[str], cwd: Path, *, exit_on_error: bool = ..., encoding: str
228+
) -> str:
229+
...
230+
231+
232+
def _git_check_output(
233+
cmd: List[str],
234+
cwd: Path,
235+
*,
236+
exit_on_error: bool = True,
237+
encoding: Optional[str] = None,
238+
) -> Union[str, bytes]:
239+
"""Log command line, run Git, return stdout, exit with 123 on error"""
210240
logger.debug("[%s]$ git %s", cwd, " ".join(cmd))
211241
try:
212242
return check_output( # nosec
213243
["git"] + cmd,
214244
cwd=str(cwd),
215-
encoding="utf-8",
245+
encoding=encoding,
216246
stderr=PIPE,
217247
env=_make_git_env(),
218-
).splitlines()
248+
)
219249
except CalledProcessError as exc_info:
220250
if not exit_on_error:
221251
raise

src/darker/tests/conftest.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import os
44
from pathlib import Path
55
from subprocess import check_call # nosec
6-
from typing import Dict, Optional
6+
from typing import Dict, Union
77

88
import pytest
99
from black import find_project_root as black_find_project_root
@@ -25,7 +25,7 @@ def create_repository(cls, root: Path) -> "GitRepoFixture":
2525
env = {"HOME": str(root), "LC_ALL": "C", "PATH": os.environ["PATH"]}
2626
instance = cls(root, env)
2727
# pylint: disable=protected-access
28-
instance._run("init")
28+
instance._run("init", "--initial-branch=master")
2929
instance._run("config", "user.email", "[email protected]")
3030
instance._run("config", "user.name", "CI system")
3131
return instance
@@ -39,7 +39,7 @@ def _run_and_get_first_line(self, *args: str) -> str:
3939
return _git_check_output_lines(list(args), Path(self.root))[0]
4040

4141
def add(
42-
self, paths_and_contents: Dict[str, Optional[str]], commit: str = None
42+
self, paths_and_contents: Dict[str, Union[str, bytes, None]], commit: str = None
4343
) -> Dict[str, Path]:
4444
"""Add/remove/modify files and optionally commit the changes
4545
@@ -58,10 +58,12 @@ def add(
5858
path = absolute_paths[relative_path]
5959
if content is None:
6060
self._run("rm", "--", relative_path)
61-
else:
62-
path.parent.mkdir(parents=True, exist_ok=True)
63-
path.write_bytes(content.encode("utf-8"))
64-
self._run("add", "--", relative_path)
61+
continue
62+
if isinstance(content, str):
63+
content = content.encode("utf-8")
64+
path.parent.mkdir(parents=True, exist_ok=True)
65+
path.write_bytes(content)
66+
self._run("add", "--", relative_path)
6567
if commit:
6668
self._run("commit", "-m", commit)
6769
return absolute_paths

src/darker/tests/test_git.py

+73-20
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,17 @@ def test_revisionrange_parse(revision_range, expect):
139139
assert result == expect
140140

141141

142+
def git_call(cmd, encoding=None):
143+
"""Returns a mocked call to git"""
144+
return call(
145+
cmd.split(),
146+
cwd=str(Path("/path")),
147+
encoding=encoding,
148+
stderr=PIPE,
149+
env={"LC_ALL": "C", "PATH": os.environ["PATH"]},
150+
)
151+
152+
142153
@pytest.mark.kwparametrize(
143154
dict(
144155
revision=":WORKTREE:",
@@ -147,31 +158,31 @@ def test_revisionrange_parse(revision_range, expect):
147158
dict(
148159
revision="HEAD",
149160
expect_git_calls=[
150-
"git show HEAD:./my.txt",
151-
"git log -1 --format=%ct HEAD -- my.txt",
161+
git_call("git show HEAD:./my.txt"),
162+
git_call("git log -1 --format=%ct HEAD -- my.txt", encoding="utf-8"),
152163
],
153164
expect_textdocument_calls=[
154-
call.from_lines([b"1627107028"], mtime="2021-07-24 06:10:28.000000 +0000")
165+
call.from_bytes(b"1627107028", mtime="2021-07-24 06:10:28.000000 +0000")
155166
],
156167
),
157168
dict(
158169
revision="HEAD^",
159170
expect_git_calls=[
160-
"git show HEAD^:./my.txt",
161-
"git log -1 --format=%ct HEAD^ -- my.txt",
171+
git_call("git show HEAD^:./my.txt"),
172+
git_call("git log -1 --format=%ct HEAD^ -- my.txt", encoding="utf-8"),
162173
],
163174
expect_textdocument_calls=[
164-
call.from_lines([b"1627107028"], mtime="2021-07-24 06:10:28.000000 +0000")
175+
call.from_bytes(b"1627107028", mtime="2021-07-24 06:10:28.000000 +0000")
165176
],
166177
),
167178
dict(
168179
revision="master",
169180
expect_git_calls=[
170-
"git show master:./my.txt",
171-
"git log -1 --format=%ct master -- my.txt",
181+
git_call("git show master:./my.txt"),
182+
git_call("git log -1 --format=%ct master -- my.txt", encoding="utf-8"),
172183
],
173184
expect_textdocument_calls=[
174-
call.from_lines([b"1627107028"], mtime="2021-07-24 06:10:28.000000 +0000")
185+
call.from_bytes(b"1627107028", mtime="2021-07-24 06:10:28.000000 +0000")
175186
],
176187
),
177188
expect_git_calls=[],
@@ -189,17 +200,7 @@ def test_git_get_content_at_revision_obtain_file_content(
189200

190201
git.git_get_content_at_revision(Path("my.txt"), revision, Path("/path"))
191202

192-
expected_calls = [
193-
call(
194-
expected_call.split(),
195-
cwd=str(Path("/path")),
196-
encoding="utf-8",
197-
stderr=PIPE,
198-
env={"LC_ALL": "C", "PATH": os.environ["PATH"]},
199-
)
200-
for expected_call in expect_git_calls
201-
]
202-
assert check_output.call_args_list == expected_calls
203+
assert check_output.call_args_list == expect_git_calls
203204
assert text_document_class.method_calls == expect_textdocument_calls
204205

205206

@@ -403,6 +404,58 @@ def test_git_get_content_at_revision_stderr(git_repo, capfd, caplog):
403404
assert caplog.text == ""
404405

405406

407+
@pytest.fixture(scope="module")
408+
def encodings_repo(tmp_path_factory):
409+
"""Create an example Git repository using various encodings for the same file"""
410+
tmpdir = tmp_path_factory.mktemp("branched_repo")
411+
git_repo = GitRepoFixture.create_repository(tmpdir)
412+
# Commit without an encoding cookie, defaults to utf-8
413+
git_repo.add({"file.py": "darker = 'plus foncé'\n"}, commit="Default encoding")
414+
git_repo.create_tag("default")
415+
# Commit without an encoding cookie but with a utf-8 signature
416+
content = "darker = 'plus foncé'\n".encode("utf-8-sig")
417+
git_repo.add({"file.py": content}, commit="utf-8-sig")
418+
git_repo.create_tag("utf-8-sig")
419+
# Commit with an iso-8859-1 encoding cookie
420+
content = "# coding: iso-8859-1\ndarker = 'plus foncé'\n".encode("iso-8859-1")
421+
git_repo.add({"file.py": content}, commit="iso-8859-1")
422+
git_repo.create_tag("iso-8859-1")
423+
# Commit with a utf-8 encoding cookie
424+
content = "# coding: utf-8\npython = 'パイソン'\n".encode("utf-8")
425+
git_repo.add({"file.py": content}, commit="utf-8")
426+
git_repo.create_tag("utf-8")
427+
# Current worktree content (not committed) with a shitfjs encoding cookie
428+
content = "# coding: shiftjis\npython = 'パイソン'\n".encode("shiftjis")
429+
git_repo.add({"file.py": content})
430+
return git_repo
431+
432+
433+
@pytest.mark.kwparametrize(
434+
dict(commit="default", encoding="utf-8", lines=("darker = 'plus foncé'",)),
435+
dict(commit="utf-8-sig", encoding="utf-8-sig", lines=("darker = 'plus foncé'",)),
436+
dict(
437+
commit="iso-8859-1",
438+
encoding="iso-8859-1",
439+
lines=("# coding: iso-8859-1", "darker = 'plus foncé'"),
440+
),
441+
dict(
442+
commit="utf-8", encoding="utf-8", lines=("# coding: utf-8", "python = 'パイソン'")
443+
),
444+
dict(
445+
commit=":WORKTREE:",
446+
encoding="shiftjis",
447+
lines=("# coding: shiftjis", "python = 'パイソン'"),
448+
),
449+
)
450+
def test_git_get_content_at_revision_encoding(encodings_repo, commit, encoding, lines):
451+
"""Git file is loaded using its historical encoding"""
452+
result = git.git_get_content_at_revision(
453+
Path("file.py"), commit, encodings_repo.root
454+
)
455+
assert result.encoding == encoding
456+
assert result.lines == lines
457+
458+
406459
@pytest.mark.kwparametrize(
407460
dict(retval=0, expect=True),
408461
dict(retval=1, expect=False),

0 commit comments

Comments
 (0)