Skip to content

Commit 88ad799

Browse files
committed
status: use PyGit2 for much faster, simpler
rename email to avoid flak8 conflict
1 parent ae22b46 commit 88ad799

File tree

9 files changed

+216
-166
lines changed

9 files changed

+216
-166
lines changed

.github/workflows/ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
strategy:
1515
matrix:
1616
os: [windows-latest, macos-latest, ubuntu-latest]
17-
python-version: ['3.9', '3.x']
17+
python-version: ['3.9', '3.12']
1818

1919
steps:
2020
- uses: actions/checkout@v4

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ python -m pip install -e .
4444
`gitbranch`
4545
: Tells of any non-master branches under directory ~/code
4646

47-
`python -m gitutils.email`
47+
`python -m gitutils.git_email`
4848
: list all contributor email addresses. To fix unwanted emails use [Git-filter-repo](https://www.scivision.dev/git-update-email-address/)
4949

5050
`find_missing_file`

pyproject.toml

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ classifiers = [
1818
]
1919
dynamic = ["version", "readme"]
2020
requires-python = ">=3.9"
21+
dependencies = ["pygit2"]
2122

2223
[project.optional-dependencies]
2324
tests = ["pytest"]
@@ -41,3 +42,4 @@ line-length = 100
4142

4243
[tool.mypy]
4344
files = ["src"]
45+
ignore_missing_imports = true

src/gitutils/__init__.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
"""
2-
These Git utilities use nothing beyond pure Python and command-line Git.
2+
These Git utilities use pygit2 where possible for much more efficient operations
3+
than using subprocesses even with asyncio.
4+
35
Speed is emphasized throughout, with pipelining and concurrent `asyncio` routines throughout
46
for fastest operation on large numbers of repos.
57
"""
68

79
import logging
810

9-
__version__ = "1.12.1"
11+
__version__ = "2.0.0"
1012

1113

1214
def _log(verbose: bool):

src/gitutils/email.py renamed to src/gitutils/git_email.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ def cli():
6969

7070
for d, emails in gitemail(P.path, P.exclude, timeout=P.timeout):
7171
print(MAGENTA + d.stem + BLACK)
72-
for email in emails:
73-
print(*email)
72+
for e in emails:
73+
print(*e)
7474

7575

7676
if __name__ == "__main__":

src/gitutils/status.py

+25-151
Original file line numberDiff line numberDiff line change
@@ -1,180 +1,54 @@
11
"""
2-
detect Git local repo modifications. Crazy fast by not invoking remote.
3-
4-
replaced by git status --porcelain:
5-
git ls-files -o -d --exclude-standard: # check for uncommitted files
6-
git --no-pager diff HEAD , # check for uncommitted work
7-
8-
DOES NOT WORK git log --branches --not --remotes # check for uncommitted branches
2+
detect Git local repo modifications
93
"""
104

115
from __future__ import annotations
126
import argparse
13-
import subprocess
14-
import logging
7+
import typing
158
from pathlib import Path
169
import asyncio
10+
from pprint import pprint
1711

18-
from . import _log
19-
from .git import gitdirs, git_exe, subprocess_asyncio, MAGENTA, BLACK, TIMEOUT
20-
21-
C0 = ["rev-parse", "--abbrev-ref", "HEAD"] # get branch name
22-
C1 = ["status", "--porcelain"] # uncommitted or changed files
23-
24-
__all__ = ["git_porcelain"]
25-
26-
27-
def git_porcelain(path: Path, timeout: float = TIMEOUT["local"]) -> bool:
28-
"""
29-
detects if single Git repo is porcelain i.e. clean.
30-
May not have been pushed or fetched.
31-
32-
Parameters
33-
----------
34-
35-
path: pathlib.Path
36-
path to Git repo
37-
38-
Returns
39-
-------
40-
41-
is_porcelain: bool
42-
true if local Git is clean
43-
"""
44-
45-
if not path.is_dir():
46-
raise NotADirectoryError(path)
47-
48-
ret = subprocess.run(
49-
[git_exe(), "-C", str(path)] + C1,
50-
stdout=subprocess.PIPE,
51-
text=True,
52-
timeout=timeout,
53-
)
54-
if ret.returncode != 0:
55-
logging.error(f"{path.name} return code {ret.returncode} {C1}")
56-
return False
57-
return not ret.stdout
58-
59-
60-
async def _git_status(path: Path, timeout: float) -> tuple[str, str] | None:
61-
"""
62-
Notes which Git repos have local changes that haven't been pushed to remote
63-
64-
Parameters
65-
----------
66-
path : pathlib.Path
67-
Git repo directory
68-
69-
Returns
70-
-------
71-
changes : tuple of pathlib.Path, str
72-
Git repo local changes
73-
"""
74-
75-
code, out, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C1, timeout=timeout)
76-
if code != 0:
77-
logging.error(f"{path.name} return code {code} {C1} {err}")
78-
return None
79-
80-
logging.info(path.name)
81-
82-
# %% uncommitted changes
83-
if out:
84-
return path.name, out
85-
86-
# %% detect committed, but not pushed
87-
code, branch, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C0, timeout=timeout)
88-
if code != 0:
89-
logging.error(f"{path.name} return code {code} {C0} {err}")
90-
return None
91-
92-
C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."]
93-
code, out, err = await subprocess_asyncio(C2, timeout=timeout)
94-
if code != 0:
95-
logging.error(f"{path.name} return code {code} {branch} {out} {err}")
96-
return None
97-
98-
if out:
99-
return path.name, out
100-
101-
return None
102-
103-
104-
def git_status_serial(path: Path, timeout: float = TIMEOUT["local"]) -> tuple[str, str] | None:
105-
"""
106-
107-
Notes which Git repos have local changes that haven't been pushed to remote
108-
109-
Parameters
110-
----------
111-
path : pathlib.Path
112-
Git repo directory
113-
114-
Returns
115-
-------
116-
changes : tuple of pathlib.Path, str
117-
Git repo local changes
118-
"""
119-
120-
out = subprocess.check_output(
121-
[git_exe(), "-C", str(path)] + C1, text=True, timeout=timeout
122-
).strip()
123-
124-
logging.info(path.name)
125-
126-
# %% uncommitted changes
127-
if out:
128-
return path.name, out
129-
130-
# %% detect committed, but not pushed
131-
branch = subprocess.check_output(
132-
[git_exe(), "-C", str(path)] + C0, text=True, timeout=timeout
133-
).strip()
134-
135-
C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."]
136-
out = subprocess.check_output(C2, text=True, timeout=timeout).strip()
137-
138-
if out:
139-
return path.name, out
140-
141-
return None
12+
import pygit2
14213

14+
from . import _log
15+
from .git import gitdirs
16+
from .status_cmd import git_status_serial, git_status_async
14317

144-
async def git_status(path: Path, verbose: bool, timeout: float) -> list[str]:
145-
c = MAGENTA if verbose else ""
14618

147-
changed = []
148-
futures = [_git_status(d, timeout) for d in gitdirs(path)]
149-
for r in asyncio.as_completed(futures, timeout=timeout):
150-
if changes := await r:
151-
changed.append(changes[0])
152-
print(c + changes[0])
153-
if verbose:
154-
print(BLACK + changes[1])
19+
def git_status(path: Path, verbose: bool) -> typing.Iterator[dict]:
15520

156-
return changed
21+
for d in gitdirs(path):
22+
repo = pygit2.Repository(d)
23+
if status := repo.status():
24+
yield status
15725

15826

15927
def cli():
16028
p = argparse.ArgumentParser(description="get status of many Git repos")
16129
p.add_argument("path", help="path to look under", nargs="?", default="~/code")
16230
p.add_argument("-v", "--verbose", action="store_true")
16331
p.add_argument("-t", "--timeout", type=float)
164-
p.add_argument("--serial", help="don't use asyncio parallel execution", action="store_true")
32+
p.add_argument(
33+
"-method",
34+
help="use Git command line serial execution",
35+
choices=["pygit2", "serial", "async"], default="pygit2",
36+
)
16537
P = p.parse_args()
16638

16739
_log(P.verbose)
16840

169-
if P.serial:
170-
c = MAGENTA if P.verbose else ""
41+
if P.method == "pygit2":
42+
for s in git_status(P.path, P.verbose):
43+
pprint(s)
44+
elif P.method == "serial":
17145
for d in gitdirs(P.path):
17246
if changes := git_status_serial(d, P.timeout):
173-
print(c + changes[0])
47+
print(changes[0])
17448
if P.verbose:
175-
print(BLACK + changes[1])
176-
else:
177-
asyncio.run(git_status(P.path, P.verbose, P.timeout))
49+
print(changes[1])
50+
elif P.method == "async":
51+
asyncio.run(git_status_async(P.path, P.verbose, P.timeout))
17852

17953

18054
if __name__ == "__main__":

0 commit comments

Comments
 (0)