Skip to content

Commit 9f9dded

Browse files
committed
status: use PyGit2 for much faster, simpler
1 parent ae22b46 commit 9f9dded

File tree

6 files changed

+204
-156
lines changed

6 files changed

+204
-156
lines changed

.github/workflows/ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
strategy:
1515
matrix:
1616
os: [windows-latest, macos-latest, ubuntu-latest]
17-
python-version: ['3.9', '3.x']
17+
python-version: ['3.9', '3.12']
1818

1919
steps:
2020
- uses: actions/checkout@v4

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,4 @@ line-length = 100
4141

4242
[tool.mypy]
4343
files = ["src"]
44+
ignore_missing_imports = true

src/gitutils/__init__.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
"""
2-
These Git utilities use nothing beyond pure Python and command-line Git.
2+
These Git utilities use pygit2 where possible for much more efficient operations
3+
than using subprocesses even with asyncio.
4+
35
Speed is emphasized throughout, with pipelining and concurrent `asyncio` routines throughout
46
for fastest operation on large numbers of repos.
57
"""
68

79
import logging
810

9-
__version__ = "1.12.1"
11+
__version__ = "2.0.0"
1012

1113

1214
def _log(verbose: bool):

src/gitutils/status.py

+25-145
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
detect Git local repo modifications. Crazy fast by not invoking remote.
2+
detect Git local repo modifications.
33
44
replaced by git status --porcelain:
55
git ls-files -o -d --exclude-standard: # check for uncommitted files
@@ -10,171 +10,51 @@
1010

1111
from __future__ import annotations
1212
import argparse
13-
import subprocess
14-
import logging
13+
import typing
1514
from pathlib import Path
1615
import asyncio
16+
from pprint import pprint
1717

18-
from . import _log
19-
from .git import gitdirs, git_exe, subprocess_asyncio, MAGENTA, BLACK, TIMEOUT
20-
21-
C0 = ["rev-parse", "--abbrev-ref", "HEAD"] # get branch name
22-
C1 = ["status", "--porcelain"] # uncommitted or changed files
23-
24-
__all__ = ["git_porcelain"]
25-
26-
27-
def git_porcelain(path: Path, timeout: float = TIMEOUT["local"]) -> bool:
28-
"""
29-
detects if single Git repo is porcelain i.e. clean.
30-
May not have been pushed or fetched.
31-
32-
Parameters
33-
----------
34-
35-
path: pathlib.Path
36-
path to Git repo
37-
38-
Returns
39-
-------
40-
41-
is_porcelain: bool
42-
true if local Git is clean
43-
"""
44-
45-
if not path.is_dir():
46-
raise NotADirectoryError(path)
47-
48-
ret = subprocess.run(
49-
[git_exe(), "-C", str(path)] + C1,
50-
stdout=subprocess.PIPE,
51-
text=True,
52-
timeout=timeout,
53-
)
54-
if ret.returncode != 0:
55-
logging.error(f"{path.name} return code {ret.returncode} {C1}")
56-
return False
57-
return not ret.stdout
58-
59-
60-
async def _git_status(path: Path, timeout: float) -> tuple[str, str] | None:
61-
"""
62-
Notes which Git repos have local changes that haven't been pushed to remote
63-
64-
Parameters
65-
----------
66-
path : pathlib.Path
67-
Git repo directory
68-
69-
Returns
70-
-------
71-
changes : tuple of pathlib.Path, str
72-
Git repo local changes
73-
"""
74-
75-
code, out, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C1, timeout=timeout)
76-
if code != 0:
77-
logging.error(f"{path.name} return code {code} {C1} {err}")
78-
return None
79-
80-
logging.info(path.name)
81-
82-
# %% uncommitted changes
83-
if out:
84-
return path.name, out
85-
86-
# %% detect committed, but not pushed
87-
code, branch, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C0, timeout=timeout)
88-
if code != 0:
89-
logging.error(f"{path.name} return code {code} {C0} {err}")
90-
return None
91-
92-
C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."]
93-
code, out, err = await subprocess_asyncio(C2, timeout=timeout)
94-
if code != 0:
95-
logging.error(f"{path.name} return code {code} {branch} {out} {err}")
96-
return None
97-
98-
if out:
99-
return path.name, out
100-
101-
return None
102-
103-
104-
def git_status_serial(path: Path, timeout: float = TIMEOUT["local"]) -> tuple[str, str] | None:
105-
"""
106-
107-
Notes which Git repos have local changes that haven't been pushed to remote
108-
109-
Parameters
110-
----------
111-
path : pathlib.Path
112-
Git repo directory
113-
114-
Returns
115-
-------
116-
changes : tuple of pathlib.Path, str
117-
Git repo local changes
118-
"""
119-
120-
out = subprocess.check_output(
121-
[git_exe(), "-C", str(path)] + C1, text=True, timeout=timeout
122-
).strip()
123-
124-
logging.info(path.name)
125-
126-
# %% uncommitted changes
127-
if out:
128-
return path.name, out
129-
130-
# %% detect committed, but not pushed
131-
branch = subprocess.check_output(
132-
[git_exe(), "-C", str(path)] + C0, text=True, timeout=timeout
133-
).strip()
134-
135-
C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."]
136-
out = subprocess.check_output(C2, text=True, timeout=timeout).strip()
137-
138-
if out:
139-
return path.name, out
140-
141-
return None
18+
import pygit2
14219

20+
from . import _log
21+
from .git import gitdirs
22+
from .status_cmd import git_status_serial, git_status_async
14323

144-
async def git_status(path: Path, verbose: bool, timeout: float) -> list[str]:
145-
c = MAGENTA if verbose else ""
14624

147-
changed = []
148-
futures = [_git_status(d, timeout) for d in gitdirs(path)]
149-
for r in asyncio.as_completed(futures, timeout=timeout):
150-
if changes := await r:
151-
changed.append(changes[0])
152-
print(c + changes[0])
153-
if verbose:
154-
print(BLACK + changes[1])
25+
def git_status(path: Path, verbose: bool) -> typing.Iterator[dict]:
15526

156-
return changed
27+
for d in gitdirs(path):
28+
repo = pygit2.Repository(d)
29+
if status := repo.status():
30+
yield status
15731

15832

15933
def cli():
16034
p = argparse.ArgumentParser(description="get status of many Git repos")
16135
p.add_argument("path", help="path to look under", nargs="?", default="~/code")
16236
p.add_argument("-v", "--verbose", action="store_true")
16337
p.add_argument("-t", "--timeout", type=float)
164-
p.add_argument("--serial", help="don't use asyncio parallel execution", action="store_true")
38+
p.add_argument(
39+
"-method",
40+
help="use Git command line serial execution",
41+
choices=["pygit2", "serial", "async"], default="pygit2",
42+
)
16543
P = p.parse_args()
16644

16745
_log(P.verbose)
16846

169-
if P.serial:
170-
c = MAGENTA if P.verbose else ""
47+
if P.method == "pygit2":
48+
for s in git_status(P.path, P.verbose):
49+
pprint(s)
50+
elif P.method == "serial":
17151
for d in gitdirs(P.path):
17252
if changes := git_status_serial(d, P.timeout):
173-
print(c + changes[0])
53+
print(changes[0])
17454
if P.verbose:
175-
print(BLACK + changes[1])
176-
else:
177-
asyncio.run(git_status(P.path, P.verbose, P.timeout))
55+
print(changes[1])
56+
elif P.method == "async":
57+
asyncio.run(git_status_async(P.path, P.verbose, P.timeout))
17858

17959

18060
if __name__ == "__main__":

src/gitutils/status_cmd.py

+149
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
"""
2+
This was the original implementation of mass-checking of Git status
3+
using asyncio and subprocesses. It is much more efficient to use
4+
libgit2 via pygit2, which is the current implementation.
5+
"""
6+
7+
import subprocess
8+
import logging
9+
from pathlib import Path
10+
import asyncio
11+
12+
from .git import gitdirs, git_exe, subprocess_asyncio, MAGENTA, BLACK, TIMEOUT
13+
14+
C0 = ["rev-parse", "--abbrev-ref", "HEAD"] # get branch name
15+
C1 = ["status", "--porcelain"] # uncommitted or changed files
16+
17+
__all__ = ["git_porcelain"]
18+
19+
20+
def git_porcelain(path: Path, timeout: float = TIMEOUT["local"]) -> bool:
21+
"""
22+
detects if single Git repo is porcelain i.e. clean.
23+
May not have been pushed or fetched.
24+
25+
Parameters
26+
----------
27+
28+
path: pathlib.Path
29+
path to Git repo
30+
31+
Returns
32+
-------
33+
34+
is_porcelain: bool
35+
true if local Git is clean
36+
"""
37+
38+
if not path.is_dir():
39+
raise NotADirectoryError(path)
40+
41+
ret = subprocess.run(
42+
[git_exe(), "-C", str(path)] + C1,
43+
stdout=subprocess.PIPE,
44+
text=True,
45+
timeout=timeout,
46+
)
47+
if ret.returncode != 0:
48+
logging.error(f"{path.name} return code {ret.returncode} {C1}")
49+
return False
50+
return not ret.stdout
51+
52+
53+
async def _git_status(path: Path, timeout: float) -> tuple[str, str] | None:
54+
"""
55+
Notes which Git repos have local changes that haven't been pushed to remote
56+
57+
Parameters
58+
----------
59+
path : pathlib.Path
60+
Git repo directory
61+
62+
Returns
63+
-------
64+
changes : tuple of pathlib.Path, str
65+
Git repo local changes
66+
"""
67+
68+
code, out, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C1, timeout=timeout)
69+
if code != 0:
70+
logging.error(f"{path.name} return code {code} {C1} {err}")
71+
return None
72+
73+
logging.info(path.name)
74+
75+
# %% uncommitted changes
76+
if out:
77+
return path.name, out
78+
79+
# %% detect committed, but not pushed
80+
code, branch, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C0, timeout=timeout)
81+
if code != 0:
82+
logging.error(f"{path.name} return code {code} {C0} {err}")
83+
return None
84+
85+
C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."]
86+
code, out, err = await subprocess_asyncio(C2, timeout=timeout)
87+
if code != 0:
88+
logging.error(f"{path.name} return code {code} {branch} {out} {err}")
89+
return None
90+
91+
if out:
92+
return path.name, out
93+
94+
return None
95+
96+
97+
def git_status_serial(path: Path, timeout: float = TIMEOUT["local"]) -> tuple[str, str] | None:
98+
"""
99+
100+
Notes which Git repos have local changes that haven't been pushed to remote
101+
102+
Parameters
103+
----------
104+
path : pathlib.Path
105+
Git repo directory
106+
107+
Returns
108+
-------
109+
changes : tuple of pathlib.Path, str
110+
Git repo local changes
111+
"""
112+
113+
out = subprocess.check_output(
114+
[git_exe(), "-C", str(path)] + C1, text=True, timeout=timeout
115+
).strip()
116+
117+
logging.info(path.name)
118+
119+
# %% uncommitted changes
120+
if out:
121+
return path.name, out
122+
123+
# %% detect committed, but not pushed
124+
branch = subprocess.check_output(
125+
[git_exe(), "-C", str(path)] + C0, text=True, timeout=timeout
126+
).strip()
127+
128+
C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."]
129+
out = subprocess.check_output(C2, text=True, timeout=timeout).strip()
130+
131+
if out:
132+
return path.name, out
133+
134+
return None
135+
136+
137+
async def git_status_async(path: Path, verbose: bool, timeout: float) -> list[str]:
138+
c = MAGENTA if verbose else ""
139+
140+
changed = []
141+
futures = [_git_status(d, timeout) for d in gitdirs(path)]
142+
for r in asyncio.as_completed(futures, timeout=timeout):
143+
if changes := await r:
144+
changed.append(changes[0])
145+
print(c + changes[0])
146+
if verbose:
147+
print(BLACK + changes[1])
148+
149+
return changed

0 commit comments

Comments
 (0)