Skip to content

Commit 29e2325

Browse files
Merge pull request #1 from augmentable-dev/parse-git-log
add a git log output parser instead of using go-git
2 parents 9f4431b + ed6f2bf commit 29e2325

File tree

3 files changed

+206
-52
lines changed

3 files changed

+206
-52
lines changed

cmd/root.go

Lines changed: 30 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,35 @@
11
package cmd
22

33
import (
4+
"context"
45
"fmt"
5-
"log"
6+
"io/ioutil"
67
"math"
78
"os"
89
"sort"
910
"text/tabwriter"
10-
"time"
1111

12+
"github.com/augmentable-dev/gitpert/pkg/gitlog"
1213
"github.com/go-git/go-git/v5"
13-
"github.com/go-git/go-git/v5/plumbing/object"
14-
"github.com/go-git/go-git/v5/storage/memory"
1514
"github.com/spf13/cobra"
1615
"github.com/src-d/enry/v2"
1716
)
1817

1918
func handleError(err error) {
2019
if err != nil {
21-
log.Fatalln(err)
20+
fmt.Println(err)
21+
os.Exit(1)
2222
}
2323
}
2424

2525
var (
2626
decayDays int
27-
full bool
2827
remote bool
2928
)
3029

3130
func init() {
3231
rootCmd.Flags().IntVarP(&decayDays, "decay-rate", "d", 30, "determines how long it takes for the impact of a commit to halve, based on how recently the commit was made")
3332
rootCmd.Flags().BoolVarP(&remote, "remote", "r", false, "whether or not this is a remote repository")
34-
rootCmd.Flags().BoolVarP(&full, "full", "f", false, "include all commits when calculating scores")
3533
}
3634

3735
var rootCmd = &cobra.Command{
@@ -50,43 +48,32 @@ var rootCmd = &cobra.Command{
5048
repoPath = p
5149
}
5250

53-
var repo *git.Repository
54-
// if the remote flag is set, clone the repo (using repoPath) into memory
5551
if remote {
56-
r, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
57-
URL: repoPath,
58-
SingleBranch: true,
59-
})
52+
dir, err := ioutil.TempDir("", "gitpert_remote_repo")
6053
handleError(err)
61-
repo = r
62-
} else { // otherwise, open the specified repo
63-
r, err := git.PlainOpen(repoPath)
54+
defer os.RemoveAll(dir)
55+
56+
_, err = git.PlainClone(dir, false, &git.CloneOptions{
57+
URL: repoPath,
58+
// Progress: os.Stdout,
59+
})
6460
handleError(err)
65-
repo = r
61+
62+
repoPath = dir
6663
}
6764

68-
var fileName *string
65+
var fileName string
6966
if len(args) > 1 {
70-
fileName = &args[1]
67+
fileName = args[1]
7168
}
7269

70+
commits, err := gitlog.Exec(context.Background(), repoPath, fileName, []string{})
71+
handleError(err)
72+
7373
// TODO (patrickdevivo) at some point this entire scoring logic should be brought out into a subpackage with some tests
7474
// this could also make it possibe for other projects to import the implementation.
7575
decayHours := 24 * decayDays
7676

77-
// this ignores any commits older than 100 half-lives,
78-
var since time.Time
79-
if !full {
80-
since = time.Now().Add(-(time.Duration(decayHours) * time.Hour * 10))
81-
}
82-
commitIter, err := repo.Log(&git.LogOptions{
83-
Order: git.LogOrderCommitterTime,
84-
FileName: fileName,
85-
Since: &since,
86-
})
87-
handleError(err)
88-
defer commitIter.Close()
89-
9077
type authorAggregate struct {
9178
email string
9279
name string
@@ -96,7 +83,11 @@ var rootCmd = &cobra.Command{
9683
}
9784
authors := map[string]*authorAggregate{}
9885
var authorEmails []string
99-
commitIter.ForEach(func(commit *object.Commit) error {
86+
var firstCommit *gitlog.Commit
87+
for _, commit := range commits {
88+
if firstCommit == nil {
89+
firstCommit = commit
90+
}
10091
authorEmail := commit.Author.Email
10192
if _, ok := authors[authorEmail]; !ok {
10293
authors[authorEmail] = &authorAggregate{
@@ -107,37 +98,24 @@ var rootCmd = &cobra.Command{
10798
}
10899

109100
agg := authors[authorEmail]
110-
hoursAgo := time.Now().Sub(commit.Author.When).Hours()
101+
hoursAgo := firstCommit.Author.When.Sub(commit.Author.When).Hours()
111102
agg.commits++
112103

113-
// TODO this is a bit hacky, we're absorbing any panics that occur
114-
// in particular, it's meant to capture an index out of range error occurring
115-
// under some conditions in the underlying git/diff dependency. Maybe another reason to use native git...
116-
defer func() {
117-
if err := recover(); err != nil {
118-
agg.score += math.Exp2(-hoursAgo / float64(decayHours))
119-
}
120-
}()
121-
122-
fileStats, err := commit.Stats()
123-
handleError(err)
124-
125104
var additions int
126105
var deletions int
127-
for _, stat := range fileStats {
106+
for file, stat := range commit.Stats {
128107
// ignore diffs in vendor files
129108
// TODO perhaps it's worth allowing for the user to supply file path patterns be ignored?
130-
if enry.IsVendor(stat.Name) {
109+
if enry.IsVendor(file) {
131110
continue
132111
}
133-
additions += stat.Addition
134-
deletions += stat.Deletion
112+
additions += stat.Additions
113+
deletions += stat.Deletions
135114
}
136115
agg.impact += additions + deletions
137116

138117
agg.score += float64(additions+deletions) * math.Exp2(-hoursAgo/float64(decayHours))
139-
return nil
140-
})
118+
}
141119

142120
sort.SliceStable(authorEmails, func(i, j int) bool {
143121
return authors[authorEmails[j]].score < authors[authorEmails[i]].score

pkg/gitlog/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
## gitlog
2+
3+
Package for parsing the output of `git log` for the purposes of `gitpert`.

pkg/gitlog/gitlog.go

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
package gitlog
2+
3+
import (
4+
"bufio"
5+
"context"
6+
"fmt"
7+
"io"
8+
"io/ioutil"
9+
"os/exec"
10+
"strconv"
11+
"strings"
12+
"time"
13+
)
14+
15+
// Commit represents a parsed commit from git log
16+
type Commit struct {
17+
SHA string
18+
Author Event
19+
Committer Event
20+
Stats map[string]Stat
21+
}
22+
23+
// Event represents the who and when of a commit event
24+
type Event struct {
25+
Name string
26+
Email string
27+
When time.Time
28+
}
29+
30+
// Stat holds the diff stat of a file
31+
type Stat struct {
32+
Additions int
33+
Deletions int
34+
}
35+
36+
// Result is a list of commits
37+
type Result []*Commit
38+
39+
func parseLog(reader io.Reader) (Result, error) {
40+
scanner := bufio.NewScanner(reader)
41+
res := make(Result, 0)
42+
43+
// line prefixes for the `fuller` formatted output
44+
const (
45+
commit = "commit "
46+
author = "Author: "
47+
authorDate = "AuthorDate: "
48+
49+
committer = "Commit: "
50+
commitDate = "CommitDate: "
51+
)
52+
53+
var currentCommit *Commit
54+
for scanner.Scan() {
55+
line := scanner.Text()
56+
switch {
57+
case strings.HasPrefix(line, commit):
58+
if currentCommit != nil { // if we're seeing a new commit but already have a current commit, we've finished a commit
59+
res = append(res, currentCommit)
60+
}
61+
currentCommit = &Commit{
62+
SHA: strings.TrimPrefix(line, commit),
63+
Stats: make(map[string]Stat),
64+
}
65+
case strings.HasPrefix(line, author):
66+
s := strings.TrimPrefix(line, author)
67+
spl := strings.Split(s, " ")
68+
email := strings.Trim(spl[len(spl)-1], "<>")
69+
name := strings.Join(spl[:len(spl)-1], " ")
70+
currentCommit.Author.Email = strings.Trim(email, "<>")
71+
currentCommit.Author.Name = strings.TrimSpace(name)
72+
case strings.HasPrefix(line, authorDate):
73+
authorDateString := strings.TrimPrefix(line, authorDate)
74+
aD, err := time.Parse(time.RFC3339, authorDateString)
75+
if err != nil {
76+
return nil, err
77+
}
78+
currentCommit.Author.When = aD
79+
case strings.HasPrefix(line, committer):
80+
s := strings.TrimPrefix(line, committer)
81+
spl := strings.Split(s, " ")
82+
email := strings.Trim(spl[len(spl)-1], "<>")
83+
name := strings.Join(spl[:len(spl)-1], " ")
84+
currentCommit.Committer.Email = strings.Trim(email, "<>")
85+
currentCommit.Committer.Name = strings.TrimSpace(name)
86+
case strings.HasPrefix(line, commitDate):
87+
commitDateString := strings.TrimPrefix(line, commitDate)
88+
cD, err := time.Parse(time.RFC3339, commitDateString)
89+
if err != nil {
90+
return nil, err
91+
}
92+
currentCommit.Committer.When = cD
93+
case strings.HasPrefix(line, " "): // ignore commit message lines
94+
case strings.TrimSpace(line) == "": // ignore empty lines
95+
default:
96+
s := strings.Split(line, "\t")
97+
var additions int
98+
var deletions int
99+
var err error
100+
if s[0] != "-" {
101+
additions, err = strconv.Atoi(s[0])
102+
if err != nil {
103+
return nil, err
104+
}
105+
}
106+
if s[1] != "-" {
107+
deletions, err = strconv.Atoi(s[1])
108+
if err != nil {
109+
return nil, err
110+
}
111+
}
112+
currentCommit.Stats[s[2]] = Stat{
113+
Additions: additions,
114+
Deletions: deletions,
115+
}
116+
}
117+
}
118+
if currentCommit != nil {
119+
res = append(res, currentCommit)
120+
}
121+
122+
return res, nil
123+
}
124+
125+
// Exec runs the git log command
126+
func Exec(ctx context.Context, repoPath string, filePattern string, additionalFlags []string) (Result, error) {
127+
gitPath, err := exec.LookPath("git")
128+
if err != nil {
129+
return nil, fmt.Errorf("could not find git: %w", err)
130+
}
131+
132+
args := []string{"log"}
133+
134+
args = append(args, "--numstat", "--format=fuller", "--no-merges", "--no-decorate", "--date=iso8601-strict", "-w")
135+
args = append(args, additionalFlags...)
136+
if filePattern != "" {
137+
args = append(args, filePattern)
138+
}
139+
140+
cmd := exec.CommandContext(ctx, gitPath, args...)
141+
cmd.Dir = repoPath
142+
143+
stdout, err := cmd.StdoutPipe()
144+
if err != nil {
145+
return nil, err
146+
}
147+
148+
stderr, err := cmd.StderrPipe()
149+
if err != nil {
150+
return nil, err
151+
}
152+
153+
if err := cmd.Start(); err != nil {
154+
return nil, err
155+
}
156+
157+
res, err := parseLog(stdout)
158+
if err != nil {
159+
return nil, err
160+
}
161+
162+
errs, err := ioutil.ReadAll(stderr)
163+
if err != nil {
164+
return nil, err
165+
}
166+
167+
if err := cmd.Wait(); err != nil {
168+
fmt.Println(string(errs))
169+
return nil, err
170+
}
171+
172+
return res, nil
173+
}

0 commit comments

Comments
 (0)