Skip to content

Commit 8589402

Browse files
committed
pkg/vcs: return multiple base commit candidates
Return the commits that represent unique sets of branches. Sort the list topologically, breaking ties by commit date.
1 parent ce94b10 commit 8589402

File tree

3 files changed

+67
-29
lines changed

3 files changed

+67
-29
lines changed

pkg/vcs/git.go

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,7 @@ func (git *gitRepo) initRepo(reason error) error {
232232
}
233233

234234
func (git *gitRepo) Contains(commit string) (bool, error) {
235-
_, err := git.Run("merge-base", "--is-ancestor", commit, HEAD)
236-
return err == nil, nil
235+
return git.containedIn(HEAD, commit)
237236
}
238237

239238
const gitDateFormat = "Mon Jan 2 15:04:05 2006 -0700"
@@ -753,9 +752,11 @@ type BaseCommit struct {
753752
Branches []string
754753
}
755754

756-
// BaseForDiff selects the most recent commit that could have been the base commit
755+
// BaseForDiff returns a list of commits that could have been the base commit
757756
// for the specified git patch.
758-
func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCommit, error) {
757+
// The returned list is minimized to only contain the commits that are represented in different
758+
// subsets of branches.
759+
func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*BaseCommit, error) {
759760
// We can't just query git log with --find-object=HASH because that will only return
760761
// the revisions where the hashed content was introduced or removed, while what we actually
761762
// want is the latest revision(s) where the content modified in the diff is still in place.
@@ -771,7 +772,7 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCo
771772
"--no-renames",
772773
// Note that we cannot accelerate it by specifying "--since"
773774
"-n", "100",
774-
`--format=%P`,
775+
`--format=%H:%P`,
775776
}
776777
var fileNames []string
777778
nameToHash := map[string]string{}
@@ -812,22 +813,25 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCo
812813
for s.Scan() {
813814
// TODO: we can further reduce the search space by adding "--raw" to args
814815
// and only considering the commits that introduce the blobs from the diff.
815-
firstParent, _, _ := strings.Cut(s.Text(), " ")
816-
if firstParent == "" {
817-
continue
816+
commit, parents, _ := strings.Cut(s.Text(), ":")
817+
// Focus on the first parent.
818+
candidate, _, _ := strings.Cut(parents, " ")
819+
if candidate == "" {
820+
// For the first commit, there's no parent.
821+
candidate = commit
818822
}
819823
// Only focus on branches that are still alive.
820824
const cutOffDays = 60
821-
list, err := git.branchesThatContain(firstParent, time.Now().Add(-time.Hour*24*cutOffDays))
825+
list, err := git.branchesThatContain(candidate, time.Now().Add(-time.Hour*24*cutOffDays))
822826
if err != nil {
823827
return nil, fmt.Errorf("failed to query branches: %w", err)
824828
}
825829
for _, info := range list {
826-
record(firstParent, info.Branch)
830+
record(candidate, info.Branch)
827831
record(info.Commit, info.Branch)
828832
}
829833
}
830-
var ret *BaseCommit
834+
var ret []*BaseCommit
831835
for commit, branches := range commitBranches {
832836
tracer.Log("considering %q [%q]", commit, branches)
833837
fileHashes, err := git.fileHashes(commit, fileNames)
@@ -853,13 +857,39 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCo
853857
if err != nil {
854858
return nil, fmt.Errorf("failed to extract commit info: %w", err)
855859
}
856-
tracer.Log("commit date is %v", info.CommitDate)
857-
// Select the most recent commit.
858-
if ret == nil || ret.CommitDate.Before(info.CommitDate) {
859-
ret = &BaseCommit{Commit: info, Branches: branchList}
860+
tracer.Log("hashes match, commit date is %v, branches %v", info.CommitDate, branchList)
861+
ret = append(ret, &BaseCommit{Commit: info, Branches: branchList})
862+
}
863+
return git.minimizeBaseCommits(ret)
864+
}
865+
866+
func (git Git) minimizeBaseCommits(list []*BaseCommit) ([]*BaseCommit, error) {
867+
// We want to preserve commits that are present in different subsets of branches.
868+
// Then, we want to sort them topologically and break ties by date.
869+
lastCommit := map[string]*BaseCommit{}
870+
for _, item := range list {
871+
key := strings.Join(item.Branches, ":")
872+
prev, ok := lastCommit[key]
873+
if !ok {
874+
lastCommit[key] = item
875+
continue
876+
}
877+
isNewer, err := git.containedIn(item.Hash, prev.Hash)
878+
if err != nil {
879+
return nil, fmt.Errorf("topological sort step failed: %w", err)
880+
}
881+
if isNewer {
882+
lastCommit[key] = item
860883
}
861884
}
862-
return ret, nil
885+
var filtered []*BaseCommit
886+
for _, item := range lastCommit {
887+
filtered = append(filtered, item)
888+
}
889+
sort.Slice(filtered, func(i, j int) bool {
890+
return filtered[i].CommitDate.After(filtered[j].CommitDate)
891+
})
892+
return filtered, nil
863893
}
864894

865895
// fileHashes returns the blob SHA hashes for a particular list of files on a particular commit.
@@ -924,6 +954,11 @@ func (git Git) verifyHash(hash string) (bool, error) {
924954
return true, nil
925955
}
926956

957+
func (git Git) containedIn(parent, commit string) (bool, error) {
958+
_, err := git.Run("merge-base", "--is-ancestor", commit, parent)
959+
return err == nil, nil
960+
}
961+
927962
func (git Git) Diff(commitA, commitB string) ([]byte, error) {
928963
return git.Run("diff", commitA+".."+commitB)
929964
}

pkg/vcs/git_test.go

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -523,22 +523,21 @@ func TestBaseForDiff(t *testing.T) {
523523
require.NoError(t, err)
524524
// Create a different change on top of commit2.
525525
repo.Git("checkout", "-b", "branch-a")
526-
time.Sleep(time.Second)
527526
repo.commitChangeset("patch a.txt",
528527
writeFile{"a.txt", "another change to a.txt"},
529528
)
530-
// Yet the patch could only be applied to commit2
529+
// Yet the patch could only be applied to commit1 or commit2.
531530
base, err := repo.repo.BaseForDiff(diff, &debugtracer.TestTracer{T: t})
532531
require.NoError(t, err)
533-
require.NotNil(t, base)
534-
assert.Equal(t, []string{"branch-a", "master"}, base.Branches)
535-
assert.Equal(t, commit2.Hash, base.Hash)
532+
require.Len(t, base, 1)
533+
assert.Equal(t, []string{"branch-a", "master"}, base[0].Branches)
534+
assert.Equal(t, commit2.Hash, base[0].Hash)
536535
})
537536
t.Run("choose latest", func(t *testing.T) {
538537
_, err := repo.repo.SwitchCommit(commit2.Hash)
539538
require.NoError(t, err)
540-
// Wait a second and add one more commit.
541-
// (Otherwise the test might be flaky).
539+
// Wait a second before adding another commit.
540+
// Git does not remember milliseconds, so otherwise the commit sorting may be flaky.
542541
time.Sleep(time.Second)
543542
repo.Git("checkout", "-b", "branch-b")
544543
commit4 := repo.commitChangeset("unrelated commit",
@@ -547,9 +546,10 @@ func TestBaseForDiff(t *testing.T) {
547546
// Since the commit did not touch a.txt, it's the expected one.
548547
base, err := repo.repo.BaseForDiff(diff, &debugtracer.TestTracer{T: t})
549548
require.NoError(t, err)
550-
require.NotNil(t, base)
551-
assert.Equal(t, []string{"branch-b"}, base.Branches)
552-
assert.Equal(t, commit4.Hash, base.Hash)
549+
require.Len(t, base, 2)
550+
assert.Equal(t, []string{"branch-b"}, base[0].Branches)
551+
assert.Equal(t, commit4.Hash, base[0].Hash)
552+
assert.Equal(t, commit2.Hash, base[1].Hash)
553553
})
554554
t.Run("ignore unknown objects", func(t *testing.T) {
555555
// It's okay if the diff contains unknown hashes.
@@ -564,7 +564,6 @@ index f70f10e..0000000
564564
twoDiffs := append(append([]byte{}, diff...), diff2...)
565565
base, err := repo.repo.BaseForDiff(twoDiffs, &debugtracer.TestTracer{T: t})
566566
require.NoError(t, err)
567-
require.NotNil(t, base)
568-
assert.Equal(t, []string{"branch-b"}, base.Branches)
567+
require.Len(t, base, 2)
569568
})
570569
}

syz-cluster/pkg/triage/git.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,5 +58,9 @@ func (ops *GitTreeOps) ApplySeries(commit string, patches [][]byte) error {
5858
}
5959

6060
func (ops *GitTreeOps) BaseForDiff(patch []byte, tracer debugtracer.DebugTracer) (*vcs.BaseCommit, error) {
61-
return ops.Git.BaseForDiff(patch, tracer)
61+
list, err := ops.Git.BaseForDiff(patch, tracer)
62+
if len(list) == 0 || err != nil {
63+
return nil, err
64+
}
65+
return list[0], nil
6266
}

0 commit comments

Comments
 (0)