@@ -232,8 +232,7 @@ func (git *gitRepo) initRepo(reason error) error {
232232}
233233
234234func (git * gitRepo ) Contains (commit string ) (bool , error ) {
235- _ , err := git .Run ("merge-base" , "--is-ancestor" , commit , HEAD )
236- return err == nil , nil
235+ return git .containedIn (HEAD , commit )
237236}
238237
239238const gitDateFormat = "Mon Jan 2 15:04:05 2006 -0700"
@@ -753,9 +752,11 @@ type BaseCommit struct {
753752 Branches []string
754753}
755754
756- // BaseForDiff selects the most recent commit that could have been the base commit
755+ // BaseForDiff returns a list of commits that could have been the base commit
757756// for the specified git patch.
758- func (git Git ) BaseForDiff (diff []byte , tracer debugtracer.DebugTracer ) (* BaseCommit , error ) {
757+ // The returned list is minimized to only contain the commits that are represented in different
758+ // subsets of branches.
759+ func (git Git ) BaseForDiff (diff []byte , tracer debugtracer.DebugTracer ) ([]* BaseCommit , error ) {
759760 // We can't just query git log with --find-object=HASH because that will only return
760761 // the revisions where the hashed content was introduced or removed, while what we actually
761762 // want is the latest revision(s) where the content modified in the diff is still in place.
@@ -771,7 +772,7 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCo
771772 "--no-renames" ,
772773 // Note that we cannot accelerate it by specifying "--since"
773774 "-n" , "100" ,
774- `--format=%P` ,
775+ `--format=%H:% P` ,
775776 }
776777 var fileNames []string
777778 nameToHash := map [string ]string {}
@@ -812,22 +813,25 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCo
812813 for s .Scan () {
813814 // TODO: we can further reduce the search space by adding "--raw" to args
814815 // and only considering the commits that introduce the blobs from the diff.
815- firstParent , _ , _ := strings .Cut (s .Text (), " " )
816- if firstParent == "" {
817- continue
816+ commit , parents , _ := strings .Cut (s .Text (), ":" )
817+ // Focus on the first parent.
818+ candidate , _ , _ := strings .Cut (parents , " " )
819+ if candidate == "" {
820+ // For the first commit, there's no parent.
821+ candidate = commit
818822 }
819823 // Only focus on branches that are still alive.
820824 const cutOffDays = 60
821- list , err := git .branchesThatContain (firstParent , time .Now ().Add (- time .Hour * 24 * cutOffDays ))
825+ list , err := git .branchesThatContain (candidate , time .Now ().Add (- time .Hour * 24 * cutOffDays ))
822826 if err != nil {
823827 return nil , fmt .Errorf ("failed to query branches: %w" , err )
824828 }
825829 for _ , info := range list {
826- record (firstParent , info .Branch )
830+ record (candidate , info .Branch )
827831 record (info .Commit , info .Branch )
828832 }
829833 }
830- var ret * BaseCommit
834+ var ret [] * BaseCommit
831835 for commit , branches := range commitBranches {
832836 tracer .Log ("considering %q [%q]" , commit , branches )
833837 fileHashes , err := git .fileHashes (commit , fileNames )
@@ -853,13 +857,39 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCo
853857 if err != nil {
854858 return nil , fmt .Errorf ("failed to extract commit info: %w" , err )
855859 }
856- tracer .Log ("commit date is %v" , info .CommitDate )
857- // Select the most recent commit.
858- if ret == nil || ret .CommitDate .Before (info .CommitDate ) {
859- ret = & BaseCommit {Commit : info , Branches : branchList }
860+ tracer .Log ("hashes match, commit date is %v, branches %v" , info .CommitDate , branchList )
861+ ret = append (ret , & BaseCommit {Commit : info , Branches : branchList })
862+ }
863+ return git .minimizeBaseCommits (ret )
864+ }
865+
866+ func (git Git ) minimizeBaseCommits (list []* BaseCommit ) ([]* BaseCommit , error ) {
867+ // We want to preserve commits that are present in different subsets of branches.
868+ // Then, we want to sort them topologically and break ties by date.
869+ lastCommit := map [string ]* BaseCommit {}
870+ for _ , item := range list {
871+ key := strings .Join (item .Branches , ":" )
872+ prev , ok := lastCommit [key ]
873+ if ! ok {
874+ lastCommit [key ] = item
875+ continue
876+ }
877+ isNewer , err := git .containedIn (item .Hash , prev .Hash )
878+ if err != nil {
879+ return nil , fmt .Errorf ("topological sort step failed: %w" , err )
880+ }
881+ if isNewer {
882+ lastCommit [key ] = item
860883 }
861884 }
862- return ret , nil
885+ var filtered []* BaseCommit
886+ for _ , item := range lastCommit {
887+ filtered = append (filtered , item )
888+ }
889+ sort .Slice (filtered , func (i , j int ) bool {
890+ return filtered [i ].CommitDate .After (filtered [j ].CommitDate )
891+ })
892+ return filtered , nil
863893}
864894
865895// fileHashes returns the blob SHA hashes for a particular list of files on a particular commit.
@@ -924,6 +954,11 @@ func (git Git) verifyHash(hash string) (bool, error) {
924954 return true , nil
925955}
926956
957+ func (git Git ) containedIn (parent , commit string ) (bool , error ) {
958+ _ , err := git .Run ("merge-base" , "--is-ancestor" , commit , parent )
959+ return err == nil , nil
960+ }
961+
927962func (git Git ) Diff (commitA , commitB string ) ([]byte , error ) {
928963 return git .Run ("diff" , commitA + ".." + commitB )
929964}
0 commit comments