From 3ea5d86f81b466703174dc9ecfcbe0223df2b7f7 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Wed, 18 Mar 2026 12:22:18 +0000 Subject: [PATCH] Detect AI contribution records in git-notes (git-ai standard) Add a gitnotes detector that reads authorship logs from refs/notes/ai, parsing the git-ai standard v3.0.0 format to extract tool and model information from the metadata section's prompt records. Signed-off-by: Andrew Nesbitt --- cmd/cmd.go | 2 + detection/detection.go | 1 + detection/gitnotes/gitnotes.go | 91 +++++++++++++ detection/gitnotes/gitnotes_test.go | 196 ++++++++++++++++++++++++++++ gitops/gitops.go | 77 ++++++++++- scan/scan.go | 1 + scan/scan_test.go | 96 ++++++++++++++ 7 files changed, 459 insertions(+), 5 deletions(-) create mode 100644 detection/gitnotes/gitnotes.go create mode 100644 detection/gitnotes/gitnotes_test.go diff --git a/cmd/cmd.go b/cmd/cmd.go index ef384b7..4d41d6f 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -8,6 +8,7 @@ import ( "github.com/chaoss/ai-detection-action/detection" "github.com/chaoss/ai-detection-action/detection/coauthor" "github.com/chaoss/ai-detection-action/detection/committer" + "github.com/chaoss/ai-detection-action/detection/gitnotes" "github.com/chaoss/ai-detection-action/detection/message" "github.com/chaoss/ai-detection-action/detection/toolmention" "github.com/chaoss/ai-detection-action/output" @@ -28,6 +29,7 @@ func allDetectors() []detection.Detector { return []detection.Detector{ &committer.Detector{}, &coauthor.Detector{}, + &gitnotes.Detector{}, &message.Detector{}, &toolmention.Detector{}, } diff --git a/detection/detection.go b/detection/detection.go index 04cacfd..41f0821 100644 --- a/detection/detection.go +++ b/detection/detection.go @@ -36,6 +36,7 @@ type Input struct { CommitHash string CommitEmail string CommitMessage string + Notes string // Content from refs/notes/ai, if any Text string // For text-only scans (PR body, comments) RepoPath string } diff --git a/detection/gitnotes/gitnotes.go b/detection/gitnotes/gitnotes.go new file mode 100644 index 0000000..a0a1f49 --- /dev/null +++ b/detection/gitnotes/gitnotes.go @@ -0,0 +1,91 @@ +package gitnotes + +import ( + "encoding/json" + "fmt" + "strings" + + "github.com/chaoss/ai-detection-action/detection" +) + +// metadata represents the JSON metadata section of a git-ai authorship log. +type metadata struct { + SchemaVersion string `json:"schema_version"` + Prompts map[string]promptRecord `json:"prompts"` +} + +type promptRecord struct { + AgentID agentID `json:"agent_id"` +} + +type agentID struct { + Tool string `json:"tool"` + Model string `json:"model"` +} + +type Detector struct{} + +func (d *Detector) Name() string { return "gitnotes" } + +func (d *Detector) Detect(input detection.Input) []detection.Finding { + if input.Notes == "" { + return nil + } + + parts := strings.SplitN(input.Notes, "\n---\n", 2) + if len(parts) != 2 { + return nil + } + + attestation := parts[0] + jsonSection := parts[1] + + var meta metadata + if err := json.Unmarshal([]byte(jsonSection), &meta); err != nil { + return nil + } + + if !strings.HasPrefix(meta.SchemaVersion, "authorship/") { + return nil + } + + // Count attributed files from the attestation section + fileCount := 0 + for _, line := range strings.Split(attestation, "\n") { + if line == "" { + continue + } + // File paths start at column 0, attestation entries are indented + if !strings.HasPrefix(line, " ") { + fileCount++ + } + } + + seen := map[string]bool{} + var findings []detection.Finding + + for _, prompt := range meta.Prompts { + tool := prompt.AgentID.Tool + if tool == "" || seen[tool] { + continue + } + seen[tool] = true + + detail := fmt.Sprintf("git-ai authorship log (refs/notes/ai) attributes code to %s", tool) + if prompt.AgentID.Model != "" { + detail += fmt.Sprintf(" (model: %s)", prompt.AgentID.Model) + } + if fileCount > 0 { + detail += fmt.Sprintf(", %d file(s) attributed", fileCount) + } + + findings = append(findings, detection.Finding{ + Detector: d.Name(), + Tool: tool, + Confidence: detection.ConfidenceHigh, + Detail: detail, + }) + } + + return findings +} diff --git a/detection/gitnotes/gitnotes_test.go b/detection/gitnotes/gitnotes_test.go new file mode 100644 index 0000000..5f45897 --- /dev/null +++ b/detection/gitnotes/gitnotes_test.go @@ -0,0 +1,196 @@ +package gitnotes + +import ( + "testing" + + "github.com/chaoss/ai-detection-action/detection" +) + +func TestDetect(t *testing.T) { + d := &Detector{} + + validNote := `src/main.rs + abcd1234abcd1234 1-10,15-20 +src/lib.rs + abcd1234abcd1234 1-50 +--- +{ + "schema_version": "authorship/3.0.0", + "base_commit_sha": "7734793b756b3921c88db5375a8c156e9532447b", + "prompts": { + "abcd1234abcd1234": { + "agent_id": { + "tool": "cursor", + "id": "6ef2299e-a67f-432b-aa80-3d2fb4d28999", + "model": "claude-4.5-opus" + }, + "total_additions": 25, + "total_deletions": 5, + "accepted_lines": 20, + "overriden_lines": 0 + } + } +}` + + multiToolNote := `src/main.rs + abcd1234abcd1234 1-10 + efgh5678efgh5678 25,30-35 +--- +{ + "schema_version": "authorship/3.0.0", + "base_commit_sha": "abc123", + "prompts": { + "abcd1234abcd1234": { + "agent_id": { + "tool": "cursor", + "model": "claude-4.5-opus" + }, + "total_additions": 10, + "total_deletions": 0, + "accepted_lines": 10, + "overriden_lines": 0 + }, + "efgh5678efgh5678": { + "agent_id": { + "tool": "claude-code", + "model": "claude-3-sonnet" + }, + "total_additions": 6, + "total_deletions": 0, + "accepted_lines": 6, + "overriden_lines": 0 + } + } +}` + + tests := []struct { + name string + notes string + wantTools []string + }{ + { + name: "valid git-ai note with single tool", + notes: validNote, + wantTools: []string{"cursor"}, + }, + { + name: "multiple tools in note", + notes: multiToolNote, + wantTools: []string{"cursor", "claude-code"}, + }, + { + name: "empty notes", + notes: "", + wantTools: nil, + }, + { + name: "no separator", + notes: "just some random text in notes", + wantTools: nil, + }, + { + name: "invalid JSON in metadata", + notes: "src/main.rs\n abc 1-10\n---\nnot json", + wantTools: nil, + }, + { + name: "wrong schema version", + notes: "src/main.rs\n abc 1-10\n---\n{\"schema_version\": \"wrong/1.0\", \"prompts\": {}}", + wantTools: nil, + }, + { + name: "no tool in agent_id", + notes: "src/main.rs\n abc 1-10\n---\n{\"schema_version\": \"authorship/3.0.0\", \"prompts\": {\"abc\": {\"agent_id\": {\"tool\": \"\"}}}}", + wantTools: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + findings := d.Detect(detection.Input{Notes: tt.notes}) + gotTools := make([]string, len(findings)) + for i, f := range findings { + gotTools[i] = f.Tool + if f.Confidence != detection.ConfidenceHigh { + t.Errorf("confidence = %d, want %d", f.Confidence, detection.ConfidenceHigh) + } + if f.Detector != "gitnotes" { + t.Errorf("detector = %q, want %q", f.Detector, "gitnotes") + } + } + + if len(gotTools) == 0 { + gotTools = nil + } + + if len(gotTools) != len(tt.wantTools) { + t.Errorf("tools = %v, want %v", gotTools, tt.wantTools) + return + } + + // Check all expected tools are present (order may vary due to map iteration) + wantSet := map[string]bool{} + for _, w := range tt.wantTools { + wantSet[w] = true + } + for _, g := range gotTools { + if !wantSet[g] { + t.Errorf("unexpected tool %q, want one of %v", g, tt.wantTools) + } + } + }) + } +} + +func TestDetectDetailIncludesModel(t *testing.T) { + d := &Detector{} + note := `src/main.rs + abcd1234abcd1234 1-10 +--- +{ + "schema_version": "authorship/3.0.0", + "base_commit_sha": "abc", + "prompts": { + "abcd1234abcd1234": { + "agent_id": { + "tool": "cursor", + "model": "claude-4.5-opus" + }, + "total_additions": 10, + "total_deletions": 0, + "accepted_lines": 10, + "overriden_lines": 0 + } + } +}` + + findings := d.Detect(detection.Input{Notes: note}) + if len(findings) != 1 { + t.Fatalf("expected 1 finding, got %d", len(findings)) + } + + if findings[0].Detail == "" { + t.Error("expected non-empty detail") + } + + if !contains(findings[0].Detail, "claude-4.5-opus") { + t.Errorf("detail should mention model, got: %s", findings[0].Detail) + } + + if !contains(findings[0].Detail, "1 file(s)") { + t.Errorf("detail should mention file count, got: %s", findings[0].Detail) + } +} + +func contains(s, substr string) bool { + return len(s) >= len(substr) && searchString(s, substr) +} + +func searchString(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} diff --git a/gitops/gitops.go b/gitops/gitops.go index c76c693..22e3d7a 100644 --- a/gitops/gitops.go +++ b/gitops/gitops.go @@ -2,6 +2,7 @@ package gitops import ( "fmt" + "io" "strings" "github.com/go-git/go-git/v5" @@ -15,15 +16,18 @@ type Commit struct { AuthorEmail string CommitterEmail string Message string + Notes string // Content from refs/notes/ai, if any } -func commitFromObject(c *object.Commit) Commit { - return Commit{ +func commitFromObject(c *object.Commit, repo *git.Repository) Commit { + commit := Commit{ Hash: c.Hash.String(), AuthorEmail: c.Author.Email, CommitterEmail: c.Committer.Email, Message: c.Message, } + commit.Notes = readNote(repo, c.Hash) + return commit } // GetCommit reads a single commit by hash from the repository at repoPath. @@ -39,7 +43,7 @@ func GetCommit(repoPath string, hash string) (Commit, error) { return Commit{}, fmt.Errorf("reading commit %s: %w", hash, err) } - return commitFromObject(c), nil + return commitFromObject(c, repo), nil } // ListCommits returns commits in the given range. The range format is "BASE..HEAD" @@ -113,7 +117,7 @@ func listAllCommits(repo *git.Repository) ([]Commit, error) { var commits []Commit err = iter.ForEach(func(c *object.Commit) error { - commits = append(commits, commitFromObject(c)) + commits = append(commits, commitFromObject(c, repo)) return nil }) if err != nil { @@ -123,6 +127,69 @@ func listAllCommits(repo *git.Repository) ([]Commit, error) { return commits, nil } +// notesRefs lists the git-notes namespaces we check for AI authorship logs, +// in priority order. refs/notes/ai is the git-ai standard namespace. +var notesRefs = []string{ + "refs/notes/ai", +} + +// readNote reads the git note attached to commitHash under the AI notes refs. +// Returns empty string when no note exists. +func readNote(repo *git.Repository, commitHash plumbing.Hash) string { + for _, refName := range notesRefs { + ref, err := repo.Reference(plumbing.ReferenceName(refName), true) + if err != nil { + continue + } + + notesCommit, err := repo.CommitObject(ref.Hash()) + if err != nil { + continue + } + + tree, err := notesCommit.Tree() + if err != nil { + continue + } + + // Notes are stored as blobs named by the commit hash they annotate. + // go-git uses the full hex hash as the path, but some implementations + // split it as ab/cd1234... so try both. + hashStr := commitHash.String() + entry, err := tree.FindEntry(hashStr) + if err != nil { + // Try the split format: first 2 chars / remaining + entry, err = tree.FindEntry(hashStr[:2] + "/" + hashStr[2:]) + if err != nil { + continue + } + } + + blob, err := repo.BlobObject(entry.Hash) + if err != nil { + continue + } + + reader, err := blob.Reader() + if err != nil { + continue + } + + content, err := io.ReadAll(reader) + closeErr := reader.Close() + if err != nil { + continue + } + if closeErr != nil { + continue + } + + return string(content) + } + + return "" +} + func listCommitRange(repo *git.Repository, base, head plumbing.Hash) ([]Commit, error) { // Collect all commits reachable from head headCommit, err := repo.CommitObject(head) @@ -150,7 +217,7 @@ func listCommitRange(repo *git.Repository, base, head plumbing.Hash) ([]Commit, if baseExclude[c.Hash] { return nil } - commits = append(commits, commitFromObject(c)) + commits = append(commits, commitFromObject(c, repo)) return nil }) if err != nil { diff --git a/scan/scan.go b/scan/scan.go index 84fa4fe..dde02c7 100644 --- a/scan/scan.go +++ b/scan/scan.go @@ -66,6 +66,7 @@ func scanOneCommit(c gitops.Commit, detectors []detection.Detector) CommitResult CommitHash: c.Hash, CommitEmail: c.CommitterEmail, CommitMessage: c.Message, + Notes: c.Notes, } var findings []detection.Finding diff --git a/scan/scan_test.go b/scan/scan_test.go index 554fa9d..7bdd2f0 100644 --- a/scan/scan_test.go +++ b/scan/scan_test.go @@ -2,6 +2,7 @@ package scan import ( "os" + "os/exec" "path/filepath" "testing" "time" @@ -9,6 +10,7 @@ import ( "github.com/chaoss/ai-detection-action/detection" "github.com/chaoss/ai-detection-action/detection/coauthor" "github.com/chaoss/ai-detection-action/detection/committer" + "github.com/chaoss/ai-detection-action/detection/gitnotes" "github.com/chaoss/ai-detection-action/detection/message" "github.com/chaoss/ai-detection-action/detection/toolmention" "github.com/go-git/go-git/v5" @@ -19,6 +21,7 @@ func allDetectors() []detection.Detector { return []detection.Detector{ &committer.Detector{}, &coauthor.Detector{}, + &gitnotes.Detector{}, &message.Detector{}, &toolmention.Detector{}, } @@ -176,6 +179,99 @@ func TestScanTextNoFindings(t *testing.T) { } } +func TestScanCommitWithGitNotes(t *testing.T) { + dir := t.TempDir() + + repo, err := git.PlainInit(dir, false) + if err != nil { + t.Fatalf("init repo: %v", err) + } + + wt, err := repo.Worktree() + if err != nil { + t.Fatalf("worktree: %v", err) + } + + filename := filepath.Join(dir, "main.rs") + if err := os.WriteFile(filename, []byte("fn main() {}"), 0644); err != nil { + t.Fatalf("write file: %v", err) + } + if _, err := wt.Add("main.rs"); err != nil { + t.Fatalf("add: %v", err) + } + + hash, err := wt.Commit("add main", &git.CommitOptions{ + Author: &object.Signature{ + Name: "Test", + Email: "human@example.com", + When: time.Now(), + }, + Committer: &object.Signature{ + Name: "Test", + Email: "human@example.com", + When: time.Now(), + }, + }) + if err != nil { + t.Fatalf("commit: %v", err) + } + + // Attach a git-ai note using the git CLI + noteContent := `src/main.rs + abcd1234abcd1234 1 +--- +{ + "schema_version": "authorship/3.0.0", + "base_commit_sha": "0000000000000000000000000000000000000000", + "prompts": { + "abcd1234abcd1234": { + "agent_id": { + "tool": "cursor", + "model": "claude-4.5-opus" + }, + "total_additions": 1, + "total_deletions": 0, + "accepted_lines": 1, + "overriden_lines": 0 + } + } +}` + + // Configure git identity for the notes commit (CI runners may not have one) + for _, kv := range [][2]string{{"user.name", "Test"}, {"user.email", "test@test.com"}} { + cfg := exec.Command("git", "config", kv[0], kv[1]) + cfg.Dir = dir + if out, err := cfg.CombinedOutput(); err != nil { + t.Fatalf("git config %s: %v\n%s", kv[0], err, out) + } + } + + cmd := exec.Command("git", "notes", "--ref=refs/notes/ai", "add", "-m", noteContent, hash.String()) + cmd.Dir = dir + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git notes add: %v\n%s", err, out) + } + + detectors := allDetectors() + result, err := ScanCommit(dir, hash.String(), detectors) + if err != nil { + t.Fatalf("ScanCommit: %v", err) + } + + foundGitNotes := false + for _, f := range result.Findings { + if f.Detector == "gitnotes" && f.Tool == "cursor" { + foundGitNotes = true + if f.Confidence != detection.ConfidenceHigh { + t.Errorf("confidence = %d, want high(%d)", f.Confidence, detection.ConfidenceHigh) + } + } + } + if !foundGitNotes { + t.Errorf("expected gitnotes finding for cursor, got findings: %v", result.Findings) + } +} + func TestReportSummaryByConfidence(t *testing.T) { dir, hashes := initTestRepo(t) detectors := allDetectors()