Skip to content

Commit de8fcbd

Browse files
leifericfclaude
andcommitted
feat(git): extract issue references from commit messages
Extract URLs, GitHub-style (#123), and Jira-style (PROJ-456) issue references from commit messages and store them as :commit/issue-refs (cardinality-many). Extracted deterministically during import via regex. Existing commits are backfilled during sync/update. Adds two named queries: commits-by-issue and issue-refs. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 65c2613 commit de8fcbd

7 files changed

Lines changed: 114 additions & 1 deletion

File tree

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{:name "commits-by-issue"
2+
:description "Commits that reference a given issue (by key or URL substring)"
3+
:query [:find ?sha ?msg ?name ?date
4+
:in $ ?ref
5+
:where
6+
[?commit :commit/issue-refs ?ref]
7+
[?commit :git/sha ?sha]
8+
[?commit :commit/message ?msg]
9+
[?commit :commit/author ?person]
10+
[?person :person/name ?name]
11+
[?commit :commit/authored-at ?date]]
12+
:params {"ref" "Issue reference to search for, e.g. #123 or PROJ-456"}}

resources/queries/index.edn

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
"co-changed-files"
1414
"commit-kinds"
1515
"commit-spread"
16+
"commits-by-issue"
1617
"complex-hotspots"
1718
"cross-dir-imports"
1819
"dependency-drift"
@@ -30,6 +31,7 @@
3031
"fix-authors"
3132
"hotspots"
3233
"import-hotspots"
34+
"issue-refs"
3335
"llm-cost-by-file"
3436
"llm-cost-by-model"
3537
"llm-cost-total"

resources/queries/issue-refs.edn

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{:name "issue-refs"
2+
:description "All unique issue references with commit count"
3+
:query [:find ?ref (count ?commit)
4+
:where
5+
[?commit :commit/issue-refs ?ref]]}

resources/schema/core.edn

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@
8383
:db/cardinality :db.cardinality/one
8484
:db/doc "Total lines deleted across all files in this commit"}
8585

86+
{:db/ident :commit/issue-refs
87+
:db/valueType :db.type/string
88+
:db/cardinality :db.cardinality/many
89+
:db/doc "Issue references extracted from commit message — keys (#123, PROJ-456) and URLs"}
90+
8691
;; --- Persons ---
8792
{:db/ident :person/email
8893
:db/valueType :db.type/string

src/noumenon/git.clj

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,26 @@
165165
:message (str/trim body)}
166166
numstat)))))
167167

168+
;; --- Issue reference extraction ---
169+
170+
(def ^:private issue-ref-patterns
171+
"Patterns for extracting issue references from commit messages."
172+
[#"https?://[^\s\)\]>\"']+" ;; URLs
173+
#"(?<![A-Za-z0-9/])[A-Z][A-Z0-9]+-\d+" ;; Jira-style: PROJ-123
174+
#"(?<=\s|^|\()#\d+" ;; GitHub-style: #123
175+
#"(?<=\s|^|\()GH-\d+"]) ;; GitHub alt: GH-42
176+
177+
(defn extract-issue-refs
178+
"Extract unique issue references (URLs, keys like #123, PROJ-456) from text.
179+
Returns a sorted set of strings, or nil if none found."
180+
[text]
181+
(when-not (str/blank? text)
182+
(let [refs (->> issue-ref-patterns
183+
(mapcat #(re-seq % text))
184+
(map str/trim)
185+
(into (sorted-set)))]
186+
(when (seq refs) refs))))
187+
168188
;; --- Commit classification ---
169189

170190
(def ^:private conventional-prefix-re
@@ -241,7 +261,9 @@
241261
(pos? (or deletions 0)) (assoc :commit/deletions deletions)
242262
(seq parent-shas) (assoc :commit/parents
243263
(mapv #(vector :git/sha %) parent-shas))
244-
(seq file-tids) (assoc :commit/changed-files file-tids))]
264+
(seq file-tids) (assoc :commit/changed-files file-tids)
265+
(extract-issue-refs message) (assoc :commit/issue-refs
266+
(extract-issue-refs message)))]
245267
(-> (person-tx-data author-tid author-email author-name
246268
committer-tid committer-email committer-name)
247269
(into [commit

src/noumenon/sync.clj

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,27 @@
195195
:tx/source :deterministic})}))
196196
(count stale)))
197197

198+
(defn backfill-issue-refs!
199+
"Extract issue references from all commit messages that don't have any yet.
200+
Returns count of commits updated."
201+
[conn]
202+
(let [db (d/db conn)
203+
commits (d/q '[:find ?e ?msg
204+
:where
205+
[?e :commit/message ?msg]
206+
(not [?e :commit/issue-refs])]
207+
db)
208+
updates (->> commits
209+
(keep (fn [[eid msg]]
210+
(when-let [refs (git/extract-issue-refs msg)]
211+
{:db/id eid :commit/issue-refs refs})))
212+
vec)]
213+
(when (seq updates)
214+
(d/transact conn {:tx-data (conj updates {:db/id "datomic.tx"
215+
:tx/op :import
216+
:tx/source :deterministic})}))
217+
(count updates)))
218+
198219
;; --- Sync orchestration ---
199220

200221
(defn update-repo!
@@ -229,6 +250,9 @@
229250
reclass-n (reclassify-commits! conn)
230251
_ (when (pos? reclass-n)
231252
(log! (str "Reclassified " reclass-n " commit kinds")))
253+
issues-n (backfill-issue-refs! conn)
254+
_ (when (pos? issues-n)
255+
(log! (str "Extracted issue refs for " issues-n " commits")))
232256
files-r (files/import-files! conn repo-path repo-uri)
233257
post-r (when (or fresh?
234258
(seq (:added changes))

test/noumenon/git_test.clj

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,49 @@
186186
(is (nil? (:commit/parents entity)))
187187
(is (nil? (:commit/changed-files entity)))))
188188

189+
;; --- Issue reference extraction ---
190+
191+
(deftest extract-issue-refs-github-style
192+
(is (= #{"#123" "#456"}
193+
(git/extract-issue-refs "fix(auth): resolve login bug (#123) and (#456)"))))
194+
195+
(deftest extract-issue-refs-jira-style
196+
(is (= #{"PROJ-42" "PROJ-999"}
197+
(git/extract-issue-refs "PROJ-42 implement feature, see also PROJ-999"))))
198+
199+
(deftest extract-issue-refs-urls
200+
(is (= #{"https://github.com/foo/bar/issues/123"}
201+
(git/extract-issue-refs "Closes https://github.com/foo/bar/issues/123"))))
202+
203+
(deftest extract-issue-refs-mixed
204+
(let [refs (git/extract-issue-refs "Fix #55 (JIRA-100) see https://jira.example.com/browse/JIRA-100")]
205+
(is (contains? refs "#55"))
206+
(is (contains? refs "JIRA-100"))
207+
(is (some #(str/starts-with? % "https://") refs))))
208+
209+
(deftest extract-issue-refs-none
210+
(is (nil? (git/extract-issue-refs "chore: bump deps")))
211+
(is (nil? (git/extract-issue-refs nil)))
212+
(is (nil? (git/extract-issue-refs ""))))
213+
214+
(deftest tx-data-includes-issue-refs
215+
(let [commit {:sha "abc" :parent-shas [] :message "fix(api): handle timeout (#42, PROJ-7)"
216+
:author-name "A" :author-email "a@x.com" :authored-at #inst "2024-01-01"
217+
:committer-name "A" :committer-email "a@x.com" :committed-at #inst "2024-01-01"
218+
:changed-files []}
219+
tx-data (git/commit->tx-data "test://repo" commit)
220+
entity (first (filter :git/sha tx-data))]
221+
(is (= #{"#42" "PROJ-7"} (:commit/issue-refs entity)))))
222+
223+
(deftest tx-data-omits-issue-refs-when-none
224+
(let [commit {:sha "abc" :parent-shas [] :message "chore: update deps"
225+
:author-name "A" :author-email "a@x.com" :authored-at #inst "2024-01-01"
226+
:committer-name "A" :committer-email "a@x.com" :committed-at #inst "2024-01-01"
227+
:changed-files []}
228+
tx-data (git/commit->tx-data "test://repo" commit)
229+
entity (first (filter :git/sha tx-data))]
230+
(is (nil? (:commit/issue-refs entity)))))
231+
189232
;; --- Rename path resolution (regression: Flask import crash) ---
190233

191234
(deftest resolve-rename-path-directory

0 commit comments

Comments
 (0)