Skip to content

Commit 748ce1b

Browse files
leifericfclaude
andcommitted
feat(analyze): add --reanalyze flag for scoped re-analysis
Allow users to explicitly re-analyze files that already have semantic metadata, without manually retracting Datomic attributes. Supports four scopes: all, prompt-changed, model-changed, and stale (files modified by commits since their last analysis). Available in both CLI and MCP. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2259f77 commit 748ce1b

7 files changed

Lines changed: 181 additions & 47 deletions

File tree

CLAUDE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ This project has its own MCP server (`noumenon`) that provides a knowledge graph
2222
| `noumenon_ask` | Ask a natural-language question — AI-powered iterative querying |
2323
| `noumenon_import` | Import git history and file structure (idempotent — safe to re-run) |
2424
| `noumenon_update` | Sync knowledge graph with latest git state (import + enrich; pass `analyze=true` for LLM analysis) |
25-
| `noumenon_analyze` | Run LLM analysis on files not yet analyzed — enriches the graph with semantic metadata |
25+
| `noumenon_analyze` | Run LLM analysis on files — by default only unanalyzed; pass `reanalyze` for re-analysis (all, prompt-changed, model-changed, stale) |
2626
| `noumenon_enrich` | Extract cross-file import/dependency graph deterministically (no LLM calls) |
2727
| `noumenon_list_databases` | List all noumenon databases with entity counts, pipeline stages, and cost |
2828
| `noumenon_digest` | Run the full pipeline: import, enrich, analyze, benchmark (each step idempotent, skippable) |

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ The CLI and [MCP](https://modelcontextprotocol.io) server expose the same capabi
146146
| Command | CLI | MCP tool | Description |
147147
|---|---|---|---|
148148
| Import | `import <path>` | `noumenon_import` | Import git history and file structure |
149-
| Analyze | `analyze <path>` | `noumenon_analyze` | Enrich files with LLM semantic metadata |
149+
| Analyze | `analyze <path>` | `noumenon_analyze` | Enrich files with LLM semantic metadata (`--reanalyze` for re-analysis) |
150150
| Enrich | `enrich <path>` | `noumenon_enrich` | Extract cross-file import graph (no LLM) |
151151
| Update | `update <path>` | `noumenon_update` | Sync knowledge graph with latest git state |
152152
| Digest | `digest <path>` | `noumenon_digest` | Run full pipeline: import, enrich, analyze, benchmark |

src/noumenon/analyze.clj

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,64 @@
306306
(log! (str "Skipping " (count sensitive) " sensitive file(s) from analysis")))
307307
(sort-by :file/path safe)))
308308

309+
(def ^:private valid-reanalyze-scopes
310+
#{:all :prompt-changed :model-changed :stale})
311+
312+
(defn files-for-reanalysis
313+
"Return analyzed files matching `scope` for re-analysis.
314+
`opts` may include :prompt-hash (for :prompt-changed) and :model-id (for :model-changed).
315+
Returns [{:file/path ... :file/lang ...}], same shape as `files-needing-analysis`."
316+
[db scope opts]
317+
{:pre [(valid-reanalyze-scopes scope)]}
318+
(let [raw (case scope
319+
:all
320+
(d/q '[:find ?path ?lang
321+
:where
322+
[?e :file/path ?path]
323+
[?e :file/lang ?lang]
324+
[?e :sem/summary _]]
325+
db)
326+
327+
:prompt-changed
328+
(d/q '[:find ?path ?lang
329+
:in $ ?current-hash
330+
:where
331+
[?e :file/path ?path]
332+
[?e :file/lang ?lang]
333+
[?e :sem/summary _ ?tx]
334+
[?tx :prov/prompt-hash ?h]
335+
[(not= ?h ?current-hash)]]
336+
db (:prompt-hash opts))
337+
338+
:model-changed
339+
(d/q '[:find ?path ?lang
340+
:in $ ?current-model
341+
:where
342+
[?e :file/path ?path]
343+
[?e :file/lang ?lang]
344+
[?e :sem/summary _ ?tx]
345+
[?tx :prov/model-version ?m]
346+
[(not= ?m ?current-model)]]
347+
db (:model-id opts))
348+
349+
:stale
350+
(d/q '[:find ?path ?lang
351+
:where
352+
[?e :file/path ?path]
353+
[?e :file/lang ?lang]
354+
[?e :sem/summary _ ?tx]
355+
[?tx :prov/analyzed-at ?at]
356+
[?c :commit/changed-files ?e]
357+
[?c :commit/committed-at ?ct]
358+
[(> ?ct ?at)]]
359+
db))
360+
candidates (mapv (fn [[path lang]] {:file/path path :file/lang lang}) raw)
361+
{sensitive true safe false} (group-by #(files/sensitive-path? (:file/path %))
362+
candidates)]
363+
(when (seq sensitive)
364+
(log! (str "Skipping " (count sensitive) " sensitive file(s) from re-analysis")))
365+
(sort-by :file/path safe)))
366+
309367
;; --- Orchestration ---
310368

311369
(defn repo-name

src/noumenon/cli.clj

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,14 @@
111111
[specs valid-set]
112112
(mapv #(if (= "--provider" (:flag %)) (assoc % :valid valid-set) %) specs))
113113

114+
(def ^:private reanalyze-flag
115+
{:flag "--reanalyze" :key :reanalyze :parse :string
116+
:desc "Re-analyze files: all, prompt-changed, model-changed, stale"
117+
:error-missing :missing-reanalyze-value})
118+
114119
(def ^:private analyze-flags
115120
(vec (concat [model-flag (assoc provider-flag :valid all-valid-providers)
116-
max-files-flag db-dir-flag]
121+
max-files-flag reanalyze-flag db-dir-flag]
117122
verbose-flags concurrency-flags)))
118123

119124
;; --- Declarative command specs ---
@@ -234,7 +239,7 @@
234239
"analyze" {:spec analyze-command-spec
235240
:summary "Enrich imported files with LLM-driven semantic analysis"
236241
:usage "analyze [options] <repo-path>"
237-
:epilog "Sensitive files (.env, *.pem, credentials, SSH keys, etc.) are\nautomatically excluded — their contents are never sent to the LLM."}
242+
:epilog "Sensitive files (.env, *.pem, credentials, SSH keys, etc.) are\nautomatically excluded — their contents are never sent to the LLM.\n\nRe-analysis scopes (--reanalyze):\n all Re-analyze every file\n prompt-changed Files analyzed with a different prompt template\n model-changed Files analyzed with a different model\n stale Files modified by commits since their last analysis"}
238243
"enrich" {:spec enrich-command-spec
239244
:summary "Extract cross-file import graph deterministically"
240245
:usage "enrich [options] <repo-path>"

src/noumenon/main.clj

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,29 @@
103103
{:db-path (db-path ctx)
104104
:next-step (str cli/program-name " enrich " repo-path)})}))))
105105

106+
(def ^:private valid-reanalyze-scopes
107+
#{"all" "prompt-changed" "model-changed" "stale"})
108+
109+
(defn- prepare-reanalysis!
110+
"Retract analysis attrs for files matching the reanalyze scope.
111+
Returns count of files marked for re-analysis, or nil if no scope given."
112+
[conn db reanalyze {:keys [prompt-hash model-id]}]
113+
(when reanalyze
114+
(let [scope (keyword reanalyze)
115+
files (analyze/files-for-reanalysis db scope {:prompt-hash prompt-hash
116+
:model-id model-id})
117+
paths (mapv :file/path files)
118+
n (if (seq paths) (sync/retract-analysis! conn paths) 0)]
119+
(log! (str "Marked " n " file(s) for re-analysis (scope: " reanalyze ")"))
120+
n)))
121+
106122
(defn do-analyze
107123
"Run the analyze subcommand. Returns {:exit n :result map-or-nil}."
108-
[{:keys [repo-path model provider concurrency min-delay max-files] :as opts}]
124+
[{:keys [repo-path model provider concurrency min-delay max-files reanalyze] :as opts}]
125+
(when (and reanalyze (not (valid-reanalyze-scopes reanalyze)))
126+
(print-error! (str "Invalid --reanalyze scope: " reanalyze
127+
". Must be one of: all, prompt-changed, model-changed, stale"))
128+
(System/exit 1))
109129
(with-valid-repo
110130
opts
111131
(fn [ctx]
@@ -115,15 +135,18 @@
115135
(fn [{:keys [conn]}]
116136
(let [{:keys [prompt-fn model-id]}
117137
(llm/wrap-as-prompt-fn-from-opts {:provider provider :model model})
118-
result (analyze/analyze-repo! conn repo-path prompt-fn
119-
(cond-> {:model-id model-id
120-
:concurrency (or concurrency 3)
121-
:min-delay-ms (or min-delay 0)}
122-
max-files (assoc :max-files max-files)))]
123-
(log! (str "Next: run '" cli/program-name " query <query-name> " repo-path
124-
"' or '" cli/program-name " ask -q \"...\" " repo-path
125-
"' to explore the knowledge graph."))
126-
{:exit 0 :result result})))
138+
prompt-hash (analyze/prompt-hash (:template (analyze/load-prompt-template)))]
139+
(prepare-reanalysis! conn (d/db conn) reanalyze
140+
{:prompt-hash prompt-hash :model-id model-id})
141+
(let [result (analyze/analyze-repo! conn repo-path prompt-fn
142+
(cond-> {:model-id model-id
143+
:concurrency (or concurrency 3)
144+
:min-delay-ms (or min-delay 0)}
145+
max-files (assoc :max-files max-files)))]
146+
(log! (str "Next: run '" cli/program-name " query <query-name> " repo-path
147+
"' or '" cli/program-name " ask -q \"...\" " repo-path
148+
"' to explore the knowledge graph."))
149+
{:exit 0 :result result}))))
127150
(catch clojure.lang.ExceptionInfo e
128151
(print-error! (.getMessage e))
129152
(when-let [help (cli/format-subcommand-help "analyze")]

src/noumenon/mcp.clj

Lines changed: 49 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@
148148
"continue_from" {:type "string" :description "Session ID from a budget-exhausted run — resumes the agent from where it left off"}})
149149
:required ["question" "repo_path"]}}
150150
{:name "noumenon_analyze"
151-
:description "Run LLM analysis on repository files to enrich the knowledge graph with semantic metadata. Only analyzes files not yet analyzed. Requires a prior import."
151+
:description "Run LLM analysis on repository files to enrich the knowledge graph with semantic metadata. By default only analyzes files not yet analyzed. Pass reanalyze to re-analyze files: all, prompt-changed, model-changed, or stale. Requires a prior import."
152152
:inputSchema {:type "object"
153153
:properties (merge repo-path-prop
154154
{"provider" {:type "string"
@@ -158,7 +158,9 @@
158158
"concurrency" {:type "integer"
159159
:description "Number of concurrent LLM calls (default: 3, max: 20)"}
160160
"max_files" {:type "integer"
161-
:description "Stop after analyzing N files (useful for sampling)"}})
161+
:description "Stop after analyzing N files (useful for sampling)"}
162+
"reanalyze" {:type "string"
163+
:description "Re-analyze scope: all, prompt-changed, model-changed, stale (default: only unanalyzed files)"}})
162164
:required ["repo_path"]}}
163165
{:name "noumenon_enrich"
164166
:description "Extract cross-file import graph deterministically. No LLM calls — uses language-specific parsers. Requires a prior import."
@@ -358,29 +360,53 @@
358360
(tool-result (or answer
359361
(str "No answer found (status: " (name (:status result)) ")"))))))))
360362

363+
(def ^:private valid-reanalyze-scopes
364+
#{"all" "prompt-changed" "model-changed" "stale"})
365+
366+
(defn- prepare-reanalysis!
367+
"Retract analysis attrs for files matching the reanalyze scope.
368+
Returns count of files marked for re-analysis, or nil if no scope given."
369+
[conn db reanalyze {:keys [prompt-hash model-id]}]
370+
(when reanalyze
371+
(let [scope (keyword reanalyze)
372+
files (analyze/files-for-reanalysis db scope {:prompt-hash prompt-hash
373+
:model-id model-id})
374+
paths (mapv :file/path files)
375+
n (if (seq paths) (sync/retract-analysis! conn paths) 0)]
376+
(log! (str "Marked " n " file(s) for re-analysis (scope: " reanalyze ")"))
377+
n)))
378+
361379
(defn- handle-analyze [args defaults]
362380
(validate-llm-inputs! args)
363-
(with-conn args defaults
364-
(fn [{:keys [conn repo-path]}]
365-
(let [{:keys [prompt-fn model-id]}
366-
(llm/wrap-as-prompt-fn-from-opts {:provider (or (args "provider") (:provider defaults))
367-
:model (or (args "model") (:model defaults))})
368-
concurrency (min (or (args "concurrency") 3) 20)
369-
max-files (args "max_files")
370-
result (analyze/analyze-repo! conn repo-path prompt-fn
371-
(cond-> {:model-id model-id
372-
:concurrency concurrency}
373-
max-files (assoc :max-files max-files)))]
374-
(tool-result (str "Analysis complete. "
375-
(:files-analyzed result 0) " files analyzed"
376-
(when (pos? (:files-parse-errored result 0))
377-
(str ", " (:files-parse-errored result 0) " parse errors"))
378-
(when (pos? (:files-errored result 0))
379-
(str ", " (:files-errored result 0) " errors"))
380-
". " (get-in result [:total-usage :input-tokens] 0)
381-
" in / " (get-in result [:total-usage :output-tokens] 0) " out tokens"
382-
(when-let [c (get-in result [:total-usage :cost-usd])]
383-
(when (pos? c) (str " ($" (format "%.2f" c) ")")))))))))
381+
(let [reanalyze (args "reanalyze")]
382+
(when (and reanalyze (not (valid-reanalyze-scopes reanalyze)))
383+
(throw (ex-info (str "Invalid reanalyze scope: " reanalyze
384+
". Must be one of: all, prompt-changed, model-changed, stale")
385+
{:scope reanalyze})))
386+
(with-conn args defaults
387+
(fn [{:keys [conn repo-path]}]
388+
(let [{:keys [prompt-fn model-id]}
389+
(llm/wrap-as-prompt-fn-from-opts {:provider (or (args "provider") (:provider defaults))
390+
:model (or (args "model") (:model defaults))})
391+
prompt-hash (analyze/prompt-hash (:template (analyze/load-prompt-template)))]
392+
(prepare-reanalysis! conn (d/db conn) reanalyze
393+
{:prompt-hash prompt-hash :model-id model-id})
394+
(let [concurrency (min (or (args "concurrency") 3) 20)
395+
max-files (args "max_files")
396+
result (analyze/analyze-repo! conn repo-path prompt-fn
397+
(cond-> {:model-id model-id
398+
:concurrency concurrency}
399+
max-files (assoc :max-files max-files)))]
400+
(tool-result (str "Analysis complete. "
401+
(:files-analyzed result 0) " files analyzed"
402+
(when (pos? (:files-parse-errored result 0))
403+
(str ", " (:files-parse-errored result 0) " parse errors"))
404+
(when (pos? (:files-errored result 0))
405+
(str ", " (:files-errored result 0) " errors"))
406+
". " (get-in result [:total-usage :input-tokens] 0)
407+
" in / " (get-in result [:total-usage :output-tokens] 0) " out tokens"
408+
(when-let [c (get-in result [:total-usage :cost-usd])]
409+
(when (pos? c) (str " ($" (format "%.2f" c) ")")))))))))))
384410

385411
(defn- handle-enrich [args defaults]
386412
(with-conn args defaults

src/noumenon/sync.clj

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,17 @@
7070

7171
;; --- Retraction ---
7272

73-
(def ^:private mutable-file-attrs
74-
"Attributes to retract on modified/deleted files so the pipeline re-processes them."
75-
[:file/size :file/lines :file/imports
76-
:sem/summary :sem/purpose :sem/tags :sem/complexity
73+
(def ^:private analysis-file-attrs
74+
"Analysis attributes to retract when re-analyzing (not import/enrich attrs)."
75+
[:sem/summary :sem/purpose :sem/tags :sem/complexity
7776
:sem/patterns :sem/category :sem/dependencies
7877
:arch/layer :arch/subsystem
7978
:prov/confidence])
8079

80+
(def ^:private mutable-file-attrs
81+
"Attributes to retract on modified/deleted files so the pipeline re-processes them."
82+
(into [:file/size :file/lines :file/imports] analysis-file-attrs))
83+
8184
(defn- find-file-eid
8285
"Look up a file entity ID by path. Returns nil if not found."
8386
[db path]
@@ -88,11 +91,11 @@
8891
[v]
8992
(if (map? v) (:db/id v) v))
9093

91-
(defn- retract-file-attrs
92-
"Build retraction tx-data for mutable attributes on a file entity."
93-
[db eid]
94-
(let [entity (d/pull db mutable-file-attrs eid)]
95-
(->> mutable-file-attrs
94+
(defn- retract-attrs
95+
"Build retraction tx-data for the given attributes on a file entity."
96+
[db eid attrs]
97+
(let [entity (d/pull db attrs eid)]
98+
(->> attrs
9699
(mapcat (fn [attr]
97100
(let [v (get entity attr)]
98101
(cond
@@ -116,7 +119,26 @@
116119
results (->> paths
117120
(keep (fn [path]
118121
(when-let [eid (find-file-eid db path)]
119-
(let [tx (into (retract-file-attrs db eid)
122+
(let [tx (into (retract-attrs db eid mutable-file-attrs)
123+
(retract-code-segments db eid))]
124+
(when (seq tx) tx)))))
125+
vec)
126+
tx-data (into [] cat results)]
127+
(when (seq tx-data)
128+
(d/transact conn {:tx-data tx-data}))
129+
(count results))))
130+
131+
(defn retract-analysis!
132+
"Retract analysis attributes and code segments for the given file paths.
133+
Does not retract import/enrich attrs (:file/size, :file/lines, :file/imports).
134+
Returns count of files actually retracted."
135+
[conn paths]
136+
(when (seq paths)
137+
(let [db (d/db conn)
138+
results (->> paths
139+
(keep (fn [path]
140+
(when-let [eid (find-file-eid db path)]
141+
(let [tx (into (retract-attrs db eid analysis-file-attrs)
120142
(retract-code-segments db eid))]
121143
(when (seq tx) tx)))))
122144
vec)

0 commit comments

Comments
 (0)