Skip to content

Commit c19ecf3

Browse files
authored
Introduce staged DSL mining pipeline with candidate tests (#1165)
1 parent d40d455 commit c19ecf3

33 files changed

Lines changed: 2309 additions & 49 deletions

sandbox_common_core/src/main/java/org/sandbox/jdt/triggerpattern/llm/AiRuleInferenceEngine.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,8 @@ private Optional<CommitEvaluation> validateAndEnrich(CommitEvaluation evaluation
162162
evaluation.categoryReason(), evaluation.canImplementInCurrentDsl(),
163163
evaluation.dslRule(), evaluation.targetHintFile(),
164164
evaluation.languageChangeNeeded(), evaluation.dslRuleAfterChange(),
165-
evaluation.summary(), validationResult);
165+
evaluation.summary(), validationResult,
166+
evaluation.beforeExample(), evaluation.afterExample(), evaluation.negativeExample());
166167

167168
return Optional.of(enriched);
168169
}

sandbox_common_core/src/main/java/org/sandbox/jdt/triggerpattern/llm/CommitEvaluation.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@
4141
* @param dslRuleAfterChange DSL rule that would work after language change
4242
* @param summary human-readable summary
4343
* @param dslValidationResult result of DSL validation ("VALID" or error message), null if not validated
44+
* @param beforeExample Java code example that should be matched by the rule (for candidate testing)
45+
* @param afterExample expected Java code after the rule is applied (for candidate testing)
46+
* @param negativeExample Java code example that should NOT match the rule (for candidate testing)
4447
*/
4548
public record CommitEvaluation(
4649
String commitHash,
@@ -65,7 +68,10 @@ public record CommitEvaluation(
6568
String languageChangeNeeded,
6669
String dslRuleAfterChange,
6770
String summary,
68-
String dslValidationResult) {
71+
String dslValidationResult,
72+
String beforeExample,
73+
String afterExample,
74+
String negativeExample) {
6975

7076
/**
7177
* Traffic light assessment for a commit evaluation.
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2026 Carsten Hammer.
3+
*
4+
* This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Public License 2.0
6+
* which accompanies this distribution, and is available at
7+
* https://www.eclipse.org/legal/epl-2.0/
8+
*
9+
* SPDX-License-Identifier: EPL-2.0
10+
*
11+
* Contributors:
12+
* Carsten Hammer
13+
*******************************************************************************/
14+
package org.sandbox.jdt.triggerpattern.llm;
15+
16+
import org.sandbox.jdt.triggerpattern.llm.CommitEvaluation.TrafficLight;
17+
18+
/**
19+
* Lightweight first-stage LLM result used to decide whether a commit is worth
20+
* a full candidate-generation request.
21+
* <p>
22+
* This deliberately omits expensive fields such as DSL rules, before/after
23+
* examples, and negative examples. Those should only be requested for commits
24+
* that pass this cheap screening stage.
25+
* </p>
26+
*
27+
* @param commitHash the commit hash
28+
* @param relevant whether the commit is relevant for mining
29+
* @param trafficLight coarse suitability assessment
30+
* @param category coarse category, if known
31+
* @param confidence confidence in the assessment, between {@code 0.0} and {@code 1.0}
32+
* @param reason short explanation for the decision
33+
*/
34+
public record CommitScreening(
35+
String commitHash,
36+
boolean relevant,
37+
TrafficLight trafficLight,
38+
String category,
39+
double confidence,
40+
String reason) {
41+
42+
/** Default confidence threshold for a full candidate request. */
43+
public static final double DEFAULT_CANDIDATE_THRESHOLD = 0.75d;
44+
45+
/**
46+
* @return whether this result should continue to full candidate generation
47+
*/
48+
public boolean shouldRequestCandidateDetails() {
49+
return relevant && trafficLight == TrafficLight.GREEN
50+
&& confidence >= DEFAULT_CANDIDATE_THRESHOLD;
51+
}
52+
}

sandbox_common_core/src/main/java/org/sandbox/jdt/triggerpattern/llm/GeminiClient.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -651,7 +651,10 @@ private static CommitEvaluation createEvaluation(JsonObject eval, String commitH
651651
getStringOrNull(eval, "languageChangeNeeded"), //$NON-NLS-1$
652652
dslRuleAfterChange,
653653
getStringOrNull(eval, "summary"), //$NON-NLS-1$
654-
null);
654+
null,
655+
getStringOrNull(eval, "beforeExample"), //$NON-NLS-1$
656+
getStringOrNull(eval, "afterExample"), //$NON-NLS-1$
657+
getStringOrNull(eval, "negativeExample")); //$NON-NLS-1$
655658
}
656659

657660
/**

sandbox_common_core/src/main/java/org/sandbox/jdt/triggerpattern/llm/OpenAiCompatibleClient.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,10 @@ static CommitEvaluation createEvaluation(JsonObject eval, String commitHash,
412412
getStringOrNull(eval, "languageChangeNeeded"), //$NON-NLS-1$
413413
getStringOrNull(eval, "dslRuleAfterChange"), //$NON-NLS-1$
414414
getStringOrNull(eval, "summary"), //$NON-NLS-1$
415-
null);
415+
null,
416+
getStringOrNull(eval, "beforeExample"), //$NON-NLS-1$
417+
getStringOrNull(eval, "afterExample"), //$NON-NLS-1$
418+
getStringOrNull(eval, "negativeExample")); //$NON-NLS-1$
416419
}
417420

418421
static String getStringOrNull(JsonObject obj, String key) {

sandbox_common_core/src/main/java/org/sandbox/jdt/triggerpattern/llm/PromptBuilder.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,9 @@ private static void appendJsonSchema(StringBuilder sb, boolean asArray) {
270270
sb.append(indent).append("\"replacesPlugin\": \"name of Java plugin this would replace, or null\",\n"); //$NON-NLS-1$
271271
sb.append(indent).append("\"previouslyProposed\": \"summary of similar prior rule, or null\",\n"); //$NON-NLS-1$
272272
sb.append(indent).append("\"sourceVersion\": 11,\n"); //$NON-NLS-1$
273+
sb.append(indent).append("\"beforeExample\": \"minimal self-contained Java class showing code BEFORE the fix (must compile)\",\n"); //$NON-NLS-1$
274+
sb.append(indent).append("\"afterExample\": \"same class showing code AFTER the fix (must compile)\",\n"); //$NON-NLS-1$
275+
sb.append(indent).append("\"negativeExample\": \"minimal Java class that looks similar but should NOT be changed by the rule\",\n"); //$NON-NLS-1$
273276
sb.append(indent).append("\"summary\": \"brief summary of the analysis\"\n"); //$NON-NLS-1$
274277
if (asArray) {
275278
sb.append(" },\n ...\n]\n\n"); //$NON-NLS-1$
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2026 Carsten Hammer.
3+
*
4+
* This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Public License 2.0
6+
* which accompanies this distribution, and is available at
7+
* https://www.eclipse.org/legal/epl-2.0/
8+
*
9+
* SPDX-License-Identifier: EPL-2.0
10+
*
11+
* Contributors:
12+
* Carsten Hammer
13+
*******************************************************************************/
14+
package org.sandbox.jdt.triggerpattern.llm;
15+
16+
import java.util.List;
17+
18+
import org.sandbox.jdt.triggerpattern.llm.PromptBuilder.CommitData;
19+
20+
/**
21+
* Builds compact first-stage prompts for cheap commit screening.
22+
* <p>
23+
* The screening prompt intentionally avoids full DSL context and does not ask
24+
* for DSL rules or code examples. It is intended for Gemini free-tier friendly
25+
* mining runs where only promising commits should continue to the expensive
26+
* candidate-generation stage.
27+
* </p>
28+
*/
29+
public class ScreeningPromptBuilder {
30+
31+
private static final int DEFAULT_MAX_DIFF_CHARS = 4_000;
32+
33+
private final int maxDiffChars;
34+
35+
/** Create a builder with the default diff truncation limit. */
36+
public ScreeningPromptBuilder() {
37+
this(DEFAULT_MAX_DIFF_CHARS);
38+
}
39+
40+
/**
41+
* Create a builder with an explicit diff truncation limit.
42+
*
43+
* @param maxDiffChars maximum characters of each diff included in the prompt
44+
*/
45+
public ScreeningPromptBuilder(int maxDiffChars) {
46+
if (maxDiffChars < 500) {
47+
throw new IllegalArgumentException("maxDiffChars must be at least 500"); //$NON-NLS-1$
48+
}
49+
this.maxDiffChars = maxDiffChars;
50+
}
51+
52+
/**
53+
* Builds a compact batch prompt for first-stage screening.
54+
*
55+
* @param commits commits to screen
56+
* @return prompt text
57+
*/
58+
public String buildScreeningPrompt(List<CommitData> commits) {
59+
StringBuilder sb = new StringBuilder();
60+
sb.append("You are screening commits for reusable Java cleanup/refactoring patterns.\n"); //$NON-NLS-1$
61+
sb.append("Do not produce DSL rules. Do not produce before/after examples.\n"); //$NON-NLS-1$
62+
sb.append("Only decide whether each commit is worth a later, more expensive candidate-generation request.\n\n"); //$NON-NLS-1$
63+
sb.append("Return a JSON array with exactly ").append(commits.size()); //$NON-NLS-1$
64+
sb.append(" objects, one per commit, in the same order as presented.\n"); //$NON-NLS-1$
65+
sb.append("Schema per object:\n"); //$NON-NLS-1$
66+
sb.append("{\n"); //$NON-NLS-1$
67+
sb.append(" \"commitHash\": \"hash\",\n"); //$NON-NLS-1$
68+
sb.append(" \"relevant\": true,\n"); //$NON-NLS-1$
69+
sb.append(" \"trafficLight\": \"GREEN|YELLOW|RED|NOT_APPLICABLE\",\n"); //$NON-NLS-1$
70+
sb.append(" \"category\": \"short category or null\",\n"); //$NON-NLS-1$
71+
sb.append(" \"confidence\": 0.0,\n"); //$NON-NLS-1$
72+
sb.append(" \"reason\": \"one short sentence\"\n"); //$NON-NLS-1$
73+
sb.append("}\n"); //$NON-NLS-1$
74+
sb.append("confidence MUST be a decimal between 0.0 and 1.0 (not 0–100).\n\n"); //$NON-NLS-1$
75+
for (int i = 0; i < commits.size(); i++) {
76+
CommitData commit = commits.get(i);
77+
sb.append("## Commit ").append(i).append(" (").append(commit.commitHash()).append(")\n"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
78+
sb.append("Message:\n").append(commit.commitMessage()).append("\n\n"); //$NON-NLS-1$ //$NON-NLS-2$
79+
sb.append("Diff:\n```\n"); //$NON-NLS-1$
80+
sb.append(truncate(commit.diff())).append("\n```\n\n"); //$NON-NLS-1$
81+
}
82+
return sb.toString();
83+
}
84+
85+
private String truncate(String diff) {
86+
if (diff == null) {
87+
return ""; //$NON-NLS-1$
88+
}
89+
if (diff.length() <= maxDiffChars) {
90+
return diff;
91+
}
92+
return diff.substring(0, maxDiffChars) + "\n... [diff truncated for screening]"; //$NON-NLS-1$
93+
}
94+
}

sandbox_common_core/src/test/java/org/sandbox/jdt/triggerpattern/internal/HintFileStoreTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ void testRegisterInferredRulesBatchSkipsIrrelevant() {
7979
"commit1", "msg", "repo", Instant.now(), null,
8080
false, "not relevant", false, null,
8181
0, 0, 0, CommitEvaluation.TrafficLight.NOT_APPLICABLE,
82-
null, false, null, false, null, null, null, null, null, null);
82+
null, false, null, false, null, null, null, null, null, null, null, null, null);
8383

8484
List<String> ids = store.registerInferredRules(List.of(irrelevant), "src");
8585

@@ -221,6 +221,6 @@ private static CommitEvaluation createEvaluation(String commitHash, String dslRu
221221
5, 5, 3, CommitEvaluation.TrafficLight.GREEN,
222222
"TestCategory", false, null,
223223
dslRule != null, dslRule, null,
224-
null, null, "Test summary", "VALID");
224+
null, null, "Test summary", "VALID", null, null, null);
225225
}
226226
}

sandbox_common_core/src/test/java/org/sandbox/jdt/triggerpattern/llm/AiRuleInferenceEngineTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ private static CommitEvaluation createMockEvaluation(boolean relevant,
201201
5, 5, 3, trafficLight,
202202
"TestCategory", false, null, //$NON-NLS-1$
203203
dslRule != null, dslRule, null,
204-
null, null, "Test summary", null); //$NON-NLS-1$
204+
null, null, "Test summary", null, null, null, null); //$NON-NLS-1$
205205
}
206206

207207
/**
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2026 Carsten Hammer.
3+
*
4+
* This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Public License 2.0
6+
* which accompanies this distribution, and is available at
7+
* https://www.eclipse.org/legal/epl-2.0/
8+
*
9+
* SPDX-License-Identifier: EPL-2.0
10+
*
11+
* Contributors:
12+
* Carsten Hammer
13+
*******************************************************************************/
14+
package org.sandbox.jdt.triggerpattern.llm;
15+
16+
import static org.junit.jupiter.api.Assertions.assertFalse;
17+
import static org.junit.jupiter.api.Assertions.assertTrue;
18+
19+
import java.util.List;
20+
21+
import org.junit.jupiter.api.Test;
22+
import org.sandbox.jdt.triggerpattern.llm.CommitEvaluation.TrafficLight;
23+
import org.sandbox.jdt.triggerpattern.llm.PromptBuilder.CommitData;
24+
25+
/**
26+
* Tests for {@link ScreeningPromptBuilder} and {@link CommitScreening}.
27+
*/
28+
class ScreeningPromptBuilderTest {
29+
30+
@Test
31+
void testScreeningPromptDoesNotAskForExpensiveCandidateFields() {
32+
CommitData commit = new CommitData("abc123", "Replace legacy API", "diff"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
33+
34+
String prompt = new ScreeningPromptBuilder().buildScreeningPrompt(List.of(commit));
35+
36+
assertTrue(prompt.contains("trafficLight")); //$NON-NLS-1$
37+
assertTrue(prompt.contains("confidence")); //$NON-NLS-1$
38+
assertFalse(prompt.contains("dslRule")); //$NON-NLS-1$
39+
assertFalse(prompt.contains("beforeExample")); //$NON-NLS-1$
40+
assertFalse(prompt.contains("afterExample")); //$NON-NLS-1$
41+
assertFalse(prompt.contains("negativeExample")); //$NON-NLS-1$
42+
}
43+
44+
@Test
45+
void testScreeningPromptConstrainsOrderingAndConfidenceScale() {
46+
CommitData c1 = new CommitData("aaa111", "First commit", "diff1"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
47+
CommitData c2 = new CommitData("bbb222", "Second commit", "diff2"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
48+
49+
String prompt = new ScreeningPromptBuilder().buildScreeningPrompt(List.of(c1, c2));
50+
51+
assertTrue(prompt.contains("in the same order as presented")); //$NON-NLS-1$
52+
assertTrue(prompt.contains("0.0 and 1.0")); //$NON-NLS-1$
53+
}
54+
55+
@Test
56+
void testDiffIsTruncatedForScreening() {
57+
String longDiff = "x".repeat(2_000); //$NON-NLS-1$
58+
CommitData commit = new CommitData("abc123", "Long diff", longDiff); //$NON-NLS-1$ //$NON-NLS-2$
59+
60+
String prompt = new ScreeningPromptBuilder(1_000).buildScreeningPrompt(List.of(commit));
61+
62+
assertTrue(prompt.contains("[diff truncated for screening]")); //$NON-NLS-1$
63+
}
64+
65+
@Test
66+
void testCommitScreeningCandidateThreshold() {
67+
CommitScreening good = new CommitScreening("abc123", true, //$NON-NLS-1$
68+
TrafficLight.GREEN, "api-modernization", 0.9d, "Reusable API replacement"); //$NON-NLS-1$ //$NON-NLS-2$
69+
CommitScreening weak = new CommitScreening("def456", true, //$NON-NLS-1$
70+
TrafficLight.GREEN, "api-modernization", 0.4d, "Unclear pattern"); //$NON-NLS-1$ //$NON-NLS-2$
71+
CommitScreening yellow = new CommitScreening("ghi789", true, //$NON-NLS-1$
72+
TrafficLight.YELLOW, "api-modernization", 0.95d, "Needs DSL change"); //$NON-NLS-1$ //$NON-NLS-2$
73+
74+
assertTrue(good.shouldRequestCandidateDetails());
75+
assertFalse(weak.shouldRequestCandidateDetails());
76+
assertFalse(yellow.shouldRequestCandidateDetails());
77+
}
78+
}

0 commit comments

Comments
 (0)