Skip to content

Commit de2da44

Browse files
committed
feat: implement WebEditBridgePageStateExtractor for enhanced page state extraction #532
1 parent 9237786 commit de2da44

File tree

3 files changed

+932
-12
lines changed

3 files changed

+932
-12
lines changed

mpp-core/src/commonMain/kotlin/cc/unitmesh/agent/e2etest/planner/TestActionPlanner.kt

Lines changed: 157 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,29 @@
11
package cc.unitmesh.agent.e2etest.planner
22

33
import cc.unitmesh.agent.e2etest.E2ETestContext
4+
import cc.unitmesh.agent.e2etest.currentTimeMillis
45
import cc.unitmesh.agent.e2etest.model.*
56
import cc.unitmesh.llm.LLMService
7+
import kotlinx.serialization.Serializable
68
import kotlinx.serialization.json.Json
9+
import kotlinx.serialization.json.JsonObject
10+
import kotlinx.serialization.json.jsonPrimitive
11+
import kotlinx.serialization.json.intOrNull
712

813
/**
914
* Plans test actions based on natural language instructions and page state.
10-
*
15+
*
1116
* Uses LLM to understand user intent and generate appropriate test actions.
1217
* Maintains memory to prevent loops and provide context.
13-
*
18+
*
1419
* @see <a href="https://github.com/phodal/auto-dev/issues/532">Issue #532</a>
1520
*/
1621
class TestActionPlanner(
1722
private val llmService: LLMService,
1823
private val config: PlannerConfig = PlannerConfig()
1924
) {
20-
private val json = Json {
21-
ignoreUnknownKeys = true
25+
private val json = Json {
26+
ignoreUnknownKeys = true
2227
isLenient = true
2328
}
2429

@@ -51,28 +56,146 @@ class TestActionPlanner(
5156
}
5257

5358
/**
54-
* Generate a test scenario from natural language description
59+
* Generate a test scenario from natural language description using LLM
5560
*/
5661
suspend fun generateScenario(
5762
description: String,
5863
startUrl: String,
5964
pageState: PageState
6065
): TestScenario? {
6166
val prompt = buildScenarioGenerationPrompt(description, startUrl, pageState)
62-
63-
// TODO: Call LLM and parse response into TestScenario
64-
// For now, return a simple placeholder
65-
return null
67+
68+
return try {
69+
val response = llmService.sendPrompt(prompt)
70+
parseScenarioResponse(response, description, startUrl)
71+
} catch (e: Exception) {
72+
null
73+
}
6674
}
6775

6876
/**
6977
* Plan action using LLM
7078
*/
7179
private suspend fun planWithLLM(context: E2ETestContext): PlannedAction? {
7280
val prompt = buildPlanningPrompt(context)
73-
74-
// TODO: Call LLM service and parse structured response
75-
// For now, return null
81+
82+
return try {
83+
val response = llmService.sendPrompt(prompt)
84+
parseActionResponse(response)
85+
} catch (e: Exception) {
86+
null
87+
}
88+
}
89+
90+
/**
91+
* Parse LLM response into a PlannedAction
92+
*/
93+
private fun parseActionResponse(response: String): PlannedAction? {
94+
val jsonStr = extractJson(response) ?: return null
95+
96+
return try {
97+
val jsonObj = json.decodeFromString<JsonObject>(jsonStr)
98+
val actionType = jsonObj["action_type"]?.jsonPrimitive?.content ?: return null
99+
val targetId = jsonObj["target_id"]?.jsonPrimitive?.intOrNull
100+
val value = jsonObj["value"]?.jsonPrimitive?.content
101+
val reasoning = jsonObj["reasoning"]?.jsonPrimitive?.content ?: "LLM planned action"
102+
val confidence = jsonObj["confidence"]?.jsonPrimitive?.content?.toDoubleOrNull() ?: 0.8
103+
104+
val action = when (actionType.lowercase()) {
105+
"click" -> targetId?.let { TestAction.Click(it) }
106+
"type" -> targetId?.let { id -> value?.let { TestAction.Type(id, it) } }
107+
"scroll_down" -> TestAction.Scroll(ScrollDirection.DOWN)
108+
"scroll_up" -> TestAction.Scroll(ScrollDirection.UP)
109+
"wait" -> TestAction.Wait(WaitCondition.Duration(value?.toLongOrNull() ?: 1000))
110+
"navigate" -> value?.let { TestAction.Navigate(it) }
111+
"go_back" -> TestAction.GoBack
112+
"press_key" -> value?.let { TestAction.PressKey(it) }
113+
"assert_visible" -> targetId?.let { TestAction.Assert(it, AssertionType.Visible) }
114+
"assert_text" -> targetId?.let { id ->
115+
value?.let { TestAction.Assert(id, AssertionType.TextContains(it)) }
116+
}
117+
"done" -> null // Signal completion
118+
else -> null
119+
}
120+
121+
action?.let {
122+
PlannedAction(
123+
action = it,
124+
reasoning = reasoning,
125+
confidence = confidence
126+
)
127+
}
128+
} catch (e: Exception) {
129+
null
130+
}
131+
}
132+
133+
/**
134+
* Parse LLM response into a TestScenario
135+
*/
136+
private fun parseScenarioResponse(response: String, fallbackName: String, startUrl: String): TestScenario? {
137+
val jsonStr = extractJson(response) ?: return null
138+
139+
return try {
140+
val parsed = json.decodeFromString<ScenarioResponse>(jsonStr)
141+
TestScenario(
142+
id = "scenario_${currentTimeMillis()}",
143+
name = parsed.name ?: fallbackName,
144+
description = parsed.description ?: fallbackName,
145+
startUrl = startUrl,
146+
steps = parsed.steps.mapIndexed { index, step ->
147+
TestStep(
148+
id = "step_$index",
149+
description = step.description,
150+
action = parseStepAction(step) ?: TestAction.Wait(WaitCondition.Duration(100)),
151+
expectedOutcome = step.expected_outcome
152+
)
153+
}
154+
)
155+
} catch (e: Exception) {
156+
null
157+
}
158+
}
159+
160+
private fun parseStepAction(step: StepResponse): TestAction? {
161+
val actionType = step.action_type ?: return null
162+
val targetId = step.target_id
163+
val value = step.value
164+
165+
return when (actionType.lowercase()) {
166+
"click" -> targetId?.let { TestAction.Click(it) }
167+
"type" -> targetId?.let { id -> value?.let { TestAction.Type(id, it) } }
168+
"scroll_down" -> TestAction.Scroll(ScrollDirection.DOWN)
169+
"scroll_up" -> TestAction.Scroll(ScrollDirection.UP)
170+
"wait" -> TestAction.Wait(WaitCondition.Duration(value?.toLongOrNull() ?: 1000))
171+
"navigate" -> value?.let { TestAction.Navigate(it) }
172+
"go_back" -> TestAction.GoBack
173+
"press_key" -> value?.let { TestAction.PressKey(it) }
174+
"assert_visible" -> targetId?.let { TestAction.Assert(it, AssertionType.Visible) }
175+
else -> null
176+
}
177+
}
178+
179+
/**
180+
* Extract JSON from LLM response (handles markdown code blocks)
181+
*/
182+
private fun extractJson(response: String): String? {
183+
val trimmed = response.trim()
184+
185+
// Try to find JSON in code blocks
186+
val codeBlockPattern = Regex("```(?:json)?\\s*([\\s\\S]*?)```")
187+
val codeBlockMatch = codeBlockPattern.find(trimmed)
188+
if (codeBlockMatch != null) {
189+
return codeBlockMatch.groupValues[1].trim()
190+
}
191+
192+
// Try to find raw JSON object
193+
val jsonStart = trimmed.indexOf('{')
194+
val jsonEnd = trimmed.lastIndexOf('}')
195+
if (jsonStart >= 0 && jsonEnd > jsonStart) {
196+
return trimmed.substring(jsonStart, jsonEnd + 1)
197+
}
198+
76199
return null
77200
}
78201

@@ -209,3 +332,25 @@ data class PlannerConfig(
209332
*/
210333
val enableCoT: Boolean = true
211334
)
335+
336+
/**
337+
* Response structure for scenario generation
338+
*/
339+
@Serializable
340+
internal data class ScenarioResponse(
341+
val name: String? = null,
342+
val description: String? = null,
343+
val steps: List<StepResponse> = emptyList()
344+
)
345+
346+
/**
347+
* Response structure for a single step
348+
*/
349+
@Serializable
350+
internal data class StepResponse(
351+
val description: String = "",
352+
val action_type: String? = null,
353+
val target_id: Int? = null,
354+
val value: String? = null,
355+
val expected_outcome: String? = null
356+
)

0 commit comments

Comments
 (0)