Skip to content

Commit cc77f24

Browse files
committed
refactor(e2e): add CSS selector support to TestAction and DSL
- Add optional 'selector' field to TestAction.Click, Type, Hover, Scroll, Assert, Select, UploadFile - Add optional 'selector' field to WaitCondition.ElementVisible, ElementHidden, ElementEnabled - Update E2EDslParser to parse CSS selectors from strings - Update E2EDslGenerator to output selectors when available - Update E2EDslLLMGenerator prompts to use CSS selectors instead of SoM tags - Fix all affected tests This allows DSL generation without page state, using CSS selectors directly.
1 parent 6632d50 commit cc77f24

File tree

6 files changed

+242
-94
lines changed

6 files changed

+242
-94
lines changed

mpp-core/src/commonMain/kotlin/cc/unitmesh/agent/e2etest/model/TestAction.kt

Lines changed: 60 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,52 +4,79 @@ import kotlinx.serialization.Serializable
44

55
/**
66
* Action space for E2E testing, inspired by WebArena and Browser-use.
7-
*
7+
*
88
* Each action represents an atomic operation that can be executed on a web page.
9-
* The targetId refers to a Set-of-Mark tag number when using visual grounding,
10-
* or can be mapped to a CSS selector.
11-
*
9+
*
10+
* Element identification supports two modes:
11+
* 1. **Set-of-Mark (SoM)**: Use `targetId` (Int) when visual grounding is available
12+
* 2. **CSS Selector**: Use `selector` (String) for direct selector-based targeting
13+
*
14+
* When both are provided, `selector` takes precedence for execution.
15+
*
1216
* @see <a href="https://github.com/phodal/auto-dev/issues/532">Issue #532</a>
1317
*/
1418
@Serializable
1519
sealed class TestAction {
1620
/**
17-
* Click on an element identified by targetId (SoM tag number)
21+
* Click on an element
22+
*
23+
* @param targetId Set-of-Mark tag number (for visual grounding)
24+
* @param selector CSS selector (for direct targeting)
25+
* @param button Mouse button to use
26+
* @param clickCount Number of clicks (1 for single, 2 for double)
1827
*/
1928
@Serializable
2029
data class Click(
21-
val targetId: Int,
30+
val targetId: Int = 0,
31+
val selector: String? = null,
2232
val button: MouseButton = MouseButton.LEFT,
2333
val clickCount: Int = 1
2434
) : TestAction()
2535

2636
/**
2737
* Type text into an element
38+
*
39+
* @param targetId Set-of-Mark tag number (for visual grounding)
40+
* @param selector CSS selector (for direct targeting)
41+
* @param text Text to type
42+
* @param clearFirst Whether to clear existing content first
43+
* @param pressEnter Whether to press Enter after typing
2844
*/
2945
@Serializable
3046
data class Type(
31-
val targetId: Int,
47+
val targetId: Int = 0,
48+
val selector: String? = null,
3249
val text: String,
3350
val clearFirst: Boolean = false,
3451
val pressEnter: Boolean = false
3552
) : TestAction()
3653

3754
/**
3855
* Hover over an element
56+
*
57+
* @param targetId Set-of-Mark tag number (for visual grounding)
58+
* @param selector CSS selector (for direct targeting)
3959
*/
4060
@Serializable
4161
data class Hover(
42-
val targetId: Int
62+
val targetId: Int = 0,
63+
val selector: String? = null
4364
) : TestAction()
4465

4566
/**
4667
* Scroll the page or a specific element
68+
*
69+
* @param direction Scroll direction
70+
* @param amount Scroll amount in pixels
71+
* @param targetId Set-of-Mark tag number of element to scroll (optional)
72+
* @param selector CSS selector of element to scroll (optional)
4773
*/
4874
@Serializable
4975
data class Scroll(
5076
val direction: ScrollDirection,
5177
val amount: Int = 300,
52-
val targetId: Int? = null
78+
val targetId: Int? = null,
79+
val selector: String? = null
5380
) : TestAction()
5481

5582
/**
@@ -98,31 +125,49 @@ sealed class TestAction {
98125

99126
/**
100127
* Assert a condition on an element
128+
*
129+
* @param targetId Set-of-Mark tag number (for visual grounding)
130+
* @param selector CSS selector (for direct targeting)
131+
* @param assertion Type of assertion to perform
132+
* @param expected Expected value (for text/attribute assertions)
101133
*/
102134
@Serializable
103135
data class Assert(
104-
val targetId: Int,
136+
val targetId: Int = 0,
137+
val selector: String? = null,
105138
val assertion: AssertionType,
106139
val expected: String? = null
107140
) : TestAction()
108141

109142
/**
110143
* Select an option from a dropdown
144+
*
145+
* @param targetId Set-of-Mark tag number (for visual grounding)
146+
* @param selector CSS selector (for direct targeting)
147+
* @param value Option value to select
148+
* @param label Option label to select
149+
* @param index Option index to select
111150
*/
112151
@Serializable
113152
data class Select(
114-
val targetId: Int,
153+
val targetId: Int = 0,
154+
val selector: String? = null,
115155
val value: String? = null,
116156
val label: String? = null,
117157
val index: Int? = null
118158
) : TestAction()
119159

120160
/**
121161
* Upload a file to a file input
162+
*
163+
* @param targetId Set-of-Mark tag number (for visual grounding)
164+
* @param selector CSS selector (for direct targeting)
165+
* @param filePath Path to the file to upload
122166
*/
123167
@Serializable
124168
data class UploadFile(
125-
val targetId: Int,
169+
val targetId: Int = 0,
170+
val selector: String? = null,
126171
val filePath: String
127172
) : TestAction()
128173

@@ -154,13 +199,13 @@ enum class KeyModifier {
154199
@Serializable
155200
sealed class WaitCondition {
156201
@Serializable
157-
data class ElementVisible(val targetId: Int) : WaitCondition()
202+
data class ElementVisible(val targetId: Int = 0, val selector: String? = null) : WaitCondition()
158203

159204
@Serializable
160-
data class ElementHidden(val targetId: Int) : WaitCondition()
205+
data class ElementHidden(val targetId: Int = 0, val selector: String? = null) : WaitCondition()
161206

162207
@Serializable
163-
data class ElementEnabled(val targetId: Int) : WaitCondition()
208+
data class ElementEnabled(val targetId: Int = 0, val selector: String? = null) : WaitCondition()
164209

165210
@Serializable
166211
data class TextPresent(val text: String) : WaitCondition()

mpp-core/src/commonMain/kotlin/cc/unitmesh/agent/e2etest/planner/TestActionPlanner.kt

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -116,17 +116,17 @@ class TestActionPlanner(
116116
val confidence = jsonObj["confidence"]?.jsonPrimitive?.content?.toDoubleOrNull() ?: 0.8
117117

118118
val action = when (actionType.lowercase()) {
119-
"click" -> targetId?.let { TestAction.Click(it) }
120-
"type" -> targetId?.let { id -> value?.let { TestAction.Type(id, it) } }
119+
"click" -> targetId?.let { TestAction.Click(targetId = it) }
120+
"type" -> targetId?.let { id -> value?.let { TestAction.Type(targetId = id, text = it) } }
121121
"scroll_down" -> TestAction.Scroll(ScrollDirection.DOWN)
122122
"scroll_up" -> TestAction.Scroll(ScrollDirection.UP)
123123
"wait" -> TestAction.Wait(WaitCondition.Duration(value?.toLongOrNull() ?: 1000))
124124
"navigate" -> value?.let { TestAction.Navigate(it) }
125125
"go_back" -> TestAction.GoBack
126126
"press_key" -> value?.let { TestAction.PressKey(it) }
127-
"assert_visible" -> targetId?.let { TestAction.Assert(it, AssertionType.Visible) }
127+
"assert_visible" -> targetId?.let { TestAction.Assert(targetId = it, assertion = AssertionType.Visible) }
128128
"assert_text" -> targetId?.let { id ->
129-
value?.let { TestAction.Assert(id, AssertionType.TextContains(it)) }
129+
value?.let { TestAction.Assert(targetId = id, assertion = AssertionType.TextContains(it)) }
130130
}
131131
"done" -> null // Signal completion
132132
else -> null
@@ -177,15 +177,15 @@ class TestActionPlanner(
177177
val value = step.value
178178

179179
return when (actionType.lowercase()) {
180-
"click" -> targetId?.let { TestAction.Click(it) }
181-
"type" -> targetId?.let { id -> value?.let { TestAction.Type(id, it) } }
180+
"click" -> targetId?.let { TestAction.Click(targetId = it) }
181+
"type" -> targetId?.let { id -> value?.let { TestAction.Type(targetId = id, text = it) } }
182182
"scroll_down" -> TestAction.Scroll(ScrollDirection.DOWN)
183183
"scroll_up" -> TestAction.Scroll(ScrollDirection.UP)
184184
"wait" -> TestAction.Wait(WaitCondition.Duration(value?.toLongOrNull() ?: 1000))
185185
"navigate" -> value?.let { TestAction.Navigate(it) }
186186
"go_back" -> TestAction.GoBack
187187
"press_key" -> value?.let { TestAction.PressKey(it) }
188-
"assert_visible" -> targetId?.let { TestAction.Assert(it, AssertionType.Visible) }
188+
"assert_visible" -> targetId?.let { TestAction.Assert(targetId = it, assertion = AssertionType.Visible) }
189189
else -> null
190190
}
191191
}
@@ -612,13 +612,13 @@ internal class SimpleDslParser {
612612
val targetId = extractTargetId(parts.getOrNull(1))
613613
val text = parts.drop(2).firstOrNull { it.startsWith("\"") }?.let { extractQuotedString(it) }
614614
if (targetId != null && text != null) {
615-
TestAction.Type(targetId, text)
615+
TestAction.Type(targetId = targetId, text = text)
616616
} else null
617617
}
618618

619619
"hover" -> {
620620
val targetId = extractTargetId(parts.getOrNull(1))
621-
targetId?.let { TestAction.Hover(it) }
621+
targetId?.let { TestAction.Hover(targetId = it) }
622622
}
623623

624624
"scroll" -> {
@@ -686,7 +686,7 @@ internal class SimpleDslParser {
686686
"textcontains" -> value?.let { AssertionType.TextContains(it) }
687687
else -> AssertionType.Visible
688688
}
689-
assertion?.let { TestAction.Assert(targetId, it) }
689+
assertion?.let { TestAction.Assert(targetId = targetId, assertion = it) }
690690
} else null
691691
}
692692

xiuper-e2e/src/commonMain/kotlin/cc/unitmesh/e2e/dsl/E2EDslGenerator.kt

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class E2EDslGenerator {
6363
return when (action) {
6464
is TestAction.Click -> generateClickAction(action)
6565
is TestAction.Type -> generateTypeAction(action)
66-
is TestAction.Hover -> "hover #${action.targetId}"
66+
is TestAction.Hover -> generateHoverAction(action)
6767
is TestAction.Scroll -> generateScrollAction(action)
6868
is TestAction.Wait -> generateWaitAction(action)
6969
is TestAction.PressKey -> generatePressKeyAction(action)
@@ -73,13 +73,24 @@ class E2EDslGenerator {
7373
is TestAction.Refresh -> "refresh"
7474
is TestAction.Assert -> generateAssertAction(action)
7575
is TestAction.Select -> generateSelectAction(action)
76-
is TestAction.UploadFile -> "uploadFile #${action.targetId} \"${escapeString(action.filePath)}\""
76+
is TestAction.UploadFile -> generateUploadFileAction(action)
7777
is TestAction.Screenshot -> generateScreenshotAction(action)
7878
}
7979
}
8080

81+
/**
82+
* Generate target reference - prefers selector over targetId
83+
*/
84+
private fun generateTarget(targetId: Int, selector: String?): String {
85+
return if (selector != null) {
86+
"\"${escapeString(selector)}\""
87+
} else {
88+
"#$targetId"
89+
}
90+
}
91+
8192
private fun generateClickAction(action: TestAction.Click): String {
82-
val parts = mutableListOf("click", "#${action.targetId}")
93+
val parts = mutableListOf("click", generateTarget(action.targetId, action.selector))
8394

8495
if (action.button != MouseButton.LEFT) {
8596
parts.add(action.button.name.lowercase())
@@ -93,7 +104,7 @@ class E2EDslGenerator {
93104
}
94105

95106
private fun generateTypeAction(action: TestAction.Type): String {
96-
val parts = mutableListOf("type", "#${action.targetId}", "\"${escapeString(action.text)}\"")
107+
val parts = mutableListOf("type", generateTarget(action.targetId, action.selector), "\"${escapeString(action.text)}\"")
97108

98109
if (action.clearFirst) {
99110
parts.add("clearFirst")
@@ -106,15 +117,21 @@ class E2EDslGenerator {
106117
return parts.joinToString(" ")
107118
}
108119

120+
private fun generateHoverAction(action: TestAction.Hover): String {
121+
return "hover ${generateTarget(action.targetId, action.selector)}"
122+
}
123+
109124
private fun generateScrollAction(action: TestAction.Scroll): String {
110125
val parts = mutableListOf("scroll", action.direction.name.lowercase())
111126

112127
if (action.amount != 300) {
113128
parts.add(action.amount.toString())
114129
}
115130

116-
action.targetId?.let {
117-
parts.add("#$it")
131+
action.selector?.let { sel ->
132+
parts.add("\"${escapeString(sel)}\"")
133+
} ?: action.targetId?.let { id ->
134+
parts.add("#$id")
118135
}
119136

120137
return parts.joinToString(" ")
@@ -123,9 +140,9 @@ class E2EDslGenerator {
123140
private fun generateWaitAction(action: TestAction.Wait): String {
124141
val conditionStr = when (val condition = action.condition) {
125142
is WaitCondition.Duration -> "duration ${condition.ms}"
126-
is WaitCondition.ElementVisible -> "visible #${condition.targetId}"
127-
is WaitCondition.ElementHidden -> "hidden #${condition.targetId}"
128-
is WaitCondition.ElementEnabled -> "enabled #${condition.targetId}"
143+
is WaitCondition.ElementVisible -> "visible ${generateTarget(condition.targetId, condition.selector)}"
144+
is WaitCondition.ElementHidden -> "hidden ${generateTarget(condition.targetId, condition.selector)}"
145+
is WaitCondition.ElementEnabled -> "enabled ${generateTarget(condition.targetId, condition.selector)}"
129146
is WaitCondition.TextPresent -> "textPresent \"${escapeString(condition.text)}\""
130147
is WaitCondition.UrlContains -> "urlContains \"${escapeString(condition.substring)}\""
131148
is WaitCondition.PageLoaded -> "pageLoaded"
@@ -163,11 +180,11 @@ class E2EDslGenerator {
163180
is AssertionType.HasClass -> "hasClass \"${escapeString(assertion.className)}\""
164181
}
165182

166-
return "assert #${action.targetId} $assertionStr"
183+
return "assert ${generateTarget(action.targetId, action.selector)} $assertionStr"
167184
}
168185

169186
private fun generateSelectAction(action: TestAction.Select): String {
170-
val parts = mutableListOf("select", "#${action.targetId}")
187+
val parts = mutableListOf("select", generateTarget(action.targetId, action.selector))
171188

172189
action.value?.let {
173190
parts.add("value")
@@ -187,6 +204,10 @@ class E2EDslGenerator {
187204
return parts.joinToString(" ")
188205
}
189206

207+
private fun generateUploadFileAction(action: TestAction.UploadFile): String {
208+
return "uploadFile ${generateTarget(action.targetId, action.selector)} \"${escapeString(action.filePath)}\""
209+
}
210+
190211
private fun generateScreenshotAction(action: TestAction.Screenshot): String {
191212
val parts = mutableListOf("screenshot", "\"${escapeString(action.name)}\"")
192213

0 commit comments

Comments
 (0)