Skip to content

Commit 285eab4

Browse files
authored
fix: resolve snapshot -C and screenshot --annotate hang over WSS (#842)
* fix: resolve snapshot -C and screenshot --annotate hang over WSS (#841) Root cause: sequential CDP round-trips per element in find_cursor_interactive_elements() and collect_annotations() caused timeouts over high-latency WSS connections (~200ms × 200+ elements exceeds the 30s CDP timeout). Fix: - snapshot -C: Replace per-element CDP calls with a single JS eval that detects cursor:pointer/onclick/tabindex elements in-browser, then batch-resolve via DOM.querySelectorAll + concurrent DOM.describeNode calls using join_all - screenshot --annotate: Replace sequential DOM.resolveNode + getRect calls with concurrent join_all, matching v0.19.0's Promise.all() pattern Behavioral parity with v0.19.0 (Node.js/Playwright): - cursor:pointer detection via getComputedStyle - Inherited cursor:pointer dedup (skip children of pointer parents) - interactiveTags and interactive ARIA roles exclusion - Role differentiation: clickable vs focusable - Text dedup against ARIA tree ref names and quoted strings - Edge case: -i -C shows cursor elements even when ARIA tree is empty Tests: - 5 unit tests for build_dedup_set() helper - 3 e2e regression tests: cursor-interactive detection, annotation scaling to 50 elements, cursor scaling to 100 elements * fix: add hidden/aria-hidden filtering, contentEditable support, and cleanup robustness - Restore hidden/aria-hidden element filtering in cursor-interactive JS (was present in old code, dropped during rewrite) - Add contentEditable detection with 'editable' role and hint - Replace fire-and-forget cleanup with warning on failure - Simplify build_dedup_set to use ref_map only (eliminates fragile tree-text quote parsing; ref_map already has all ref-bearing names) --------- Co-authored-by: ctate <366502+ctate@users.noreply.github.com>
1 parent 5b74604 commit 285eab4

File tree

3 files changed

+590
-105
lines changed

3 files changed

+590
-105
lines changed

cli/src/native/e2e_tests.rs

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1982,3 +1982,232 @@ async fn e2e_material_checkbox_check_uncheck() {
19821982
let resp = execute_command(&json!({ "id": "99", "action": "close" }), &mut state).await;
19831983
assert_success(&resp);
19841984
}
1985+
1986+
// ---------------------------------------------------------------------------
1987+
// Issue #841 – snapshot -C and screenshot --annotate must not hang over WSS
1988+
// ---------------------------------------------------------------------------
1989+
1990+
/// Verifies that `snapshot -C` (cursor-interactive mode) detects elements with
1991+
/// cursor:pointer / onclick / tabindex, produces the correct v0.19.0-compatible
1992+
/// output format, deduplicates against the ARIA tree, and completes in bounded
1993+
/// time (no sequential CDP round-trip explosion).
1994+
#[tokio::test]
1995+
#[ignore]
1996+
async fn e2e_snapshot_cursor_interactive() {
1997+
let mut state = DaemonState::new();
1998+
1999+
let resp = execute_command(
2000+
&json!({ "id": "1", "action": "launch", "headless": true }),
2001+
&mut state,
2002+
)
2003+
.await;
2004+
assert_success(&resp);
2005+
2006+
// Page with:
2007+
// - <button> and <a> (standard interactive – ARIA tree, NOT in cursor section)
2008+
// - <div cursor:pointer onclick> (clickable – cursor section)
2009+
// - <div tabindex=0> (focusable – cursor section)
2010+
// - <span cursor:pointer> (clickable – cursor section)
2011+
// - <span cursor:pointer> child of <div cursor:pointer> (inherited – skip)
2012+
let html = concat!(
2013+
"<html><body>",
2014+
"<a href='#'>Link</a>",
2015+
"<button>Btn</button>",
2016+
"<div style='cursor:pointer' onclick='x()'>ClickDiv</div>",
2017+
"<div tabindex='0'>FocusDiv</div>",
2018+
"<span style='cursor:pointer'>PointerSpan</span>",
2019+
"<div style='cursor:pointer'><span>InheritChild</span></div>",
2020+
"</body></html>",
2021+
);
2022+
2023+
let resp = execute_command(
2024+
&json!({ "id": "2", "action": "setcontent", "html": html }),
2025+
&mut state,
2026+
)
2027+
.await;
2028+
assert_success(&resp);
2029+
2030+
// snapshot -i -C: interactive tree + cursor section
2031+
let start = std::time::Instant::now();
2032+
let resp = execute_command(
2033+
&json!({ "id": "3", "action": "snapshot", "interactive": true, "cursor": true }),
2034+
&mut state,
2035+
)
2036+
.await;
2037+
let elapsed = start.elapsed();
2038+
assert_success(&resp);
2039+
2040+
let snapshot = get_data(&resp)["snapshot"].as_str().unwrap();
2041+
2042+
// Cursor section must appear
2043+
assert!(
2044+
snapshot.contains("# Cursor-interactive elements:") || snapshot.contains("clickable"),
2045+
"Cursor section missing from snapshot -i -C:\n{}",
2046+
snapshot,
2047+
);
2048+
2049+
// v0.19.0 output format: role + hints
2050+
assert!(
2051+
snapshot.contains("clickable") && snapshot.contains("[cursor:pointer"),
2052+
"Expected v0.19.0-format cursor output with hints:\n{}",
2053+
snapshot,
2054+
);
2055+
2056+
// Role differentiation: tabindex-only → focusable
2057+
assert!(
2058+
snapshot.contains("focusable") && snapshot.contains("[tabindex]"),
2059+
"Expected focusable role for tabindex-only element:\n{}",
2060+
snapshot,
2061+
);
2062+
2063+
// Text dedup: "Link" and "Btn" are in the ARIA tree, so must NOT appear
2064+
// in the cursor section.
2065+
let cursor_section = snapshot
2066+
.split("# Cursor-interactive elements:")
2067+
.nth(1)
2068+
.unwrap_or(snapshot);
2069+
assert!(
2070+
!cursor_section.contains("\"Link\""),
2071+
"ARIA link text should be deduped from cursor section"
2072+
);
2073+
assert!(
2074+
!cursor_section.contains("\"Btn\""),
2075+
"ARIA button text should be deduped from cursor section"
2076+
);
2077+
2078+
// Must complete quickly (< 5s), not hit the 30s CDP timeout
2079+
assert!(
2080+
elapsed.as_secs() < 5,
2081+
"snapshot -C took {:?}, expected < 5s (Issue #841 regression)",
2082+
elapsed,
2083+
);
2084+
2085+
let resp = execute_command(&json!({ "id": "99", "action": "close" }), &mut state).await;
2086+
assert_success(&resp);
2087+
}
2088+
2089+
/// Verifies that `screenshot --annotate` completes in bounded time even with
2090+
/// many interactive elements. Guards against the sequential CDP round-trip
2091+
/// regression that caused hangs over high-latency WSS (Issue #841).
2092+
#[tokio::test]
2093+
#[ignore]
2094+
async fn e2e_screenshot_annotate_many_elements() {
2095+
let mut state = DaemonState::new();
2096+
2097+
let resp = execute_command(
2098+
&json!({ "id": "1", "action": "launch", "headless": true }),
2099+
&mut state,
2100+
)
2101+
.await;
2102+
assert_success(&resp);
2103+
2104+
// 50 buttons: old sequential code would do 50×2×200ms ≈ 20s over WSS.
2105+
let mut html = String::from("<html><body>");
2106+
for i in 1..=50 {
2107+
html.push_str(&format!("<button>Button {}</button>", i));
2108+
}
2109+
html.push_str("</body></html>");
2110+
2111+
let resp = execute_command(
2112+
&json!({ "id": "2", "action": "setcontent", "html": html }),
2113+
&mut state,
2114+
)
2115+
.await;
2116+
assert_success(&resp);
2117+
2118+
let start = std::time::Instant::now();
2119+
let resp = execute_command(
2120+
&json!({ "id": "3", "action": "screenshot", "annotate": true }),
2121+
&mut state,
2122+
)
2123+
.await;
2124+
let elapsed = start.elapsed();
2125+
assert_success(&resp);
2126+
2127+
let annotations = get_data(&resp)["annotations"]
2128+
.as_array()
2129+
.expect("Annotated screenshot should return annotations");
2130+
2131+
assert!(
2132+
annotations.len() >= 50,
2133+
"Expected at least 50 annotations, got {}",
2134+
annotations.len(),
2135+
);
2136+
2137+
// Must complete quickly (< 10s), not hit the 30s CDP timeout
2138+
assert!(
2139+
elapsed.as_secs() < 10,
2140+
"screenshot --annotate with 50 elements took {:?}, expected < 10s (Issue #841)",
2141+
elapsed,
2142+
);
2143+
2144+
let resp = execute_command(&json!({ "id": "99", "action": "close" }), &mut state).await;
2145+
assert_success(&resp);
2146+
}
2147+
2148+
/// Verifies `snapshot -C` with many cursor-interactive elements completes in
2149+
/// bounded time. Direct regression test for Issue #841's root cause: N×2
2150+
/// sequential CDP round-trips per cursor-interactive element.
2151+
#[tokio::test]
2152+
#[ignore]
2153+
async fn e2e_snapshot_cursor_many_elements() {
2154+
let mut state = DaemonState::new();
2155+
2156+
let resp = execute_command(
2157+
&json!({ "id": "1", "action": "launch", "headless": true }),
2158+
&mut state,
2159+
)
2160+
.await;
2161+
assert_success(&resp);
2162+
2163+
// 100 cursor-interactive divs: old code = 200 sequential CDP calls,
2164+
// at 200ms WSS latency = 40s timeout. New code must finish in seconds.
2165+
let mut html = String::from("<html><body>");
2166+
for i in 1..=100 {
2167+
html.push_str(&format!(
2168+
"<div style='cursor:pointer' onclick='x()'>Item {}</div>",
2169+
i,
2170+
));
2171+
}
2172+
html.push_str("</body></html>");
2173+
2174+
let resp = execute_command(
2175+
&json!({ "id": "2", "action": "setcontent", "html": html }),
2176+
&mut state,
2177+
)
2178+
.await;
2179+
assert_success(&resp);
2180+
2181+
let start = std::time::Instant::now();
2182+
let resp = execute_command(
2183+
&json!({ "id": "3", "action": "snapshot", "interactive": true, "cursor": true }),
2184+
&mut state,
2185+
)
2186+
.await;
2187+
let elapsed = start.elapsed();
2188+
assert_success(&resp);
2189+
2190+
let snapshot = get_data(&resp)["snapshot"].as_str().unwrap();
2191+
2192+
// All 100 items should appear
2193+
assert!(
2194+
snapshot.contains("Item 1") && snapshot.contains("Item 100"),
2195+
"Expected all 100 cursor-interactive items in output",
2196+
);
2197+
2198+
// All should have v0.19.0-format hints
2199+
assert!(
2200+
snapshot.contains("[cursor:pointer, onclick]"),
2201+
"Expected v0.19.0-format hints",
2202+
);
2203+
2204+
// Must complete quickly
2205+
assert!(
2206+
elapsed.as_secs() < 10,
2207+
"snapshot -C with 100 cursor elements took {:?}, expected < 10s (Issue #841)",
2208+
elapsed,
2209+
);
2210+
2211+
let resp = execute_command(&json!({ "id": "99", "action": "close" }), &mut state).await;
2212+
assert_success(&resp);
2213+
}

cli/src/native/screenshot.rs

Lines changed: 69 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -223,35 +223,87 @@ async fn collect_annotations(
223223
session_id: &str,
224224
ref_map: &RefMap,
225225
) -> Result<Vec<RawAnnotation>, String> {
226-
let mut annotations = Vec::new();
226+
let entries = ref_map.entries_sorted();
227+
if entries.is_empty() {
228+
return Ok(Vec::new());
229+
}
227230

228-
for (ref_id, entry) in ref_map.entries_sorted() {
229-
let object_id =
230-
match super::element::resolve_element_object_id(client, session_id, ref_map, &ref_id)
231-
.await
232-
{
233-
Ok(id) => id,
234-
Err(_) => continue,
235-
};
231+
// Collect entries that have backend_node_ids for batch resolution.
232+
let with_backend_ids: Vec<(String, super::element::RefEntry, i64)> = entries
233+
.iter()
234+
.filter_map(|(ref_id, entry)| {
235+
entry
236+
.backend_node_id
237+
.map(|bid| (ref_id.clone(), entry.clone(), bid))
238+
})
239+
.collect();
236240

237-
let Some(rect) = get_rect_for_object(client, session_id, &object_id).await? else {
238-
continue;
239-
};
241+
if with_backend_ids.is_empty() {
242+
return Ok(Vec::new());
243+
}
240244

241-
if rect.width <= 0.0 || rect.height <= 0.0 {
242-
continue;
245+
// Batch-resolve all backend_node_ids to object IDs using concurrent CDP calls.
246+
let resolve_futures: Vec<_> = with_backend_ids
247+
.iter()
248+
.map(|(_, _, backend_node_id)| {
249+
client.send_command(
250+
"DOM.resolveNode",
251+
Some(serde_json::json!({
252+
"backendNodeId": backend_node_id,
253+
"objectGroup": "agent-browser-annotate"
254+
})),
255+
Some(session_id),
256+
)
257+
})
258+
.collect();
259+
260+
let resolve_results = futures_util::future::join_all(resolve_futures).await;
261+
262+
// Collect resolved object IDs paired with their ref info.
263+
let mut resolved: Vec<(String, super::element::RefEntry, String)> = Vec::new();
264+
for (i, result) in resolve_results.into_iter().enumerate() {
265+
if let Ok(val) = result {
266+
if let Some(oid) = val
267+
.get("object")
268+
.and_then(|o| o.get("objectId"))
269+
.and_then(|v| v.as_str())
270+
{
271+
let (ref_id, entry, _) = &with_backend_ids[i];
272+
resolved.push((ref_id.clone(), entry.clone(), oid.to_string()));
273+
}
243274
}
275+
}
276+
277+
if resolved.is_empty() {
278+
return Ok(Vec::new());
279+
}
280+
281+
// Batch-get bounding rects for all resolved elements using concurrent CDP calls.
282+
let rect_futures: Vec<_> = resolved
283+
.iter()
284+
.map(|(_, _, object_id)| get_rect_for_object(client, session_id, object_id))
285+
.collect();
286+
287+
let rect_results = futures_util::future::join_all(rect_futures).await;
288+
289+
let mut annotations = Vec::new();
290+
for (i, rect_result) in rect_results.into_iter().enumerate() {
291+
let rect = match rect_result {
292+
Ok(Some(r)) if r.width > 0.0 && r.height > 0.0 => r,
293+
_ => continue,
294+
};
244295

296+
let (ref_id, entry, _) = &resolved[i];
245297
let number = ref_id
246298
.strip_prefix('e')
247299
.and_then(|n| n.parse::<u64>().ok())
248300
.unwrap_or(0);
249301

250302
annotations.push(RawAnnotation {
251-
ref_id,
303+
ref_id: ref_id.clone(),
252304
number,
253-
role: entry.role,
254-
name: (!entry.name.is_empty()).then_some(entry.name),
305+
role: entry.role.clone(),
306+
name: (!entry.name.is_empty()).then_some(entry.name.clone()),
255307
rect,
256308
});
257309
}

0 commit comments

Comments
 (0)