Skip to content

Commit 391a155

Browse files
mehmetnadirclaude
andcommitted
feat: add smart commands — natural language element interaction without LLM
Like Stagehand's act("Click login") but zero LLM cost: - `smart-click <text>` — click by visible text, fuzzy matching across textContent, aria-label, title, placeholder, value. Shows score + alternatives - `smart-fill <label> <value>` — fill input by label/placeholder text, React-compatible, searches associated labels, aria-label, name, id - `smart-select <label> <option>` — select dropdown option by text All use fuzzy scoring: exact(100) > startsWith(80) > includes(60) > partial(20-50) No CSS selectors needed. No LLM API calls. Pure DOM intelligence. Available as MCP tools: browser_smart_click, browser_smart_fill, browser_smart_select Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 55c7b67 commit 391a155

1 file changed

Lines changed: 299 additions & 0 deletions

File tree

src/cdpilot.py

Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2984,6 +2984,293 @@ async def cmd_observe():
29842984
print(result)
29852985

29862986

2987+
# ─── Smart Commands (LLM-free intelligence) ───
2988+
2989+
async def cmd_smart_click(text):
2990+
"""Click element by visible text — fuzzy matching, no CSS selector needed.
2991+
2992+
Like Stagehand's act("Click login") but without LLM.
2993+
Searches: button text, link text, aria-label, placeholder, title, value.
2994+
2995+
Usage:
2996+
cdpilot smart-click "Login"
2997+
cdpilot smart-click "Submit Order"
2998+
cdpilot smart-click "Learn more"
2999+
"""
3000+
ws_url, _ = get_page_ws()
3001+
safe_text = json.dumps(text.lower())
3002+
js = f"""
3003+
(function() {{
3004+
var search = {safe_text};
3005+
var candidates = [];
3006+
3007+
// Score: exact > startsWith > includes > partial
3008+
function score(str) {{
3009+
if (!str) return 0;
3010+
var s = str.toLowerCase().trim();
3011+
if (s === search) return 100;
3012+
if (s.startsWith(search)) return 80;
3013+
if (s.includes(search)) return 60;
3014+
// Partial word match
3015+
var words = search.split(/\\s+/);
3016+
var matched = words.filter(function(w) {{ return s.includes(w); }}).length;
3017+
if (matched > 0) return 20 + (matched / words.length) * 30;
3018+
return 0;
3019+
}}
3020+
3021+
var els = document.querySelectorAll(
3022+
'a, button, input[type=submit], input[type=button], ' +
3023+
'[role=button], [role=link], [role=tab], [role=menuitem], ' +
3024+
'summary, label, [onclick], [tabindex]'
3025+
);
3026+
3027+
Array.from(els).forEach(function(el) {{
3028+
var rect = el.getBoundingClientRect();
3029+
if (rect.width === 0 && rect.height === 0) return;
3030+
var style = window.getComputedStyle(el);
3031+
if (style.display === 'none' || style.visibility === 'hidden') return;
3032+
3033+
var texts = [
3034+
el.textContent || '',
3035+
el.getAttribute('aria-label') || '',
3036+
el.getAttribute('title') || '',
3037+
el.getAttribute('placeholder') || '',
3038+
el.value || '',
3039+
el.getAttribute('alt') || ''
3040+
];
3041+
3042+
var bestScore = 0;
3043+
var bestMatch = '';
3044+
texts.forEach(function(t) {{
3045+
var s = score(t);
3046+
if (s > bestScore) {{ bestScore = s; bestMatch = t.trim().substring(0, 60); }}
3047+
}});
3048+
3049+
if (bestScore > 0) {{
3050+
candidates.push({{
3051+
el: el,
3052+
score: bestScore,
3053+
match: bestMatch,
3054+
tag: el.tagName.toLowerCase(),
3055+
cx: rect.x + rect.width / 2,
3056+
cy: rect.y + rect.height / 2
3057+
}});
3058+
}}
3059+
}});
3060+
3061+
if (candidates.length === 0) return JSON.stringify({{found: false}});
3062+
3063+
candidates.sort(function(a, b) {{ return b.score - a.score; }});
3064+
var best = candidates[0];
3065+
best.el.scrollIntoView({{block: 'center'}});
3066+
var rect = best.el.getBoundingClientRect();
3067+
best.el.click();
3068+
return JSON.stringify({{
3069+
found: true,
3070+
tag: best.tag,
3071+
text: best.match,
3072+
score: best.score,
3073+
x: rect.x + rect.width / 2,
3074+
y: rect.y + rect.height / 2,
3075+
alternatives: candidates.slice(1, 4).map(function(c) {{
3076+
return c.tag + ' "' + c.match + '" (score:' + c.score + ')';
3077+
}})
3078+
}});
3079+
}})()
3080+
"""
3081+
r = await cdp_send(ws_url, [(1, "Runtime.evaluate", {"expression": js, "returnByValue": True})])
3082+
raw = r.get(1, {}).get("result", {}).get("value", "")
3083+
try:
3084+
data = json.loads(raw)
3085+
except (json.JSONDecodeError, TypeError):
3086+
print(f"Error parsing result", file=sys.stderr)
3087+
return
3088+
3089+
if not data.get("found"):
3090+
print(f'No element found matching: "{text}"', file=sys.stderr)
3091+
sys.exit(1)
3092+
3093+
await _vfx_ripple(ws_url, data["x"], data["y"])
3094+
print(f'Clicked: {data["tag"].upper()} "{data["text"]}" (score:{data["score"]})')
3095+
if data.get("alternatives"):
3096+
print(f' Also found: {", ".join(data["alternatives"])}')
3097+
3098+
3099+
async def cmd_smart_fill(text, value):
3100+
"""Fill input by label/placeholder text — no CSS selector needed.
3101+
3102+
Finds input by: associated label, placeholder, aria-label, name, id match.
3103+
3104+
Usage:
3105+
cdpilot smart-fill "Email" "test@example.com"
3106+
cdpilot smart-fill "Password" "secret123"
3107+
cdpilot smart-fill "Search" "cdpilot"
3108+
"""
3109+
ws_url, _ = get_page_ws()
3110+
safe_text = json.dumps(text.lower())
3111+
safe_value = json.dumps(value)
3112+
js = f"""
3113+
(function() {{
3114+
var search = {safe_text};
3115+
var value = {safe_value};
3116+
var candidates = [];
3117+
3118+
function score(str) {{
3119+
if (!str) return 0;
3120+
var s = str.toLowerCase().trim();
3121+
if (s === search) return 100;
3122+
if (s.startsWith(search)) return 80;
3123+
if (s.includes(search)) return 60;
3124+
return 0;
3125+
}}
3126+
3127+
var inputs = document.querySelectorAll('input, textarea, select, [contenteditable=true]');
3128+
Array.from(inputs).forEach(function(el) {{
3129+
var rect = el.getBoundingClientRect();
3130+
if (rect.width === 0 && rect.height === 0) return;
3131+
3132+
var scores = [];
3133+
// Check placeholder
3134+
scores.push(score(el.getAttribute('placeholder') || ''));
3135+
// Check aria-label
3136+
scores.push(score(el.getAttribute('aria-label') || ''));
3137+
// Check name/id
3138+
scores.push(score(el.name || ''));
3139+
scores.push(score(el.id || ''));
3140+
// Check associated label
3141+
if (el.id) {{
3142+
var label = document.querySelector('label[for="' + el.id + '"]');
3143+
if (label) scores.push(score(label.textContent || ''));
3144+
}}
3145+
// Check parent label
3146+
var parentLabel = el.closest('label');
3147+
if (parentLabel) scores.push(score(parentLabel.textContent || ''));
3148+
// Check preceding text node/label
3149+
var prev = el.previousElementSibling;
3150+
if (prev) scores.push(score(prev.textContent || ''));
3151+
3152+
var bestScore = Math.max.apply(null, scores);
3153+
if (bestScore > 0) {{
3154+
candidates.push({{el: el, score: bestScore, tag: el.tagName.toLowerCase(), type: el.type || ''}});
3155+
}}
3156+
}});
3157+
3158+
if (candidates.length === 0) return JSON.stringify({{found: false}});
3159+
3160+
candidates.sort(function(a, b) {{ return b.score - a.score; }});
3161+
var best = candidates[0];
3162+
3163+
// React-compatible value setting
3164+
var nativeSetter = Object.getOwnPropertyDescriptor(
3165+
window.HTMLInputElement.prototype, 'value'
3166+
) || Object.getOwnPropertyDescriptor(
3167+
window.HTMLTextAreaElement.prototype, 'value'
3168+
);
3169+
if (nativeSetter && nativeSetter.set) {{
3170+
nativeSetter.set.call(best.el, value);
3171+
}} else {{
3172+
best.el.value = value;
3173+
}}
3174+
best.el.dispatchEvent(new Event('input', {{bubbles: true}}));
3175+
best.el.dispatchEvent(new Event('change', {{bubbles: true}}));
3176+
3177+
return JSON.stringify({{
3178+
found: true,
3179+
tag: best.tag,
3180+
type: best.type,
3181+
score: best.score,
3182+
placeholder: best.el.getAttribute('placeholder') || '',
3183+
name: best.el.name || best.el.id || ''
3184+
}});
3185+
}})()
3186+
"""
3187+
r = await cdp_send(ws_url, [(1, "Runtime.evaluate", {"expression": js, "returnByValue": True})])
3188+
raw = r.get(1, {}).get("result", {}).get("value", "")
3189+
try:
3190+
data = json.loads(raw)
3191+
except (json.JSONDecodeError, TypeError):
3192+
print(f"Error parsing result", file=sys.stderr)
3193+
return
3194+
3195+
if not data.get("found"):
3196+
print(f'No input found matching: "{text}"', file=sys.stderr)
3197+
sys.exit(1)
3198+
3199+
ident = data.get("placeholder") or data.get("name") or data["tag"]
3200+
print(f'Filled: {data["tag"].upper()}[{data["type"]}] "{ident}" = {value} (score:{data["score"]})')
3201+
3202+
3203+
async def cmd_smart_select(text, option_text):
3204+
"""Select dropdown option by label text — no CSS selector needed.
3205+
3206+
Usage:
3207+
cdpilot smart-select "Country" "Turkey"
3208+
cdpilot smart-select "Size" "Large"
3209+
"""
3210+
ws_url, _ = get_page_ws()
3211+
safe_text = json.dumps(text.lower())
3212+
safe_option = json.dumps(option_text.lower())
3213+
js = f"""
3214+
(function() {{
3215+
var search = {safe_text};
3216+
var optSearch = {safe_option};
3217+
var selects = document.querySelectorAll('select');
3218+
var best = null;
3219+
var bestScore = 0;
3220+
3221+
Array.from(selects).forEach(function(sel) {{
3222+
var texts = [
3223+
sel.getAttribute('aria-label') || '',
3224+
sel.name || '', sel.id || ''
3225+
];
3226+
if (sel.id) {{
3227+
var label = document.querySelector('label[for="' + sel.id + '"]');
3228+
if (label) texts.push(label.textContent || '');
3229+
}}
3230+
var parent = sel.closest('label');
3231+
if (parent) texts.push(parent.textContent || '');
3232+
3233+
texts.forEach(function(t) {{
3234+
var s = t.toLowerCase().trim();
3235+
var sc = s === search ? 100 : s.includes(search) ? 60 : 0;
3236+
if (sc > bestScore) {{ bestScore = sc; best = sel; }}
3237+
}});
3238+
}});
3239+
3240+
if (!best) return JSON.stringify({{found: false}});
3241+
3242+
// Find matching option
3243+
var options = Array.from(best.options);
3244+
var match = options.find(function(o) {{
3245+
return o.text.toLowerCase().trim() === optSearch;
3246+
}}) || options.find(function(o) {{
3247+
return o.text.toLowerCase().includes(optSearch);
3248+
}});
3249+
3250+
if (!match) return JSON.stringify({{found: true, optionFound: false, available: options.map(function(o) {{ return o.text; }}).slice(0, 10)}});
3251+
3252+
best.value = match.value;
3253+
best.dispatchEvent(new Event('change', {{bubbles: true}}));
3254+
return JSON.stringify({{found: true, optionFound: true, selected: match.text, value: match.value}});
3255+
}})()
3256+
"""
3257+
r = await cdp_send(ws_url, [(1, "Runtime.evaluate", {"expression": js, "returnByValue": True})])
3258+
raw = r.get(1, {}).get("result", {}).get("value", "")
3259+
try:
3260+
data = json.loads(raw)
3261+
except (json.JSONDecodeError, TypeError):
3262+
print("Error parsing result", file=sys.stderr)
3263+
return
3264+
3265+
if not data.get("found"):
3266+
print(f'No select found matching: "{text}"', file=sys.stderr)
3267+
sys.exit(1)
3268+
if not data.get("optionFound"):
3269+
print(f'Option "{option_text}" not found. Available: {", ".join(data.get("available", []))}', file=sys.stderr)
3270+
sys.exit(1)
3271+
print(f'Selected: "{data["selected"]}" (value={data["value"]})')
3272+
3273+
29873274
async def cmd_run_script(script_path):
29883275
"""Run a .cdp script file — sequential commands, one per line.
29893276
@@ -3878,6 +4165,12 @@ def _register_tools(self):
38784165
"inputSchema": {"type": "object", "properties": {"selector": {"type": "string", "description": "CSS selector to match elements (e.g. 'table tr', '.product', 'a')"}, "format": {"type": "string", "enum": ["text", "json", "list"], "description": "Output format: 'text' (one per line), 'json' (full structure with attrs), 'list' (numbered)", "default": "text"}}, "required": ["selector"]}},
38794166
{"name": "browser_observe", "description": "List all interactive elements on the current page with their available actions (CLICK, FILL, NAVIGATE, TOGGLE, SELECT, SUBMIT, UPLOAD). Like Stagehand observe() but deterministic — no LLM needed. Shows what you CAN DO on the page. Use this to understand page structure before acting.",
38804167
"inputSchema": {"type": "object", "properties": {}}},
4168+
{"name": "browser_smart_click", "description": "Click an element by its visible text — no CSS selector needed. Uses fuzzy matching across text content, aria-label, title, placeholder, and value. Returns match score and alternatives. Like Stagehand act('Click login') but without LLM cost. Use when you know WHAT to click but not the exact selector.",
4169+
"inputSchema": {"type": "object", "properties": {"text": {"type": "string", "description": "Visible text of the element to click (e.g. 'Login', 'Submit Order', 'Learn more')"}}, "required": ["text"]}},
4170+
{"name": "browser_smart_fill", "description": "Fill an input by its label or placeholder text — no CSS selector needed. Finds input by associated label, placeholder, aria-label, name, or id. React-compatible value setting. Use when you know WHAT field to fill but not the exact selector.",
4171+
"inputSchema": {"type": "object", "properties": {"label": {"type": "string", "description": "Label or placeholder text of the input (e.g. 'Email', 'Password', 'Search')"}, "value": {"type": "string", "description": "Value to fill in the input"}}, "required": ["label", "value"]}},
4172+
{"name": "browser_smart_select", "description": "Select a dropdown option by label and option text — no CSS selector needed. Finds the select element by label/name, then selects the matching option. Use for dropdown interactions without knowing selectors.",
4173+
"inputSchema": {"type": "object", "properties": {"label": {"type": "string", "description": "Label text of the select dropdown"}, "option": {"type": "string", "description": "Text of the option to select"}}, "required": ["label", "option"]}},
38814174
{"name": "browser_describe", "description": "Get a comprehensive page description combining three data sources: (1) accessibility tree with @N references for interactive elements, (2) a PNG screenshot saved to disk, and (3) visible text content. Use this when browser_a11y alone is insufficient — for canvas/WebGL content, visual verification, or complex dynamic UIs. This is the vision fallback tool.",
38824175
"inputSchema": {"type": "object", "properties": {}}},
38834176
{"name": "browser_assert", "description": "Assert that an element matching the CSS selector exists and optionally contains expected text. Returns PASS or FAIL with details. Use this for automated testing — verify page state after navigation or interaction. Checks visibility by default.",
@@ -3954,6 +4247,9 @@ def _execute_tool(self, req_id, tool_name, args):
39544247
"browser_close": lambda a: ["close"],
39554248
"browser_extract": lambda a: ["extract", a.get("selector", "")] + ([f"--{a['format']}"] if a.get("format") and a["format"] != "text" else []),
39564249
"browser_observe": lambda a: ["observe"],
4250+
"browser_smart_click": lambda a: ["smart-click", a.get("text", "")],
4251+
"browser_smart_fill": lambda a: ["smart-fill", a.get("label", ""), a.get("value", "")],
4252+
"browser_smart_select": lambda a: ["smart-select", a.get("label", ""), a.get("option", "")],
39574253
"browser_describe": lambda a: ["describe"],
39584254
"browser_assert": lambda a: ["assert", a.get("selector", "")] + ([a["text"]] if a.get("text") else []),
39594255
"browser_wait_for": lambda a: ["wait-for", a.get("selector", "")] + ([str(a["timeout"])] if a.get("timeout") else []),
@@ -4121,6 +4417,9 @@ def require_args(n, usage):
41214417
'extract': lambda: (require_args(1, 'extract <selector> [--json|--list|--attrs=href,title]'), None)[1] if not args else cmd_extract(args[0], next((a.lstrip('-') for a in args[1:] if a.startswith('--')), "text")),
41224418
'observe': cmd_observe,
41234419
'run': lambda: (require_args(1, 'run <script.cdp>'), None)[1] if not args else cmd_run_script(args[0]),
4420+
'smart-click': lambda: (require_args(1, 'smart-click <text>'), None)[1] if not args else cmd_smart_click(" ".join(args)),
4421+
'smart-fill': lambda: (require_args(2, 'smart-fill <label> <value>'), None)[1] if len(args) < 2 else cmd_smart_fill(args[0], " ".join(args[1:])),
4422+
'smart-select': lambda: (require_args(2, 'smart-select <label> <option>'), None)[1] if len(args) < 2 else cmd_smart_select(args[0], " ".join(args[1:])),
41244423
'assert': lambda: (require_args(1, 'assert <selector> [text]'), None)[1] if not args else cmd_assert(args[0], args[1] if len(args) > 1 else None),
41254424
'wait-for': lambda: (require_args(1, 'wait-for <selector> [timeout_ms]'), None)[1] if not args else cmd_wait_for(args[0], int(args[1]) if len(args) > 1 else 5000),
41264425
'check': lambda: cmd_check(args[0] if args else None),

0 commit comments

Comments
 (0)