Skip to content

Commit e0b4ddd

Browse files
j-mendezclaude
andcommitted
feat(agent,scripting): client reuse + usage tracking (v2.51.204)
`agent.fetch` now inherits the engine's proxy-configured HTTP client instead of using a process-wide static — scripts go through the same proxy / TLS / headers as the rest of the agent. Per-call fetch + script activity is tracked in lock-free atomics on the engine. ## Client reuse * `ScriptEngine` gains a `default_client: reqwest::Client` field plus `with_client(...)` builder method. * New per-call methods `run_python_with_client` and `run_javascript_with_client` accept an explicit client override. * Chrome dispatcher (`run_embedded_script`) now reads `engine.client.clone()` (cheap — reqwest::Client is internally Arc'd) and routes through the `*_with_client` variant when set. Scripts inherit the agent's proxy without any extra wiring. * `Job` carries the client into the worker; native `agent.fetch` hooks use that client instead of the static fallback. No more per-script client construction. ## Usage tracking * New `ScriptUsage` struct with atomic counters: `scripts_run`, `scripts_timed_out`, `scripts_failed`, `fetch_calls`, `fetch_errors`, `fetch_bytes_in`. * No mutexes — counters are `AtomicU64` with `Ordering::Relaxed` (we want monotonic activity totals, not synchronization). * Engine exposes `usage_snapshot() -> ScriptUsageSnapshot` for plain-data reads + `usage_handle() -> Arc<ScriptUsage>` for live observation. ## Tests All 20 scripting tests still pass (parallel + --test-threads=1). cargo fmt + clippy clean across scripting + chrome+scripting. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 6aea8e5 commit e0b4ddd

14 files changed

Lines changed: 289 additions & 75 deletions

File tree

Cargo.lock

Lines changed: 17 additions & 17 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

spider/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "spider"
3-
version = "2.51.203"
3+
version = "2.51.204"
44
authors = ["j-mendez <jeff@spider.cloud>"]
55
description = "A web crawler and scraper, building blocks for data curation workloads."
66
repository = "https://github.com/spider-rs/spider"

spider_agent/Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "spider_agent"
3-
version = "2.51.203"
3+
version = "2.51.204"
44
authors = ["j-mendez <jeff@spider.cloud>"]
55
description = "A concurrent-safe multimodal agent for web automation and research."
66
repository = "https://github.com/spider-rs/spider"
@@ -28,8 +28,8 @@ parking_lot = "0.12"
2828
base64 = "0.22"
2929

3030
# Extracted types and HTML processing
31-
spider_agent_types = { version = "2.51.203", path = "../spider_agent_types" }
32-
spider_agent_html = { version = "2.51.203", path = "../spider_agent_html" }
31+
spider_agent_types = { version = "2.51.204", path = "../spider_agent_types" }
32+
spider_agent_html = { version = "2.51.204", path = "../spider_agent_html" }
3333

3434
# HTML processing (still needed for engine internals)
3535
lol_html = "2"

spider_agent/src/automation/browser.rs

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5835,9 +5835,22 @@ async fn run_embedded_script(
58355835
};
58365836
let timeout = timeout_ms.map(std::time::Duration::from_millis);
58375837

5838-
let result = match action {
5839-
"RunPython" => script.run_python(code, ctx, timeout).await,
5840-
"RunJavaScript" => script.run_javascript(code, ctx, timeout).await,
5838+
// Inherit the agent's HTTP client (proxy / TLS / headers) so `agent.fetch`
5839+
// from inside the script doesn't bypass the configured outbound path.
5840+
// Cheap clone — reqwest::Client is Arc-internal.
5841+
let result = match (action, engine.client.clone()) {
5842+
("RunPython", Some(client)) => {
5843+
script
5844+
.run_python_with_client(code, ctx, timeout, client)
5845+
.await
5846+
}
5847+
("RunPython", None) => script.run_python(code, ctx, timeout).await,
5848+
("RunJavaScript", Some(client)) => {
5849+
script
5850+
.run_javascript_with_client(code, ctx, timeout, client)
5851+
.await
5852+
}
5853+
("RunJavaScript", None) => script.run_javascript(code, ctx, timeout).await,
58415854
_ => return ActionOutcome::fail(action, "unreachable: action mismatch"),
58425855
};
58435856

spider_agent/src/scripting/js.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ struct CallState {
3434
runtime: tokio::runtime::Handle,
3535
sandbox: Option<Arc<SandboxedDir>>,
3636
allow_network: bool,
37+
/// Engine-provided HTTP client (proxy/TLS config from `engine.client`).
38+
client: reqwest::Client,
39+
/// Process-wide usage counters; updated atomically by `agent.fetch`.
40+
usage: Arc<super::ScriptUsage>,
3741
}
3842

3943
thread_local! {
@@ -101,6 +105,8 @@ pub(crate) fn run(job: &Job) -> Result<ScriptResult, String> {
101105
runtime: job.runtime.clone(),
102106
sandbox: sandbox.clone(),
103107
allow_network: job.config.allow_network,
108+
client: job.client.clone(),
109+
usage: job.usage.clone(),
104110
});
105111

106112
// Install per-call state; ensure it's cleared even on panic/early return.
@@ -373,9 +379,11 @@ fn js_fetch(_this: &JsValue, args: &[JsValue], ctx: &mut Context) -> JsResult<Js
373379
} else {
374380
FetchRequest::default()
375381
};
376-
let resp = try_with_state(|s| agent_fetch_blocking(&s.runtime, &s.interrupt, &url, req))
377-
.map_err(|_| state_missing_err())?
378-
.map_err(|e| JsNativeError::error().with_message(e))?;
382+
let resp = try_with_state(|s| {
383+
agent_fetch_blocking(&s.client, &s.runtime, &s.interrupt, &s.usage, &url, req)
384+
})
385+
.map_err(|_| state_missing_err())?
386+
.map_err(|e| JsNativeError::error().with_message(e))?;
379387
let resp_json = serde_json::to_value(&resp)
380388
.map_err(|e| JsNativeError::error().with_message(format!("serialize: {e}")))?;
381389
JsValue::from_json(&resp_json, ctx)

0 commit comments

Comments
 (0)