Skip to content

Commit 57f5348

Browse files
j-mendezclaude
andcommitted
feat(cache): dump chrome responses to remote cache server
After put_hybrid_cache writes to the local in-memory CACACHE_MANAGER, spawn a fire-and-forget task that calls dump_to_remote_cache_parts to POST the response to the shared hybrid_cache_server. This allows all ECS tasks to serve cache hits without re-rendering through Chrome. Bump v2.47.66 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 00f01e8 commit 57f5348

9 files changed

Lines changed: 87 additions & 35 deletions

File tree

Cargo.lock

Lines changed: 15 additions & 15 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

spider/Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "spider"
3-
version = "2.47.65"
3+
version = "2.47.66"
44
authors = ["j-mendez <jeff@spider.cloud>"]
55
description = "A web crawler and scraper, building blocks for data curation workloads."
66
repository = "https://github.com/spider-rs/spider"
@@ -122,11 +122,11 @@ features = ["serde", "headers", "dynamic-versions"]
122122

123123
[dependencies.spider_agent_types]
124124
path = "../spider_agent_types"
125-
version = "2.47.65"
125+
version = "2.47.66"
126126

127127
[dependencies.spider_agent]
128128
path = "../spider_agent"
129-
version = "2.47.65"
129+
version = "2.47.66"
130130
optional = true
131131
default-features = false
132132

spider/src/utils/mod.rs

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1750,18 +1750,19 @@ pub async fn cache_chrome_response(
17501750
};
17511751

17521752
if let Ok(u) = url::Url::parse(target_url) {
1753+
let chromey_version = match chrome_http_req_res.protocol.as_str() {
1754+
"http/0.9" => HttpVersion::Http09,
1755+
"http/1" | "http/1.0" => HttpVersion::Http10,
1756+
"http/1.1" => HttpVersion::Http11,
1757+
"http/2.0" | "http/2" => HttpVersion::H2,
1758+
"http/3.0" | "http/3" => HttpVersion::H3,
1759+
_ => HttpVersion::Http11,
1760+
};
17531761
let http_response = HttpResponse {
17541762
url: u,
17551763
body,
17561764
status: chrome_http_req_res.status_code.into(),
1757-
version: match chrome_http_req_res.protocol.as_str() {
1758-
"http/0.9" => HttpVersion::Http09,
1759-
"http/1" | "http/1.0" => HttpVersion::Http10,
1760-
"http/1.1" => HttpVersion::Http11,
1761-
"http/2.0" | "http/2" => HttpVersion::H2,
1762-
"http/3.0" | "http/3" => HttpVersion::H3,
1763-
_ => HttpVersion::Http11,
1764-
},
1765+
version: chromey_version,
17651766
headers: chrome_http_req_res.response_headers,
17661767
};
17671768
let auth_opt = match cache_options {
@@ -1776,13 +1777,64 @@ pub async fn cache_chrome_response(
17761777
auth_opt.map(|token| token.as_ref()),
17771778
);
17781779

1780+
// Clone data needed for remote cache dump before put_hybrid_cache consumes them.
1781+
#[cfg(feature = "chrome_remote_cache")]
1782+
let remote_dump_data = {
1783+
let cache_site =
1784+
chromiumoxide::cache::manager::site_key_for_target_url(target_url, None);
1785+
Some((
1786+
cache_key.clone(),
1787+
cache_site,
1788+
http_response.body.clone(),
1789+
http_response.status,
1790+
chrome_http_req_res.request_headers.clone(),
1791+
http_response.headers.clone(),
1792+
chromey_version,
1793+
chrome_http_req_res.method.clone(),
1794+
))
1795+
};
1796+
17791797
put_hybrid_cache(
17801798
&cache_key,
17811799
http_response,
17821800
&chrome_http_req_res.method,
17831801
chrome_http_req_res.request_headers,
17841802
)
17851803
.await;
1804+
1805+
// Best-effort async dump to the shared remote cache server so other
1806+
// ECS tasks / processes can serve cache hits without re-rendering.
1807+
#[cfg(feature = "chrome_remote_cache")]
1808+
if let Some((key, site, body, status, req_hdrs, resp_hdrs, version, method)) =
1809+
remote_dump_data
1810+
{
1811+
let target = target_url.to_string();
1812+
let remote_version = match version {
1813+
HttpVersion::Http09 => chromiumoxide::http::HttpVersion::Http09,
1814+
HttpVersion::Http10 => chromiumoxide::http::HttpVersion::Http10,
1815+
HttpVersion::H2 => chromiumoxide::http::HttpVersion::H2,
1816+
HttpVersion::H3 => chromiumoxide::http::HttpVersion::H3,
1817+
_ => chromiumoxide::http::HttpVersion::Http11,
1818+
};
1819+
tokio::spawn(async move {
1820+
let _ = tokio::time::timeout(
1821+
Duration::from_secs(5),
1822+
chromiumoxide::cache::remote::dump_to_remote_cache_parts(
1823+
&key,
1824+
&site,
1825+
&target,
1826+
&body,
1827+
&method,
1828+
status,
1829+
&req_hdrs,
1830+
&resp_hdrs,
1831+
&remote_version,
1832+
Some("true"),
1833+
),
1834+
)
1835+
.await;
1836+
});
1837+
}
17861838
}
17871839
}
17881840

spider_agent/Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "spider_agent"
3-
version = "2.47.65"
3+
version = "2.47.66"
44
authors = ["j-mendez <jeff@spider.cloud>"]
55
description = "A concurrent-safe multimodal agent for web automation and research."
66
repository = "https://github.com/spider-rs/spider"
@@ -28,8 +28,8 @@ parking_lot = "0.12"
2828
base64 = "0.22"
2929

3030
# Extracted types and HTML processing
31-
spider_agent_types = { version = "2.47.65", path = "../spider_agent_types" }
32-
spider_agent_html = { version = "2.47.65", path = "../spider_agent_html" }
31+
spider_agent_types = { version = "2.47.66", path = "../spider_agent_types" }
32+
spider_agent_html = { version = "2.47.66", path = "../spider_agent_html" }
3333

3434
# HTML processing (still needed for engine internals)
3535
lol_html = "2"

spider_agent_html/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "spider_agent_html"
3-
version = "2.47.65"
3+
version = "2.47.66"
44
authors = ["j-mendez <jeff@spider.cloud>"]
55
description = "HTML processing utilities for spider_agent — cleaning, content analysis, and diffing."
66
repository = "https://github.com/spider-rs/spider"
@@ -24,7 +24,7 @@ serde = { version = "1", features = ["derive"] }
2424
serde_json = "1"
2525

2626
# Types from our types crate
27-
spider_agent_types = { version = "2.47.65", path = "../spider_agent_types" }
27+
spider_agent_types = { version = "2.47.66", path = "../spider_agent_types" }
2828

2929
[dev-dependencies]
3030
serde_json = "1"

spider_agent_types/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "spider_agent_types"
3-
version = "2.47.65"
3+
version = "2.47.66"
44
authors = ["j-mendez <jeff@spider.cloud>"]
55
description = "Pure data types and constants for spider_agent automation. Zero heavy dependencies."
66
repository = "https://github.com/spider-rs/spider"

spider_cli/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "spider_cli"
3-
version = "2.47.65"
3+
version = "2.47.66"
44
authors = ["j-mendez <jeff@spider.cloud>"]
55
description = "The fastest web crawler CLI written in Rust."
66
repository = "https://github.com/spider-rs/spider"

spider_utils/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "spider_utils"
3-
version = "2.47.65"
3+
version = "2.47.66"
44
authors = ["j-mendez <jeff@spider.cloud>"]
55
description = "Utilities to use for Spider Web Crawler."
66
repository = "https://github.com/spider-rs/spider"

spider_worker/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "spider_worker"
3-
version = "2.47.65"
3+
version = "2.47.66"
44
authors = ["j-mendez <jeff@spider.cloud>"]
55
description = "The fastest web crawler as a worker or proxy."
66
repository = "https://github.com/spider-rs/spider"

0 commit comments

Comments
 (0)