Skip to content

feat: add on_link_blocked_callback for robots.txt blocked URLs #3743

feat: add on_link_blocked_callback for robots.txt blocked URLs

feat: add on_link_blocked_callback for robots.txt blocked URLs #3743

Workflow file for this run

name: Rust
on:
push:
branches: [main]
pull_request:
branches: [main]
env:
CARGO_TERM_COLOR: always
jobs:
build:
runs-on: ubuntu-latest
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPEN_ROUTER: ${{ secrets.OPEN_ROUTER }}
SERPER_API_KEY: ${{ secrets.SERPER_API_KEY }}
RUN_LIVE_TESTS: ${{ secrets.RUN_LIVE_TESTS }}
steps:
- uses: actions/checkout@v4
- uses: actions/cache@v4
id: cache
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
target/
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
- name: Cargo check workspace
run: cargo check --workspace
- name: Cargo test spider_agent (default)
run: cargo test -p spider_agent
- name: Cargo test spider_agent (openai + search_serper)
run: cargo test -p spider_agent --features "openai search_serper"
- name: Cargo test spider_agent live smoke
if: env.RUN_LIVE_TESTS == '1' || env.RUN_LIVE_TESTS == 'true' || env.RUN_LIVE_TESTS == 'TRUE'
run: cargo test -p spider_agent --features "openai search_serper" --test live_env_smoke -- --nocapture
chrome:
runs-on: ubuntu-latest
env:
RUN_LIVE_TESTS: ${{ secrets.RUN_LIVE_TESTS }}
steps:
- uses: actions/checkout@v4
- uses: actions/cache@v4
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
target/
key: ${{ runner.os }}-cargo-chrome-${{ hashFiles('**/Cargo.lock') }}
# Stack-frame regression guard. `website::scrape` does not launch
# chrome (default config uses the HTTP path), so no browser binary
# is needed — but compiling with `--features chrome` folds the
# chrome page-fetch frames into the crawl future, and if they
# regrow unboxed this SIGABRTs on the 2MB stack (the v2.51.188
# regression). Network-bound (hits choosealicense.com), so gated
# on RUN_LIVE_TESTS like the spider_agent live smoke step.
- name: Cargo test spider scrape (chrome frame guard)
if: env.RUN_LIVE_TESTS == '1' || env.RUN_LIVE_TESTS == 'true' || env.RUN_LIVE_TESTS == 'TRUE'
run: cargo test -p spider --features chrome website::scrape -- --exact --nocapture
io_uring:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/cache@v4
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
target/
key: ${{ runner.os }}-cargo-uring-${{ hashFiles('**/Cargo.lock') }}
- name: Cargo check spider with io_uring
run: cargo check -p spider --features io_uring
- name: Cargo test spider uring_fs
run: cargo test -p spider --features io_uring uring_fs
- name: Cargo test spider (no io_uring fallback)
run: cargo test -p spider --no-default-features --features basic uring_fs