11name : Functional Tests
22
3+ # PR-triggered functional tests use pull_request_target so fork PRs receive
4+ # secrets. Authorization runs in a separate gate job (base checkout only)
5+ # before the test job checks out the PR head — same pattern as e2e.yml.
6+
7+ permissions : {}
8+
39on :
410 push :
511 branches : [main]
12+ # SYNC-WITH: grep regex in "Check for functional-test-relevant changes" step
613 paths :
714 - ' eval/**'
815 - ' internal/scaffold/**'
9- pull_request :
10- branches : [main]
11- paths :
12- - ' eval/** '
13- - ' internal/scaffold/** '
16+ - ' .github/workflows/functional-tests.yml '
17+ - ' .github/scripts/** '
18+ pull_request_target :
19+ types : [opened, synchronize, reopened, labeled]
20+ merge_group :
1421 workflow_dispatch :
1522
16- permissions :
17- contents : read
18- id-token : write
19-
2023concurrency :
21- group : functional-tests-${{ github.ref }}
22- cancel-in-progress : true
24+ group : >-
25+ ${{ github.event_name == 'pull_request_target'
26+ && format('functional-{0}', github.event.pull_request.number)
27+ || format('{0}-{1}', github.workflow, github.ref) }}
28+ cancel-in-progress : >-
29+ ${{ github.event_name == 'pull_request_target'
30+ || github.ref != 'refs/heads/main' }}
2331
2432jobs :
33+ gate :
34+ # Separate job so pull-requests: write stays out of the job that checks
35+ # out fork head and runs tests with secrets.
36+ # Never checkout github.event.pull_request.head.sha here.
37+ if : >-
38+ github.event_name == 'pull_request_target' &&
39+ (github.event.action != 'labeled' || github.event.label.name == 'ok-to-test')
40+ runs-on : ubuntu-latest
41+ timeout-minutes : 5
42+ permissions :
43+ contents : read
44+ pull-requests : write
45+ outputs :
46+ authorized : ${{ steps.auth.outputs.authorized }}
47+ steps :
48+ - uses : actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
49+ with :
50+ ref : ${{ github.sha }} # Base branch only — never checkout PR head in gate
51+
52+ - name : Check PR authorization
53+ id : auth
54+ uses : ./.github/actions/check-e2e-authorization
55+ with :
56+ pr_number : ${{ github.event.pull_request.number }}
57+ repository : ${{ github.repository }}
58+ pr_updated_at : ${{ github.event.pull_request.updated_at }}
59+ event_action : ${{ github.event.action }}
60+ pr_author_association : ${{ github.event.pull_request.author_association }}
61+
2562 functional-tests :
63+ # For pull_request_target, runs only when gate sets authorized=true.
64+ # Do not treat a skipped gate as authorized.
65+ # This job checks out untrusted PR head code — no pull-requests: write here.
66+ needs : gate
67+ if : >-
68+ !cancelled() &&
69+ (github.event_name != 'pull_request_target' || needs.gate.outputs.authorized == 'true')
2670 runs-on : ubuntu-latest
2771 timeout-minutes : 45
72+ permissions :
73+ contents : read
74+ id-token : write
2875 steps :
29- - uses : actions/checkout@v6.0.2
76+ - name : Check for functional-test-relevant changes
77+ id : changes
78+ if : github.event_name == 'pull_request_target'
79+ env :
80+ GH_TOKEN : ${{ github.token }}
81+ PR_NUMBER : ${{ github.event.pull_request.number }}
82+ REPO : ${{ github.repository }}
83+ # SYNC-WITH: push.paths filter above
84+ run : |
85+ FILES=$(gh api "repos/${REPO}/pulls/${PR_NUMBER}/files" --paginate --jq '.[].filename') || {
86+ echo "::warning::Failed to fetch PR files — running functional tests as a precaution"
87+ echo "relevant=true" >> "$GITHUB_OUTPUT"
88+ exit 0
89+ }
90+ if echo "$FILES" | grep -qE '^eval/|^internal/scaffold/|^\.github/workflows/functional-tests\.yml$|^\.github/scripts/'; then
91+ echo "relevant=true" >> "$GITHUB_OUTPUT"
92+ else
93+ echo "::notice::No functional-test-relevant files changed — skipping tests"
94+ echo "relevant=false" >> "$GITHUB_OUTPUT"
95+ fi
96+
97+ - uses : actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
98+ if : steps.changes.outputs.relevant != 'false'
3099 with :
100+ ref : ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }}
101+ persist-credentials : false
102+ # checkout@v7 blocks fork PR head checkouts on pull_request_target by default.
103+ # Safe here: gate job authorizes before this job runs; no pull-requests: write.
104+ allow-unsafe-pr-checkout : ${{ github.event_name == 'pull_request_target' }}
31105 submodules : true
32106
33- - uses : actions/setup-go@v5
107+ - uses : actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
108+ if : steps.changes.outputs.relevant != 'false'
34109 with :
35110 go-version-file : go.mod
36111
37- - uses : actions/setup-python@v6.2.0
112+ - uses : actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
113+ if : steps.changes.outputs.relevant != 'false'
38114 with :
39115 python-version : " 3.12"
40116
41117 - name : Install uv
42- uses : astral-sh/setup-uv@v7.6.0
118+ if : steps.changes.outputs.relevant != 'false'
119+ uses : astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
43120
44121 - name : Install agent-eval-harness
45- # Installs from the git submodule checked out above (submodules: true)
122+ if : steps.changes.outputs.relevant != 'false'
46123 run : uv pip install --system -e 'eval/.agent-eval-harness[anthropic]'
47124
48125 - name : Install yq
126+ if : steps.changes.outputs.relevant != 'false'
49127 run : |
50128 curl -sSfL "https://github.com/mikefarah/yq/releases/download/v4.47.1/yq_linux_amd64" -o /usr/local/bin/yq
51129 chmod +x /usr/local/bin/yq
52130
53131 - name : Configure git identity
132+ if : steps.changes.outputs.relevant != 'false'
54133 run : |
55134 git config --global user.name "fullsend-eval[bot]"
56135 git config --global user.email "fullsend-eval[bot]@users.noreply.github.com"
57136
58137 - name : Build fullsend
138+ if : steps.changes.outputs.relevant != 'false'
59139 run : make go-build
60140
61141 - name : Add bin to PATH
142+ if : steps.changes.outputs.relevant != 'false'
62143 run : echo "${{ github.workspace }}/bin" >> "$GITHUB_PATH"
63144
64- # TODO: The openshell setup below (version, CLI, gateway, Podman,
65- # gateway start) is duplicated from action.yml. Extract into a
66- # shared script (e.g. .github/scripts/setup-openshell.sh) so the
67- # version and config stay in sync across both places.
68- - name : Set OpenShell version
69- run : echo "OPENSHELL_VERSION=0.0.38" >> "${GITHUB_ENV}"
70-
71- - name : Install OpenShell CLI
145+ - name : Configure OpenShell gateway
146+ if : steps.changes.outputs.relevant != 'false'
72147 run : |
73- uv tool install "openshell==${OPENSHELL_VERSION} "
74- openshell --version
148+ mkdir -p "$HOME/.config/openshell "
149+ echo "OPENSHELL_BIND_ADDRESS=0.0.0.0" > "$HOME/.config/openshell/gateway.env"
75150
76- - name : Download openshell-gateway
77- run : |
78- set -euo pipefail
79- arch="$(uname -m)"
80- case "${arch}" in
81- x86_64) ;;
82- aarch64|arm64) arch=aarch64 ;;
83- *) echo "::error::Unsupported architecture: ${arch}"; exit 1 ;;
84- esac
85- GATEWAY_ASSET="openshell-gateway-${arch}-unknown-linux-gnu.tar.gz"
86- GATEWAY_URL="https://github.com/NVIDIA/OpenShell/releases/download/v${OPENSHELL_VERSION}/${GATEWAY_ASSET}"
87- curl -fsSL "${GATEWAY_URL}" -o "/tmp/${GATEWAY_ASSET}"
88- tar xzf "/tmp/${GATEWAY_ASSET}" -C "${{ runner.temp }}"
89- rm -f "/tmp/${GATEWAY_ASSET}"
151+ - name : Install OpenShell CLI
152+ if : steps.changes.outputs.relevant != 'false'
153+ run : .github/scripts/install-openshell.sh
90154
91155 - name : Install Podman
156+ if : steps.changes.outputs.relevant != 'false'
92157 run : |
93158 sudo apt-get update
94159 sudo apt-get install -y podman
95160
96161 - name : Configure rootless Podman
162+ if : steps.changes.outputs.relevant != 'false'
97163 run : |
98164 whoami_user="$(whoami)"
99165 grep -q "^${whoami_user}:" /etc/subuid || sudo usermod --add-subuids 100000-165535 --add-subgids 100000-165535 "${whoami_user}"
100166 podman system migrate
101167
102168 - name : Start Podman API service
169+ if : steps.changes.outputs.relevant != 'false'
103170 run : |
104171 SOCKET_PATH="${XDG_RUNTIME_DIR:-/run/user/$(id -u)}/podman/podman.sock"
105172 if [ ! -S "${SOCKET_PATH}" ]; then
@@ -112,47 +179,38 @@ jobs:
112179 [ -S "${SOCKET_PATH}" ] || { echo "::error::Podman socket not ready"; exit 1; }
113180 fi
114181
115- - name : Start openshell-gateway
116- run : |
117- set -euo pipefail
118- OPENSHELL_SSH_HANDSHAKE_SECRET="ci-$(openssl rand -hex 16)"
119- export OPENSHELL_SSH_HANDSHAKE_SECRET
120- echo "::add-mask::${OPENSHELL_SSH_HANDSHAKE_SECRET}"
121- export OPENSHELL_SUPERVISOR_IMAGE="ghcr.io/nvidia/openshell/supervisor:dfd47683e7da4f1a4a8fa5d77f92d3696e6a41f9"
122- "${{ runner.temp }}/openshell-gateway" \
123- --bind-address 0.0.0.0 \
124- --health-port 8081 \
125- --drivers podman \
126- --disable-tls \
127- --db-url "sqlite:/tmp/gateway.db?mode=rwc" \
128- >/tmp/gateway.log 2>&1 &
129- for _i in $(seq 1 30); do
130- curl -sf http://127.0.0.1:8081/healthz >/dev/null 2>&1 && break
131- sleep 2
132- done
133- curl -sf http://127.0.0.1:8081/healthz >/dev/null 2>&1 || {
134- echo "::error::Gateway health check failed"
135- cat /tmp/gateway.log 2>/dev/null || true
136- exit 1
137- }
138- openshell gateway add http://127.0.0.1:8080 --local --name local
139- openshell gateway select local
140-
141182 - name : Install validation dependencies
183+ if : steps.changes.outputs.relevant != 'false'
142184 run : pip install --quiet "jsonschema>=4.18.0"
143185
186+ - name : Check for secrets
187+ if : steps.changes.outputs.relevant != 'false'
188+ id : secrets-check
189+ run : |
190+ if [ -z "$WIF_PROVIDER" ]; then
191+ echo "::warning::GCP secrets are not configured. Skipping functional tests."
192+ echo "available=false" >> "$GITHUB_OUTPUT"
193+ else
194+ echo "available=true" >> "$GITHUB_OUTPUT"
195+ fi
196+ env :
197+ WIF_PROVIDER : ${{ secrets.E2E_GCP_WIF_PROVIDER }}
198+
144199 - name : Authenticate to GCP
145- uses : google-github-actions/auth@v2
200+ if : steps.changes.outputs.relevant != 'false' && steps.secrets-check.outputs.available == 'true'
201+ uses : google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093 # v3.0.0
146202 with :
147203 workload_identity_provider : ${{ secrets.E2E_GCP_WIF_PROVIDER }}
148204 service_account : ${{ secrets.E2E_GCP_SERVICE_ACCOUNT }}
149205
150206 - name : Prepare sandbox credentials
207+ if : steps.changes.outputs.relevant != 'false' && steps.secrets-check.outputs.available == 'true'
151208 run : |
152209 echo "HOST_GOOGLE_APPLICATION_CREDENTIALS=$GOOGLE_APPLICATION_CREDENTIALS" >> "$GITHUB_ENV"
153210 bash internal/scaffold/fullsend-repo/scripts/prepare-sandbox-credentials.sh
154211
155212 - name : Run functional tests
213+ if : steps.changes.outputs.relevant != 'false' && steps.secrets-check.outputs.available == 'true'
156214 env :
157215 EVAL_ORG : ${{ vars.EVAL_ORG }}
158216 GH_TOKEN : ${{ secrets.EVAL_GH_TOKEN }}
@@ -163,12 +221,12 @@ jobs:
163221 run : make functional-tests
164222
165223 - name : Scrub secrets from eval results
166- if : always()
224+ if : always() && steps.changes.outputs.relevant != 'false' && steps.secrets-check.outputs.available == 'true'
167225 run : find eval/runs/ -name '.eval-env' -delete 2>/dev/null || true; find /tmp/agent-eval/ -name '.eval-env' -delete 2>/dev/null || true
168226
169227 - name : Upload eval results
170- if : always()
171- uses : actions/upload-artifact@v4
228+ if : always() && steps.changes.outputs.relevant != 'false' && steps.secrets-check.outputs.available == 'true'
229+ uses : actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
172230 with :
173231 name : eval-results
174232 path : |
0 commit comments