-
Notifications
You must be signed in to change notification settings - Fork 2.2k
254 lines (230 loc) · 9.23 KB
/
coprocessor-gpu-tests.yml
File metadata and controls
254 lines (230 loc) · 9.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
# Compile and test Coprocessor on a single L40 GPU, on hyperstack
name: coprocessor-gpu-tests
permissions: {}
env:
CARGO_TERM_COLOR: always
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUSTFLAGS: "-C target-cpu=native"
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
jobs:
check-changes:
name: coprocessor-gpu-tests/check-changes
permissions:
actions: 'read' # Required to read workflow run information
contents: 'read' # Required to checkout repository code
pull-requests: 'read' # Required to read pull request information
runs-on: ubuntu-latest
outputs:
changes-coprocessor-gpu: ${{ steps.filter.outputs.coprocessor-gpu }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: 'false'
- uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
id: filter
with:
filters: |
coprocessor-gpu:
- coprocessor/fhevm-engine/Cargo.toml
- coprocessor/fhevm-engine/tfhe-worker/Cargo.toml
- coprocessor/fhevm-engine/tfhe-worker/build.rs
- coprocessor/fhevm-engine/tfhe-worker/src/**
- coprocessor/fhevm-engine/scheduler/src/**
- coprocessor/fhevm-engine/scheduler/Cargo.toml
- coprocessor/fhevm-engine/scheduler/build.rs
- coprocessor/proto/**
- '.github/workflows/coprocessor-gpu-tests.yml'
- ci/slab.toml
setup-instance:
name: coprocessor-gpu-tests/setup-instance
needs: check-changes
if: ${{ github.event_name == 'workflow_dispatch' || needs.check-changes.outputs.changes-coprocessor-gpu == 'true' }}
runs-on: ubuntu-latest
permissions:
contents: 'read' # Required to checkout repository code
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: hyperstack
profile: l40
coprocessor-gpu:
name: coprocessor-gpu-tests/tests (bpr)
needs: [ check-changes, setup-instance ]
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
permissions:
contents: 'read' # Required to checkout repository code
packages: 'read' # Required to read GitHub packages/container registry
strategy:
fail-fast: false
# explicit include-based build matrix, of known valid options
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: "/usr/local/cuda-${{ matrix.cuda }}"
CUDA_MODULE_LOADER: "EAGER"
CC: "/usr/bin/gcc-${{ matrix.gcc }}"
CXX: "/usr/bin/g++-${{ matrix.gcc }}"
CUDAHOSTCXX: "/usr/bin/g++-${{ matrix.gcc }}"
steps:
- name: Install git LFS
run: |
sudo apt-get update
sudo apt-get install -y git-lfs
git lfs install
- name: Checkout fhevm
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: 'false'
lfs: true
- name: Checkout LFS objects
run: git lfs checkout
- name: Setup Hyperstack dependencies
uses: ./.github/actions/gpu_setup
with:
cuda-version: ${{ matrix.cuda }}
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Export CUDA variables
shell: bash
run: |
echo "PATH=$PATH:${CUDA_PATH}/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=${CUDA_PATH}/lib64:${LD_LIBRARY_PATH}" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
- name: Install cargo dependencies
run: |
sudo apt-get update
sudo apt-get install -y protobuf-compiler cmake pkg-config libssl-dev \
libclang-dev docker-compose-v2 docker.io acl
sudo usermod -aG docker "$USER"
newgrp docker
sudo setfacl --modify user:"$USER":rw /var/run/docker.sock
cargo install sqlx-cli --version 0.7.2 --no-default-features --features postgres --locked
- name: Install foundry
uses: foundry-rs/foundry-toolchain@de808b1eea699e761c404bda44ba8f21aba30b2c
- name: Cache cargo
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-
- name: Login to GitHub Container Registry
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to GitHub Chainguard Registry
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
with:
registry: cgr.dev
username: ${{ secrets.CGR_USERNAME }}
password: ${{ secrets.CGR_PASSWORD }}
- name: Start database services (background)
run: |
nohup docker compose up -d --build db-migration > /tmp/db-init.log 2>&1 &
working-directory: coprocessor/fhevm-engine/tfhe-worker
- name: Use Node.js
uses: actions/setup-node@60edb5dd545a775178f52524783378180af0d1f8 # v4.0.2
with:
node-version: 20.x
- run: cp ./host-contracts/.env.example ./host-contracts/.env
- run: npm --workspace=host-contracts ci --include=optional
- run: "cd host-contracts && npm run deploy:emptyProxies && npx hardhat compile"
env:
HARDHAT_NETWORK: hardhat
- name: Compile GPU test artifacts
run: |
SQLX_OFFLINE=true cargo test --no-run \
-p tfhe-worker \
-p sns-worker \
-p zkproof-worker \
--release \
--features=gpu
working-directory: coprocessor/fhevm-engine
- name: Wait for database migration
run: |
SECONDS=0
while ! docker container inspect db-migration > /dev/null 2>&1; do
if [ "$SECONDS" -ge 900 ]; then
echo "Timed out waiting for db-migration container after 15 minutes"
cat /tmp/db-init.log
exit 1
fi
echo "Waiting for db-migration container to be created..."
sleep 2
done
EXIT_CODE=$(docker wait db-migration)
if [ "$EXIT_CODE" != "0" ]; then
echo "Database migration failed with exit code $EXIT_CODE"
docker logs db-migration
cat /tmp/db-init.log
exit 1
fi
echo "Database migration completed"
- name: Run GPU tests for the worker services.
env:
IS_MERGE_QUEUE: ${{ startsWith(github.head_ref, 'mergify/merge-queue/') && '1' || '0' }}
run: |
export DATABASE_URL=postgresql://postgres:postgres@localhost:5432/coprocessor
# Merge queue: leave unset so supported_types() defaults to full matrix.
# PR CI: run only FHEUint64 for faster feedback.
if [ "$IS_MERGE_QUEUE" != "1" ]; then
export TFHE_WORKER_EVENT_TYPE_MATRIX=uint64
fi
cargo test \
-p tfhe-worker \
-p sns-worker \
-p zkproof-worker \
--release \
--features=gpu \
-- \
--test-threads=1
working-directory: coprocessor/fhevm-engine
teardown-instance:
name: coprocessor-gpu-tests/teardown
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, coprocessor-gpu ]
runs-on: ubuntu-latest
permissions:
contents: 'read' # Required to checkout repository code
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}