-
Notifications
You must be signed in to change notification settings - Fork 4
222 lines (198 loc) · 9.7 KB
/
build-deploy-changes.yaml
File metadata and controls
222 lines (198 loc) · 9.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
name: Build & Deploy Changed Services
permissions:
packages: write
contents: read
on:
pull_request:
branches: [main, dev, "release/*"]
env:
TAG: ${{ github.run_number }}
jobs:
build:
name: Build and Deploy
runs-on: [self-hosted, paicicd]
timeout-minutes: 120
environment: auto-test
container:
image: ubuntu:24.04
volumes:
- /var/run/docker.sock:/var/run/docker.sock
env:
DOCKER_BUILDKIT: "1"
steps:
- name: Install git
run: |
DEBIAN_FRONTEND=noninteractive apt update
DEBIAN_FRONTEND=noninteractive apt install -y git
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: false
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.ref_name }}
- name: Get Changed Folders (Services)
id: changes
run: |
git config --global --add safe.directory "$GITHUB_WORKSPACE"
if [ "${{ github.event_name }}" = "pull_request" ]; then
echo "Pull request detected"
# Fetch the merge base to get only PR changes
git fetch origin ${{ github.event.pull_request.base.ref }} --depth=50
base_sha=$(git merge-base origin/${{ github.event.pull_request.base.ref }} ${{ github.event.pull_request.head.sha }})
head_sha="${{ github.event.pull_request.head.sha }}"
else
if [ "${{ github.event.before }}" = "0000000000000000000000000000000000000000" ]; then
# Get the previous commit on branch
base_sha=$(git rev-parse ${{ github.sha }}^)
else
base_sha="${{ github.event.before }}"
fi
head_sha="${{ github.sha }}"
fi
echo "Comparing $base_sha...$head_sha"
changed_files=$(git diff --name-only "$base_sha" "$head_sha")
echo "Changed files: $changed_files"
# extract service folders under src/
folders=$(echo "$changed_files" | grep '^src/' \
| awk -F'/' '{print $2}' \
| sort -u | grep -v -E '^(webportal-dind|dev-box)$' | tr '\n' ' ')
echo "Changed folders: $folders"
# export as output for next steps
echo "folders=$folders" >> $GITHUB_OUTPUT
- name: Check if folders are empty
id: check
run: |
if [ -z "${{ steps.changes.outputs.folders }}" ]; then
echo "has_changed=false" >> $GITHUB_OUTPUT
else
echo "has_changed=true" >> $GITHUB_OUTPUT
fi
- name: Install Package
if: steps.check.outputs.has_changed == 'true'
run: |
DEBIAN_FRONTEND=noninteractive apt install -y python3 python-is-python3 pip git unzip ca-certificates curl apt-transport-https lsb-release gnupg parallel
curl -sL https://aka.ms/InstallAzureCLIDeb | bash
curl -fsSL https://get.docker.com | sh
- name: Install python libs
if: steps.check.outputs.has_changed == 'true'
run: python -m pip install --break-system-packages pyyaml jinja2 paramiko etcd3 protobuf==3.20.3 kubernetes gitpython
- name: Decode and unzip config file
if: steps.check.outputs.has_changed == 'true'
run: |
echo "${{ secrets.CONFIG_FILE_B64 }}" | base64 -d > config.zip
mkdir -p $GITHUB_WORKSPACE/config
unzip -o config.zip -d $GITHUB_WORKSPACE/config
ls -l $GITHUB_WORKSPACE/config
- name: Arrange Config Files
if: steps.check.outputs.has_changed == 'true'
run: |
rm -rf /tmp/auth-configuration
mv $GITHUB_WORKSPACE/config/auth-configuration /tmp/
ls -l /tmp/auth-configuration
- name: Login to GHCR
if: steps.check.outputs.has_changed == 'true'
run: |
docker login ghcr.io -u ${{ github.actor }} -p ${{ secrets.GITHUB_TOKEN }}
- name: Build Images of Changed Services
if: steps.check.outputs.has_changed == 'true'
run: |
changed_services="${{ steps.changes.outputs.folders }}"
echo "Building: $changed_services"
if [[ "$changed_services" == *"alert-manager"* ]]; then
echo "alert-manager is in the changed services"
changed_services=$(echo $changed_services | sed 's/alert-manager//g')
# build specific images in alert-manager
echo "Building specific alert-manager images"
$GITHUB_WORKSPACE/build/pai_build.py build \
-c $GITHUB_WORKSPACE/config/cluster-configuration \
-s alert-manager
-i abnormal-detector,alert-handler,alert-parser,cert-expiration-checker,cluster-utilization,job-data-recorder,job-status-change-notification,node-failure-detection,node-issue-classifier,nvidia-gpu-low-perf-fixer,redis-monitoring
fi
echo "Changed services after removing alert-manager: $changed_services"
$GITHUB_WORKSPACE/build/pai_build.py build \
-c $GITHUB_WORKSPACE/config/cluster-configuration \
-s $changed_services
- name: Login to Azure with Managed Identity
if: steps.check.outputs.has_changed == 'true'
run: |
az login --identity --client-id ${{ secrets.AZURE_MANAGED_IDENTITY_CLIENT_ID }}
- name: Push Images of Changed Services to ACR
if: steps.check.outputs.has_changed == 'true'
run: |
changed_services="${{ steps.changes.outputs.folders }}"
echo "Pushing: $changed_services"
$GITHUB_WORKSPACE/build/pai_build.py push \
-c $GITHUB_WORKSPACE/config/cluster-configuration \
-s $changed_services
- name: Push Images of Changed Service to GHCR
if: steps.check.outputs.has_changed == 'true'
run: |
changed_services="${{ steps.changes.outputs.folders }}"
echo "Pushing: $changed_services"
# check whether alert-manager is in the changed services
echo "Changed services before removing alert-manager: $changed_services"
if [[ "$changed_services" == *"alert-manager"* ]]; then
echo "alert-manager is in the changed services"
changed_services=$(echo $changed_services | sed 's/alert-manager//g')
# push specific images in alert-manager to GHCR
echo "Pushing specific alert-manager images to GHCR"
$GITHUB_WORKSPACE/build/pai_build.py push \
-c $GITHUB_WORKSPACE/config/cluster-configuration \
-s alert-manager \
-i abnormal-detector,alert-handler,alert-parser,cert-expiration-checker,cluster-utilization,job-data-recorder,job-status-change-notification,node-failure-detection,node-issue-classifier,nvidia-gpu-low-perf-fixer,redis-monitoring \
--docker-registry ghcr.io \
--docker-namespace ${GITHUB_REPOSITORY_OWNER} \
--docker-username ${{ github.actor }} \
--docker-password ${{ secrets.GITHUB_TOKEN }}
fi
echo "Changed services after removing alert-manager: $changed_services"
$GITHUB_WORKSPACE/build/pai_build.py push \
-c $GITHUB_WORKSPACE/config/cluster-configuration \
-s $changed_services \
--docker-registry ghcr.io \
--docker-namespace ${GITHUB_REPOSITORY_OWNER} \
--docker-username ${{ github.actor }} \
--docker-password ${{ secrets.GITHUB_TOKEN }}
- name: Azure CLI get credentials and deploy
if: steps.check.outputs.has_changed == 'true'
run: |
az version
az login --identity --client-id ${{ secrets.AZURE_MANAGED_IDENTITY_CLIENT_ID }}
az aks install-cli
az aks get-credentials \
--resource-group ${{ secrets.AZURE_RESOURCE_GROUP }} \
--name ${{ secrets.KUBERNETES_CLUSTER }} \
--overwrite-existing
kubelogin convert-kubeconfig -l azurecli
kubectl config use-context ${{ secrets.KUBERNETES_CLUSTER }}
services_to_deploy="${{ steps.changes.outputs.folders }}"
echo "Final services to deploy: $services_to_deploy"
if echo " $services_to_deploy " | grep -q " cluster-local-storage-worker "; then
sed -i '42s/value: "8"/value: "0"/' $GITHUB_WORKSPACE/src/cluster-local-storage-worker/deploy/cluster-local-storage-worker.yaml.template
fi
echo "${{ secrets.PAI_CLUSTER_NAME }}" > cluster_id
echo "Stopping changed pai services $services_to_deploy on ${{ secrets.PAI_CLUSTER_NAME }} ..."
$GITHUB_WORKSPACE/paictl.py service stop -n $services_to_deploy < cluster_id
echo "Pushing config to cluster \"${{ secrets.PAI_CLUSTER_NAME }}\" ..."
$GITHUB_WORKSPACE/paictl.py config push -m service -p $GITHUB_WORKSPACE/config/cluster-configuration < cluster_id
echo "Starting to update $services_to_deploy on ${{ secrets.PAI_CLUSTER_NAME }} ..."
$GITHUB_WORKSPACE/paictl.py service start -n $services_to_deploy < cluster_id
kubectl get pod
kubectl get service
test:
name: Test rest-server
needs: build
runs-on: [self-hosted, paicicd]
environment: auto-test
steps:
- name: Test rest-server
run: |
echo "Testing rest-server ${{ secrets.PAI_WEB_URL }}/rest-server/api/v2/info"
curl ${{ secrets.PAI_WEB_URL }}/rest-server/api/v2/info
echo "Checking virtual cluster status..."
vc_info=$(curl -H "Authorization: Bearer ${{ secrets.PAI_WEB_TOKEN }}" -s ${{ secrets.PAI_WEB_URL }}/rest-server/api/v2/virtual-clusters)
if [ $? -ne 0 ]; then
echo "Failed to access virtual cluster API"
exit 1
fi
echo "Virtual cluster info: $vc_info"