@@ -19,28 +19,59 @@ jobs:
1919 start-large-ec2-runner :
2020 runs-on : ubuntu-latest
2121 outputs :
22- label : ${{ steps.start-ec2-runner.outputs.label }}
23- ec2-instance-id : ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
22+ label : ${{ steps.launch-ec2-instance-with-fallback.outputs.label }}
23+ ec2-instance-id : ${{ steps.launch-ec2-instance-with-fallback.outputs.ec2-instance-id }}
24+ ec2-instance-region : ${{ steps.launch-ec2-instance-with-fallback.outputs.ec2-instance-region }}
2425 steps :
25- - name : Configure AWS credentials
26- uses : aws- actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
26+ - name : Checkout "launch-ec2-runner-with-fallback" in-house CI action
27+ uses : actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
2728 with :
28- aws-access-key-id : ${{ secrets.AWS_ACCESS_KEY_ID }}
29- aws-secret-access-key : ${{ secrets.AWS_SECRET_ACCESS_KEY }}
30- aws-region : ${{ vars.AWS_REGION }}
31-
32- - name : Start EC2 runner
33- id : start-ec2-runner
34- uses : machulav/ec2-github-runner@a8c20fc0876503410b2b966c124abc2311984ce2 # v2.3.9
29+ repository : instructlab/ci-actions
30+ # clone the "ci-actions" repo to a local directory called "ci-actions", instead of
31+ # overwriting the current WORKDIR contents
32+ path : ci-actions
33+ ref : release-v0.1
34+ sparse-checkout : |
35+ actions/launch-ec2-runner-with-fallback
36+
37+ - name : Launch EC2 Runner with Fallback
38+ id : launch-ec2-instance-with-fallback
39+ uses : ./ci-actions/actions/launch-ec2-runner-with-fallback
40+ env :
41+ TMPDIR : " /tmp"
3542 with :
36- mode : start
37- github-token : ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
38- ec2-image-id : ${{ vars.AWS_EC2_AMI }}
39- ec2-instance-type : g6e.12xlarge
40- subnet-id : subnet-024298cefa3bedd61
41- security-group-id : sg-06300447c4a5fbef3
42- iam-role-name : instructlab-ci-runner
43- aws-resource-tags : >
43+ aws_access_key_id : ${{ secrets.AWS_ACCESS_KEY_ID }}
44+ aws_secret_access_key : ${{ secrets.AWS_SECRET_ACCESS_KEY }}
45+ github_token : ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
46+ regions_config : >
47+ [
48+ {
49+ "region": "us-east-2",
50+ "subnets": {
51+ "us-east-2a": "${{ vars.SUBNET_US_EAST_2A }}",
52+ "us-east-2b": "${{ vars.SUBNET_US_EAST_2B }}",
53+ "us-east-2c": "${{ vars.SUBNET_US_EAST_2C }}"
54+ },
55+ "ec2-ami": "${{ vars.AWS_EC2_AMI_US_EAST_2 }}",
56+ "security-group-id": "${{ vars.SECURITY_GROUP_ID_US_EAST_2 }}"
57+ },
58+ {
59+ "region": "us-east-1",
60+ "subnets": {
61+ "us-east-1a": "${{ vars.SUBNET_US_EAST_1A }}",
62+ "us-east-1b": "${{ vars.SUBNET_US_EAST_1B }}",
63+ "us-east-1c": "${{ vars.SUBNET_US_EAST_1C }}",
64+ "us-east-1d": "${{ vars.SUBNET_US_EAST_1D }}",
65+ "us-east-1e": "${{ vars.SUBNET_US_EAST_1E }}",
66+ "us-east-1f": "${{ vars.SUBNET_US_EAST_1F }}"
67+ },
68+ "ec2-ami": "${{ vars.AWS_EC2_AMI_US_EAST_1 }}",
69+ "security-group-id": "${{ vars.SECURITY_GROUP_ID_US_EAST_1 }}"
70+ }
71+ ]
72+ try_spot_instance_first : false
73+ ec2_instance_type : g6e.12xlarge
74+ aws_resource_tags : >
4475 [
4576 {"Key": "Name", "Value": "instructlab-ci-github-large-runner"},
4677 {"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
6091 - name : Install Packages
6192 run : |
6293 cat /etc/os-release
63- mkdir -p "${TMPDIR}"
64- sudo dnf install -y gcc gcc-c++ make git python3.11 python3.11-devel
94+ mkdir -p /home/tmp
95+ sudo dnf install -y gcc gcc-c++ make git-core python3.11 python3.11-devel
6596
6697 - name : Checkout instructlab/instructlab
6798 uses : actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -144,20 +175,7 @@ jobs:
144175 - name : Install ilab
145176 working-directory : ./instructlab
146177 run : |
147- export CUDA_HOME="/usr/local/cuda"
148- export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
149- export PATH="$PATH:$CUDA_HOME/bin"
150- python3.11 -m venv --upgrade-deps venv
151- . venv/bin/activate
152- nvidia-smi
153- python3.11 -m pip cache remove llama_cpp_python
154-
155- CMAKE_ARGS="-DGGML_CUDA=on" python3.11 -m pip install -v . -c constraints-dev.txt
156-
157- # https://github.com/instructlab/instructlab/issues/1821
158- # install with Torch and build dependencies installed
159- python3.11 -m pip install -v packaging wheel setuptools-scm
160- python3.11 -m pip install -v .[cuda] -r requirements-vllm-cuda.txt -c constraints-dev.txt
178+ PYTHON="python3.11" ./scripts/install-ilab-with-cuda.sh
161179
162180 - name : Update instructlab-sdg library
163181 working-directory : ./sdg
@@ -263,7 +281,7 @@ jobs:
263281 with :
264282 aws-access-key-id : ${{ secrets.AWS_ACCESS_KEY_ID }}
265283 aws-secret-access-key : ${{ secrets.AWS_SECRET_ACCESS_KEY }}
266- aws-region : ${{ vars.AWS_REGION }}
284+ aws-region : ${{ needs.start-large-ec2-runner.outputs.ec2-instance-region }}
267285
268286 - name : Stop EC2 runner
269287 uses : machulav/ec2-github-runner@a8c20fc0876503410b2b966c124abc2311984ce2 # v2.3.9
0 commit comments