Skip to content

Commit 5ae7a02

Browse files
vrajashkrRamkumar Chinchani
andauthored
feat(cluster): Add support for request proxying for scale out (project-zot#2385)
* feat(cluster): initial commit for scale-out cluster Signed-off-by: Ramkumar Chinchani <[email protected]> * feat(cluster): support shared storage scale out This change introduces support for shared storage backed zot cluster scale out. New feature Multiple stateless zot instances can run using the same shared storage backend where each instance looks at a specific set of repositories based on a siphash of the repository name to improve scale as the load is distributed across multiple instances. For a given config, there will only be one instance that can perform dist-spec read/write on a given repository. What's changed? - introduced a transparent request proxy for dist-spec endpoints based on siphash of repository name. - new config for scale out cluster that specifies list of cluster members. Signed-off-by: Vishwas Rajashekar <[email protected]> --------- Signed-off-by: Ramkumar Chinchani <[email protected]> Signed-off-by: Vishwas Rajashekar <[email protected]> Co-authored-by: Ramkumar Chinchani <[email protected]>
1 parent be5ad66 commit 5ae7a02

30 files changed

+2320
-24
lines changed

.github/workflows/ecosystem-tools.yaml

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
go install github.com/swaggo/swag/cmd/[email protected]
2828
go mod download
2929
sudo apt-get update
30-
sudo apt-get install libgpgme-dev libassuan-dev libbtrfs-dev libdevmapper-dev pkg-config rpm uidmap
30+
sudo apt-get install libgpgme-dev libassuan-dev libbtrfs-dev libdevmapper-dev pkg-config rpm uidmap haproxy jq
3131
# install skopeo
3232
git clone -b v1.12.0 https://github.com/containers/skopeo.git
3333
cd skopeo
@@ -80,4 +80,37 @@ jobs:
8080
env:
8181
AWS_ACCESS_KEY_ID: fake
8282
AWS_SECRET_ACCESS_KEY: fake
83+
- name: Run cloud scale-out tests
84+
id: scale
85+
run: |
86+
make run-cloud-scale-out-tests
87+
env:
88+
AWS_ACCESS_KEY_ID: fake
89+
AWS_SECRET_ACCESS_KEY: fake
90+
continue-on-error: true
91+
- name: print service logs for scale-out
92+
run: |
93+
find /tmp/zot-ft-logs -name '*.log' -print0 | xargs -0 cat
94+
- name: multi-hop detection
95+
id: multihop
96+
run: |
97+
if find /tmp/zot-ft-logs -name '*.log' -print0 | xargs -0 cat | grep 'cannot proxy an already proxied request'; then
98+
echo "detected multi-hop"
99+
exit 1
100+
else
101+
exit 0
102+
fi
103+
continue-on-error: true
104+
- name: clean up scale-out logs
105+
run: |
106+
rm -r /tmp/zot-ft-logs
107+
- name: fail job if error
108+
if: ${{ steps.scale.outcome != 'success' || steps.multihop.outcome != 'success' }}
109+
run: |
110+
exit 1
111+
- name: Upload zb test results zip as build artifact
112+
uses: actions/upload-artifact@v4
113+
with:
114+
name: zb-cloud-scale-out-functional-results-${{ github.sha }}
115+
path: ./zb-results/
83116
- uses: ./.github/actions/teardown-localstack

.github/workflows/nightly.yaml

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@ on:
66

77
permissions: read-all
88

9-
# Here we are running two tests:
9+
# The following tests are run:
1010
# 1. run zot with local storage and dedupe disabled, push images, restart zot with dedupe enabled
1111
# task scheduler will start a dedupe all blobs process at zot startup and it shouldn't interfere with clients.
1212
# 2. run zot with s3 storage and dynamodb and dedupe enabled, push images, restart zot with dedupe false and no cache
1313
# task scheduler will start a restore all blobs process at zot startup, after it finishes all blobs should be restored to their original state (have content)
14+
# 3. run many, many, many instances of zot with shared storage and metadata front-ended by HAProxy. start a long-running zb run with high concurrency and number of requests
15+
# to achieve a long-running sustained load on the system. The system is expected to perform well without errors and return performance data after the test.
1416
jobs:
1517
dedupe:
1618
name: Dedupe/restore blobs
@@ -195,3 +197,82 @@ jobs:
195197
- name: Run tests
196198
run: |
197199
./examples/kind/kind-ci.sh
200+
201+
cloud-scale-out:
202+
name: s3+dynamodb scale-out
203+
runs-on: ubuntu-latest-16-cores
204+
steps:
205+
- uses: actions/checkout@v4
206+
- uses: actions/setup-go@v5
207+
with:
208+
cache: false
209+
go-version: 1.22.x
210+
- name: Install dependencies
211+
run: |
212+
cd $GITHUB_WORKSPACE
213+
go install github.com/swaggo/swag/cmd/[email protected]
214+
go mod download
215+
sudo apt-get update
216+
sudo apt-get install libgpgme-dev libassuan-dev libbtrfs-dev libdevmapper-dev pkg-config rpm uidmap haproxy jq
217+
# install skopeo
218+
git clone -b v1.12.0 https://github.com/containers/skopeo.git
219+
cd skopeo
220+
make bin/skopeo
221+
sudo cp bin/skopeo /usr/bin
222+
skopeo -v
223+
cd $GITHUB_WORKSPACE
224+
- name: Log in to GitHub Docker Registry
225+
uses: docker/login-action@v3
226+
with:
227+
registry: ghcr.io
228+
username: ${{ github.actor }}
229+
password: ${{ github.token }}
230+
- uses: actions/setup-python@v5
231+
with:
232+
python-version: '3.11'
233+
- name: Install localstack
234+
run: |
235+
pip install --upgrade pyopenssl
236+
pip install localstack==3.3.0 awscli-local[ver1] # install LocalStack cli and awslocal
237+
docker pull ghcr.io/project-zot/ci-images/localstack:3.3.0 # Make sure to pull a working version of the image
238+
localstack start -d # Start LocalStack in the background
239+
240+
echo "Waiting for LocalStack startup..." # Wait 30 seconds for the LocalStack container
241+
localstack wait -t 30 # to become ready before timing out
242+
echo "Startup complete"
243+
- name: Run cloud scale-out high scale performance tests
244+
id: scale
245+
run: |
246+
make run-cloud-scale-out-high-scale-tests
247+
env:
248+
AWS_ACCESS_KEY_ID: fake
249+
AWS_SECRET_ACCESS_KEY: fake
250+
continue-on-error: true
251+
- name: print service logs
252+
run: |
253+
sudo dmesg
254+
cat /tmp/zot-logs/*.log
255+
- name: multi-hop detection
256+
id: multihop
257+
run: |
258+
if cat /tmp/zot-logs/*.log | grep 'cannot proxy an already proxied request'; then
259+
echo "detected multi-hop"
260+
exit 1
261+
else
262+
exit 0
263+
fi
264+
continue-on-error: true
265+
- name: clean up logs
266+
run: |
267+
rm -r /tmp/zot-logs
268+
- name: fail job if error
269+
if: ${{ steps.scale.outcome != 'success' || steps.multihop.outcome != 'success' }}
270+
run: |
271+
exit 1
272+
- name: Upload zb test results zip as build artifact
273+
if: steps.scale.outcome == 'success'
274+
uses: actions/upload-artifact@v4
275+
with:
276+
name: zb-cloud-scale-out-perf-results-${{ github.sha }}
277+
path: ./zb-results/
278+
- uses: ./.github/actions/teardown-localstack

Makefile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,17 @@ run-blackbox-tests: $(BATS_TEST_FILE_PATH) check-blackbox-prerequisites binary b
489489
echo running bats test "$(BATS_TEST_FILE_PATH)"; \
490490
$(BATS) $(BATS_FLAGS) $(BATS_TEST_FILE_PATH)
491491

492+
.PHONY: run-cloud-scale-out-tests
493+
run-cloud-scale-out-tests: check-blackbox-prerequisites check-awslocal binary bench test-prereq
494+
echo running scale out bats test; \
495+
$(BATS) $(BATS_FLAGS) test/scale-out/cloud_scale_out_no_auth.bats; \
496+
$(BATS) $(BATS_FLAGS) test/scale-out/cloud_scale_out_basic_auth_tls.bats
497+
498+
.PHONY: run-cloud-scale-out-high-scale-tests
499+
run-cloud-scale-out-high-scale-tests: check-blackbox-prerequisites check-awslocal binary bench test-prereq
500+
echo running cloud scale out bats high scale test; \
501+
$(BATS) $(BATS_FLAGS) test/scale-out/cloud_scale_out_basic_auth_tls_scale.bats
502+
492503
.PHONY: run-blackbox-ci
493504
run-blackbox-ci: check-blackbox-prerequisites binary binary-minimal cli
494505
echo running CI bats tests concurently
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
{
2+
"distSpecVersion": "1.1.0",
3+
"storage": {
4+
"rootDirectory": "/tmp/zot",
5+
"dedupe": false,
6+
"remoteCache": true,
7+
"storageDriver": {
8+
"name": "s3",
9+
"rootdirectory": "/zot",
10+
"region": "us-east-1",
11+
"regionendpoint": "localhost:4566",
12+
"bucket": "zot-storage",
13+
"secure": false,
14+
"skipverify": false
15+
},
16+
"cacheDriver": {
17+
"name": "dynamodb",
18+
"endpoint": "http://localhost:4566",
19+
"region": "us-east-1",
20+
"cacheTablename": "ZotBlobTable",
21+
"repoMetaTablename": "ZotRepoMetadataTable",
22+
"imageMetaTablename": "ZotImageMetaTable",
23+
"repoBlobsInfoTablename": "ZotRepoBlobsInfoTable",
24+
"userDataTablename": "ZotUserDataTable",
25+
"versionTablename": "ZotVersion",
26+
"apiKeyTablename": "ZotApiKeyTable"
27+
}
28+
},
29+
"http": {
30+
"address": "127.0.0.1",
31+
"port": "9000"
32+
},
33+
"log": {
34+
"level": "debug"
35+
},
36+
"cluster": {
37+
"members": [
38+
"127.0.0.1:9000",
39+
"127.0.0.1:9001",
40+
"127.0.0.1:9002"
41+
],
42+
"hashKey": "loremipsumdolors"
43+
}
44+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
{
2+
"distSpecVersion": "1.1.0",
3+
"storage": {
4+
"rootDirectory": "/tmp/zot",
5+
"dedupe": false,
6+
"remoteCache": true,
7+
"storageDriver": {
8+
"name": "s3",
9+
"rootdirectory": "/zot",
10+
"region": "us-east-1",
11+
"regionendpoint": "localhost:4566",
12+
"bucket": "zot-storage",
13+
"secure": false,
14+
"skipverify": false
15+
},
16+
"cacheDriver": {
17+
"name": "dynamodb",
18+
"endpoint": "http://localhost:4566",
19+
"region": "us-east-1",
20+
"cacheTablename": "ZotBlobTable",
21+
"repoMetaTablename": "ZotRepoMetadataTable",
22+
"imageMetaTablename": "ZotImageMetaTable",
23+
"repoBlobsInfoTablename": "ZotRepoBlobsInfoTable",
24+
"userDataTablename": "ZotUserDataTable",
25+
"versionTablename": "ZotVersion",
26+
"apiKeyTablename": "ZotApiKeyTable"
27+
}
28+
},
29+
"http": {
30+
"address": "127.0.0.1",
31+
"port": "9001"
32+
},
33+
"log": {
34+
"level": "debug"
35+
},
36+
"cluster": {
37+
"members": [
38+
"127.0.0.1:9000",
39+
"127.0.0.1:9001",
40+
"127.0.0.1:9002"
41+
],
42+
"hashKey": "loremipsumdolors"
43+
}
44+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
{
2+
"distSpecVersion": "1.1.0",
3+
"storage": {
4+
"rootDirectory": "/tmp/zot",
5+
"dedupe": false,
6+
"remoteCache": true,
7+
"storageDriver": {
8+
"name": "s3",
9+
"rootdirectory": "/zot",
10+
"region": "us-east-1",
11+
"regionendpoint": "localhost:4566",
12+
"bucket": "zot-storage",
13+
"secure": false,
14+
"skipverify": false
15+
},
16+
"cacheDriver": {
17+
"name": "dynamodb",
18+
"endpoint": "http://localhost:4566",
19+
"region": "us-east-1",
20+
"cacheTablename": "ZotBlobTable",
21+
"repoMetaTablename": "ZotRepoMetadataTable",
22+
"imageMetaTablename": "ZotImageMetaTable",
23+
"repoBlobsInfoTablename": "ZotRepoBlobsInfoTable",
24+
"userDataTablename": "ZotUserDataTable",
25+
"versionTablename": "ZotVersion",
26+
"apiKeyTablename": "ZotApiKeyTable"
27+
}
28+
},
29+
"http": {
30+
"address": "127.0.0.1",
31+
"port": "9002"
32+
},
33+
"log": {
34+
"level": "debug"
35+
},
36+
"cluster": {
37+
"members": [
38+
"127.0.0.1:9000",
39+
"127.0.0.1:9001",
40+
"127.0.0.1:9002"
41+
],
42+
"hashKey": "loremipsumdolors"
43+
}
44+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
global
2+
log /tmp/log local0
3+
log /tmp/log local1 notice
4+
maxconn 2000
5+
stats timeout 30s
6+
daemon
7+
8+
defaults
9+
log global
10+
mode http
11+
option httplog
12+
option dontlognull
13+
timeout connect 5000
14+
timeout client 50000
15+
timeout server 50000
16+
17+
frontend zot
18+
bind *:8080
19+
default_backend zot-cluster
20+
21+
backend zot-cluster
22+
balance roundrobin
23+
cookie SERVER insert indirect nocache
24+
server zot0 127.0.0.1:9000 cookie zot0
25+
server zot1 127.0.0.1:9001 cookie zot1
26+
server zot2 127.0.0.1:9002 cookie zot2
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
{
2+
"distSpecVersion": "1.1.0",
3+
"storage": {
4+
"rootDirectory": "/tmp/zot",
5+
"dedupe": false,
6+
"remoteCache": true,
7+
"storageDriver": {
8+
"name": "s3",
9+
"rootdirectory": "/zot",
10+
"region": "us-east-1",
11+
"regionendpoint": "localhost:4566",
12+
"bucket": "zot-storage",
13+
"secure": false,
14+
"skipverify": false
15+
},
16+
"cacheDriver": {
17+
"name": "dynamodb",
18+
"endpoint": "http://localhost:4566",
19+
"region": "us-east-1",
20+
"cacheTablename": "ZotBlobTable",
21+
"repoMetaTablename": "ZotRepoMetadataTable",
22+
"imageMetaTablename": "ZotImageMetaTable",
23+
"repoBlobsInfoTablename": "ZotRepoBlobsInfoTable",
24+
"userDataTablename": "ZotUserDataTable",
25+
"versionTablename": "ZotVersion",
26+
"apiKeyTablename": "ZotApiKeyTable"
27+
}
28+
},
29+
"http": {
30+
"address": "127.0.0.1",
31+
"port": "9000",
32+
"tls": {
33+
"cert": "test/data/server.cert",
34+
"key": "test/data/server.key"
35+
}
36+
},
37+
"log": {
38+
"level": "debug"
39+
},
40+
"cluster": {
41+
"members": [
42+
"127.0.0.1:9000",
43+
"127.0.0.1:9001",
44+
"127.0.0.1:9002"
45+
],
46+
"hashKey": "loremipsumdolors",
47+
"tls": {
48+
"cacert": "test/data/ca.crt"
49+
}
50+
}
51+
}

0 commit comments

Comments
 (0)