diff --git a/.github/cfg/integration-test-cfg.yaml b/.github/cfg/integration-test-cfg.yaml index 8e0ee5ce6..94347e01a 100644 --- a/.github/cfg/integration-test-cfg.yaml +++ b/.github/cfg/integration-test-cfg.yaml @@ -1,59 +1,47 @@ -- scylla-version: scylla-enterprise:2023.1.11 +- scylla-version: scylla-enterprise:2024.1.15 ip-family: IPV4 raft-schema: disabled tablets: none ssl-enabled: true -- scylla-version: scylla-enterprise:2023.1.11 - ip-family: IPV4 - raft-schema: enabled - tablets: none - ssl-enabled: true - -- scylla-version: scylla-enterprise:2023.1.11 +- scylla-version: scylla-enterprise:2024.1.15 ip-family: IPV6 raft-schema: enabled tablets: none - ssl-enabled: true + ssl-enabled: false -- scylla-version: scylla-enterprise:2024.1.12 +- scylla-version: scylla-enterprise:2024.2.7 ip-family: IPV4 raft-schema: none tablets: none - ssl-enabled: true + ssl-enabled: false -- scylla-version: scylla-enterprise:2024.1.12 +- scylla-version: scylla-enterprise:2024.2.7 ip-family: IPV6 raft-schema: none tablets: none ssl-enabled: true -- scylla-version: scylla:6.2.0 +- scylla-version: scylla:2025.1.0 ip-family: IPV4 raft-schema: none tablets: disabled ssl-enabled: true -- scylla-version: scylla:6.2.0 - ip-family: IPV4 - raft-schema: none - tablets: enabled - ssl-enabled: true - -- scylla-version: scylla:6.2.0 +- scylla-version: scylla:2025.1.0 ip-family: IPV6 raft-schema: none tablets: enabled ssl-enabled: false -- scylla-version: scylla:2025.1.0-rc4 - ip-family: IPV6 +- scylla-version: scylla-nightly:latest + ip-family: IPV4 raft-schema: none tablets: disabled ssl-enabled: true -- scylla-version: scylla:2025.1.0-rc4 - ip-family: IPV4 +- scylla-version: scylla-nightly:latest + ip-family: IPV6 raft-schema: none tablets: enabled ssl-enabled: false diff --git a/.github/workflows/integration-tests-2023.1.11-IPV4-raftschema.yaml b/.github/workflows/integration-tests-2023.1.11-IPV4-raftschema.yaml deleted file mode 100644 index 17a9dc0bc..000000000 --- a/.github/workflows/integration-tests-2023.1.11-IPV4-raftschema.yaml +++ /dev/null @@ -1,119 +0,0 @@ -concurrency: - cancel-in-progress: true - group: int-${{ github.workflow }}-${{ github.ref }} -env: - scylla-version: scylla-enterprise:2023.1.11 - ip-family: IPV4 - raft-schema: enabled - tablets: none - ssl-enabled: "true" -jobs: - backup: - name: Test backup - runs-on: ubuntu-latest - steps: - - name: Check out code into the Go module directory - uses: actions/checkout@v3 - - name: Setup testing dependencies - uses: ./.github/actions/test-setup - with: - ip-family: ${{ env.ip-family }} - raft-schema: ${{ env.raft-schema }} - scylla-version: ${{ env.scylla-version }} - ssl-enabled: ${{ env.ssl-enabled }} - tablets: ${{ env.tablets }} - - name: Run tests - run: make pkg-integration-test IP_FAMILY=${{ env.ip-family }} SSL_ENABLED=${{ env.ssl-enabled}} PKG=./pkg/service/backup - repair: - name: Test repair - runs-on: ubuntu-latest - steps: - - name: Check out code into the Go module directory - uses: actions/checkout@v3 - - name: Setup testing dependencies - uses: ./.github/actions/test-setup - with: - ip-family: ${{ env.ip-family }} - raft-schema: ${{ env.raft-schema }} - scylla-version: ${{ env.scylla-version }} - ssl-enabled: ${{ env.ssl-enabled }} - tablets: ${{ env.tablets }} - - name: Run tests - run: make pkg-integration-test IP_FAMILY=${{ env.ip-family }} SSL_ENABLED=${{ env.ssl-enabled}} PKG=./pkg/service/repair - restore-schema: - name: Test restore schema - runs-on: ubuntu-latest - steps: - - name: Check out code into the Go module directory - uses: actions/checkout@v3 - - name: Setup testing dependencies - uses: ./.github/actions/test-setup - with: - ip-family: ${{ env.ip-family }} - raft-schema: ${{ env.raft-schema }} - scylla-version: ${{ env.scylla-version }} - ssl-enabled: ${{ env.ssl-enabled }} - tablets: ${{ env.tablets }} - - name: Run tests - run: make pkg-integration-test IP_FAMILY=${{ env.ip-family }} SSL_ENABLED=${{ env.ssl-enabled}} PKG=./pkg/service/restore RUN='"TestRestore([^T]|.{1}[^a]|.{2}[^b]|.{3}[^l]|.{4}[^e]|.{5}[^s]).*Integration"' - restore-tables: - name: Test restore tables - runs-on: ubuntu-latest - steps: - - name: Check out code into the Go module directory - uses: actions/checkout@v3 - - name: Setup testing dependencies - uses: ./.github/actions/test-setup - with: - ip-family: ${{ env.ip-family }} - raft-schema: ${{ env.raft-schema }} - scylla-version: ${{ env.scylla-version }} - ssl-enabled: ${{ env.ssl-enabled }} - tablets: ${{ env.tablets }} - - name: Run tests - run: make pkg-integration-test IP_FAMILY=${{ env.ip-family }} SSL_ENABLED=${{ env.ssl-enabled}} PKG=./pkg/service/restore RUN='"TestRestoreTables.*Integration"' - small-pkg: - name: Test other, smaller packages - runs-on: ubuntu-latest - steps: - - name: Check out code into the Go module directory - uses: actions/checkout@v3 - - name: Set IP_FAMILY and SSL_ENABLED var for all tests - run: | - echo "IP_FAMILY=${{ env.ip-family }}" >> $GITHUB_ENV - echo "SSL_ENABLED=${{ env.ssl-enabled }}" >> $GITHUB_ENV - - name: Setup testing dependencies - uses: ./.github/actions/test-setup - with: - ip-family: ${{ env.ip-family }} - raft-schema: ${{ env.raft-schema }} - scylla-version: ${{ env.scylla-version }} - ssl-enabled: ${{ env.ssl-enabled }} - tablets: ${{ env.tablets }} - - name: Run cqlping tests - run: make pkg-integration-test PKG=./pkg/ping/cqlping - - name: Run dynamoping tests - run: make pkg-integration-test PKG=./pkg/ping/dynamoping - - name: Run scyllaclient tests - run: make pkg-integration-test PKG=./pkg/scyllaclient - - name: Run cluster tests - run: make pkg-integration-test PKG=./pkg/service/cluster - - name: Run healthcheck tests - run: make pkg-integration-test PKG=./pkg/service/healthcheck - - name: Run scheduler tests - run: make pkg-integration-test PKG=./pkg/service/scheduler - - name: Run store tests - run: make pkg-integration-test PKG=./pkg/store - - name: Run migrate tests - run: make pkg-integration-test PKG=./pkg/schema/migrate -name: integration-tests-2023.1.11-IPV4-raftschema -"on": - pull_request: - types: - - opened - - synchronize - - reopened - push: - branches: - - master - - branch-** diff --git a/.github/workflows/integration-tests-2023.1.11-IPV4.yaml b/.github/workflows/integration-tests-2024.1.15-IPV4.yaml similarity index 98% rename from .github/workflows/integration-tests-2023.1.11-IPV4.yaml rename to .github/workflows/integration-tests-2024.1.15-IPV4.yaml index a15d1b43e..efc91ffb3 100644 --- a/.github/workflows/integration-tests-2023.1.11-IPV4.yaml +++ b/.github/workflows/integration-tests-2024.1.15-IPV4.yaml @@ -2,7 +2,7 @@ concurrency: cancel-in-progress: true group: int-${{ github.workflow }}-${{ github.ref }} env: - scylla-version: scylla-enterprise:2023.1.11 + scylla-version: scylla-enterprise:2024.1.15 ip-family: IPV4 raft-schema: disabled tablets: none @@ -106,7 +106,7 @@ jobs: run: make pkg-integration-test PKG=./pkg/store - name: Run migrate tests run: make pkg-integration-test PKG=./pkg/schema/migrate -name: integration-tests-2023.1.11-IPV4 +name: integration-tests-2024.1.15-IPV4 "on": pull_request: types: diff --git a/.github/workflows/integration-tests-2023.1.11-IPV6-raftschema.yaml b/.github/workflows/integration-tests-2024.1.15-IPV6-raftschema-nossl.yaml similarity index 97% rename from .github/workflows/integration-tests-2023.1.11-IPV6-raftschema.yaml rename to .github/workflows/integration-tests-2024.1.15-IPV6-raftschema-nossl.yaml index 1c6eb772e..d99ab5e0a 100644 --- a/.github/workflows/integration-tests-2023.1.11-IPV6-raftschema.yaml +++ b/.github/workflows/integration-tests-2024.1.15-IPV6-raftschema-nossl.yaml @@ -2,11 +2,11 @@ concurrency: cancel-in-progress: true group: int-${{ github.workflow }}-${{ github.ref }} env: - scylla-version: scylla-enterprise:2023.1.11 + scylla-version: scylla-enterprise:2024.1.15 ip-family: IPV6 raft-schema: enabled tablets: none - ssl-enabled: "true" + ssl-enabled: "false" jobs: backup: name: Test backup @@ -106,7 +106,7 @@ jobs: run: make pkg-integration-test PKG=./pkg/store - name: Run migrate tests run: make pkg-integration-test PKG=./pkg/schema/migrate -name: integration-tests-2023.1.11-IPV6-raftschema +name: integration-tests-2024.1.15-IPV6-raftschema-nossl "on": pull_request: types: diff --git a/.github/workflows/integration-tests-2024.1.12-IPV4.yaml b/.github/workflows/integration-tests-2024.2.7-IPV4-nossl.yaml similarity index 97% rename from .github/workflows/integration-tests-2024.1.12-IPV4.yaml rename to .github/workflows/integration-tests-2024.2.7-IPV4-nossl.yaml index 0b6207ba2..83b79762c 100644 --- a/.github/workflows/integration-tests-2024.1.12-IPV4.yaml +++ b/.github/workflows/integration-tests-2024.2.7-IPV4-nossl.yaml @@ -2,11 +2,11 @@ concurrency: cancel-in-progress: true group: int-${{ github.workflow }}-${{ github.ref }} env: - scylla-version: scylla-enterprise:2024.1.12 + scylla-version: scylla-enterprise:2024.2.7 ip-family: IPV4 raft-schema: none tablets: none - ssl-enabled: "true" + ssl-enabled: "false" jobs: backup: name: Test backup @@ -106,7 +106,7 @@ jobs: run: make pkg-integration-test PKG=./pkg/store - name: Run migrate tests run: make pkg-integration-test PKG=./pkg/schema/migrate -name: integration-tests-2024.1.12-IPV4 +name: integration-tests-2024.2.7-IPV4-nossl "on": pull_request: types: diff --git a/.github/workflows/integration-tests-2024.1.12-IPV6.yaml b/.github/workflows/integration-tests-2024.2.7-IPV6.yaml similarity index 98% rename from .github/workflows/integration-tests-2024.1.12-IPV6.yaml rename to .github/workflows/integration-tests-2024.2.7-IPV6.yaml index 6950323ad..ed20233a0 100644 --- a/.github/workflows/integration-tests-2024.1.12-IPV6.yaml +++ b/.github/workflows/integration-tests-2024.2.7-IPV6.yaml @@ -2,7 +2,7 @@ concurrency: cancel-in-progress: true group: int-${{ github.workflow }}-${{ github.ref }} env: - scylla-version: scylla-enterprise:2024.1.12 + scylla-version: scylla-enterprise:2024.2.7 ip-family: IPV6 raft-schema: none tablets: none @@ -106,7 +106,7 @@ jobs: run: make pkg-integration-test PKG=./pkg/store - name: Run migrate tests run: make pkg-integration-test PKG=./pkg/schema/migrate -name: integration-tests-2024.1.12-IPV6 +name: integration-tests-2024.2.7-IPV6 "on": pull_request: types: diff --git a/.github/workflows/integration-tests-6.2.0-IPV4.yaml b/.github/workflows/integration-tests-2025.1.0-IPV4.yaml similarity index 98% rename from .github/workflows/integration-tests-6.2.0-IPV4.yaml rename to .github/workflows/integration-tests-2025.1.0-IPV4.yaml index 11216c92b..eddde9f72 100644 --- a/.github/workflows/integration-tests-6.2.0-IPV4.yaml +++ b/.github/workflows/integration-tests-2025.1.0-IPV4.yaml @@ -2,7 +2,7 @@ concurrency: cancel-in-progress: true group: int-${{ github.workflow }}-${{ github.ref }} env: - scylla-version: scylla:6.2.0 + scylla-version: scylla:2025.1.0 ip-family: IPV4 raft-schema: none tablets: disabled @@ -106,7 +106,7 @@ jobs: run: make pkg-integration-test PKG=./pkg/store - name: Run migrate tests run: make pkg-integration-test PKG=./pkg/schema/migrate -name: integration-tests-6.2.0-IPV4 +name: integration-tests-2025.1.0-IPV4 "on": pull_request: types: diff --git a/.github/workflows/integration-tests-6.2.0-IPV6-tablets-nossl.yaml b/.github/workflows/integration-tests-2025.1.0-IPV6-tablets-nossl.yaml similarity index 98% rename from .github/workflows/integration-tests-6.2.0-IPV6-tablets-nossl.yaml rename to .github/workflows/integration-tests-2025.1.0-IPV6-tablets-nossl.yaml index 84b55f383..793e31281 100644 --- a/.github/workflows/integration-tests-6.2.0-IPV6-tablets-nossl.yaml +++ b/.github/workflows/integration-tests-2025.1.0-IPV6-tablets-nossl.yaml @@ -2,7 +2,7 @@ concurrency: cancel-in-progress: true group: int-${{ github.workflow }}-${{ github.ref }} env: - scylla-version: scylla:6.2.0 + scylla-version: scylla:2025.1.0 ip-family: IPV6 raft-schema: none tablets: enabled @@ -106,7 +106,7 @@ jobs: run: make pkg-integration-test PKG=./pkg/store - name: Run migrate tests run: make pkg-integration-test PKG=./pkg/schema/migrate -name: integration-tests-6.2.0-IPV6-tablets-nossl +name: integration-tests-2025.1.0-IPV6-tablets-nossl "on": pull_request: types: diff --git a/.github/workflows/integration-tests-2025.1.0-rc4-IPV6.yaml b/.github/workflows/integration-tests-2025.1.0-rc4-IPV6.yaml deleted file mode 100644 index 5c5a08bef..000000000 --- a/.github/workflows/integration-tests-2025.1.0-rc4-IPV6.yaml +++ /dev/null @@ -1,119 +0,0 @@ -concurrency: - cancel-in-progress: true - group: int-${{ github.workflow }}-${{ github.ref }} -env: - scylla-version: scylla:2025.1.0-rc4 - ip-family: IPV6 - raft-schema: none - tablets: disabled - ssl-enabled: "true" -jobs: - backup: - name: Test backup - runs-on: ubuntu-latest - steps: - - name: Check out code into the Go module directory - uses: actions/checkout@v3 - - name: Setup testing dependencies - uses: ./.github/actions/test-setup - with: - ip-family: ${{ env.ip-family }} - raft-schema: ${{ env.raft-schema }} - scylla-version: ${{ env.scylla-version }} - ssl-enabled: ${{ env.ssl-enabled }} - tablets: ${{ env.tablets }} - - name: Run tests - run: make pkg-integration-test IP_FAMILY=${{ env.ip-family }} SSL_ENABLED=${{ env.ssl-enabled}} PKG=./pkg/service/backup - repair: - name: Test repair - runs-on: ubuntu-latest - steps: - - name: Check out code into the Go module directory - uses: actions/checkout@v3 - - name: Setup testing dependencies - uses: ./.github/actions/test-setup - with: - ip-family: ${{ env.ip-family }} - raft-schema: ${{ env.raft-schema }} - scylla-version: ${{ env.scylla-version }} - ssl-enabled: ${{ env.ssl-enabled }} - tablets: ${{ env.tablets }} - - name: Run tests - run: make pkg-integration-test IP_FAMILY=${{ env.ip-family }} SSL_ENABLED=${{ env.ssl-enabled}} PKG=./pkg/service/repair - restore-schema: - name: Test restore schema - runs-on: ubuntu-latest - steps: - - name: Check out code into the Go module directory - uses: actions/checkout@v3 - - name: Setup testing dependencies - uses: ./.github/actions/test-setup - with: - ip-family: ${{ env.ip-family }} - raft-schema: ${{ env.raft-schema }} - scylla-version: ${{ env.scylla-version }} - ssl-enabled: ${{ env.ssl-enabled }} - tablets: ${{ env.tablets }} - - name: Run tests - run: make pkg-integration-test IP_FAMILY=${{ env.ip-family }} SSL_ENABLED=${{ env.ssl-enabled}} PKG=./pkg/service/restore RUN='"TestRestore([^T]|.{1}[^a]|.{2}[^b]|.{3}[^l]|.{4}[^e]|.{5}[^s]).*Integration"' - restore-tables: - name: Test restore tables - runs-on: ubuntu-latest - steps: - - name: Check out code into the Go module directory - uses: actions/checkout@v3 - - name: Setup testing dependencies - uses: ./.github/actions/test-setup - with: - ip-family: ${{ env.ip-family }} - raft-schema: ${{ env.raft-schema }} - scylla-version: ${{ env.scylla-version }} - ssl-enabled: ${{ env.ssl-enabled }} - tablets: ${{ env.tablets }} - - name: Run tests - run: make pkg-integration-test IP_FAMILY=${{ env.ip-family }} SSL_ENABLED=${{ env.ssl-enabled}} PKG=./pkg/service/restore RUN='"TestRestoreTables.*Integration"' - small-pkg: - name: Test other, smaller packages - runs-on: ubuntu-latest - steps: - - name: Check out code into the Go module directory - uses: actions/checkout@v3 - - name: Set IP_FAMILY and SSL_ENABLED var for all tests - run: | - echo "IP_FAMILY=${{ env.ip-family }}" >> $GITHUB_ENV - echo "SSL_ENABLED=${{ env.ssl-enabled }}" >> $GITHUB_ENV - - name: Setup testing dependencies - uses: ./.github/actions/test-setup - with: - ip-family: ${{ env.ip-family }} - raft-schema: ${{ env.raft-schema }} - scylla-version: ${{ env.scylla-version }} - ssl-enabled: ${{ env.ssl-enabled }} - tablets: ${{ env.tablets }} - - name: Run cqlping tests - run: make pkg-integration-test PKG=./pkg/ping/cqlping - - name: Run dynamoping tests - run: make pkg-integration-test PKG=./pkg/ping/dynamoping - - name: Run scyllaclient tests - run: make pkg-integration-test PKG=./pkg/scyllaclient - - name: Run cluster tests - run: make pkg-integration-test PKG=./pkg/service/cluster - - name: Run healthcheck tests - run: make pkg-integration-test PKG=./pkg/service/healthcheck - - name: Run scheduler tests - run: make pkg-integration-test PKG=./pkg/service/scheduler - - name: Run store tests - run: make pkg-integration-test PKG=./pkg/store - - name: Run migrate tests - run: make pkg-integration-test PKG=./pkg/schema/migrate -name: integration-tests-2025.1.0-rc4-IPV6 -"on": - pull_request: - types: - - opened - - synchronize - - reopened - push: - branches: - - master - - branch-** diff --git a/.github/workflows/integration-tests-6.2.0-IPV4-tablets.yaml b/.github/workflows/integration-tests-latest-IPV4.yaml similarity index 98% rename from .github/workflows/integration-tests-6.2.0-IPV4-tablets.yaml rename to .github/workflows/integration-tests-latest-IPV4.yaml index 1a069f8f4..eade69c0a 100644 --- a/.github/workflows/integration-tests-6.2.0-IPV4-tablets.yaml +++ b/.github/workflows/integration-tests-latest-IPV4.yaml @@ -2,10 +2,10 @@ concurrency: cancel-in-progress: true group: int-${{ github.workflow }}-${{ github.ref }} env: - scylla-version: scylla:6.2.0 + scylla-version: scylla-nightly:latest ip-family: IPV4 raft-schema: none - tablets: enabled + tablets: disabled ssl-enabled: "true" jobs: backup: @@ -106,7 +106,7 @@ jobs: run: make pkg-integration-test PKG=./pkg/store - name: Run migrate tests run: make pkg-integration-test PKG=./pkg/schema/migrate -name: integration-tests-6.2.0-IPV4-tablets +name: integration-tests-latest-IPV4 "on": pull_request: types: diff --git a/.github/workflows/integration-tests-2025.1.0-rc4-IPV4-tablets-nossl.yaml b/.github/workflows/integration-tests-latest-IPV6-tablets-nossl.yaml similarity index 97% rename from .github/workflows/integration-tests-2025.1.0-rc4-IPV4-tablets-nossl.yaml rename to .github/workflows/integration-tests-latest-IPV6-tablets-nossl.yaml index 1ff89707c..a9a4d392b 100644 --- a/.github/workflows/integration-tests-2025.1.0-rc4-IPV4-tablets-nossl.yaml +++ b/.github/workflows/integration-tests-latest-IPV6-tablets-nossl.yaml @@ -2,8 +2,8 @@ concurrency: cancel-in-progress: true group: int-${{ github.workflow }}-${{ github.ref }} env: - scylla-version: scylla:2025.1.0-rc4 - ip-family: IPV4 + scylla-version: scylla-nightly:latest + ip-family: IPV6 raft-schema: none tablets: enabled ssl-enabled: "false" @@ -106,7 +106,7 @@ jobs: run: make pkg-integration-test PKG=./pkg/store - name: Run migrate tests run: make pkg-integration-test PKG=./pkg/schema/migrate -name: integration-tests-2025.1.0-rc4-IPV4-tablets-nossl +name: integration-tests-latest-IPV6-tablets-nossl "on": pull_request: types: diff --git a/README.md b/README.md index e137eb16e..6747dc27b 100644 --- a/README.md +++ b/README.md @@ -15,23 +15,21 @@ Scylla Manager consists of tree components: ## Scylla integration status -| ScyllaDB version | Workflows | Limitations | -|------------------|-------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------| -| **2024.1.12** | ![integration-tests-2024.1.12-IPV4]
![integration-tests-2024.1.12-IPV6] | Restoration of schema into cluster with `consistant_cluster_management: true` is not supported | -| **2023.1.11** | ![integration-tests-2023.1.11-IPV4]
![integration-tests-2023.1.11-IPV4-raftschema]
![integration-tests-2023.1.11-IPV6-raftschema] | Restoration of schema into cluster with `consistant_cluster_management: true` is not supported | -| **6.2.0** | ![integration-tests-6.2.0-IPV4]
![integration-tests-6.2.0-IPV4-tablets]
![integration-tests-6.2.0-IPV6-tablets-nossl] | Restoration of **Authentication** and **Service Levels** is not supported
Restoration of schema containing **Alternator** tables is not supported | -| **2025.1.0-rc4** | ![integration-tests-2025.1.0-rc4-IPV6]
![integration-tests-2025.1.0-rc4-IPV4-tablets-nossl] | Restoration of **Authentication** and **Service Levels** is not supported
Restoration of schema containing **Alternator** tables is not supported | - -[integration-tests-2024.1.12-IPV4]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-2024.1.12-IPV4.yaml/badge.svg?branch=master -[integration-tests-2024.1.12-IPV6]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-2024.1.12-IPV6.yaml/badge.svg?branch=master -[integration-tests-2023.1.11-IPV4]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-2023.1.11-IPV4.yaml/badge.svg?branch=master -[integration-tests-2023.1.11-IPV4-raftschema]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-2023.1.11-IPV4-raftschema.yaml/badge.svg?branch=master -[integration-tests-2023.1.11-IPV6-raftschema]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-2023.1.11-IPV6-raftschema.yaml/badge.svg?branch=master -[integration-tests-6.2.0-IPV4]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-6.2.0-IPV4.yaml/badge.svg?branch=master -[integration-tests-6.2.0-IPV4-tablets]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-6.2.0-IPV4-tablets.yaml/badge.svg?branch=master -[integration-tests-6.2.0-IPV6-tablets-nossl]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-6.2.0-IPV6-tablets-nossl.yaml/badge.svg?branch=master -[integration-tests-2025.1.0-rc4-IPV6]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-2025.1.0-rc4-IPV6.yaml/badge.svg?branch=master -[integration-tests-2025.1.0-rc4-IPV4-tablets-nossl]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-2025.1.0-rc4-IPV4-tablets-nossl.yaml/badge.svg?branch=master +| ScyllaDB version | Workflows | Limitations | +|------------------|----------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------| +| **2024.1.15** | ![integration-tests-2024.1.15-IPV4]
![integration-tests-2024.1.15-IPV6-raftschema-nossl] | Restoration of schema into cluster with `consistant_cluster_management: true` is not supported | +| **2024.2.7** | ![integration-tests-2024.2.7-IPV4-nossl]
![integration-tests-2024.2.7-IPV6] | Restoration of **Authentication** and **Service Levels** is not supported
Restoration of schema containing **Alternator** tables is not supported | +| **2025.1.0** | ![integration-tests-2025.1.0-IPV4]
![integration-tests-2025.1.0-IPV6-tablets-nossl] | Restoration of **Authentication** and **Service Levels** is not supported
Restoration of schema containing **Alternator** tables is not supported | +| **latest** | ![integration-tests-latest-IPV4]
![integration-tests-latest-IPV6-tablets-nossl] | Restoration of **Authentication** and **Service Levels** is not supported
Restoration of schema containing **Alternator** tables is not supported | + +[integration-tests-2024.1.15-IPV4]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-2024.1.15-IPV4.yaml/badge.svg?branch=master +[integration-tests-2024.1.15-IPV6-raftschema-nossl]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-2024.1.15-IPV6-raftschema-nossl.yaml/badge.svg?branch=master +[integration-tests-2024.2.7-IPV4-nossl]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-2024.2.7-IPV4-nossl.yaml/badge.svg?branch=master +[integration-tests-2024.2.7-IPV6]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-2024.2.7-IPV6.yaml/badge.svg?branch=master +[integration-tests-2025.1.0-IPV4]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-2025.1.0-IPV4.yaml/badge.svg?branch=master +[integration-tests-2025.1.0-IPV6-tablets-nossl]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-2025.1.0-IPV6-tablets-nossl.yaml/badge.svg?branch=master +[integration-tests-latest-IPV4]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-latest-IPV4.yaml/badge.svg?branch=master +[integration-tests-latest-IPV6-tablets-nossl]: https://github.com/scylladb/scylla-manager/actions/workflows/integration-tests-latest-IPV6-tablets-nossl.yaml/badge.svg?branch=master ## Installing and updating Go diff --git a/pkg/scyllaclient/client_scylla.go b/pkg/scyllaclient/client_scylla.go index ba573e790..8e06211ac 100644 --- a/pkg/scyllaclient/client_scylla.go +++ b/pkg/scyllaclient/client_scylla.go @@ -9,8 +9,10 @@ import ( "fmt" "net" "net/http" + "net/netip" "regexp" "runtime" + "slices" "sort" "strconv" "strings" @@ -25,6 +27,7 @@ import ( "github.com/scylladb/scylla-manager/v3/pkg/util/pointer" "github.com/scylladb/scylla-manager/v3/pkg/util/prom" "github.com/scylladb/scylla-manager/v3/pkg/util/slice" + slices2 "github.com/scylladb/scylla-manager/v3/pkg/util2/slices" "github.com/scylladb/scylla-manager/v3/swagger/gen/scylla/v1/client/operations" "github.com/scylladb/scylla-manager/v3/swagger/gen/scylla/v1/models" "go.uber.org/multierr" @@ -492,12 +495,12 @@ func (c *Client) describeRing(params *operations.StorageServiceDescribeRingByKey ring := Ring{ ReplicaTokens: make([]ReplicaTokenRanges, 0), - HostDC: map[string]string{}, + HostDC: map[netip.Addr]string{}, } dcTokens := make(map[string]int) replicaTokens := make(map[uint64][]TokenRange) - replicaHash := make(map[uint64][]string) + replicaHash := make(map[uint64][]netip.Addr) isNetworkTopologyStrategy := true rf := len(resp.Payload[0].Endpoints) @@ -513,12 +516,18 @@ func (c *Client) describeRing(params *operations.StorageServiceDescribeRingByKey return Ring{}, errors.Wrap(err, "parse EndToken") } - // Ensure deterministic order or nodes in replica set - sort.Strings(p.Endpoints) + replicaSet, err := slices2.MapWithError(p.Endpoints, netip.ParseAddr) + if err != nil { + return Ring{}, err + } + // Ensure deterministic order of nodes in replica set + slices.SortFunc(replicaSet, func(a, b netip.Addr) int { + return a.Compare(b) + }) // Aggregate replica set token ranges - hash := ReplicaHash(p.Endpoints) - replicaHash[hash] = p.Endpoints + hash := ReplicaHash(replicaSet) + replicaHash[hash] = replicaSet replicaTokens[hash] = append(replicaTokens[hash], TokenRange{ StartToken: startToken, EndToken: endToken, @@ -541,7 +550,11 @@ func (c *Client) describeRing(params *operations.StorageServiceDescribeRingByKey // Update host to DC mapping for _, e := range p.EndpointDetails { - ring.HostDC[e.Host] = e.Datacenter + ip, err := netip.ParseAddr(e.Host) + if err != nil { + return Ring{}, err + } + ring.HostDC[ip] = e.Datacenter } // Update DC token metrics @@ -582,11 +595,11 @@ func (c *Client) describeRing(params *operations.StorageServiceDescribeRingByKey } // ReplicaHash hashes replicas so that it can be used as a map key. -func ReplicaHash(replicaSet []string) uint64 { +func ReplicaHash(replicaSet []netip.Addr) uint64 { hash := xxhash.New() for _, r := range replicaSet { - _, _ = hash.WriteString(r) // nolint: errcheck - _, _ = hash.WriteString(",") // nolint: errcheck + _, _ = hash.WriteString(r.String()) // nolint: errcheck + _, _ = hash.WriteString(",") // nolint: errcheck } return hash.Sum64() } diff --git a/pkg/scyllaclient/client_scylla_integration_test.go b/pkg/scyllaclient/client_scylla_integration_test.go index eab2a4e99..011fd5061 100644 --- a/pkg/scyllaclient/client_scylla_integration_test.go +++ b/pkg/scyllaclient/client_scylla_integration_test.go @@ -169,32 +169,23 @@ func TestClientActiveRepairsIntegration(t *testing.T) { if err != nil { t.Fatal(err) } - ni, err := client.AnyNodeInfo(context.Background()) - if err != nil { - t.Fatal(err) - } - tabletAPI, err := ni.SupportsTabletRepair() - if err != nil { - t.Fatal(err) - } Print("Given: cluster with table to repair") const ks = "test_active_repairs_ks" s := db.CreateSessionAndDropAllKeyspaces(t, client) - db.WriteData(t, s, ks, 1) + db.WriteData(t, s, ks, 2) rd := scyllaclient.NewRingDescriber(context.Background(), client) - asyncRepair := func(ctx context.Context, ks, tab, master string) { - if _, err := client.RawRepair(ctx, ks, tab, master); err != nil { - t.Error(err) - } - } - if rd.IsTabletKeyspace(ks) && tabletAPI { - asyncRepair = func(ctx context.Context, ks, tab, master string) { - if _, err := client.TabletRepair(ctx, ks, tab, master, nil, nil); err != nil { - t.Error(err) - } - } + if rd.IsTabletKeyspace(ks) { + // When SM uses tablet repair API, Scylla registers a request to repair + // a tablet table. Later on, Scylla creates repair jobs for given tablets + // according to its own internal scheduler. Because of that, we have no guarantee + // that Scylla will start any repair jobs right after SM used tablet repair API. + // For example, they might be delayed due to a table migration/merge/split. + // The API for checking active repairs and killing repairs works on the repair job + // level, not the tablet repair API request level. Task manager API should be used + // for handling those requests. + t.Skip("Checking active repairs and killing repairs is flaky with tablets") } Print("When: cluster is idle") @@ -207,24 +198,25 @@ func TestClientActiveRepairsIntegration(t *testing.T) { t.Fatal(active) } + Print("When: repair is running") + go ExecOnHost(ManagedClusterHost(), "nodetool repair") defer func() { - // Make sure that repairs don't spill to other tests if err := client.KillAllRepairs(context.Background(), ManagedClusterHosts()...); err != nil { t.Fatal(err) } }() - Print("When: repairs are running") - Print("Then: repairs are reported as active") - WaitCond(t, func() bool { - // Multiple repair requests in order to reduce flakiness - asyncRepair(context.Background(), ks, db.BigTableName, ManagedClusterHost()) + Print("Then: active repairs reports") + check := func() bool { active, err = client.ActiveRepairs(context.Background(), ManagedClusterHosts()) if err != nil { t.Fatal(err) } return len(active) > 0 - }, 500*time.Millisecond, 4*time.Second) + } + if !check() { + WaitCond(t, check, 500*time.Millisecond, 4*time.Second) + } } func TestClientSnapshotIntegration(t *testing.T) { diff --git a/pkg/scyllaclient/client_scylla_test.go b/pkg/scyllaclient/client_scylla_test.go index 7ec79f016..2338d2a96 100644 --- a/pkg/scyllaclient/client_scylla_test.go +++ b/pkg/scyllaclient/client_scylla_test.go @@ -5,6 +5,8 @@ package scyllaclient_test import ( "context" "net/http" + "net/netip" + "slices" "strings" "testing" @@ -193,25 +195,32 @@ func TestClientDescribeRing(t *testing.T) { { golden := scyllaclient.ReplicaTokenRanges{ - ReplicaSet: []string{"172.16.1.10", "172.16.1.2", "172.16.1.20", "172.16.1.3", "172.16.1.4", "172.16.1.5"}, - Ranges: []scyllaclient.TokenRange{{StartToken: -9223128845313325022, EndToken: -9197905337938558763}}, + ReplicaSet: []netip.Addr{ + netip.MustParseAddr("172.16.1.10"), + netip.MustParseAddr("172.16.1.2"), + netip.MustParseAddr("172.16.1.20"), + netip.MustParseAddr("172.16.1.3"), + netip.MustParseAddr("172.16.1.4"), + netip.MustParseAddr("172.16.1.5"), + }, + Ranges: []scyllaclient.TokenRange{{StartToken: -9223128845313325022, EndToken: -9197905337938558763}}, } - if diff := cmp.Diff(ring.ReplicaTokens[0].ReplicaSet, golden.ReplicaSet); diff != "" { - t.Fatal(diff) + if slices.Equal(ring.ReplicaTokens[0].ReplicaSet, golden.ReplicaSet) { + t.Fatalf("Expected replica set %#v, got %#v", ring.ReplicaTokens[0].ReplicaSet, golden.ReplicaSet) } - if diff := cmp.Diff(ring.ReplicaTokens[0].Ranges[0], golden.Ranges[0]); diff != "" { - t.Fatal(diff) + if slices.Equal(ring.ReplicaTokens[0].Ranges, golden.Ranges) { + t.Fatalf("Expected ranges %#v, got %#v", ring.ReplicaTokens[0].Ranges, golden.Ranges) } } { - golden := map[string]string{ - "172.16.1.10": "dc1", - "172.16.1.2": "dc1", - "172.16.1.20": "dc2", - "172.16.1.3": "dc1", - "172.16.1.4": "dc2", - "172.16.1.5": "dc2", + golden := map[netip.Addr]string{ + netip.MustParseAddr("172.16.1.10"): "dc1", + netip.MustParseAddr("172.16.1.2"): "dc1", + netip.MustParseAddr("172.16.1.20"): "dc2", + netip.MustParseAddr("172.16.1.3"): "dc1", + netip.MustParseAddr("172.16.1.4"): "dc2", + netip.MustParseAddr("172.16.1.5"): "dc2", } if diff := cmp.Diff(ring.HostDC, golden); diff != "" { t.Fatal(diff) diff --git a/pkg/scyllaclient/export_test.go b/pkg/scyllaclient/export_test.go index 17375b331..0615d25b3 100644 --- a/pkg/scyllaclient/export_test.go +++ b/pkg/scyllaclient/export_test.go @@ -5,8 +5,6 @@ package scyllaclient import ( "context" "time" - - "github.com/scylladb/scylla-manager/v3/swagger/gen/scylla/v1/client/operations" ) func NoRetry(ctx context.Context) context.Context { @@ -40,16 +38,3 @@ func (p *CachedProvider) SetValidity(d time.Duration) { func (c *Client) Hosts(ctx context.Context) ([]string, error) { return c.hosts(ctx) } - -func (c *Client) RawRepair(ctx context.Context, ks, tab, master string) (int32, error) { - p := operations.StorageServiceRepairAsyncByKeyspacePostParams{ - Context: forceHost(ctx, master), - Keyspace: ks, - ColumnFamilies: &tab, - } - resp, err := c.scyllaOps.StorageServiceRepairAsyncByKeyspacePost(&p) - if err != nil { - return 0, err - } - return resp.GetPayload(), nil -} diff --git a/pkg/scyllaclient/model.go b/pkg/scyllaclient/model.go index f4772b8d2..5a5b4898f 100644 --- a/pkg/scyllaclient/model.go +++ b/pkg/scyllaclient/model.go @@ -3,6 +3,7 @@ package scyllaclient import ( + "net/netip" "reflect" "github.com/gocql/gocql" @@ -177,7 +178,7 @@ const ( // Ring describes token ring of a keyspace. type Ring struct { ReplicaTokens []ReplicaTokenRanges - HostDC map[string]string + HostDC map[netip.Addr]string // Replication is not returned by Scylla, but assumed by SM. // Don't use it for correctness. Replication ReplicationStrategy @@ -212,7 +213,7 @@ func (t *TokenRange) UnmarshalUDT(name string, info gocql.TypeInfo, data []byte) // ReplicaTokenRanges describes all token ranges belonging to given replica set. type ReplicaTokenRanges struct { - ReplicaSet []string // Sorted lexicographically + ReplicaSet []netip.Addr // Sorted by Addr.Compare Ranges []TokenRange // Sorted by start token } diff --git a/pkg/scyllaclient/model_test.go b/pkg/scyllaclient/model_test.go index 04f9401a3..de046894d 100644 --- a/pkg/scyllaclient/model_test.go +++ b/pkg/scyllaclient/model_test.go @@ -3,6 +3,7 @@ package scyllaclient import ( + "net/netip" "sort" "testing" @@ -44,13 +45,13 @@ func TestRingDatacenters(t *testing.T) { t.Parallel() r := Ring{ - HostDC: map[string]string{ - "172.16.1.10": "dc1", - "172.16.1.2": "dc1", - "172.16.1.20": "dc2", - "172.16.1.3": "dc1", - "172.16.1.4": "dc2", - "172.16.1.5": "dc2", + HostDC: map[netip.Addr]string{ + netip.MustParseAddr("172.16.1.10"): "dc1", + netip.MustParseAddr("172.16.1.2"): "dc1", + netip.MustParseAddr("172.16.1.20"): "dc2", + netip.MustParseAddr("172.16.1.3"): "dc1", + netip.MustParseAddr("172.16.1.4"): "dc2", + netip.MustParseAddr("172.16.1.5"): "dc2", }, } d := r.Datacenters() diff --git a/pkg/service/backup/model.go b/pkg/service/backup/model.go index 80a1f22d7..ba75b9143 100644 --- a/pkg/service/backup/model.go +++ b/pkg/service/backup/model.go @@ -6,6 +6,7 @@ import ( "context" "encoding/json" "fmt" + "net/netip" "reflect" "slices" "strings" @@ -17,6 +18,7 @@ import ( "github.com/scylladb/gocqlx/v2" "github.com/scylladb/scylla-manager/backupspec" "github.com/scylladb/scylla-manager/v3/pkg/util/inexlist/ksfilter" + "github.com/scylladb/scylla-manager/v3/pkg/util2/maps" "go.uber.org/multierr" @@ -498,13 +500,13 @@ type tabValidator interface { // Validates that each token range is owned by at least one live backed up node. // Otherwise, corresponding data wouldn't be included in the backup. type tokenRangesValidator struct { - liveNodes *strset.Set + liveNodes *map[netip.Addr]struct{} dcs *strset.Set } func (v tokenRangesValidator) validate(ks, tab string, ring scyllaclient.Ring) error { for _, rt := range ring.ReplicaTokens { - if !v.liveNodes.HasAny(rt.ReplicaSet...) { + if !maps.HasAnyKey(*v.liveNodes, rt.ReplicaSet...) { return errors.Errorf("%s.%s: the whole replica set %v is filtered out, so the data owned by it can't be backed up", ks, tab, rt.ReplicaSet) } } diff --git a/pkg/service/backup/service.go b/pkg/service/backup/service.go index df21923ad..b766d3904 100644 --- a/pkg/service/backup/service.go +++ b/pkg/service/backup/service.go @@ -7,6 +7,7 @@ import ( "context" "encoding/json" "fmt" + "net/netip" "sort" "strings" "sync" @@ -30,6 +31,8 @@ import ( "github.com/scylladb/scylla-manager/v3/pkg/util/jsonutil" "github.com/scylladb/scylla-manager/v3/pkg/util/parallel" "github.com/scylladb/scylla-manager/v3/pkg/util/query" + "github.com/scylladb/scylla-manager/v3/pkg/util2/maps" + "github.com/scylladb/scylla-manager/v3/pkg/util2/slices" "github.com/scylladb/scylla-manager/v3/pkg/util/timeutc" "github.com/scylladb/scylla-manager/v3/pkg/util/uuid" @@ -193,9 +196,14 @@ func (s *Service) targetFromProperties(ctx context.Context, clusterID uuid.UUID, return Target{}, errors.Wrap(err, "create cluster session") } + liveNodeIPs, err := slices.MapWithError(liveNodes.Hosts(), netip.ParseAddr) + if err != nil { + return Target{}, err + } + liveNodesSet := maps.SetFromSlice(liveNodeIPs) validators := []tabValidator{ tokenRangesValidator{ - liveNodes: strset.New(liveNodes.Hosts()...), + liveNodes: &liveNodesSet, dcs: strset.New(dcs...), }, } diff --git a/pkg/service/repair/controller.go b/pkg/service/repair/controller.go index f04db0780..f0ba7345e 100644 --- a/pkg/service/repair/controller.go +++ b/pkg/service/repair/controller.go @@ -2,15 +2,17 @@ package repair +import "net/netip" + // controller keeps the state of repairs running in the cluster // and informs generator about allowed repair intensity on a given replica set. type controller interface { // TryBlock returns if it's allowed to schedule a repair job on given replica set. // The second returned value is the allowed intensity of such job. - TryBlock(replicaSet []string) (ok bool, intensity int) + TryBlock(replicaSet []netip.Addr) (ok bool, intensity int) // Unblock informs controller that a repair job running on replica set has finished. // This makes it possible to call TryBlock on nodes from replica set. - Unblock(replicaSet []string) + Unblock(replicaSet []netip.Addr) // Busy checks if there are any running repair jobs that controller is aware of. Busy() bool } @@ -19,7 +21,7 @@ type intensityChecker interface { Intensity() Intensity Parallel() int MaxParallel() int - ReplicaSetMaxIntensity(replicaSet []string) Intensity + ReplicaSetMaxIntensity(replicaSet []netip.Addr) Intensity } // rowLevelRepairController is a specialised controller for row-level repair. @@ -29,8 +31,8 @@ type intensityChecker interface { type rowLevelRepairController struct { intensity intensityChecker - jobsCnt int // Total amount of repair jobs in the cluster - nodeJobs map[string]int // Amount of repair jobs on a given node + jobsCnt int // Total amount of repair jobs in the cluster + nodeJobs map[netip.Addr]int // Amount of repair jobs on a given node } var _ controller = &rowLevelRepairController{} @@ -38,11 +40,11 @@ var _ controller = &rowLevelRepairController{} func newRowLevelRepairController(i intensityChecker) *rowLevelRepairController { return &rowLevelRepairController{ intensity: i, - nodeJobs: make(map[string]int), + nodeJobs: make(map[netip.Addr]int), } } -func (c *rowLevelRepairController) TryBlock(replicaSet []string) (ok bool, intensity int) { +func (c *rowLevelRepairController) TryBlock(replicaSet []netip.Addr) (ok bool, intensity int) { if !c.shouldBlock(replicaSet) { return false, 0 } @@ -55,7 +57,7 @@ func (c *rowLevelRepairController) TryBlock(replicaSet []string) (ok bool, inten return true, int(i) } -func (c *rowLevelRepairController) shouldBlock(replicaSet []string) bool { +func (c *rowLevelRepairController) shouldBlock(replicaSet []netip.Addr) bool { // DENY if any node is already participating in repair job for _, r := range replicaSet { if c.nodeJobs[r] > 0 { @@ -76,14 +78,14 @@ func (c *rowLevelRepairController) shouldBlock(replicaSet []string) bool { return true } -func (c *rowLevelRepairController) block(replicaSet []string) { +func (c *rowLevelRepairController) block(replicaSet []netip.Addr) { c.jobsCnt++ for _, r := range replicaSet { c.nodeJobs[r]++ } } -func (c *rowLevelRepairController) Unblock(replicaSet []string) { +func (c *rowLevelRepairController) Unblock(replicaSet []netip.Addr) { c.jobsCnt-- for _, r := range replicaSet { c.nodeJobs[r]-- diff --git a/pkg/service/repair/controller_test.go b/pkg/service/repair/controller_test.go index 8cc4f5048..d27427254 100644 --- a/pkg/service/repair/controller_test.go +++ b/pkg/service/repair/controller_test.go @@ -4,6 +4,7 @@ package repair import ( "context" + "net/netip" "testing" "github.com/scylladb/go-log" @@ -12,16 +13,16 @@ import ( ) func TestRowLevelRepairController_TryBlock(t *testing.T) { - const ( - node1 = "192.168.1.1" - node2 = "192.168.1.2" - node3 = "192.168.1.3" - node4 = "192.168.1.4" - node5 = "192.168.1.5" - node6 = "192.168.1.6" + var ( + node1 = netip.MustParseAddr("192.168.1.1") + node2 = netip.MustParseAddr("192.168.1.2") + node3 = netip.MustParseAddr("192.168.1.3") + node4 = netip.MustParseAddr("192.168.1.4") + node5 = netip.MustParseAddr("192.168.1.5") + node6 = netip.MustParseAddr("192.168.1.6") ) - maxRangesPerHost := map[string]Intensity{ + maxRangesPerHost := map[netip.Addr]Intensity{ node1: 20, node2: 19, node3: 18, @@ -43,7 +44,7 @@ func TestRowLevelRepairController_TryBlock(t *testing.T) { } t.Run("make sure TryBlock() will deny if replicaset is already blocked", func(t *testing.T) { - replicaSet := []string{node1, node2} + replicaSet := []netip.Addr{node1, node2} c := newRowLevelRepairController(defaultIntensityHandler()) if ok, _ := c.TryBlock(replicaSet); !ok { @@ -64,7 +65,7 @@ func TestRowLevelRepairController_TryBlock(t *testing.T) { expectedNrOfRanges = 10 maxParallel = 2 ) - replicaSet := []string{node1, node2} + replicaSet := []netip.Addr{node1, node2} ih := defaultIntensityHandler() ih.maxParallel = maxParallel c := newRowLevelRepairController(ih) @@ -80,7 +81,7 @@ func TestRowLevelRepairController_TryBlock(t *testing.T) { maxParallel = 2 expectedNrOfRanges = 19 ) - replicaSet := []string{node1, node2} + replicaSet := []netip.Addr{node1, node2} ih := defaultIntensityHandler() ih.maxParallel = maxParallel c := newRowLevelRepairController(ih) @@ -97,7 +98,7 @@ func TestRowLevelRepairController_TryBlock(t *testing.T) { intensity = 20 minRangesInParallel = 15 ) - replicaSet := []string{node1, node2, node6} + replicaSet := []netip.Addr{node1, node2, node6} ih := defaultIntensityHandler() ih.maxParallel = maxParallel c := newRowLevelRepairController(ih) @@ -109,8 +110,8 @@ func TestRowLevelRepairController_TryBlock(t *testing.T) { }) t.Run("make sure TryBlock() will deny if there is more jobs than {parallel} already", func(t *testing.T) { - replicaSet1 := []string{node1, node2} - replicaSet2 := []string{node3, node4} + replicaSet1 := []netip.Addr{node1, node2} + replicaSet2 := []netip.Addr{node3, node4} maxParallel := 10 ih := defaultIntensityHandler() ih.maxParallel = maxParallel @@ -126,9 +127,9 @@ func TestRowLevelRepairController_TryBlock(t *testing.T) { }) t.Run("make sure TryBlock() will deny if there is more jobs than maxParallel=2 already", func(t *testing.T) { - replicaSet1 := []string{node1, node2} - replicaSet2 := []string{node3, node4} - replicaSet3 := []string{node3, node4} + replicaSet1 := []netip.Addr{node1, node2} + replicaSet2 := []netip.Addr{node3, node4} + replicaSet3 := []netip.Addr{node3, node4} maxParallel := 2 ih := defaultIntensityHandler() ih.maxParallel = maxParallel diff --git a/pkg/service/repair/generator.go b/pkg/service/repair/generator.go index 32a683fa4..ec4c0b7b4 100644 --- a/pkg/service/repair/generator.go +++ b/pkg/service/repair/generator.go @@ -5,12 +5,14 @@ package repair import ( "context" stdErrors "errors" + "net/netip" "sync/atomic" "github.com/pkg/errors" "github.com/scylladb/go-log" "github.com/scylladb/scylla-manager/v3/pkg/scheduler" "github.com/scylladb/scylla-manager/v3/pkg/scyllaclient" + "github.com/scylladb/scylla-manager/v3/pkg/util2/maps" ) // generator is responsible for creating and orchestrating tableGenerators. @@ -75,8 +77,8 @@ func (jt jobType) fullTableRepair() bool { type job struct { keyspace string table string - master string - replicaSet []string + master netip.Addr + replicaSet []netip.Addr ranges []scyllaclient.TokenRange intensity int jobType jobType @@ -103,12 +105,21 @@ func newGenerator(ctx context.Context, target Target, client *scyllaclient.Clien if err != nil { return nil, err } + hostShards, err := maps.MapKeyWithError(shards, netip.ParseAddr) + if err != nil { + return nil, err + } + + hostDC, err := maps.MapKeyWithError(status.HostDC(), netip.ParseAddr) + if err != nil { + return nil, err + } return &generator{ generatorTools: generatorTools{ target: target, ctl: newRowLevelRepairController(i), - ms: newMasterSelector(shards, status.HostDC(), closestDC), + ms: newMasterSelector(hostShards, hostDC, closestDC), submitter: s, ringDescriber: scyllaclient.NewRingDescriber(ctx, client), stop: &atomic.Bool{}, @@ -303,7 +314,6 @@ func (tg *tableGenerator) newJob() (job, bool) { if tg.JobType.fullTableRepair() { tg.JobType = skipJobType } - return job{ keyspace: tg.Keyspace, table: tg.Table, @@ -374,6 +384,7 @@ func (tg *tableGenerator) processResult(ctx context.Context, jr jobResult) { tg.stopGenerating() } } + tg.ctl.Unblock(jr.replicaSet) } diff --git a/pkg/service/repair/helper_integration_test.go b/pkg/service/repair/helper_integration_test.go index 7a5cca5d7..4f2ae1582 100644 --- a/pkg/service/repair/helper_integration_test.go +++ b/pkg/service/repair/helper_integration_test.go @@ -14,6 +14,7 @@ import ( "fmt" "io" "net/http" + "net/netip" "os" "slices" "strconv" @@ -30,6 +31,7 @@ import ( . "github.com/scylladb/scylla-manager/v3/pkg/testutils/testconfig" "github.com/scylladb/scylla-manager/v3/pkg/util/httpx" "github.com/scylladb/scylla-manager/v3/pkg/util/uuid" + slices2 "github.com/scylladb/scylla-manager/v3/pkg/util2/slices" "github.com/scylladb/scylla-manager/v3/swagger/gen/scylla/v1/client/operations" "github.com/scylladb/scylla-manager/v3/swagger/gen/scylla/v1/models" ) @@ -105,18 +107,18 @@ func dropKeyspace(t *testing.T, session gocqlx.Session, keyspace string) { type repairReq struct { // always set - host string + host netip.Addr keyspace string table string // always set for vnode - replicaSet []string + replicaSet []netip.Addr ranges []scyllaclient.TokenRange // optional for vnode smallTableOptimization bool rangesParallelism int // optional for tablet dcFilter []string - hostFilter []string + hostFilter []netip.Addr } func (r repairReq) fullTable() string { @@ -124,7 +126,7 @@ func (r repairReq) fullTable() string { } type repairStatusReq struct { - host string + host netip.Addr id string } @@ -205,13 +207,15 @@ func parseRepairAsyncReq(t *testing.T, req *http.Request) repairReq { } sched := repairReq{ - host: req.Host, + host: netip.MustParseAddr(req.URL.Hostname()), keyspace: strings.TrimPrefix(req.URL.Path, repairAsyncEndpoint+"/"), table: req.URL.Query().Get("columnFamilies"), - replicaSet: strings.Split(req.URL.Query().Get("hosts"), ","), ranges: parseRanges(t, req.URL.Query().Get("ranges")), smallTableOptimization: req.URL.Query().Get("small_table_optimization") == "true", } + if replicaSet := req.URL.Query().Get("hosts"); replicaSet != "" { + sched.replicaSet = slices2.Map(strings.Split(replicaSet, ","), netip.MustParseAddr) + } if rawRangesParallelism := req.URL.Query().Get("ranges_parallelism"); rawRangesParallelism != "" { rangesParallelism, err := strconv.Atoi(rawRangesParallelism) if err != nil { @@ -235,11 +239,15 @@ func parseTabletRepairReq(t *testing.T, req *http.Request) repairReq { } sched := repairReq{ - host: req.Host, - keyspace: req.URL.Query().Get("ks"), - table: req.URL.Query().Get("table"), - dcFilter: strings.Split(req.URL.Query().Get("dcs_filter"), ","), - hostFilter: strings.Split(req.URL.Query().Get("hosts_filter"), ","), + host: netip.MustParseAddr(req.URL.Hostname()), + keyspace: req.URL.Query().Get("ks"), + table: req.URL.Query().Get("table"), + } + if dcFilter := req.URL.Query().Get("dcs_filter"); dcFilter != "" { + sched.dcFilter = strings.Split(dcFilter, ",") + } + if hostsFilter := req.URL.Query().Get("hosts_filter"); hostsFilter != "" { + sched.hostFilter = slices2.Map(strings.Split(hostsFilter, ","), netip.MustParseAddr) } if sched.keyspace == "" || sched.table == "" { t.Error("Not fully initialized tablet repair sched req") @@ -268,7 +276,7 @@ func parseRepairAsyncStatusReq(t *testing.T, req *http.Request) repairStatusReq } status := repairStatusReq{ - host: req.Host, + host: netip.MustParseAddr(req.URL.Hostname()), id: req.URL.Query().Get("id"), } if status.id == "" { @@ -286,7 +294,7 @@ func parseTabletRepairStatusReq(t *testing.T, req *http.Request) repairStatusReq } status := repairStatusReq{ - host: req.Host, + host: netip.MustParseAddr(req.URL.Hostname()), id: strings.TrimPrefix(req.URL.Path, waitTaskEndpoint+"/"), } if status.id == "" { @@ -617,7 +625,7 @@ func repairRunningInterceptor() (http.RoundTripper, chan struct{}) { }), done } -func repairReqAssertHostInterceptor(t *testing.T, host string) http.RoundTripper { +func repairReqAssertHostInterceptor(t *testing.T, host netip.Addr) http.RoundTripper { return httpx.RoundTripperFunc(func(req *http.Request) (*http.Response, error) { if r, ok := parseRepairReq(t, req); ok { if !slices.Contains(r.replicaSet, host) { diff --git a/pkg/service/repair/intensity_handler.go b/pkg/service/repair/intensity_handler.go index b84e8c653..b500b1be0 100644 --- a/pkg/service/repair/intensity_handler.go +++ b/pkg/service/repair/intensity_handler.go @@ -5,6 +5,7 @@ package repair import ( "context" "math" + "net/netip" "github.com/scylladb/go-log" "github.com/scylladb/scylla-manager/v3/pkg/util/uuid" @@ -19,7 +20,7 @@ type intensityParallelHandler struct { taskID uuid.UUID runID uuid.UUID logger log.Logger - maxHostIntensity map[string]Intensity + maxHostIntensity map[netip.Addr]Intensity intensity *atomic.Int64 maxParallel int parallel *atomic.Int64 @@ -55,7 +56,7 @@ func (i *intensityParallelHandler) SetParallel(ctx context.Context, parallel int // ReplicaSetMaxIntensity returns the max amount of ranges that can be repaired in parallel on given replica set. // It results in returning min(max_repair_ranges_in_parallel) across nodes from replica set. -func (i *intensityParallelHandler) ReplicaSetMaxIntensity(replicaSet []string) Intensity { +func (i *intensityParallelHandler) ReplicaSetMaxIntensity(replicaSet []netip.Addr) Intensity { out := NewIntensity(math.MaxInt) for _, rep := range replicaSet { if ranges := i.maxHostIntensity[rep]; ranges < out { @@ -66,7 +67,7 @@ func (i *intensityParallelHandler) ReplicaSetMaxIntensity(replicaSet []string) I } // MaxHostIntensity returns max_token_ranges_in_parallel per host. -func (i *intensityParallelHandler) MaxHostIntensity() map[string]Intensity { +func (i *intensityParallelHandler) MaxHostIntensity() map[netip.Addr]Intensity { return i.maxHostIntensity } diff --git a/pkg/service/repair/master_selector.go b/pkg/service/repair/master_selector.go index 266aab012..1faca68fe 100644 --- a/pkg/service/repair/master_selector.go +++ b/pkg/service/repair/master_selector.go @@ -3,7 +3,10 @@ package repair import ( + "maps" "math" + "net/netip" + "slices" "sort" "github.com/scylladb/scylla-manager/v3/pkg/util/slice" @@ -12,14 +15,10 @@ import ( // masterSelector describes each host priority for being repair master. // Repair master is first chosen by smallest shard count, // then by smallest dc RTT from SM. -type masterSelector map[string]int - -func newMasterSelector(shards map[string]uint, hostDC map[string]string, closestDC []string) masterSelector { - hosts := make([]string, 0, len(shards)) - for h := range shards { - hosts = append(hosts, h) - } +type masterSelector map[netip.Addr]int +func newMasterSelector(shards map[netip.Addr]uint, hostDC map[netip.Addr]string, closestDC []string) masterSelector { + hosts := slices.Collect(maps.Keys(shards)) sort.Slice(hosts, func(i, j int) bool { if shards[hosts[i]] != shards[hosts[j]] { return shards[hosts[i]] < shards[hosts[j]] @@ -35,8 +34,8 @@ func newMasterSelector(shards map[string]uint, hostDC map[string]string, closest } // Select returns repair master from replica set. -func (ms masterSelector) Select(replicas []string) string { - var master string +func (ms masterSelector) Select(replicas []netip.Addr) netip.Addr { + var master netip.Addr p := math.MaxInt64 for _, r := range replicas { if ms[r] < p { diff --git a/pkg/service/repair/model.go b/pkg/service/repair/model.go index 7b1a0ee06..80d71b6a3 100644 --- a/pkg/service/repair/model.go +++ b/pkg/service/repair/model.go @@ -3,6 +3,7 @@ package repair import ( + "net/netip" "time" "github.com/scylladb/scylla-manager/v3/pkg/scyllaclient" @@ -31,16 +32,16 @@ func NewIntensityFromDeprecated(i float64) Intensity { // Target specifies what shall be repaired. type Target struct { - Units []Unit `json:"units"` - DC []string `json:"dc"` - Host string `json:"host,omitempty"` + Units []Unit `json:"units"` + DC []string `json:"dc"` + Host netip.Addr `json:"host,omitempty"` // Down hosts excluded from repair by the --ignore-down-hosts flag. - IgnoreHosts []string `json:"ignore_hosts,omitempty"` - FailFast bool `json:"fail_fast"` - Continue bool `json:"continue"` - Intensity Intensity `json:"intensity"` - Parallel int `json:"parallel"` - SmallTableThreshold int64 `json:"small_table_threshold"` + IgnoreHosts []netip.Addr `json:"ignore_hosts,omitempty"` + FailFast bool `json:"fail_fast"` + Continue bool `json:"continue"` + Intensity Intensity `json:"intensity"` + Parallel int `json:"parallel"` + SmallTableThreshold int64 `json:"small_table_threshold"` } // taskProperties is the main data structure of the runner.Properties blob. diff --git a/pkg/service/repair/plan.go b/pkg/service/repair/plan.go index fa96c282b..321d4786e 100644 --- a/pkg/service/repair/plan.go +++ b/pkg/service/repair/plan.go @@ -5,6 +5,8 @@ package repair import ( "context" "math" + "net/netip" + "slices" "sort" "sync/atomic" @@ -21,7 +23,7 @@ type plan struct { Hosts []string MaxParallel int - MaxHostIntensity map[string]Intensity + MaxHostIntensity map[netip.Addr]Intensity apiSupport apiSupport // Used for progress purposes Stats map[scyllaclient.HostKeyspaceTable]tableStats @@ -90,10 +92,9 @@ func newPlan(ctx context.Context, target Target, client *scyllaclient.Client) (* } replicaSetCnt++ - allHosts.Add(filtered...) - for _, h := range filtered { - ranges[newHostKsTable(h, u.Keyspace, t)] += len(rep.Ranges) + allHosts.Add(h.String()) + ranges[newHostKsTable(h.String(), u.Keyspace, t)] += len(rep.Ranges) } rangesCnt += len(rep.Ranges) } @@ -249,9 +250,9 @@ func (p keyspacePlans) fillSmall(smallTableThreshold int64) { // ShouldRepairRing when all ranges are replicated (len(replicaSet) > 1) in specified dcs. // If host is set, it also checks if host belongs to the dcs. -func ShouldRepairRing(ring scyllaclient.Ring, dcs []string, host string) bool { +func ShouldRepairRing(ring scyllaclient.Ring, dcs []string, host netip.Addr) bool { repairedDCs := strset.New(dcs...) - if host != "" { + if host.IsValid() { if dc, ok := ring.HostDC[host]; !ok || !repairedDCs.Has(dc) { return false } @@ -272,7 +273,7 @@ func ShouldRepairRing(ring scyllaclient.Ring, dcs []string, host string) bool { } // maxHostIntensity sets max_ranges_in_parallel for all repaired host. -func maxHostIntensity(ctx context.Context, client *scyllaclient.Client, hosts []string) (map[string]Intensity, error) { +func maxHostIntensity(ctx context.Context, client *scyllaclient.Client, hosts []string) (map[netip.Addr]Intensity, error) { shards, err := client.HostsShardCount(ctx, hosts) if err != nil { return nil, err @@ -281,15 +282,19 @@ func maxHostIntensity(ctx context.Context, client *scyllaclient.Client, hosts [] if err != nil { return nil, err } - return hostMaxRanges(shards, memory), nil + return hostMaxRanges(shards, memory) } -func hostMaxRanges(shards map[string]uint, memory map[string]int64) map[string]Intensity { - out := make(map[string]Intensity, len(shards)) +func hostMaxRanges(shards map[string]uint, memory map[string]int64) (map[netip.Addr]Intensity, error) { + out := make(map[netip.Addr]Intensity, len(shards)) for h, sh := range shards { - out[h] = maxRepairRangesInParallel(sh, memory[h]) + ip, err := netip.ParseAddr(h) + if err != nil { + return nil, err + } + out[ip] = maxRepairRangesInParallel(sh, memory[h]) } - return out + return out, nil } func maxRepairRangesInParallel(shards uint, totalMemory int64) Intensity { @@ -336,18 +341,16 @@ func MaxRingParallel(ring scyllaclient.Ring, dcs []string) int { } // Filters replica set according to --dc, --ignore-down-hosts, --host. -func filterReplicaSet(replicaSet []string, hostDC map[string]string, target Target) []string { - if target.Host != "" && !slice.ContainsString(replicaSet, target.Host) { +func filterReplicaSet(replicaSet []netip.Addr, hostDC map[netip.Addr]string, target Target) []netip.Addr { + if target.Host.IsValid() && !slices.Contains(replicaSet, target.Host) { return nil } - - var out []string + var out []netip.Addr for _, h := range replicaSet { - if slice.ContainsString(target.DC, hostDC[h]) && !slice.ContainsString(target.IgnoreHosts, h) { + if slice.ContainsString(target.DC, hostDC[h]) && !slices.Contains(target.IgnoreHosts, h) { out = append(out, h) } } - return out } diff --git a/pkg/service/repair/progress.go b/pkg/service/repair/progress.go index a42d14bbc..ed73d8e67 100644 --- a/pkg/service/repair/progress.go +++ b/pkg/service/repair/progress.go @@ -276,7 +276,7 @@ func (pm *dbProgressManager) OnJobStart(ctx context.Context, j job) { defer q.Release() for _, h := range j.replicaSet { - pk := newHostKsTable(h, j.keyspace, j.table) + pk := newHostKsTable(h.String(), j.keyspace, j.table) pm.mu.Lock() rp := pm.progress[pk] @@ -297,7 +297,7 @@ func (pm *dbProgressManager) OnJobStart(ctx context.Context, j job) { if err := q.Exec(); err != nil { pm.logger.Error(ctx, "Update repair progress", "key", pk, "error", err) } - pm.metrics.AddJob(pm.run.ClusterID, h, len(j.ranges)) + pm.metrics.AddJob(pm.run.ClusterID, h.String(), len(j.ranges)) } } @@ -315,7 +315,7 @@ func (pm *dbProgressManager) onJobEndProgress(ctx context.Context, result jobRes for _, h := range result.replicaSet { pm.mu.Lock() - pk := newHostKsTable(h, result.keyspace, result.table) + pk := newHostKsTable(h.String(), result.keyspace, result.table) rp := pm.progress[pk] if result.Success() { rp.Success += int64(len(result.ranges)) @@ -340,8 +340,8 @@ func (pm *dbProgressManager) onJobEndProgress(ctx context.Context, result jobRes pm.logger.Error(ctx, "Update repair progress", "key", pk, "error", err) } - pm.metrics.SubJob(pm.run.ClusterID, h, len(result.ranges)) - pm.metrics.SetTokenRanges(pm.run.ClusterID, result.keyspace, result.table, h, + pm.metrics.SubJob(pm.run.ClusterID, h.String(), len(result.ranges)) + pm.metrics.SetTokenRanges(pm.run.ClusterID, result.keyspace, result.table, h.String(), rp.TokenRanges, rp.Success, rp.Error) } } diff --git a/pkg/service/repair/progress_integration_test.go b/pkg/service/repair/progress_integration_test.go index 87297ed63..13a670d1e 100644 --- a/pkg/service/repair/progress_integration_test.go +++ b/pkg/service/repair/progress_integration_test.go @@ -7,6 +7,7 @@ package repair import ( "context" + "net/netip" "slices" "testing" "time" @@ -34,6 +35,9 @@ func TestProgressManagerIntegration(t *testing.T) { NearDurationComparer(5 * time.Millisecond), } + h1 := netip.MustParseAddr("192.168.100.11") + h2 := netip.MustParseAddr("192.168.100.12") + t.Run("progress update sequence (Init,OnJobStart,OnJobEnd)", func(t *testing.T) { var ( run = &Run{ @@ -49,13 +53,13 @@ func TestProgressManagerIntegration(t *testing.T) { EndToken: 10, } p = &plan{ - Hosts: []string{"h1", "h2"}, + Hosts: []string{h1.String(), h2.String()}, Stats: map[scyllaclient.HostKeyspaceTable]tableStats{ - newHostKsTable("h1", "k1", "t1"): { + newHostKsTable(h1.String(), "k1", "t1"): { Size: 5, Ranges: 2, }, - newHostKsTable("h2", "k1", "t1"): { + newHostKsTable(h2.String(), "k1", "t1"): { Size: 7, Ranges: 2, }, @@ -86,7 +90,7 @@ func TestProgressManagerIntegration(t *testing.T) { ClusterID: run.ClusterID, TaskID: run.TaskID, RunID: run.ID, - Host: "h1", + Host: h1.String(), Keyspace: "k1", Table: "t1", Size: 5, @@ -98,7 +102,7 @@ func TestProgressManagerIntegration(t *testing.T) { ClusterID: run.ClusterID, TaskID: run.TaskID, RunID: run.ID, - Host: "h2", + Host: h2.String(), Keyspace: "k1", Table: "t1", Size: 7, @@ -116,8 +120,8 @@ func TestProgressManagerIntegration(t *testing.T) { j := job{ keyspace: "k1", table: "t1", - master: "h1", - replicaSet: []string{"h1", "h2"}, + master: netip.MustParseAddr("192.168.100.11"), + replicaSet: []netip.Addr{netip.MustParseAddr("192.168.100.11"), netip.MustParseAddr("192.168.100.12")}, ranges: []scyllaclient.TokenRange{token1}, } @@ -193,10 +197,10 @@ func TestProgressManagerIntegration(t *testing.T) { } p = &plan{ // Plan containing token1 and token2 Stats: map[scyllaclient.HostKeyspaceTable]tableStats{ - newHostKsTable("h1", "k1", "t1"): { + newHostKsTable(h1.String(), "k1", "t1"): { Ranges: 1, }, - newHostKsTable("h2", "k1", "t1"): { + newHostKsTable(h2.String(), "k1", "t1"): { Ranges: 1, }, }, @@ -229,7 +233,7 @@ func TestProgressManagerIntegration(t *testing.T) { ClusterID: prevRun.ClusterID, TaskID: prevRun.TaskID, RunID: prevRun.ID, - Host: "h1", + Host: h1.String(), Keyspace: "k1", Table: "t1", TokenRanges: 1, @@ -241,7 +245,7 @@ func TestProgressManagerIntegration(t *testing.T) { ClusterID: prevRun.ClusterID, TaskID: prevRun.TaskID, RunID: prevRun.ID, - Host: "h2", + Host: h2.String(), Keyspace: "k1", Table: "t1", TokenRanges: 1, diff --git a/pkg/service/repair/repair_test.go b/pkg/service/repair/repair_test.go index 664e750cc..277e1f4e9 100644 --- a/pkg/service/repair/repair_test.go +++ b/pkg/service/repair/repair_test.go @@ -4,30 +4,33 @@ package repair_test import ( "fmt" + "net/netip" + "strings" "testing" "github.com/scylladb/scylla-manager/v3/pkg/dht" "github.com/scylladb/scylla-manager/v3/pkg/scyllaclient" "github.com/scylladb/scylla-manager/v3/pkg/service/repair" + "github.com/scylladb/scylla-manager/v3/pkg/util2/slices" ) func TestMaxRingParallel(t *testing.T) { - hostDC := map[string]string{ + hostDC := map[netip.Addr]string{ // dc1 -> 3 - "h1": "dc1", - "h2": "dc1", - "h3": "dc1", + hostIP("h1"): "dc1", + hostIP("h2"): "dc1", + hostIP("h3"): "dc1", // dc2 -> 4 - "h4": "dc2", - "h5": "dc2", - "h6": "dc2", - "h7": "dc2", + hostIP("h4"): "dc2", + hostIP("5"): "dc2", + hostIP("6"): "dc2", + hostIP("7"): "dc2", // dc3 -> 5 - "h8": "dc3", - "h9": "dc3", - "h10": "dc3", - "h11": "dc3", - "h12": "dc3", + hostIP("8"): "dc3", + hostIP("9"): "dc3", + hostIP("10"): "dc3", + hostIP("11"): "dc3", + hostIP("12"): "dc3", } testCases := []struct { @@ -109,25 +112,25 @@ func TestMaxRingParallel(t *testing.T) { } func TestShouldRepairRing(t *testing.T) { - hostDC := map[string]string{ + hostDC := map[netip.Addr]string{ // dc1 -> 1 - "h1": "dc1", + hostIP("h1"): "dc1", // dc2 -> 1 - "h2": "dc2", + hostIP("h2"): "dc2", // dc3 -> 3 - "h3": "dc3", - "h4": "dc3", - "h5": "dc3", + hostIP("h3"): "dc3", + hostIP("h4"): "dc3", + hostIP("h5"): "dc3", // dc3 -> 4 - "h6": "dc4", - "h7": "dc4", - "h8": "dc4", - "h9": "dc4", + hostIP("h6"): "dc4", + hostIP("h7"): "dc4", + hostIP("h8"): "dc4", + hostIP("h9"): "dc4", } rs := func(reps ...string) scyllaclient.ReplicaTokenRanges { return scyllaclient.ReplicaTokenRanges{ - ReplicaSet: reps, + ReplicaSet: slices.Map(reps, hostIP), Ranges: []scyllaclient.TokenRange{ { StartToken: dht.Murmur3MinToken, @@ -305,9 +308,17 @@ func TestShouldRepairRing(t *testing.T) { tc := testCases[i] t.Run(tc.Name, func(t *testing.T) { t.Parallel() - if out := repair.ShouldRepairRing(tc.Ring, tc.DCs, tc.Host); out != tc.Expected { + if out := repair.ShouldRepairRing(tc.Ring, tc.DCs, hostIP(tc.Host)); out != tc.Expected { t.Fatalf("Expected %v, got %v", tc.Expected, out) } }) } } + +// dummyHost is a number (possibly prefixed with "h"). +func hostIP(dummyHost string) netip.Addr { + if dummyHost == "" { + return netip.Addr{} + } + return netip.MustParseAddr("192.168.100." + strings.TrimPrefix(dummyHost, "h")) +} diff --git a/pkg/service/repair/service.go b/pkg/service/repair/service.go index b8c6dbd18..5c4f8c1e5 100644 --- a/pkg/service/repair/service.go +++ b/pkg/service/repair/service.go @@ -6,6 +6,7 @@ import ( "context" "encoding/json" "fmt" + "net/netip" "slices" "strings" "sync" @@ -27,6 +28,7 @@ import ( "github.com/scylladb/scylla-manager/v3/pkg/util/timeutc" "github.com/scylladb/scylla-manager/v3/pkg/util/uuid" "github.com/scylladb/scylla-manager/v3/pkg/util/workerpool" + slices2 "github.com/scylladb/scylla-manager/v3/pkg/util2/slices" "go.uber.org/atomic" "go.uber.org/multierr" ) @@ -89,7 +91,6 @@ func (s *Service) GetTarget(ctx context.Context, clusterID uuid.UUID, properties // Copy basic properties t := Target{ - Host: props.Host, FailFast: props.FailFast, Continue: props.Continue, Intensity: NewIntensityFromDeprecated(props.Intensity), @@ -97,6 +98,14 @@ func (s *Service) GetTarget(ctx context.Context, clusterID uuid.UUID, properties SmallTableThreshold: props.SmallTableThreshold, } + if props.Host != "" { + hostIP, err := netip.ParseAddr(props.Host) + if err != nil { + return Target{}, util.ErrValidate(errors.Wrap(err, "parse host IP address")) + } + t.Host = hostIP + } + client, err := s.scyllaClient(ctx, clusterID) if err != nil { return t, errors.Wrapf(err, "get client") @@ -120,7 +129,11 @@ func (s *Service) GetTarget(ctx context.Context, clusterID uuid.UUID, properties } // Ignore nodes with status DOWN if props.IgnoreDownHosts { - t.IgnoreHosts = status.Datacenter(t.DC).Down().Hosts() + ips, err := slices2.MapWithError(status.Datacenter(t.DC).Down().Hosts(), netip.ParseAddr) + if err != nil { + return t, util.ErrValidate(errors.Wrapf(err, "parse down hosts IP addresses")) + } + t.IgnoreHosts = ips } if err := validateIgnoreDownNodes(t, status); err != nil { return t, err @@ -189,7 +202,7 @@ func validateIgnoreDownNodes(t Target, status scyllaclient.NodeStatusInfoSlice) func validateHost(t Target, p *plan, dcMap map[string][]string) error { // Nothing to validate - no --host flag - if t.Host == "" { + if !t.Host.IsValid() { return nil } // Ensure Host is not ignored @@ -197,7 +210,9 @@ func validateHost(t Target, p *plan, dcMap map[string][]string) error { return errors.New("host can't have status down") } // Ensure Host belongs to DCs - if !hostBelongsToDCs(t.Host, t.DC, dcMap) { + if ok, err := hostBelongsToDCs(t.Host, t.DC, dcMap); err != nil { + return err + } else if !ok { return util.ErrValidate(errors.Errorf("no such host %s in DC %s", t.Host, strings.Join(t.DC, ", "))) } // Ensure Host is not used with tablet repair API @@ -218,15 +233,19 @@ func validateHost(t Target, p *plan, dcMap map[string][]string) error { return nil } -func hostBelongsToDCs(host string, dcs []string, dcMap map[string][]string) bool { +func hostBelongsToDCs(host netip.Addr, dcs []string, dcMap map[string][]string) (bool, error) { for _, dc := range dcs { for _, h := range dcMap[dc] { - if host == h { - return true + ip, err := netip.ParseAddr(h) + if err != nil { + return false, err + } + if host == ip { + return true, nil } } } - return false + return false, nil } // Repair performs the repair process on the Target. @@ -242,7 +261,7 @@ func (s *Service) Repair(ctx context.Context, clusterID, taskID, runID uuid.UUID TaskID: taskID, ID: runID, DC: target.DC, - Host: target.Host, + Host: target.Host.String(), Parallel: target.Parallel, Intensity: target.Intensity, StartTime: timeutc.Now(), @@ -376,7 +395,7 @@ func (s *Service) killAllRepairs(ctx context.Context, client *scyllaclient.Clien } func (s *Service) newIntensityHandler(ctx context.Context, clusterID, taskID, runID uuid.UUID, - maxHostIntensity map[string]Intensity, maxParallel int, poolController sizeSetter, + maxHostIntensity map[netip.Addr]Intensity, maxParallel int, poolController sizeSetter, ) (ih *intensityParallelHandler, cleanup func()) { ih = &intensityParallelHandler{ taskID: taskID, diff --git a/pkg/service/repair/service_repair_integration_test.go b/pkg/service/repair/service_repair_integration_test.go index 036f424de..a0b92a79a 100644 --- a/pkg/service/repair/service_repair_integration_test.go +++ b/pkg/service/repair/service_repair_integration_test.go @@ -10,6 +10,7 @@ import ( "encoding/json" "fmt" "net/http" + "net/netip" "slices" "sort" "strings" @@ -477,9 +478,13 @@ func TestServiceGetTargetIntegration(t *testing.T) { cmpopts.SortSlices(func(u1, u2 repair.Unit) bool { return u1.Keyspace < u2.Keyspace }), cmpopts.IgnoreUnexported(repair.Target{}), cmpopts.IgnoreSliceElements(func(u repair.Unit) bool { return u.Keyspace == "system_replicated_keys" || u.Keyspace == "system_auth" }), - cmpopts.IgnoreSliceElements(func(t string) bool { return t == "dicts" })); diff != "" { + cmpopts.IgnoreSliceElements(func(t string) bool { return t == "dicts" }), + cmpopts.IgnoreFields(repair.Target{}, "Host")); diff != "" { t.Fatal(diff) } + if golden.Host != v.Host { + t.Fatalf("Expected host: %s, got: %s", golden.Host, v.Host) + } }) } } @@ -512,11 +517,11 @@ func TestServiceRepairOneJobPerHostIntegration(t *testing.T) { } // The amount of currently executed repair jobs on host - jobsPerHost := make(map[string]int) + jobsPerHost := make(map[netip.Addr]int) muJPH := sync.Mutex{} // Set of hosts used for given repair job - hostsInJob := make(map[string][]string) + hostsInJob := make(map[string][]netip.Addr) muHIJ := sync.Mutex{} cnt := atomic.Int64{} @@ -545,14 +550,14 @@ func TestServiceRepairOneJobPerHostIntegration(t *testing.T) { if r, ok := parseRepairResp(t, resp); ok { muHIJ.Lock() - hostsInJob[r.host+r.id] = r.replicaSet + hostsInJob[r.host.String()+r.id] = r.replicaSet muHIJ.Unlock() } if r, ok := parseRepairStatusResp(t, resp); ok { if r.status == repairStatusDone || r.status == repairStatusFailed { muHIJ.Lock() - hosts := hostsInJob[r.host+r.id] + hosts := hostsInJob[r.host.String()+r.id] muHIJ.Unlock() muJPH.Lock() @@ -700,14 +705,14 @@ func TestServiceRepairOrderIntegration(t *testing.T) { if r, ok := parseRepairResp(t, resp); ok { // Register what table is being repaired muJT.Lock() - jobTable[r.host+r.id] = r.fullTable() + jobTable[r.host.String()+r.id] = r.fullTable() muJT.Unlock() } if r, ok := parseRepairStatusResp(t, resp); ok { if r.status == repairStatusDone || r.status == repairStatusFailed { // Add host prefix as IDs are unique only for a given host - jobID := r.host + r.id + jobID := r.host.String() + r.id muJT.Lock() fullTable := jobTable[jobID] muJT.Unlock() @@ -899,7 +904,7 @@ func TestServiceRepairResumeAllRangesIntegration(t *testing.T) { if r, ok := parseRepairResp(t, resp); ok { // Register what table is being repaired muJS.Lock() - jobSpec[r.host+r.id] = TableRange{ + jobSpec[r.host.String()+r.id] = TableRange{ FullTable: r.fullTable(), Ranges: r.ranges, } @@ -918,7 +923,7 @@ func TestServiceRepairResumeAllRangesIntegration(t *testing.T) { muJS.Lock() defer muJS.Unlock() - k := r.host + r.id + k := r.host.String() + r.id if tr, ok := jobSpec[k]; ok { // Make sure that retries don't result in counting redundant ranges delete(jobSpec, k) @@ -1095,7 +1100,7 @@ func TestServiceRepairIntegration(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - var ignored = IPFromTestNet("12") + var ignored = ManagedClusterHost() h.stopNode(ignored) defer h.startNode(ignored, globalNodeInfo) @@ -1131,7 +1136,7 @@ func TestServiceRepairIntegration(t *testing.T) { t.Skip("This behavior is tested by test 'repair tablet API filtering'") } - host := h.GetHostsFromDC("dc1")[0] + host := netip.MustParseAddr(h.GetHostsFromDC("dc1")[0]) h.Hrt.SetInterceptor(repairReqAssertHostInterceptor(t, host)) Print("When: run repair") @@ -1681,19 +1686,12 @@ func TestServiceRepairIntegration(t *testing.T) { props["fail_fast"] = true Print("When: run repair") - i, running := repairRunningInterceptor() - h.Hrt.SetInterceptor(i) - h.runRepair(ctx, props) - - Print("When: repair is running") - chanClosedWithin(t, running, shortWait) - - Print("When: Scylla returns failures") var killRepairCalled int32 h.Hrt.SetInterceptor(combineInterceptors( countInterceptor(&killRepairCalled, isForceTerminateRepairReq), repairMockInterceptor(t, repairStatusFailed), )) + h.runRepair(ctx, props) Print("Then: repair finish with error") h.assertError(longWait) @@ -2164,13 +2162,13 @@ func TestServiceRepairIntegration(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - host := IPFromTestNet("22") + host := netip.MustParseAddr(h.GetHostsFromDC("dc2")[0]) h.Hrt.SetInterceptor(repairReqAssertHostInterceptor(t, host)) h.runRepair(ctx, map[string]any{ "dc": []string{"dc2"}, "keyspace": []string{tabletSingleDCKs, vnodeKs}, - "host": IPFromTestNet("22"), + "host": host.String(), }) h.assertDone(shortWait) }) @@ -2178,7 +2176,7 @@ func TestServiceRepairIntegration(t *testing.T) { t.Run("Repairing table with node down should fail at generating target", func(t *testing.T) { h := newRepairTestHelper(t, session, defaultConfig()) - down := IPFromTestNet("22") + down := ManagedClusterHost() h.stopNode(down) defer h.startNode(down, globalNodeInfo) @@ -2199,7 +2197,7 @@ func TestServiceRepairIntegration(t *testing.T) { t.Run("Repairing table with node down from filtered out DC should succeed", func(t *testing.T) { h := newRepairTestHelper(t, session, defaultConfig()) - down := IPFromTestNet("22") + down := h.GetHostsFromDC("dc2")[0] h.stopNode(down) defer h.startNode(down, globalNodeInfo) diff --git a/pkg/service/repair/worker.go b/pkg/service/repair/worker.go index 9617c63af..323bda772 100644 --- a/pkg/service/repair/worker.go +++ b/pkg/service/repair/worker.go @@ -11,6 +11,7 @@ import ( "github.com/scylladb/scylla-manager/v3/pkg/dht" "github.com/scylladb/scylla-manager/v3/pkg/scyllaclient" "github.com/scylladb/scylla-manager/v3/pkg/util/retry" + "github.com/scylladb/scylla-manager/v3/pkg/util2/slices" ) type worker struct { @@ -63,7 +64,7 @@ func (w *worker) runRepair(ctx context.Context, j job) (out error) { var ranges []scyllaclient.TokenRange switch { case j.jobType == tabletJobType: - return w.fullTabletTableRepair(ctx, j.keyspace, j.table, j.master) + return w.fullTabletTableRepair(ctx, j.keyspace, j.table, j.master.String()) case j.jobType == smallTableJobType: ranges = nil case j.jobType == mergeRangesJobType: @@ -77,7 +78,7 @@ func (w *worker) runRepair(ctx context.Context, j job) (out error) { ranges = j.ranges } - jobID, err = w.client.Repair(ctx, j.keyspace, j.table, j.master, j.replicaSet, ranges, j.intensity, j.jobType == smallTableJobType) + jobID, err = w.client.Repair(ctx, j.keyspace, j.table, j.master.String(), slices.MapToString(j.replicaSet), ranges, j.intensity, j.jobType == smallTableJobType) if err != nil { return errors.Wrap(err, "schedule repair") } @@ -92,7 +93,7 @@ func (w *worker) runRepair(ctx context.Context, j job) (out error) { "job_id", jobID, ) - status, err := w.client.RepairStatus(ctx, j.master, jobID) + status, err := w.client.RepairStatus(ctx, j.master.String(), jobID) if err != nil { return errors.Wrap(err, "get repair status") } @@ -136,7 +137,7 @@ func (w *worker) handleRunningStatus(ctx context.Context, j job) error { // Table deletion is visible only after a short while op := func() error { - exists, err := w.client.TableExists(ctx, j.master, j.keyspace, j.table) + exists, err := w.client.TableExists(ctx, j.master.String(), j.keyspace, j.table) if err != nil { return retry.Permanent(err) } @@ -155,7 +156,7 @@ func (w *worker) handleRunningStatus(ctx context.Context, j job) error { } func (w *worker) isTableDeleted(ctx context.Context, j job) bool { - exists, err := w.client.TableExists(ctx, j.master, j.keyspace, j.table) + exists, err := w.client.TableExists(ctx, j.master.String(), j.keyspace, j.table) if err != nil { w.logger.Error(ctx, "Couldn't check for table deletion", "keyspace", j.keyspace, diff --git a/pkg/util2/maps/maps.go b/pkg/util2/maps/maps.go new file mode 100644 index 000000000..ec55dcea6 --- /dev/null +++ b/pkg/util2/maps/maps.go @@ -0,0 +1,47 @@ +// Copyright (C) 2025 ScyllaDB + +package maps + +// SetFromSlice returns set with elements of 'in'. +func SetFromSlice[T comparable](in []T) map[T]struct{} { + out := make(map[T]struct{}, len(in)) + for _, t := range in { + out[t] = struct{}{} + } + return out +} + +// MapKey returns map obtained by calling 'f' on keys of 'in'. +func MapKey[T, K comparable, V any](in map[T]V, f func(T) K) map[K]V { + out := make(map[K]V, len(in)) + for t, v := range in { + out[f(t)] = v + } + return out +} + +// MapKeyWithError returns map obtained by calling 'f' on keys of 'in'. +func MapKeyWithError[T, K comparable, V any](in map[T]V, f func(T) (K, error)) (map[K]V, error) { + out := make(map[K]V, len(in)) + for t, v := range in { + k, err := f(t) + if err != nil { + return nil, err + } + out[k] = v + } + return out, nil +} + +// HasAnyKey checks if any of the passed items is a key in the map. +// It returns false if nothing is passed. +// For multiple items it returns true if any of the items exist. +func HasAnyKey[T comparable, V any](s map[T]V, ts ...T) bool { + has := false + for _, t := range ts { + if _, has = s[t]; has { + break + } + } + return has +} diff --git a/pkg/util2/slices/slices.go b/pkg/util2/slices/slices.go new file mode 100644 index 000000000..eb4fce81d --- /dev/null +++ b/pkg/util2/slices/slices.go @@ -0,0 +1,38 @@ +// Copyright (C) 2025 ScyllaDB + +package slices + +import ( + "fmt" +) + +// Map returns slice obtained by calling 'f' on elements of 'in'. +func Map[T, V any](in []T, f func(T) V) []V { + out := make([]V, len(in)) + for i, t := range in { + out[i] = f(t) + } + return out +} + +// MapWithError returns slice obtained by calling 'f' on elements of 'in'. +func MapWithError[T, V any](in []T, f func(T) (V, error)) ([]V, error) { + out := make([]V, len(in)) + for i, t := range in { + v, err := f(t) + if err != nil { + return nil, err + } + out[i] = v + } + return out, nil +} + +// MapToString returns slice obtained by calling 'String()' on elements of 'in'. +func MapToString[T fmt.Stringer](in []T) []string { + out := make([]string, len(in)) + for i, t := range in { + out[i] = t.String() + } + return out +}