Skip to content

Commit fc79598

Browse files
authored
Support running e2e tests across different Gw API versions (#12794)
Signed-off-by: sheidkamp <[email protected]> Signed-off-by: Seth Heidkamp <[email protected]>
1 parent 516b15c commit fc79598

File tree

52 files changed

+1281
-701
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+1281
-701
lines changed

.github/actions/setup-kind-cluster/action.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ inputs:
1919
required: false
2020
default: "" # If this is undefined the setup-kind script has its own default value
2121
description: The version of the gateway-api to use
22+
gateway-api-channel:
23+
required: false
24+
default: "" # If this is undefined the setup-kind script has its own default value
25+
description: The channel of the gateway-api to use
2226
localstack:
2327
required: false
2428
default: "false"
@@ -38,6 +42,7 @@ runs:
3842
CLUSTER_NODE_VERSION: ${{ inputs.kind-node-version }}
3943
ISTIO_VERSION: ${{ inputs.istio-version }}
4044
CONFORMANCE_VERSION: ${{ inputs.gateway-api-version }}
45+
CONFORMANCE_CHANNEL: ${{ inputs.gateway-api-channel }}
4146
LOCALSTACK: ${{ inputs.localstack }}
4247
CONFORMANCE: true
4348
run: |

.github/workflows/e2e.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ jobs:
7575
- cluster-name: 'agent-gateway-cluster'
7676
go-test-args: '-v -timeout=25m'
7777
go-test-run-regex: '^TestAgentgatewayIntegration'
78-
agentgateway: 'true'
7978
# August 29, 2025: ~3 minutes
8079
- cluster-name: 'api-validation'
8180
go-test-args: '-v -timeout=10m'

.github/workflows/nightly-tests.yaml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ on:
2222
description: "Run load testing suite"
2323
type: boolean
2424
default: false
25+
run-e2e-tests:
26+
description: "Run e2e tests"
27+
type: boolean
28+
default: false
2529

2630
concurrency:
2731
group: ${{ github.workflow }}-${{ github.ref_name }}
@@ -59,3 +63,50 @@ jobs:
5963
with:
6064
ref: main
6165
- uses: ./.github/actions/kube-gateway-api-load-tests
66+
67+
kgateway_e2e_tests_for_gateway_api_versions:
68+
name: GwApi E2E
69+
if: ${{ (github.event_name == 'workflow_dispatch' && inputs.run-e2e-tests) || github.event.schedule == '0 5 * * *' }}
70+
runs-on: ubuntu-22.04
71+
timeout-minutes: 120
72+
strategy:
73+
fail-fast: false
74+
matrix:
75+
gateway-api-version: [ { version: 'v1.4.0', channel: 'experimental' },
76+
{ version: 'v1.4.0', channel: 'standard' },
77+
{ version: 'v1.3.0', channel: 'experimental' },
78+
{ version: 'v1.3.0', channel: 'standard' },
79+
{ version: 'v1.2.1', channel: 'experimental' },
80+
{ version: 'v1.2.1', channel: 'standard' }]
81+
steps:
82+
- uses: actions/checkout@v4
83+
- name: Prep Go Runner
84+
uses: ./.github/actions/prep-go-runner
85+
- name: Dotenv Action
86+
uses: falti/[email protected]
87+
id: dotenv
88+
with:
89+
path: "./.github/workflows/.env/nightly-tests/max_versions.env"
90+
log-variables: true
91+
- name: Setup KinD Cluster
92+
uses: ./.github/actions/setup-kind-cluster
93+
with:
94+
gateway-api-version: ${{ matrix.gateway-api-version.version }}
95+
gateway-api-channel: ${{ matrix.gateway-api-version.channel }}
96+
cluster-name: "kgw-api-e2e-${{ matrix.gateway-api-version.version }}-${{ matrix.gateway-api-version.channel }}"
97+
kubectl-version: ${{ steps.dotenv.outputs.kubectl_version }}
98+
istio-version: ${{ steps.dotenv.outputs.istio_version }}
99+
kind-node-version: ${{ steps.dotenv.outputs.node_version }}
100+
- id: run-tests
101+
uses: ./.github/actions/kubernetes-e2e-tests
102+
env:
103+
VERSION: '1.0.0-ci1'
104+
GITHUB_TOKEN: ${{ github.token }}
105+
GO_TEST_RETRIES: '3' # Use a higher number of retries because there are so many tests and we don't want to rerun
106+
GOTESTSUM_ARGS: '--format=standard-verbose --rerun-fails-max-failures 60' # High value for "rerun-fails-max-failures" because 3 failed tests can result in gotestsum thinking there's 40+ failures
107+
with:
108+
cluster-name: "kgw-api-e2e-${{ matrix.gateway-api-version.version }}-${{ matrix.gateway-api-version.channel }}"
109+
test-args: '-v -timeout=120m'
110+
run-regex: "^Test"
111+
istio-version: ${{ steps.dotenv.outputs.istio_version }}
112+
matrix-label: "nightly-kgw-api-${{ matrix.gateway-api-version.version }}-${{ matrix.gateway-api-version.channel }}"

design/12721.md

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# EP-12721: E2e testing with Gateway API Versions
2+
3+
4+
* Issue: [12721](https://github.com/kgateway-dev/kgateway/issues/12721)
5+
6+
7+
## Background
8+
The current e2e tests assume the latest supported experimental version of the `gateway.networking.k8s.io` APIs is installed. This will not always be the case in the environments in which kgateway is deployed. In order to validate functionality across a wider range of environments, we will allow testing with different versions of the Gateway API.
9+
10+
In addition to different semver designated versions of the API, there are two channels, `standard` and `experimental`
11+
12+
13+
### Differences in API versions
14+
* v0.3.0
15+
* TCPRoute, TLSRoute, and the unused UDPRoute added to experimental (not available in standard as of v1.4.0)
16+
* v1.1.0
17+
* SessionPersistance for HTTPRoute rules added to experimental (not available in standard as of v1.4.0)
18+
* v1.2.0
19+
* HTTPRoutes.spec.rules[].name added in experimental (promoted to standard in v1.4.0)
20+
* v1.3.0
21+
* XListenerSets added to experimental (not available in standard as of v1.4.0, planned for v1.5.0)
22+
* CORS filters added to experimental (not available in standard as of v1.4.0)
23+
* v1.4.0
24+
* BackendTLSPolicy promoted to v1 in standard and experimental. Previous v1alpha3 version is not supported.
25+
* HTTPRoutes.spec.rules[].name added to standard
26+
27+
The are a substantial number of tests that need to be modified to
28+
29+
30+
## Motivation
31+
Better test coverage and understanding of how kgateway works with different Gateway API versions
32+
33+
## Goals
34+
* E2E tests can be run locally or in CI with different versions (semver and channel) of the Gateway API
35+
* Consistent approach to managing resources for different versions of the API
36+
37+
## Non-Goals
38+
* Mass update of existing tests to use the BaseTestingSuite
39+
* Suites that don't use the BaseTestingSuite will continue to run all tests for any GatewayAPI version
40+
* Tests that need implement version dependent behavior will be migrated to BaseTestingSuite as needed
41+
* Running tests in CRC/Openshift
42+
* Updating application code to support earlier versions
43+
44+
45+
## Implementation Details
46+
### Determining the Gateway API version
47+
The Gateway API CRDs contain two relevant annotations:
48+
* `gateway.networking.k8s.io/bundle-version` - the API version, for example `gateway.networking.k8s.io/bundle-version: v1.2.0`
49+
* `gateway.networking.k8s.io/channel` - the API channel, standard or experimental, for example `gateway.networking.k8s.io/channel: standard`
50+
51+
These annotations can be examined to determine the version. If the annotations are not present, this should be considered a fatal error.
52+
53+
### Test cases
54+
The e2e tests are built up of [TestCases](https://github.com/kgateway-dev/kgateway/blob/2b04f3d1465257d0c449687922ea6e92603b822c/test/kubernetes/e2e/tests/base/base_suite.go#L33) that define the resources used for the tests.
55+
56+
In order to allow test cases to run conditionally based on the API version, we will add new fields, `MinGwApiVersion` and `MaxGwApiVersion` to the TestCase struct:
57+
58+
```
59+
// MinGwApiVersion specifies the minimum Gateway API version required per channel.
60+
// Map key is the channel (GwApiChannelStandard or GwApiChannelExperimental), value is the minimum version.
61+
// If the map is empty/nil, the test runs on any channel/version.
62+
// The test will only run if the Gateway API version is >= the specified minimum version.
63+
// For minimum requirements, if only experimental constraints exist, the test is considered experimental-only and will skip on standard channel.
64+
// Matching logic based on installed channel:
65+
// - experimental: If experimental key exists, check version; otherwise run
66+
// - standard: If standard key exists, check version; if only experimental exists, skip; otherwise runs on any standard version.
67+
MinGwApiVersion map[GwApiChannel]*GwApiVersion
68+
69+
// MaxGwApiVersion specifies the maximum Gateway API version required per channel.
70+
// Map key is the channel (GwApiChannelStandard or GwApiChannelExperimental), value is the maximum version.
71+
// If the map is empty/nil, the test runs on any channel/version.
72+
// The test will only run if the Gateway API version is < the specified maximum version.
73+
// Maximum constraints are channel-specific - experimental constraints don't affect standard channel execution.
74+
// If the maximum version is less than the minimum version, the test will be skipped.
75+
MaxGwApiVersion map[GwApiChannel]*GwApiVersion
76+
```
77+
78+
`MinGwApiChannel` is a typed string with the value of `experimental` or `standard`, and will define the minimum version of the API needed to run the test for the channel. If the current installation is now greater or equal to the required version, the test will be skipped. If no `MinGwApiChannel` value is defined, the test will run on any version of the API. The exception to this logic is if the standard channel is installed and the `MinGwApiVersion` for the TestCase only defines an experimental minimum version, for example:
79+
80+
```
81+
MinGwApiVersion: map[base.GwApiChannel]*semver.Version{
82+
base.GwApiChannelExperimental: base.GwApiV1_3_0,
83+
},
84+
```
85+
86+
This will be interpreted as "the test needs to use features available in experimental API v1.4.0; these features are not yet available in the standard channel". In this case, the test will be skipped for all standard channel versions.
87+
88+
`GwApiVersion` is a wrapper around the underlying semver packages used, and was created in order to allow test suites to use semver types without having to know about the underlying implementation.
89+
90+
91+
### Test Suites
92+
93+
94+
#### SetupByVersion
95+
A common pattern used in our e2e tests is to setup a Gateway and possibly other resources during suite setup and using them for every test. This pattern allows the tests to run faster, as time is not spent deploying and removing Gateways. In the [BaseTestingSuite](https://github.com/kgateway-dev/kgateway/blob/2b04f3d1465257d0c449687922ea6e92603b822c/test/kubernetes/e2e/tests/base/base_suite.go#L49C1-L66C2), these resources are defined by the [Setup](https://github.com/kgateway-dev/kgateway/blob/2b04f3d1465257d0c449687922ea6e92603b822c/test/kubernetes/e2e/tests/base/base_suite.go#L53) field
96+
97+
However, once we allow tests to run for different versions of the API, we are no longer in a "one configuration fits all" situation. For example, using ListenerSets requires `allowedListeners` to be defined on the Gateway, but this field will cause the resource to be rejected when using older versions of the API.
98+
99+
To accommodate this, we will add a new field `SetupByVersion` to the BaseTestingSuite:
100+
```
101+
// SetupByVersion allows defining different setup configurations for different GW API versions and channels.
102+
// The outer map key is the channel (standard or experimental).
103+
// The inner map key is the minimum version, and the value is the TestCase to use.
104+
// The system will select the setup with the highest matching version for the current channel.
105+
// If no setups match, falls back to the Setup field for backward compatibility.
106+
// Example:
107+
// SetupByVersion: map[GwApiChannel]map[*semver.Version]*TestCase{
108+
// GwApiChannelExperimental: {
109+
// GwApiV1_3_0: &setupExperimentalV1_3,
110+
// },
111+
// GwApiChannelStandard: {
112+
// GwApiV1_3_0: &setupStandardV1_4,
113+
// },
114+
// }
115+
SetupByVersion map[GwApiChannel]map[*semver.Version]*TestCase
116+
```
117+
118+
When choosing which setup to use, the suite will use the highest defined semver for the channel that is less than or equal to the current version, falling back to the existing `Setup` if there is no such version.
119+
120+
There are other data structures that could be used to store the setup information, but this approach was chosen because by making channel and version keys for the map, we guarantee that it will be unambiguous which setup to use.
121+
122+
#### MinGwApiVersion
123+
124+
`MinGwApiVersion` has also been added at the suite level to allow entire suites to be skipped.
125+
126+
This is used for the cases where all the tests in a suite require configuration not available in all Gw API versions, and it was introduced because test suites apply their setup before running (or skipping) the individual test cases. In these cases, the suite may run its setup with resources incompatible with the installed version of the Gw API, and we would not want to restore those resources.
127+
128+
### DevX
129+
* This approach requires no changes for tests and suites that aren't version sensitive
130+
* If a test needs to be skipped on certain versions, it can configured on the test case
131+
* If a suite requires different setups/gateways based on version, once the setup is configured additional test cases just need to be congfigured with the versions they run on.
132+
133+
134+
### Test Plan
135+
Successful runs of a GitHub job across versions v1.2-1.4 in both channels.
136+
137+
Tests and suites will be adapted to older versions in 2 ways:
138+
* If a test requires a feature (like XListenerSets or rule names in HTTPRoutes), those tests will be skipped.
139+
* Some tests will fail because the suite setup or test resources have invalid config for a Gw API version, but the test itself does not. For example, a Gateway for the suite may be configured with `allowedListeners`, but only some tests use listenersets. In this case we will split the resources and use a combination of SetupForVersion and MinGwApiVersion to apply the appropriate config and run the appropriate tests for the Gw API version.
140+
141+
## Alternatives
142+
Do not test other versions of the API.
143+
144+
## Open Questions
145+
* Should we be able to set minimum version at the suite level? EG, for the listenerset suite, when we know that no tests in the suite will run?

devel/testing/nightly-tests.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,8 @@ The following are run on a schedule via a [GitHub action](/.github/workflows/nig
55
## Gateway API conformance tests
66
Kubernetes Gateway API conformance tests are run using the earliest and latest supported k8s versions.
77

8+
## Gateway Load Tests
9+
Kubernetes Gateway load tests are run using the earliest and latest supported k8s versions.
10+
11+
## E2E tests with different Gateway API versions
12+
The entire e2e suite is run against a variety of Gateway API Versions and Channels.

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ require (
183183
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0 // indirect
184184
github.com/MakeNowJust/heredoc v1.0.0 // indirect
185185
github.com/Masterminds/goutils v1.1.1 // indirect
186-
github.com/Masterminds/semver/v3 v3.4.0 // indirect
186+
github.com/Masterminds/semver/v3 v3.4.0
187187
github.com/Masterminds/sprig/v3 v3.3.0 // indirect
188188
github.com/Masterminds/squirrel v1.5.4 // indirect
189189
github.com/Microsoft/go-winio v0.6.2 // indirect

hack/utils/oss_compliance/osa_provided.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
Name|Version|License
22
---|---|---
33
[cel.dev/expr](https://cel.dev/expr)|v0.24.0|Apache License 2.0
4+
[semver/v3](https://github.com/Masterminds/semver)|v3.4.0|MIT License
45
[agentgateway/agentgateway](https://github.com/agentgateway/agentgateway)|v0.10.6-0.20251103234311-2f71d0e845d0|Apache License 2.0
56
[anthropics/anthropic-sdk-go](https://github.com/anthropics/anthropic-sdk-go)|v1.13.0|MIT License
67
[retry-go/v4](https://github.com/avast/retry-go)|v4.3.3|MIT License

pkg/utils/cmdutils/local.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ func (cmd *LocalCmd) WithStderr(w io.Writer) Cmd {
8181
// Run runs the command
8282
// If the returned error is non-nil, it should be of type *RunError
8383
func (cmd *LocalCmd) Run() *RunError {
84+
// Combined output is used to capture the stdout and stderr of the command for logging
8485
var combinedOutput threadsafe.Buffer
8586

8687
if printCommands {

pkg/utils/helmutils/client.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"io"
66

77
"github.com/kgateway-dev/kgateway/v2/pkg/utils/cmdutils"
8+
"github.com/kgateway-dev/kgateway/v2/pkg/utils/threadsafe"
89
)
910

1011
// Client is a utility for executing `helm` commands
@@ -26,8 +27,10 @@ func NewClient() *Client {
2627

2728
// WithReceiver sets the io.Writer that will be used by default for the stdout and stderr
2829
// of cmdutils.Cmd created by the Client
30+
// This modifies the value in place, so affects shared references to the Client and future commands run by the Client.
31+
// Wrap this in a threadsafe struct to avoid data races when wrapped in io.MultiWriter in cmdutils.
2932
func (c *Client) WithReceiver(receiver io.Writer) *Client {
30-
c.receiver = receiver
33+
c.receiver = &threadsafe.WriterWrapper{W: receiver}
3134
return c
3235
}
3336

pkg/utils/kubeutils/kubectl/cli.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/kgateway-dev/kgateway/v2/pkg/utils/cmdutils"
1717
"github.com/kgateway-dev/kgateway/v2/pkg/utils/kubeutils/portforward"
1818
"github.com/kgateway-dev/kgateway/v2/pkg/utils/requestutils/curl"
19+
"github.com/kgateway-dev/kgateway/v2/pkg/utils/threadsafe"
1920
)
2021

2122
// Cli is a utility for executing `kubectl` commands
@@ -52,8 +53,10 @@ type CurlResponse struct {
5253

5354
// WithReceiver sets the io.Writer that will be used by default for the stdout and stderr
5455
// of cmdutils.Cmd created by the Cli
56+
// This modifies the value in place, so affects shared references to the Cli and future commands run by the Cli.
57+
// Wrap this in a threadsafe struct to avoid data races when wrapped in io.MultiWriter in cmdutils.
5558
func (c *Cli) WithReceiver(receiver io.Writer) *Cli {
56-
c.receiver = receiver
59+
c.receiver = &threadsafe.WriterWrapper{W: receiver}
5760
return c
5861
}
5962

@@ -355,14 +358,12 @@ func (c *Cli) Execute(ctx context.Context, args ...string) (string, string, erro
355358
}
356359
}
357360

358-
stdout := new(strings.Builder)
359-
stderr := new(strings.Builder)
361+
stdout := threadsafe.Buffer{}
362+
stderr := threadsafe.Buffer{}
360363

361364
err := cmdutils.Command(ctx, "kubectl", args...).
362-
// For convenience, we set the stdout and stderr to the receiver
363-
// This can still be overwritten by consumers who use the commands
364-
WithStdout(stdout).
365-
WithStderr(stderr).Run().Cause()
365+
WithStdout(&stdout).
366+
WithStderr(&stderr).Run().Cause()
366367

367368
return stdout.String(), stderr.String(), err
368369
}

0 commit comments

Comments
 (0)