Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 27 additions & 6 deletions api/v1/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,22 @@ import (
. "github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)

var _ = Describe("QdrantCluster", func() {
Context("API integration tests", func() {
const namespaceName = "test-namespace"
ctx := context.Background()
namespace := &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: namespaceName,
},
}
BeforeEach(func() {
By("Creating the Namespace to perform the tests")
namespace := &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: namespaceName,
},
}
err := k8sClient.Create(ctx, namespace)
Expect(err).To(Not(HaveOccurred()))
Expect(client.IgnoreAlreadyExists(err)).To(Not(HaveOccurred()))
})
It("should not flip ServicePerNode value on update", func() {
qc := QdrantCluster{
Expand All @@ -44,6 +45,26 @@ var _ = Describe("QdrantCluster", func() {
Expect(err).To(Not(HaveOccurred()))
Expect(DerefPointer(qc.Spec.ServicePerNode)).To(BeFalse())
})
It("should default OnDemandReplication to Off when omitted", func() {
qc := QdrantCluster{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespaceName,
Name: "test-cluster-on-demand-replication",
},
Spec: QdrantClusterSpec{
Id: "test-cluster-on-demand-replication",
Version: "v1.18.0",
Size: 1,
},
}
err := k8sClient.Create(ctx, &qc)
Expect(err).To(Not(HaveOccurred()))

created := &QdrantCluster{}
err = k8sClient.Get(ctx, client.ObjectKeyFromObject(&qc), created)
Expect(err).To(Not(HaveOccurred()))
Expect(created.Spec.OnDemandReplication).To(Equal(OnDemandReplicationOff))
})
})

})
Expand Down
23 changes: 23 additions & 0 deletions api/v1/qdrantcluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ const (
// If the value is updated it will retrigger the restart.
// For historical reasons the key doesn't start with `operator.qdrant.com/`
RestartedAtAnnotationKey = "restartedAt"
// OnDemandReplicationRestartAnnotationKey is the annotation key to trigger an on-demand replication restart.
// The annotation should be placed on the QdrantCluster instance.
// The value should be a [RFC3339 formatted] date.
// If the value is updated it will retrigger the restart.
OnDemandReplicationRestartAnnotationKey = "operator.qdrant.com/on-demand-replication-restart"
// RecreateNodeAnnotationKey is the annotation key to recreate a certain node.
// The annotation should be placed on the pod created by the operator (for the node that need to be recreated).
// It is allowed to add this annotation to multiple pods, the operator will handle them all.
Expand Down Expand Up @@ -58,6 +63,17 @@ const (
ByCountAndSize RebalanceStrategy = "by_count_and_size"
)

// OnDemandReplicationType specifies the on-demand replication restart mode.
// +kubebuilder:validation:Enum=Off;Auto;On
type OnDemandReplicationType string

//goland:noinspection GoUnusedConst
const (
OnDemandReplicationOff OnDemandReplicationType = "Off"
OnDemandReplicationAuto OnDemandReplicationType = "Auto"
OnDemandReplicationOn OnDemandReplicationType = "On"
)

// QdrantClusterSpec defines the desired state of QdrantCluster
// +kubebuilder:pruning:PreserveUnknownFields
type QdrantClusterSpec struct {
Expand Down Expand Up @@ -136,6 +152,13 @@ type QdrantClusterSpec struct {
// If unset, the operator is going to restart nodes concurrently if none of the collections if replicated.
// +optional
RestartAllPodsConcurrently *bool `json:"restartAllPodsConcurrently,omitempty"`
// OnDemandReplication specifies the on-demand replication restart mode.
// Off (default): Normal restart behavior. Pods are restarted directly.
// Auto: The operator checks telemetry for non-replicated shards. If found, uses the recreate-node flow.
// On: Always uses the recreate-node flow for eligible restart triggers.
// +kubebuilder:default=Off
// +optional
OnDemandReplication OnDemandReplicationType `json:"onDemandReplication,omitempty"`
// If StartupDelaySeconds is set (> 0), an additional 'sleep <value>' will be emitted to the pod startup.
// The sleep will be added when a pod is restarted, it will not force any pod to restart.
// This feature can be used for debugging the core, e.g. if a pod is in crash loop, it provided a way
Expand Down
8 changes: 8 additions & 0 deletions api/v1/qdrantcluster_types_test.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
package v1

import (
"encoding/json"
"fmt"
"testing"

"github.com/stretchr/testify/assert"
"k8s.io/utils/ptr"
)

func TestQdrantClusterSpecJSONOmitsUnsetOnDemandReplication(t *testing.T) {
data, err := json.Marshal(QdrantClusterSpec{})

assert.NoError(t, err)
assert.NotContains(t, string(data), "onDemandReplication")
}

func TestValidate(t *testing.T) {
testCases := []struct {
name string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,18 @@ spec:
description: NodeSelector specifies the node selector for each Qdrant
node.
type: object
onDemandReplication:
default: "Off"
description: |-
OnDemandReplication specifies the on-demand replication restart mode.
Off (default): Normal restart behavior. Pods are restarted directly.
Auto: The operator checks telemetry for non-replicated shards. If found, uses the recreate-node flow.
On: Always uses the recreate-node flow for eligible restart triggers.
enum:
- "Off"
- Auto
- "On"
type: string
pauses:
description: |-
Pauses specifies a list of pause request by developer for manual maintenance.
Expand Down
12 changes: 12 additions & 0 deletions crds/qdrant.io_qdrantclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,18 @@ spec:
description: NodeSelector specifies the node selector for each Qdrant
node.
type: object
onDemandReplication:
default: "Off"
description: |-
OnDemandReplication specifies the on-demand replication restart mode.
Off (default): Normal restart behavior. Pods are restarted directly.
Auto: The operator checks telemetry for non-replicated shards. If found, uses the recreate-node flow.
On: Always uses the recreate-node flow for eligible restart triggers.
enum:
- "Off"
- Auto
- "On"
type: string
pauses:
description: |-
Pauses specifies a list of pause request by developer for manual maintenance.
Expand Down
20 changes: 20 additions & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,25 @@ _Appears in:_
| `snapshotsPVCStatus` _[NodePVCStatus](#nodepvcstatus)_ | Status of the snapshots storage PVC | | Optional: \{\} <br /> |


#### OnDemandReplicationType

_Underlying type:_ _string_

OnDemandReplicationType specifies the on-demand replication restart mode.

_Validation:_
- Enum: [Off Auto On]

_Appears in:_
- [QdrantClusterSpec](#qdrantclusterspec)

| Field | Description |
| --- | --- |
| `Off` | |
| `Auto` | |
| `On` | |


#### Pause


Expand Down Expand Up @@ -976,6 +995,7 @@ _Appears in:_
| `topologySpreadConstraints` _[TopologySpreadConstraint](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#topologyspreadconstraint-v1-core)_ | TopologySpreadConstraints specifies the topology spread constraints for the cluster. | | Optional: \{\} <br /> |
| `podDisruptionBudget` _[PodDisruptionBudgetSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#poddisruptionbudgetspec-v1-policy)_ | PodDisruptionBudget specifies the pod disruption budget for the cluster. | | Optional: \{\} <br /> |
| `restartAllPodsConcurrently` _boolean_ | RestartAllPodsConcurrently specifies whether to restart all pods concurrently (also called one-shot-restart).<br />If enabled, all the pods in the cluster will be restarted concurrently in situations where multiple pods<br />need to be restarted, like when RestartedAtAnnotationKey is added/updated or the Qdrant version needs to be upgraded.<br />This helps sharded but not replicated clusters to reduce downtime to a possible minimum during restart.<br />If unset, the operator is going to restart nodes concurrently if none of the collections if replicated. | | Optional: \{\} <br /> |
| `onDemandReplication` _[OnDemandReplicationType](#ondemandreplicationtype)_ | OnDemandReplication specifies the on-demand replication restart mode.<br />Off (default): Normal restart behavior. Pods are restarted directly.<br />Auto: The operator checks telemetry for non-replicated shards. If found, uses the recreate-node flow.<br />On: Always uses the recreate-node flow for eligible restart triggers. | Off | Enum: [Off Auto On] <br />Optional: \{\} <br /> |
| `startupDelaySeconds` _integer_ | If StartupDelaySeconds is set (> 0), an additional 'sleep <value>' will be emitted to the pod startup.<br />The sleep will be added when a pod is restarted, it will not force any pod to restart.<br />This feature can be used for debugging the core, e.g. if a pod is in crash loop, it provided a way<br />to inspect the attached storage. | | Optional: \{\} <br /> |
| `rebalanceStrategy` _[RebalanceStrategy](#rebalancestrategy)_ | RebalanceStrategy specifies the strategy to use for automaticially rebalancing shards the cluster.<br />Cluster-manager needs to be enabled for this feature to work. | | Enum: [by_count by_size by_count_and_size] <br />Optional: \{\} <br /> |
| `readClusters` _[ReadCluster](#readcluster) array_ | ReadClusters specifies the read clusters for this cluster to synchronize.<br />Cluster-manager needs to be enabled for this feature to work. | | Optional: \{\} <br /> |
Expand Down
Loading