Skip to content

Commit fb0af69

Browse files
chore(release): merge branch 'hotfix-v1.0.1'
Creates patch release v1.0.1
2 parents 62f82ea + 7029312 commit fb0af69

File tree

53 files changed

+1236
-504
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+1236
-504
lines changed

.github/workflows/release_artifacts.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ on:
44
branches:
55
- master
66
- 'release/**'
7+
- 'hotfix-v**'
78

89
jobs:
910
kubectl-plugin:

.pre-commit-config.yaml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,3 @@ repos:
5353
args: [ "--changes" ]
5454
pass_filenames: false
5555
language: system
56-
- id: helm-develop-deploy
57-
name: Helm Generator
58-
description: Ensures the deploy is updated with the develop yamls
59-
entry: ./scripts/generate-deploy-yamls.sh
60-
args: [ "-c", "develop" ]
61-
pass_filenames: false
62-
language: system

chart/templates/csi-deployment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ spec:
5757
imagePullPolicy: {{ .Values.mayastorCP.pullPolicy }}
5858
args:
5959
- "--csi-socket=/var/lib/csi/sockets/pluginproxy/csi.sock"
60-
- "--rest-endpoint=http://$(REST_SERVICE_HOST):8081"{{ if .Values.base.jaeger.enabled }}
60+
- "--rest-endpoint=http://rest:8081"{{ if .Values.base.jaeger.enabled }}
6161
- "--jaeger={{ .Values.base.jaeger.agent.name }}:{{ .Values.base.jaeger.agent.port }}"{{ end }}
6262
env:
6363
- name: RUST_LOG

chart/templates/msp-deployment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ spec:
2626
image: {{ .Values.mayastorCP.registry }}mayadata/mcp-msp-operator:{{ .Values.mayastorCP.tag }}
2727
imagePullPolicy: {{ .Values.mayastorCP.pullPolicy }}
2828
args:
29-
- "-e http://$(REST_SERVICE_HOST):8081"
29+
- "-e http://rest:8081"
3030
- "--interval={{ .Values.base.cache_poll_period }}"{{ if .Values.base.jaeger.enabled }}
3131
- "--jaeger={{ .Values.base.jaeger.agent.name }}:{{ .Values.base.jaeger.agent.port }}"{{ end }}
3232
env:

common/src/opentelemetry.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,18 @@ pub fn default_tracing_tags(git_commit: &str, cargo_version: &str) -> Vec<KeyVal
1919
KeyValue::new("crate.version", cargo_version.to_string()),
2020
]
2121
}
22+
23+
/// Name of the OTEL_BSP_MAX_EXPORT_BATCH_SIZE variable
24+
pub const OTEL_BSP_MAX_EXPORT_BATCH_SIZE_NAME: &str = "OTEL_BSP_MAX_EXPORT_BATCH_SIZE";
25+
/// The value of OTEL_BSP_MAX_EXPORT_BATCH_SIZE to be used with JAEGER
26+
pub const OTEL_BSP_MAX_EXPORT_BATCH_SIZE_JAEGER: &str = "64";
27+
/// Set the OTEL variables for a jaeger configuration
28+
pub fn set_jaeger_env() {
29+
// if not set, default it to our jaeger value
30+
if std::env::var(OTEL_BSP_MAX_EXPORT_BATCH_SIZE_NAME).is_err() {
31+
std::env::set_var(
32+
OTEL_BSP_MAX_EXPORT_BATCH_SIZE_NAME,
33+
OTEL_BSP_MAX_EXPORT_BATCH_SIZE_JAEGER,
34+
);
35+
}
36+
}

common/src/store/etcd_keep_alive.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ impl LeaseLockKeeperClocking<Locked> for EtcdSingletonLock {
507507

508508
#[async_trait::async_trait]
509509
impl LeaseLockKeeperClocking<KeepAlive> for EtcdSingletonLock {
510-
#[tracing::instrument(skip(self, state), err)]
510+
#[tracing::instrument(level = "trace", skip(self, state), err)]
511511
async fn clock(&mut self, mut state: KeepAlive) -> LockStatesResult {
512512
state
513513
.keeper

common/src/types/v0/message_bus/replica.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ pub struct CreateReplica {
158158
#[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq)]
159159
pub struct ReplicaOwners {
160160
volume: Option<VolumeId>,
161+
#[serde(skip)]
161162
nexuses: Vec<NexusId>,
162163
}
163164
impl ReplicaOwners {

common/src/types/v0/store/nexus_persistence.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ impl NexusInfo {
2626
None => false,
2727
}
2828
}
29+
/// Check if no replica is healthy
30+
pub fn no_healthy_replicas(&self) -> bool {
31+
self.children.iter().all(|c| !c.healthy) || self.children.is_empty()
32+
}
2933
}
3034

3135
/// Definition of the child information that gets saved in the persistent

control-plane/agents/common/src/errors.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ pub enum SvcError {
186186
ReplicaCreateNumber { id: String },
187187
#[snafu(display("No online replicas are available for Volume '{}'", id))]
188188
NoOnlineReplicas { id: String },
189+
#[snafu(display("No healthy replicas are available for Volume '{}'", id))]
190+
NoHealthyReplicas { id: String },
189191
#[snafu(display("Entry with key '{}' not found in the persistent store.", key))]
190192
StoreMissingEntry { key: String },
191193
#[snafu(display("The uuid '{}' for kind '{}' is not valid.", uuid, kind.to_string()))]
@@ -514,6 +516,12 @@ impl From<SvcError> for ReplyError {
514516
source: desc.to_string(),
515517
extra: error.full_string(),
516518
},
519+
SvcError::NoHealthyReplicas { .. } => ReplyError {
520+
kind: ReplyErrorKind::VolumeNoReplicas,
521+
resource: ResourceKind::Volume,
522+
source: desc.to_string(),
523+
extra: error.full_string(),
524+
},
517525
SvcError::ReplicaCreateNumber { .. } => ReplyError {
518526
kind: ReplyErrorKind::ReplicaCreateNumber,
519527
resource: ResourceKind::Volume,

control-plane/agents/core/src/core/grpc.rs

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,27 @@ impl GrpcContext {
5656
comms_timeouts: comms_timeouts.clone(),
5757
})
5858
}
59-
pub(crate) async fn lock(&self) -> tokio::sync::OwnedMutexGuard<()> {
59+
/// Override the timeout config in the context for the given request
60+
fn override_timeout<R: MessageIdTimeout>(&mut self, request: Option<R>) {
61+
let timeout = request
62+
.map(|r| r.timeout(self.comms_timeouts.request(), &bus()))
63+
.unwrap_or_else(|| self.comms_timeouts.request());
64+
65+
self.endpoint = self
66+
.endpoint
67+
.clone()
68+
.connect_timeout(self.comms_timeouts.connect() + Duration::from_millis(500))
69+
.timeout(timeout);
70+
}
71+
pub(crate) async fn lock(&self) -> GrpcLockGuard {
6072
self.lock.clone().lock_owned().await
6173
}
6274
pub(crate) async fn connect(&self) -> Result<GrpcClient, SvcError> {
6375
GrpcClient::new(self).await
6476
}
65-
pub(crate) async fn connect_locked(&self) -> Result<GrpcClientLocked, SvcError> {
77+
pub(crate) async fn connect_locked(
78+
&self,
79+
) -> Result<GrpcClientLocked, (GrpcLockGuard, SvcError)> {
6680
GrpcClientLocked::new(self).await
6781
}
6882
}
@@ -72,7 +86,7 @@ impl GrpcContext {
7286
pub(crate) struct GrpcClient {
7387
context: GrpcContext,
7488
/// gRPC Mayastor Client
75-
pub(crate) client: MayaClient,
89+
pub(crate) mayastor: MayaClient,
7690
}
7791
pub(crate) type MayaClient = MayastorClient<Channel>;
7892
impl GrpcClient {
@@ -96,23 +110,48 @@ impl GrpcClient {
96110

97111
Ok(Self {
98112
context: context.clone(),
99-
client,
113+
mayastor: client,
100114
})
101115
}
102116
}
103117

104-
/// Wrapper over all gRPC Clients types with implicit locking for serialization
118+
/// Async Lock guard for gRPC operations.
119+
/// It's used by the GrpcClientLocked to ensure there's only one operation in progress
120+
/// at a time while still allowing for multiple gRPC clients.
121+
type GrpcLockGuard = tokio::sync::OwnedMutexGuard<()>;
122+
123+
/// Wrapper over all gRPC Clients types with implicit locking for serialization.
105124
pub(crate) struct GrpcClientLocked {
106125
/// gRPC auto CRUD guard lock
107-
_lock: tokio::sync::OwnedMutexGuard<()>,
126+
_lock: GrpcLockGuard,
108127
client: GrpcClient,
109128
}
110129
impl GrpcClientLocked {
111-
pub(crate) async fn new(context: &GrpcContext) -> Result<Self, SvcError> {
112-
let client = GrpcClient::new(context).await?;
130+
/// Create new locked client from the given context
131+
/// A connection is established with the timeouts specified from the context.
132+
/// Only one `Self` is allowed at a time by making use of a lock guard.
133+
pub(crate) async fn new(context: &GrpcContext) -> Result<Self, (GrpcLockGuard, SvcError)> {
134+
let _lock = context.lock().await;
135+
136+
let client = match GrpcClient::new(context).await {
137+
Ok(client) => client,
138+
Err(error) => return Err((_lock, error)),
139+
};
140+
141+
Ok(Self { _lock, client })
142+
}
143+
/// Reconnect the client to use for the given request
144+
/// This is useful when we want to issue the next gRPC using a different timeout
145+
/// todo: tower should allow us to handle this better by keeping the same "backend" client
146+
/// but modifying the timeout layer?
147+
pub(crate) async fn reconnect<R: MessageIdTimeout>(self, request: R) -> Result<Self, SvcError> {
148+
let mut context = self.context.clone();
149+
context.override_timeout(Some(request));
150+
151+
let client = GrpcClient::new(&context).await?;
113152

114153
Ok(Self {
115-
_lock: context.lock().await,
154+
_lock: self._lock,
116155
client,
117156
})
118157
}

0 commit comments

Comments
 (0)