Skip to content

RDR chnages for 4.19 #12073

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 49 additions & 9 deletions ocs_ci/deployment/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@
from ocs_ci.framework.logger_helper import log_step
from ocs_ci.helpers.dr_helpers import (
configure_drcluster_for_fencing,
create_service_exporter,
validate_storage_cluster_peer_state,
verify_volsync,
)
from ocs_ci.ocs import constants, ocp, defaults, registry
from ocs_ci.ocs.cluster import (
Expand Down Expand Up @@ -1691,6 +1694,20 @@ def deploy_ocs_via_operator(self, image=None):
merge_dict(
cluster_data, {"metadata": {"annotations": rdr_bluestore_annotation}}
)
if (
version.get_semantic_ocs_version_from_config() >= version.VERSION_4_19
and config.MULTICLUSTER.get("multicluster_mode") == "regional-dr"
):
api_server_exported_address_annotation = {
"ocs.openshift.io/api-server-exported-address": (
f'{config.ENV_DATA["cluster_name"]}.'
f"ocs-provider-server.openshift-storage.svc.clusterset.local:50051"
)
}
merge_dict(
cluster_data,
{"metadata": {"annotations": api_server_exported_address_annotation}},
)
if config.ENV_DATA.get("noobaa_external_pgsql"):
log_step(
"Creating external pgsql DB for NooBaa and correct StorageCluster data"
Expand Down Expand Up @@ -2953,25 +2970,39 @@ def deploy(self):

@retry(ResourceWrongStatusException, tries=10, delay=5)
def configure_rbd(self):
st_string = '{.items[?(@.metadata.ownerReferences[*].kind=="StorageCluster")].spec.mirroring.enabled}'
query_mirroring = (
f"oc get CephBlockPool -n {config.ENV_DATA['cluster_namespace']}"
f" -o=jsonpath='{st_string}'"
)
odf_running_version = version.get_semantic_ocs_version_from_config()
if odf_running_version >= version.VERSION_4_19:
cmd = (
f"oc get cephblockpoolradosnamespaces -n {config.ENV_DATA['cluster_namespace']}"
" -o=jsonpath='{.items[*].status.phase}'"
)
resource_name = constants.CEPHBLOCKPOOLRADOSNS
expected_state = constants.STATUS_READY
else:
st_string = '{.items[?(@.metadata.ownerReferences[*].kind=="StorageCluster")].spec.mirroring.enabled}'
cmd = (
f"oc get CephBlockPool -n {config.ENV_DATA['cluster_namespace']}"
f" -o=jsonpath='{st_string}'"
)
resource_name = constants.CEPHBLOCKPOOL
expected_state = "true"

out_list = run_cmd_multicluster(
query_mirroring, skip_index=get_all_acm_and_recovery_indexes()
cmd, skip_index=get_all_acm_and_recovery_indexes()
)
index = 0
for out in out_list:
if not out:
continue
logger.info(out.stdout.decode())
if out.stdout.decode() != "true":
if out.stdout.decode() != expected_state:
logger.error(
f"On cluster {config.clusters[index].ENV_DATA['cluster_name']}"
)
raise ResourceWrongStatusException(
"CephBlockPool", expected="true", got=out.stdout.decode()
resource_or_name=resource_name,
expected=expected_state,
got=out.stdout.decode(),
)
index = +1

Expand Down Expand Up @@ -3798,14 +3829,23 @@ def deploy(self):
# Enable MCO console plugin
enable_mco_console_plugin()
config.switch_acm_ctx()
odf_running_version = version.get_semantic_ocs_version_from_config()
if odf_running_version >= version.VERSION_4_19:
# create service exporter
create_service_exporter()

# RBD specific dr deployment
if self.rbd:
rbddops = RBDDRDeployOps()
self.configure_mirror_peer()
rbddops.deploy()
self.enable_acm_observability()

self.deploy_dr_policy()
update_volsync_channel()
if odf_running_version >= version.VERSION_4_19:
# validate storage cluster peer state
validate_storage_cluster_peer_state()
verify_volsync()

# Enable cluster backup on both ACMs
for i in acm_indexes:
Expand Down
88 changes: 88 additions & 0 deletions ocs_ci/helpers/dr_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2097,3 +2097,91 @@ def wait_for_vrg_state(
)
logger.info(error_msg)
raise TimeoutExpiredError(error_msg)


def validate_storage_cluster_peer_state():
"""
Validate Storage cluster peer state

Raises:
TimeoutExpiredError: incase storage cluster peer state is not reached 'Peered' state.

"""
restore_index = config.cur_index
managed_clusters = get_non_acm_cluster_config()
for cluster in managed_clusters:
index = cluster.MULTICLUSTER["multicluster_index"]
config.switch_ctx(index)
logger.info("Validating Storage Cluster Peer status")
sample = TimeoutSampler(
timeout=300,
sleep=5,
func=check_storage_cluster_peer_state,
)
if not sample.wait_for_func_status(result=True):
error_msg = (
"Storage cluster peer status does not have expected values within the time "
f"limit on cluster {cluster.ENV_DATA['cluster_name']}"
)
logger.error(error_msg)
raise TimeoutExpiredError(error_msg)
config.switch_ctx(restore_index)


def check_storage_cluster_peer_state():
Comment on lines +2102 to +2131
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need 2 functions for peer state validation?

"""
Checks Storage cluster peer state

Returns:
bool: True if storage cluster peer state is 'Peered'. otherwise False

"""
storage_cluster_peer = ocp.OCP(
kind=constants.STORAGECLUSTERPEER,
namespace=config.ENV_DATA["cluster_namespace"],
)
storage_cluster_peer_data = storage_cluster_peer.get()
storage_cluster_peer_status = storage_cluster_peer_data["items"][0]["status"].get(
"state"
)
if storage_cluster_peer_status == constants.STATUS_PEERED:
return True
else:
logger.warning(f"storage cluster peer state is {storage_cluster_peer_status}")
return False


def create_service_exporter():
"""
Create Service exporter
"""
restore_index = config.cur_index
managed_clusters = get_non_acm_cluster_config()
for cluster in managed_clusters:
index = cluster.MULTICLUSTER["multicluster_index"]
config.switch_ctx(index)
logger.info("Creating Service exporter")
run_cmd(f"oc create -f {constants.DR_SERVICE_EXPORTER}")
config.switch_ctx(restore_index)


def verify_volsync():
"""
Verify volsync pod is created in volsync-system namespace
"""
restore_index = config.cur_index
managed_clusters = get_non_acm_cluster_config()
for cluster in managed_clusters:
index = cluster.MULTICLUSTER["multicluster_index"]
config.switch_ctx(index)
logger.info(
f"Verifying volsync pod in namespace {constants.VOLSYNC_SYSTEM_NAMESPACE}"
)
pod = ocp.OCP(kind=constants.POD, namespace=constants.VOLSYNC_SYSTEM_NAMESPACE)
assert pod.wait_for_resource(
condition="Running",
selector=constants.VOLSYNC_LABEL,
resource_count=1,
timeout=600,
)
config.switch_ctx(restore_index)
5 changes: 5 additions & 0 deletions ocs_ci/ocs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@
STATUS_READY = "Ready"
STATUS_PROGRESSING = "Progressing"
PEER_READY = "Peer ready"
STATUS_PEERED = "Peered"
STATUS_PENDING = "Pending"
STATUS_CONTAINER_CREATING = "ContainerCreating"
STATUS_AVAILABLE = "Available"
Expand Down Expand Up @@ -174,6 +175,7 @@
STORAGECLASS = "StorageClass"
DEVICECLASS = "deviceClass"
STORAGESYSTEM = "StorageSystem"
STORAGECLUSTERPEER = "StorageClusterPeer"
PV = "PersistentVolume"
PVC = "PersistentVolumeClaim"
POD = "Pod"
Expand Down Expand Up @@ -293,6 +295,7 @@
TEST_FILES_BUCKET = "ocsci-test-files"
ROOK_REPOSITORY = "https://github.com/rook/rook.git"
OPENSHIFT_STORAGE_NAMESPACE = "openshift-storage"
VOLSYNC_SYSTEM_NAMESPACE = "volsync-system"
OPENSHIFT_NAMESPACE = "openshift"
OPENSHIFT_STORAGE_CLIENT_NAMESPACE = "openshift-storage-client"
OPENSHIFT_STORAGE_EXTENDED_NAMESPACE = "openshift-storage-extended"
Expand Down Expand Up @@ -660,6 +663,7 @@
NOOBAA_CNPG_POD_LABEL = "app.kubernetes.io/name=cloudnative-pg"
ROOK_CEPH_DETECT_VERSION_LABEL = "app=rook-ceph-detect-version"
CEPH_FILE_CONTROLLER_DETECT_VERSION_LABEL = "app=ceph-file-controller-detect-version"
VOLSYNC_LABEL = "app.kubernetes.io/name=volsync"
CONTROLLER_DETECT_VERSION_NAME = "controller-detect-version"
OSD_KEY_ROTATION_POD_NAME = "rook-ceph-osd-key-rotation"
ROOK_CEPH_DETECT_VERSION_POD_NAME = "rook-ceph-detect-version"
Expand Down Expand Up @@ -1283,6 +1287,7 @@
TEMPLATE_MULTICLUSTER_DIR, "openshift_dr_system_operatorgroup.yaml"
)
ACM_DPA = os.path.join(TEMPLATE_MULTICLUSTER_DIR, "dpa_acm.yaml")
DR_SERVICE_EXPORTER = os.path.join(TEMPLATE_MULTICLUSTER_DIR, "service_exporter.yaml")
DR_POLICY_ACM_HUB = os.path.join(TEMPLATE_MULTICLUSTER_DIR, "dr_policy_acm_hub.yaml")
ODR_S3_SECRET_YAML = os.path.join(TEMPLATE_MULTICLUSTER_DIR, "odr_s3_secret.yaml")
OPENSHIFT_DR_SYSTEM_NAMESPACE = "openshift-dr-system"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
apiVersion: multicluster.x-k8s.io/v1alpha1
kind: ServiceExport
metadata:
name: ocs-provider-server
namespace: openshift-storage