Skip to content

Commit 1b6fd2a

Browse files
committed
cerbuerus chagnes
Signed-off-by: Paige Patton <prubenda@redhat.com>
1 parent 83d99bb commit 1b6fd2a

23 files changed

+383
-470
lines changed

krkn/cerberus/setup.py

Lines changed: 44 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,33 @@
22
import requests
33
import sys
44
import json
5+
from krkn_lib.utils.functions import get_yaml_item_value
56

7+
check_application_routes = ""
8+
cerberus_url = None
9+
exit_on_failure = False
10+
cerberus_enabled = False
611

7-
def get_status(config, start_time, end_time):
12+
def set_url(config):
13+
global exit_on_failure
14+
exit_on_failure = get_yaml_item_value(config["kraken"], "exit_on_failure", False)
15+
global cerberus_enabled
16+
cerberus_enabled = get_yaml_item_value(config["cerberus"],"cerberus_enabled", False)
17+
if cerberus_enabled:
18+
global cerberus_url
19+
cerberus_url = get_yaml_item_value(config["cerberus"],"cerberus_url", "")
20+
global check_application_routes
21+
check_application_routes = \
22+
get_yaml_item_value(config["cerberus"],"check_applicaton_routes","")
23+
24+
def get_status(start_time, end_time):
825
"""
926
Get cerberus status
1027
"""
1128
cerberus_status = True
1229
check_application_routes = False
1330
application_routes_status = True
14-
if config["cerberus"]["cerberus_enabled"]:
15-
cerberus_url = config["cerberus"]["cerberus_url"]
16-
check_application_routes = \
17-
config["cerberus"]["check_applicaton_routes"]
31+
if cerberus_enabled:
1832
if not cerberus_url:
1933
logging.error(
2034
"url where Cerberus publishes True/False signal "
@@ -61,40 +75,38 @@ def get_status(config, start_time, end_time):
6175
return cerberus_status
6276

6377

64-
def publish_kraken_status(config, failed_post_scenarios, start_time, end_time):
78+
def publish_kraken_status( start_time, end_time):
6579
"""
6680
Publish kraken status to cerberus
6781
"""
68-
cerberus_status = get_status(config, start_time, end_time)
82+
cerberus_status = get_status(start_time, end_time)
6983
if not cerberus_status:
70-
if failed_post_scenarios:
71-
if config["kraken"]["exit_on_failure"]:
72-
logging.info(
73-
"Cerberus status is not healthy and post action scenarios "
74-
"are still failing, exiting kraken run"
75-
)
76-
sys.exit(1)
77-
else:
78-
logging.info(
79-
"Cerberus status is not healthy and post action scenarios "
80-
"are still failing"
81-
)
84+
if exit_on_failure:
85+
logging.info(
86+
"Cerberus status is not healthy and post action scenarios "
87+
"are still failing, exiting kraken run"
88+
)
89+
sys.exit(1)
90+
else:
91+
logging.info(
92+
"Cerberus status is not healthy and post action scenarios "
93+
"are still failing"
94+
)
8295
else:
83-
if failed_post_scenarios:
84-
if config["kraken"]["exit_on_failure"]:
85-
logging.info(
86-
"Cerberus status is healthy but post action scenarios "
87-
"are still failing, exiting kraken run"
88-
)
89-
sys.exit(1)
90-
else:
91-
logging.info(
92-
"Cerberus status is healthy but post action scenarios "
93-
"are still failing"
94-
)
96+
if exit_on_failure:
97+
logging.info(
98+
"Cerberus status is healthy but post action scenarios "
99+
"are still failing, exiting kraken run"
100+
)
101+
sys.exit(1)
102+
else:
103+
logging.info(
104+
"Cerberus status is healthy but post action scenarios "
105+
"are still failing"
106+
)
95107

96108

97-
def application_status(cerberus_url, start_time, end_time):
109+
def application_status( start_time, end_time):
98110
"""
99111
Check application availability
100112
"""

krkn/scenario_plugins/abstract_scenario_plugin.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from krkn_lib.models.telemetry import ScenarioTelemetry
55
from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
66

7-
from krkn import utils
7+
from krkn import utils, cerberus
88

99

1010
class AbstractScenarioPlugin(ABC):
@@ -13,7 +13,6 @@ def run(
1313
self,
1414
run_uuid: str,
1515
scenario: str,
16-
krkn_config: dict[str, any],
1716
lib_telemetry: KrknTelemetryOpenshift,
1817
scenario_telemetry: ScenarioTelemetry,
1918
) -> int:
@@ -78,10 +77,10 @@ def run_scenarios(
7877
logging.info(
7978
f"Running {self.__class__.__name__}: {self.get_scenario_types()} -> {scenario_config}"
8079
)
80+
start_time = int(time.time())
8181
return_value = self.run(
8282
run_uuid,
8383
scenario_config,
84-
krkn_config,
8584
telemetry,
8685
scenario_telemetry,
8786
)
@@ -114,6 +113,9 @@ def run_scenarios(
114113
if scenario_telemetry.exit_status != 0:
115114
failed_scenarios.append(scenario_config)
116115
scenario_telemetries.append(scenario_telemetry)
116+
end_time = int(time.time())
117+
cerberus.publish_kraken_status(start_time, end_time)
117118
logging.info(f"wating {wait_duration} before running the next scenario")
118119
time.sleep(wait_duration)
120+
119121
return failed_scenarios, scenario_telemetries

krkn/scenario_plugins/application_outage/application_outage_scenario_plugin.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,9 @@ def run(
1414
self,
1515
run_uuid: str,
1616
scenario: str,
17-
krkn_config: dict[str, any],
1817
lib_telemetry: KrknTelemetryOpenshift,
1918
scenario_telemetry: ScenarioTelemetry,
2019
) -> int:
21-
wait_duration = krkn_config["tunings"]["wait_duration"]
2220
try:
2321
with open(scenario, "r") as f:
2422
app_outage_config_yaml = yaml.full_load(f)
@@ -73,14 +71,8 @@ def run(
7371
policy_name, namespace
7472
)
7573

76-
logging.info(
77-
"End of scenario. Waiting for the specified duration: %s"
78-
% wait_duration
79-
)
80-
time.sleep(wait_duration)
81-
8274
end_time = int(time.time())
83-
cerberus.publish_kraken_status(krkn_config, [], start_time, end_time)
75+
8476
except Exception as e:
8577
logging.error(
8678
"ApplicationOutageScenarioPlugin exiting due to Exception %s" % e
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
apiVersion: networking.k8s.io/v1
2+
kind: NetworkPolicy
3+
metadata:
4+
name: krkn-deny-1
5+
spec:
6+
podSelector:
7+
matchLabels: {alertmanager: main}
8+
policyTypes: [Ingress, Egress]
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
import logging
2+
import os
3+
from pathlib import Path
4+
import arcaflow
5+
import yaml
6+
from krkn_lib.models.telemetry import ScenarioTelemetry
7+
from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
8+
from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin
9+
from krkn.scenario_plugins.arcaflow.context_auth import ContextAuth
10+
11+
12+
class ArcaflowScenarioPlugin(AbstractScenarioPlugin):
13+
14+
def run(
15+
self,
16+
run_uuid: str,
17+
scenario: str,
18+
lib_telemetry: KrknTelemetryOpenshift,
19+
scenario_telemetry: ScenarioTelemetry,
20+
) -> int:
21+
try:
22+
engine_args = self.build_args(scenario)
23+
status_code = self.run_workflow(
24+
engine_args, lib_telemetry.get_lib_kubernetes().get_kubeconfig_path()
25+
)
26+
return status_code
27+
except Exception as e:
28+
logging.error("ArcaflowScenarioPlugin exiting due to Exception %s" % e)
29+
return 1
30+
31+
def get_scenario_types(self) -> [str]:
32+
return ["hog_scenarios", "arcaflow_scenario"]
33+
34+
def run_workflow(
35+
self, engine_args: arcaflow.EngineArgs, kubeconfig_path: str
36+
) -> int:
37+
self.set_arca_kubeconfig(engine_args, kubeconfig_path)
38+
exit_status = arcaflow.run(engine_args)
39+
return exit_status
40+
41+
def build_args(self, input_file: str) -> arcaflow.EngineArgs:
42+
"""sets the kubeconfig parsed by setArcaKubeConfig as an input to the arcaflow workflow"""
43+
current_path = Path().resolve()
44+
context = f"{current_path}/{Path(input_file).parent}"
45+
workflow = f"{context}/workflow.yaml"
46+
config = f"{context}/config.yaml"
47+
if not os.path.exists(context):
48+
raise Exception(
49+
"context folder for arcaflow workflow not found: {}".format(context)
50+
)
51+
if not os.path.exists(input_file):
52+
raise Exception(
53+
"input file for arcaflow workflow not found: {}".format(input_file)
54+
)
55+
if not os.path.exists(workflow):
56+
raise Exception(
57+
"workflow file for arcaflow workflow not found: {}".format(workflow)
58+
)
59+
if not os.path.exists(config):
60+
raise Exception(
61+
"configuration file for arcaflow workflow not found: {}".format(config)
62+
)
63+
64+
engine_args = arcaflow.EngineArgs()
65+
engine_args.context = context
66+
engine_args.config = config
67+
engine_args.workflow = workflow
68+
engine_args.input = f"{current_path}/{input_file}"
69+
return engine_args
70+
71+
def set_arca_kubeconfig(
72+
self, engine_args: arcaflow.EngineArgs, kubeconfig_path: str
73+
):
74+
75+
context_auth = ContextAuth()
76+
if not os.path.exists(kubeconfig_path):
77+
raise Exception("kubeconfig not found in {}".format(kubeconfig_path))
78+
79+
with open(kubeconfig_path, "r") as stream:
80+
try:
81+
kubeconfig = yaml.safe_load(stream)
82+
context_auth.fetch_auth_data(kubeconfig)
83+
except Exception as e:
84+
logging.error(
85+
"impossible to read kubeconfig file in: {}".format(kubeconfig_path)
86+
)
87+
raise e
88+
89+
kubeconfig_str = self.set_kubeconfig_auth(kubeconfig, context_auth)
90+
91+
with open(engine_args.input, "r") as stream:
92+
input_file = yaml.safe_load(stream)
93+
if "input_list" in input_file and isinstance(
94+
input_file["input_list"], list
95+
):
96+
for index, _ in enumerate(input_file["input_list"]):
97+
if isinstance(input_file["input_list"][index], dict):
98+
input_file["input_list"][index]["kubeconfig"] = kubeconfig_str
99+
else:
100+
input_file["kubeconfig"] = kubeconfig_str
101+
stream.close()
102+
with open(engine_args.input, "w") as stream:
103+
yaml.safe_dump(input_file, stream)
104+
105+
with open(engine_args.config, "r") as stream:
106+
config_file = yaml.safe_load(stream)
107+
if config_file["deployers"]["image"]["deployer_name"] == "kubernetes":
108+
kube_connection = self.set_kubernetes_deployer_auth(
109+
config_file["deployers"]["image"]["connection"], context_auth
110+
)
111+
config_file["deployers"]["image"]["connection"] = kube_connection
112+
with open(engine_args.config, "w") as stream:
113+
yaml.safe_dump(config_file, stream, explicit_start=True, width=4096)
114+
115+
def set_kubernetes_deployer_auth(
116+
self, deployer: any, context_auth: ContextAuth
117+
) -> any:
118+
if context_auth.clusterHost is not None:
119+
deployer["host"] = context_auth.clusterHost
120+
if context_auth.clientCertificateData is not None:
121+
deployer["cert"] = context_auth.clientCertificateData
122+
if context_auth.clientKeyData is not None:
123+
deployer["key"] = context_auth.clientKeyData
124+
if context_auth.clusterCertificateData is not None:
125+
deployer["cacert"] = context_auth.clusterCertificateData
126+
if context_auth.username is not None:
127+
deployer["username"] = context_auth.username
128+
if context_auth.password is not None:
129+
deployer["password"] = context_auth.password
130+
if context_auth.bearerToken is not None:
131+
deployer["bearerToken"] = context_auth.bearerToken
132+
return deployer
133+
134+
def set_kubeconfig_auth(self, kubeconfig: any, context_auth: ContextAuth) -> str:
135+
"""
136+
Builds an arcaflow-compatible kubeconfig representation and returns it as a string.
137+
In order to run arcaflow plugins in kubernetes/openshift the kubeconfig must contain client certificate/key
138+
and server certificate base64 encoded within the kubeconfig file itself in *-data fields. That is not always the
139+
case, infact kubeconfig may contain filesystem paths to those files, this function builds an arcaflow-compatible
140+
kubeconfig file and returns it as a string that can be safely included in input.yaml
141+
"""
142+
143+
if "current-context" not in kubeconfig.keys():
144+
raise Exception(
145+
"invalid kubeconfig file, impossible to determine current-context"
146+
)
147+
user_id = None
148+
cluster_id = None
149+
user_name = None
150+
cluster_name = None
151+
current_context = kubeconfig["current-context"]
152+
for context in kubeconfig["contexts"]:
153+
if context["name"] == current_context:
154+
user_name = context["context"]["user"]
155+
cluster_name = context["context"]["cluster"]
156+
if user_name is None:
157+
raise Exception(
158+
"user not set for context {} in kubeconfig file".format(current_context)
159+
)
160+
if cluster_name is None:
161+
raise Exception(
162+
"cluster not set for context {} in kubeconfig file".format(
163+
current_context
164+
)
165+
)
166+
167+
for index, user in enumerate(kubeconfig["users"]):
168+
if user["name"] == user_name:
169+
user_id = index
170+
for index, cluster in enumerate(kubeconfig["clusters"]):
171+
if cluster["name"] == cluster_name:
172+
cluster_id = index
173+
174+
if cluster_id is None:
175+
raise Exception(
176+
"no cluster {} found in kubeconfig users".format(cluster_name)
177+
)
178+
if "client-certificate" in kubeconfig["users"][user_id]["user"]:
179+
kubeconfig["users"][user_id]["user"][
180+
"client-certificate-data"
181+
] = context_auth.clientCertificateDataBase64
182+
del kubeconfig["users"][user_id]["user"]["client-certificate"]
183+
184+
if "client-key" in kubeconfig["users"][user_id]["user"]:
185+
kubeconfig["users"][user_id]["user"][
186+
"client-key-data"
187+
] = context_auth.clientKeyDataBase64
188+
del kubeconfig["users"][user_id]["user"]["client-key"]
189+
190+
if "certificate-authority" in kubeconfig["clusters"][cluster_id]["cluster"]:
191+
kubeconfig["clusters"][cluster_id]["cluster"][
192+
"certificate-authority-data"
193+
] = context_auth.clusterCertificateDataBase64
194+
del kubeconfig["clusters"][cluster_id]["cluster"]["certificate-authority"]
195+
kubeconfig_str = yaml.dump(kubeconfig)
196+
return kubeconfig_str

krkn/scenario_plugins/container/container_scenario_plugin.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ def run(
1818
self,
1919
run_uuid: str,
2020
scenario: str,
21-
krkn_config: dict[str, any],
2221
lib_telemetry: KrknTelemetryOpenshift,
2322
scenario_telemetry: ScenarioTelemetry,
2423
) -> int:
@@ -44,7 +43,6 @@ def run(
4443
return 1
4544
scenario_telemetry.affected_pods = result
4645

47-
# publish cerberus status
4846
except (RuntimeError, Exception):
4947
logging.error("ContainerScenarioPlugin exiting due to Exception %s")
5048
return 1

0 commit comments

Comments
 (0)