diff --git a/README.md b/README.md index 930d6256..e9e9f089 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,7 @@ Scenario type | Kubernetes [Network_Chaos](docs/network_chaos.md) | :heavy_check_mark: | [ManagedCluster Scenarios](docs/managedcluster_scenarios.md) | :heavy_check_mark: | [Service Hijacking Scenarios](docs/service_hijacking_scenarios.md) | :heavy_check_mark: | +[SYN Flood Scenarios](docs/syn_flood_scenarios.md) | :heavy_check_mark: | ### Kraken scenario pass/fail criteria and report diff --git a/config/config.yaml b/config/config.yaml index cedb1aee..3e918f7e 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -44,6 +44,8 @@ kraken: - scenarios/openshift/network_chaos.yaml - service_hijacking: - scenarios/kube/service_hijacking.yaml + - syn_flood: + - scenarios/kube/syn_flood.yaml cerberus: cerberus_enabled: False # Enable it when cerberus is previously installed diff --git a/docs/syn_flood_scenarios.md b/docs/syn_flood_scenarios.md new file mode 100644 index 00000000..15fe779d --- /dev/null +++ b/docs/syn_flood_scenarios.md @@ -0,0 +1,33 @@ +### SYN Flood Scenarios + +This scenario generates a substantial amount of TCP traffic directed at one or more Kubernetes services within +the cluster to test the server's resiliency under extreme traffic conditions. +It can also target hosts outside the cluster by specifying a reachable IP address or hostname. +This scenario leverages the distributed nature of Kubernetes clusters to instantiate multiple instances +of the same pod against a single host, significantly increasing the effectiveness of the attack. +The configuration also allows for the specification of multiple node selectors, enabling Kubernetes to schedule +the attacker pods on a user-defined subset of nodes to make the test more realistic. + + ```yaml +packet-size: 120 # hping3 packet size +window-size: 64 # hping 3 TCP window size +duration: 10 # chaos scenario duration +namespace: default # namespace where the target service(s) are deployed +target-service: target-svc # target service name (if set target-service-label must be empty) +target-port: 80 # target service TCP port +target-service-label : "" # target service label, can be used to target multiple target at the same time + # if they have the same label set (if set target-service must be empty) +number-of-pods: 2 # number of attacker pod instantiated per each target +image: quay.io/krkn-chaos/krkn-syn-flood # syn flood attacker container image +attacker-nodes: # this will set the node affinity to schedule the attacker node. Per each node label selector + # can be specified multiple values in this way the kube scheduler will schedule the attacker pods + # in the best way possible based on the provided labels. Multiple labels can be specified + kubernetes.io/hostname: + - host_1 + - host_2 + kubernetes.io/os: + - linux + + ``` + +The attacker container source code is available [here](https://github.com/krkn-chaos/krkn-syn-flood). \ No newline at end of file diff --git a/kraken/syn_flood/__init__.py b/kraken/syn_flood/__init__.py new file mode 100644 index 00000000..57180326 --- /dev/null +++ b/kraken/syn_flood/__init__.py @@ -0,0 +1 @@ +from .syn_flood import * \ No newline at end of file diff --git a/kraken/syn_flood/syn_flood.py b/kraken/syn_flood/syn_flood.py new file mode 100644 index 00000000..62c4e339 --- /dev/null +++ b/kraken/syn_flood/syn_flood.py @@ -0,0 +1,132 @@ +import logging +import os.path +import time +from typing import List + +import krkn_lib.utils +import yaml +from krkn_lib.k8s import KrknKubernetes +from krkn_lib.models.telemetry import ScenarioTelemetry +from krkn_lib.telemetry.k8s import KrknTelemetryKubernetes + + +def run(scenarios_list: list[str], krkn_kubernetes: KrknKubernetes, telemetry: KrknTelemetryKubernetes) -> (list[str], list[ScenarioTelemetry]): + scenario_telemetries: list[ScenarioTelemetry] = [] + failed_post_scenarios = [] + for scenario in scenarios_list: + scenario_telemetry = ScenarioTelemetry() + scenario_telemetry.scenario = scenario + scenario_telemetry.start_timestamp = time.time() + telemetry.set_parameters_base64(scenario_telemetry, scenario) + + try: + pod_names = [] + config = parse_config(scenario) + if config["target-service-label"]: + target_services = krkn_kubernetes.select_service_by_label(config["namespace"], config["target-service-label"]) + else: + target_services = [config["target-service"]] + + for target in target_services: + if not krkn_kubernetes.service_exists(target, config["namespace"]): + raise Exception(f"{target} service not found") + for i in range(config["number-of-pods"]): + pod_name = "syn-flood-" + krkn_lib.utils.get_random_string(10) + krkn_kubernetes.deploy_syn_flood(pod_name, + config["namespace"], + config["image"], + target, + config["target-port"], + config["packet-size"], + config["window-size"], + config["duration"], + config["attacker-nodes"] + ) + pod_names.append(pod_name) + + logging.info("waiting all the attackers to finish:") + did_finish = False + finished_pods = [] + while not did_finish: + for pod_name in pod_names: + if not krkn_kubernetes.is_pod_running(pod_name, config["namespace"]): + finished_pods.append(pod_name) + if set(pod_names) == set(finished_pods): + did_finish = True + time.sleep(1) + + except Exception as e: + logging.error(f"Failed to run syn flood scenario {scenario}: {e}") + failed_post_scenarios.append(scenario) + scenario_telemetry.exit_status = 1 + else: + scenario_telemetry.exit_status = 0 + scenario_telemetry.end_timestamp = time.time() + scenario_telemetries.append(scenario_telemetry) + return failed_post_scenarios, scenario_telemetries + +def parse_config(scenario_file: str) -> dict[str,any]: + if not os.path.exists(scenario_file): + raise Exception(f"failed to load scenario file {scenario_file}") + + try: + with open(scenario_file) as stream: + config = yaml.safe_load(stream) + except Exception: + raise Exception(f"{scenario_file} is not a valid yaml file") + + missing = [] + if not check_key_value(config ,"packet-size"): + missing.append("packet-size") + if not check_key_value(config,"window-size"): + missing.append("window-size") + if not check_key_value(config, "duration"): + missing.append("duration") + if not check_key_value(config, "namespace"): + missing.append("namespace") + if not check_key_value(config, "number-of-pods"): + missing.append("number-of-pods") + if not check_key_value(config, "target-port"): + missing.append("target-port") + if not check_key_value(config, "image"): + missing.append("image") + if "target-service" not in config.keys(): + missing.append("target-service") + if "target-service-label" not in config.keys(): + missing.append("target-service-label") + + + + + if len(missing) > 0: + raise Exception(f"{(',').join(missing)} parameter(s) are missing") + + if not config["target-service"] and not config["target-service-label"]: + raise Exception("you have either to set a target service or a label") + if config["target-service"] and config["target-service-label"]: + raise Exception("you cannot select both target-service and target-service-label") + + if 'attacker-nodes' and not is_node_affinity_correct(config['attacker-nodes']): + raise Exception("attacker-nodes format is not correct") + return config + +def check_key_value(dictionary, key): + if key in dictionary: + value = dictionary[key] + if value is not None and value != '': + return True + return False + +def is_node_affinity_correct(obj) -> bool: + if not isinstance(obj, dict): + return False + for key in obj.keys(): + if not isinstance(key, str): + return False + if not isinstance(obj[key], list): + return False + return True + + + + diff --git a/requirements.txt b/requirements.txt index dcc23b0f..6e64d6db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,7 @@ google-api-python-client==2.116.0 ibm_cloud_sdk_core==3.18.0 ibm_vpc==0.20.0 jinja2==3.1.4 -krkn-lib==2.1.6 +krkn-lib==2.1.7 lxml==5.1.0 kubernetes==28.1.0 oauth2client==4.1.3 diff --git a/run_kraken.py b/run_kraken.py index b1c23871..86d990d3 100644 --- a/run_kraken.py +++ b/run_kraken.py @@ -27,7 +27,7 @@ import kraken.prometheus as prometheus_plugin import kraken.service_hijacking.service_hijacking as service_hijacking_plugin import server as server -from kraken import plugins +from kraken import plugins, syn_flood from krkn_lib.k8s import KrknKubernetes from krkn_lib.ocp import KrknOpenshift from krkn_lib.telemetry.elastic import KrknElastic @@ -354,6 +354,10 @@ def main(cfg): logging.info("Running Service Hijacking Chaos") failed_post_scenarios, scenario_telemetries = service_hijacking_plugin.run(scenarios_list, wait_duration, kubecli, telemetry_k8s) chaos_telemetry.scenarios.extend(scenario_telemetries) + elif scenario_type == "syn_flood": + logging.info("Running Syn Flood Chaos") + failed_post_scenarios, scenario_telemetries = syn_flood.run(scenarios_list, kubecli, telemetry_k8s) + chaos_telemetry.scenarios.extend(scenario_telemetries) # Check for critical alerts when enabled post_critical_alerts = 0 diff --git a/scenarios/kube/syn_flood.yaml b/scenarios/kube/syn_flood.yaml new file mode 100644 index 00000000..2cb9c1bc --- /dev/null +++ b/scenarios/kube/syn_flood.yaml @@ -0,0 +1,16 @@ +packet-size: 120 # hping3 packet size +window-size: 64 # hping 3 TCP window size +duration: 10 # chaos scenario duration +namespace: default # namespace where the target service(s) are deployed +target-service: elasticsearch # target service name (if set target-service-label must be empty) +target-port: 9200 # target service TCP port +target-service-label : "" # target service label, can be used to target multiple target at the same time + # if they have the same label set (if set target-service must be empty) +number-of-pods: 2 # number of attacker pod instantiated per each target +image: quay.io/krkn-chaos/krkn-syn-flood:v1.0.0 # syn flood attacker container image +attacker-nodes: # this will set the node affinity to schedule the attacker node. Per each node label selector + node-role.kubernetes.io/worker: # can be specified multiple values in this way the kube scheduler will schedule the attacker pods + - "" # in the best way possible based on the provided labels. Multiple labels can be specified + # set empty value `attacker-nodes: {}` to let kubernetes schedule the pods + +