forked from krkn-chaos/krkn
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Service hijacking scenario (krkn-chaos#617)
* WIP: service hijacking scenario Signed-off-by: Tullio Sebastiani <[email protected]> * wip Signed-off-by: Tullio Sebastiani <[email protected]> * error handling Signed-off-by: Tullio Sebastiani <[email protected]> adapted run_raken.py Signed-off-by: Tullio Sebastiani <[email protected]> * restored config.yaml Signed-off-by: Tullio Sebastiani <[email protected]> * added funtest Signed-off-by: Tullio Sebastiani <[email protected]> test fix Signed-off-by: Tullio Sebastiani <[email protected]> fix Signed-off-by: Tullio Sebastiani <[email protected]> fixed test Signed-off-by: Tullio Sebastiani <[email protected]> fix Signed-off-by: Tullio Sebastiani <[email protected]> fix test Signed-off-by: Tullio Sebastiani <[email protected]> fixed funtest Signed-off-by: Tullio Sebastiani <[email protected]> funtest fix Signed-off-by: Tullio Sebastiani <[email protected]> minor nit Signed-off-by: Tullio Sebastiani <[email protected]> added explicit curl method Signed-off-by: Tullio Sebastiani <[email protected]> push Signed-off-by: Tullio Sebastiani <[email protected]> fix Signed-off-by: Tullio Sebastiani <[email protected]> restored all funtests Signed-off-by: Tullio Sebastiani <[email protected]> added mime type test Signed-off-by: Tullio Sebastiani <[email protected]> fixed pipeline Signed-off-by: Tullio Sebastiani <[email protected]> commented unit Signed-off-by: Tullio Sebastiani <[email protected]> utf-8 Signed-off-by: Tullio Sebastiani <[email protected]> test restored Signed-off-by: Tullio Sebastiani <[email protected]> fix test pipeline Signed-off-by: Tullio Sebastiani <[email protected]> * documentation Signed-off-by: Tullio Sebastiani <[email protected]> * krkn-lib 2.1.3 Signed-off-by: Tullio Sebastiani <[email protected]> * added other funtests to main merge to collect coverage Signed-off-by: Tullio Sebastiani <[email protected]> --------- Signed-off-by: Tullio Sebastiani <[email protected]>
- Loading branch information
1 parent
2610a7a
commit a142f6e
Showing
12 changed files
with
388 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
apiVersion: v1 | ||
kind: Pod | ||
metadata: | ||
name: nginx | ||
labels: | ||
app.kubernetes.io/name: proxy | ||
spec: | ||
containers: | ||
- name: nginx | ||
image: nginx:stable | ||
ports: | ||
- containerPort: 80 | ||
name: http-web-svc | ||
|
||
--- | ||
apiVersion: v1 | ||
kind: Service | ||
metadata: | ||
name: nginx-service | ||
spec: | ||
selector: | ||
app.kubernetes.io/name: proxy | ||
type: NodePort | ||
ports: | ||
- name: name-of-service-port | ||
protocol: TCP | ||
port: 80 | ||
targetPort: http-web-svc | ||
nodePort: 30036 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
set -xeEo pipefail | ||
|
||
source CI/tests/common.sh | ||
|
||
trap error ERR | ||
trap finish EXIT | ||
# port mapping has been configured in kind-config.yml | ||
SERVICE_URL=http://localhost:8888 | ||
PAYLOAD_GET_1="{ \ | ||
\"status\":\"internal server error\" \ | ||
}" | ||
STATUS_CODE_GET_1=500 | ||
|
||
PAYLOAD_PATCH_1="resource patched" | ||
STATUS_CODE_PATCH_1=201 | ||
|
||
PAYLOAD_POST_1="{ \ | ||
\"status\": \"unauthorized\" \ | ||
}" | ||
STATUS_CODE_POST_1=401 | ||
|
||
PAYLOAD_GET_2="{ \ | ||
\"status\":\"resource created\" \ | ||
}" | ||
STATUS_CODE_GET_2=201 | ||
|
||
PAYLOAD_PATCH_2="bad request" | ||
STATUS_CODE_PATCH_2=400 | ||
|
||
PAYLOAD_POST_2="not found" | ||
STATUS_CODE_POST_2=404 | ||
|
||
JSON_MIME="application/json" | ||
TEXT_MIME="text/plain; charset=utf-8" | ||
|
||
function functional_test_service_hijacking { | ||
|
||
export scenario_type="service_hijacking" | ||
export scenario_file="scenarios/kube/service_hijacking.yaml" | ||
export post_config="" | ||
envsubst < CI/config/common_test_config.yaml > CI/config/service_hijacking.yaml | ||
python3 -m coverage run -a run_kraken.py -c CI/config/service_hijacking.yaml > /dev/null 2>&1 & | ||
PID=$! | ||
#Waiting the hijacking to have effect | ||
while [ `curl -X GET -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/list/index.php` == 404 ]; do echo "waiting scenario to kick in."; sleep 1; done; | ||
|
||
#Checking Step 1 GET on /list/index.php | ||
OUT_GET="`curl -X GET -s $SERVICE_URL/list/index.php`" | ||
OUT_CONTENT=`curl -X GET -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/list/index.php` | ||
OUT_STATUS_CODE=`curl -X GET -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/list/index.php` | ||
[ "${PAYLOAD_GET_1//[$'\t\r\n ']}" == "${OUT_GET//[$'\t\r\n ']}" ] && echo "Step 1 GET Payload OK" || (echo "Payload did not match. Test failed." && exit 1) | ||
[ "$OUT_STATUS_CODE" == "$STATUS_CODE_GET_1" ] && echo "Step 1 GET Status Code OK" || (echo " Step 1 GET status code did not match. Test failed." && exit 1) | ||
[ "$OUT_CONTENT" == "$JSON_MIME" ] && echo "Step 1 GET MIME OK" || (echo " Step 1 GET MIME did not match. Test failed." && exit 1) | ||
|
||
#Checking Step 1 POST on /list/index.php | ||
OUT_POST="`curl -s -X POST $SERVICE_URL/list/index.php`" | ||
OUT_STATUS_CODE=`curl -X POST -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/list/index.php` | ||
OUT_CONTENT=`curl -X POST -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/list/index.php` | ||
[ "${PAYLOAD_POST_1//[$'\t\r\n ']}" == "${OUT_POST//[$'\t\r\n ']}" ] && echo "Step 1 POST Payload OK" || (echo "Payload did not match. Test failed." && exit 1) | ||
[ "$OUT_STATUS_CODE" == "$STATUS_CODE_POST_1" ] && echo "Step 1 POST Status Code OK" || (echo "Step 1 POST status code did not match. Test failed." && exit 1) | ||
[ "$OUT_CONTENT" == "$JSON_MIME" ] && echo "Step 1 POST MIME OK" || (echo " Step 1 POST MIME did not match. Test failed." && exit 1) | ||
|
||
#Checking Step 1 PATCH on /patch | ||
OUT_PATCH="`curl -s -X PATCH $SERVICE_URL/patch`" | ||
OUT_STATUS_CODE=`curl -X PATCH -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/patch` | ||
OUT_CONTENT=`curl -X PATCH -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/patch` | ||
[ "${PAYLOAD_PATCH_1//[$'\t\r\n ']}" == "${OUT_PATCH//[$'\t\r\n ']}" ] && echo "Step 1 PATCH Payload OK" || (echo "Payload did not match. Test failed." && exit 1) | ||
[ "$OUT_STATUS_CODE" == "$STATUS_CODE_PATCH_1" ] && echo "Step 1 PATCH Status Code OK" || (echo "Step 1 PATCH status code did not match. Test failed." && exit 1) | ||
[ "$OUT_CONTENT" == "$TEXT_MIME" ] && echo "Step 1 PATCH MIME OK" || (echo " Step 1 PATCH MIME did not match. Test failed." && exit 1) | ||
# wait for the next step | ||
sleep 16 | ||
|
||
#Checking Step 2 GET on /list/index.php | ||
OUT_GET="`curl -X GET -s $SERVICE_URL/list/index.php`" | ||
OUT_CONTENT=`curl -X GET -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/list/index.php` | ||
OUT_STATUS_CODE=`curl -X GET -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/list/index.php` | ||
[ "${PAYLOAD_GET_2//[$'\t\r\n ']}" == "${OUT_GET//[$'\t\r\n ']}" ] && echo "Step 2 GET Payload OK" || (echo "Step 2 GET Payload did not match. Test failed." && exit 1) | ||
[ "$OUT_STATUS_CODE" == "$STATUS_CODE_GET_2" ] && echo "Step 2 GET Status Code OK" || (echo "Step 2 GET status code did not match. Test failed." && exit 1) | ||
[ "$OUT_CONTENT" == "$JSON_MIME" ] && echo "Step 2 GET MIME OK" || (echo " Step 2 GET MIME did not match. Test failed." && exit 1) | ||
|
||
#Checking Step 2 POST on /list/index.php | ||
OUT_POST="`curl -s -X POST $SERVICE_URL/list/index.php`" | ||
OUT_CONTENT=`curl -X POST -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/list/index.php` | ||
OUT_STATUS_CODE=`curl -X POST -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/list/index.php` | ||
[ "${PAYLOAD_POST_2//[$'\t\r\n ']}" == "${OUT_POST//[$'\t\r\n ']}" ] && echo "Step 2 POST Payload OK" || (echo "Step 2 POST Payload did not match. Test failed." && exit 1) | ||
[ "$OUT_STATUS_CODE" == "$STATUS_CODE_POST_2" ] && echo "Step 2 POST Status Code OK" || (echo "Step 2 POST status code did not match. Test failed." && exit 1) | ||
[ "$OUT_CONTENT" == "$TEXT_MIME" ] && echo "Step 2 POST MIME OK" || (echo " Step 2 POST MIME did not match. Test failed." && exit 1) | ||
|
||
#Checking Step 2 PATCH on /patch | ||
OUT_PATCH="`curl -s -X PATCH $SERVICE_URL/patch`" | ||
OUT_CONTENT=`curl -X PATCH -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/patch` | ||
OUT_STATUS_CODE=`curl -X PATCH -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/patch` | ||
[ "${PAYLOAD_PATCH_2//[$'\t\r\n ']}" == "${OUT_PATCH//[$'\t\r\n ']}" ] && echo "Step 2 PATCH Payload OK" || (echo "Step 2 PATCH Payload did not match. Test failed." && exit 1) | ||
[ "$OUT_STATUS_CODE" == "$STATUS_CODE_PATCH_2" ] && echo "Step 2 PATCH Status Code OK" || (echo "Step 2 PATCH status code did not match. Test failed." && exit 1) | ||
[ "$OUT_CONTENT" == "$TEXT_MIME" ] && echo "Step 2 PATCH MIME OK" || (echo " Step 2 PATCH MIME did not match. Test failed." && exit 1) | ||
wait $PID | ||
|
||
# now checking if service has been restore correctly and nginx responds correctly | ||
curl -s $SERVICE_URL | grep nginx! && echo "BODY: Service restored!" || (echo "BODY: failed to restore service" && exit 1) | ||
OUT_STATUS_CODE=`curl -X GET -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL` | ||
[ "$OUT_STATUS_CODE" == "200" ] && echo "STATUS_CODE: Service restored!" || (echo "STATUS_CODE: failed to restore service" && exit 1) | ||
|
||
echo "Service Hijacking Chaos test: Success" | ||
} | ||
|
||
|
||
functional_test_service_hijacking |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
### Service Hijacking Scenarios | ||
|
||
Service Hijacking Scenarios aim to simulate fake HTTP responses from a workload targeted by a | ||
`Service` already deployed in the cluster. | ||
This scenario is executed by deploying a custom-made web service and modifying the target `Service` | ||
selector to direct traffic to this web service for a specified duration. | ||
|
||
The web service will utilize a time-based test plan loaded from the scenario configuration file, | ||
which outlines the behavior of resources during the chaos scenario, defined as follows: | ||
|
||
```yaml | ||
service_target_port: http-web-svc # The port of the service to be hijacked (can be named or numeric, based on the workload and service configuration). | ||
service_name: nginx-service # The name of the service that will be hijacked. | ||
service_namespace: default # The namespace where the target service is located. | ||
image: quay.io/krkn-chaos/krkn-service-hijacking:v0.1.3 # Image of the krkn web service to be deployed to receive traffic. | ||
chaos_duration: 30 # Total duration of the chaos scenario in seconds. | ||
plan: | ||
- resource: "/list/index.php" # Specifies the resource or path to respond to in the scenario. For paths, both the path and query parameters are captured but ignored. For resources, only query parameters are captured. | ||
|
||
steps: # A time-based plan consisting of steps can be defined for each resource. | ||
GET: # One or more HTTP methods can be specified for each step. Note: Non-standard methods are supported for fully custom web services (e.g., using NONEXISTENT instead of POST). | ||
|
||
- duration: 15 # Duration in seconds for this step before moving to the next one, if defined. Otherwise, this step will continue until the chaos scenario ends. | ||
|
||
status: 500 # HTTP status code to be returned in this step. | ||
mime_type: "application/json" # MIME type of the response for this step. | ||
payload: | # The response payload for this step. | ||
{ | ||
"status":"internal server error" | ||
} | ||
- duration: 15 | ||
status: 201 | ||
mime_type: "application/json" | ||
payload: | | ||
{ | ||
"status":"resource created" | ||
} | ||
POST: | ||
- duration: 15 | ||
status: 401 | ||
mime_type: "application/json" | ||
payload: | | ||
{ | ||
"status": "unauthorized" | ||
} | ||
- duration: 15 | ||
status: 404 | ||
mime_type: "text/plain" | ||
payload: "not found" | ||
|
||
|
||
``` | ||
The scenario will focus on the `service_name` within the `service_namespace`, | ||
substituting the selector with a randomly generated one, which is added as a label in the mock service manifest. | ||
This allows multiple scenarios to be executed in the same namespace, each targeting different services without | ||
causing conflicts. | ||
|
||
The newly deployed mock web service will expose a `service_target_port`, | ||
which can be either a named or numeric port based on the service configuration. | ||
This ensures that the Service correctly routes HTTP traffic to the mock web service during the chaos run. | ||
|
||
Each step will last for `duration` seconds from the deployment of the mock web service in the cluster. | ||
For each HTTP resource, defined as a top-level YAML property of the plan | ||
(it could be a specific resource, e.g., /list/index.php, or a path-based resource typical in MVC frameworks), | ||
one or more HTTP request methods can be specified. Both standard and custom request methods are supported. | ||
|
||
During this time frame, the web service will respond with: | ||
|
||
- `status`: The [HTTP status code](https://datatracker.ietf.org/doc/html/rfc7231#section-6) (can be standard or custom). | ||
- `mime_type`: The [MIME type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types) (can be standard or custom). | ||
- `payload`: The response body to be returned to the client. | ||
|
||
At the end of the step `duration`, the web service will proceed to the next step (if available) until | ||
the global `chaos_duration` concludes. At this point, the original service will be restored, | ||
and the custom web service and its resources will be undeployed. | ||
|
||
__NOTE__: Some clients (e.g., cURL, jQuery) may optimize queries using lightweight methods (like HEAD or OPTIONS) | ||
to probe API behavior. If these methods are not defined in the test plan, the web service may respond with | ||
a `405` or `404` status code. If you encounter unexpected behavior, consider this use case. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import logging | ||
import time | ||
|
||
import yaml | ||
from krkn_lib.k8s import KrknKubernetes | ||
from krkn_lib.models.telemetry import ScenarioTelemetry | ||
from krkn_lib.telemetry.k8s import KrknTelemetryKubernetes | ||
|
||
|
||
def run(scenarios_list: list[str], krkn_lib: KrknKubernetes, telemetry: KrknTelemetryKubernetes) -> (list[str], list[ScenarioTelemetry]): | ||
scenario_telemetries= list[ScenarioTelemetry]() | ||
failed_post_scenarios = [] | ||
for scenario in scenarios_list: | ||
scenario_telemetry = ScenarioTelemetry() | ||
scenario_telemetry.scenario = scenario | ||
scenario_telemetry.start_timestamp = time.time() | ||
telemetry.set_parameters_base64(scenario_telemetry, scenario) | ||
with open(scenario) as stream: | ||
scenario_config = yaml.safe_load(stream) | ||
|
||
service_name = scenario_config['service_name'] | ||
service_namespace = scenario_config['service_namespace'] | ||
plan = scenario_config["plan"] | ||
image = scenario_config["image"] | ||
target_port = scenario_config["service_target_port"] | ||
chaos_duration = scenario_config["chaos_duration"] | ||
|
||
logging.info(f"checking service {service_name} in namespace: {service_namespace}") | ||
if not krkn_lib.service_exists(service_name, service_namespace): | ||
logging.error(f"service: {service_name} not found in namespace: {service_namespace}, failed to run scenario.") | ||
fail(scenario_telemetry, scenario_telemetries) | ||
failed_post_scenarios.append(scenario) | ||
break | ||
try: | ||
logging.info(f"service: {service_name} found in namespace: {service_namespace}") | ||
logging.info(f"creating webservice and initializing test plan...") | ||
# both named ports and port numbers can be used | ||
if isinstance(target_port, int): | ||
logging.info(f"webservice will listen on port {target_port}") | ||
webservice = krkn_lib.deploy_service_hijacking(service_namespace, plan, image, port_number=target_port) | ||
else: | ||
logging.info(f"traffic will be redirected to named port: {target_port}") | ||
webservice = krkn_lib.deploy_service_hijacking(service_namespace, plan, image, port_name=target_port) | ||
logging.info(f"successfully deployed pod: {webservice.pod_name} " | ||
f"in namespace:{service_namespace} with selector {webservice.selector}!" | ||
) | ||
logging.info(f"patching service: {service_name} to hijack traffic towards: {webservice.pod_name}") | ||
original_service = krkn_lib.replace_service_selector([webservice.selector], service_name, service_namespace) | ||
if original_service is None: | ||
logging.error(f"failed to patch service: {service_name}, namespace: {service_namespace} with selector {webservice.selector}") | ||
fail(scenario_telemetry, scenario_telemetries) | ||
failed_post_scenarios.append(scenario) | ||
break | ||
|
||
logging.info(f"service: {service_name} successfully patched!") | ||
logging.info(f"original service manifest:\n\n{yaml.dump(original_service)}") | ||
logging.info(f"waiting {chaos_duration} before restoring the service") | ||
time.sleep(chaos_duration) | ||
selectors = ["=".join([key, original_service["spec"]["selector"][key]]) for key in original_service["spec"]["selector"].keys()] | ||
logging.info(f"restoring the service selectors {selectors}") | ||
original_service = krkn_lib.replace_service_selector(selectors, service_name, service_namespace) | ||
if original_service is None: | ||
logging.error(f"failed to restore original service: {service_name}, namespace: {service_namespace} with selectors: {selectors}") | ||
fail(scenario_telemetry, scenario_telemetries) | ||
failed_post_scenarios.append(scenario) | ||
break | ||
logging.info("selectors successfully restored") | ||
logging.info("undeploying service-hijacking resources...") | ||
krkn_lib.undeploy_service_hijacking(webservice) | ||
scenario_telemetry.exit_status = 0 | ||
scenario_telemetry.end_timestamp = time.time() | ||
scenario_telemetries.append(scenario_telemetry) | ||
logging.info("success") | ||
except Exception as e: | ||
logging.error(f"scenario {scenario} failed with exception: {e}") | ||
fail(scenario_telemetry, scenario_telemetries) | ||
failed_post_scenarios.append(scenario) | ||
|
||
return failed_post_scenarios, scenario_telemetries | ||
|
||
|
||
def fail(scenario_telemetry: ScenarioTelemetry, scenario_telemetries: list[ScenarioTelemetry]): | ||
scenario_telemetry.exit_status = 1 | ||
scenario_telemetry.end_timestamp = time.time() | ||
scenario_telemetries.append(scenario_telemetry) | ||
|
Oops, something went wrong.