Skip to content

Added Azure VM Chaos by running Powershell script based chaos #743

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 159 additions & 0 deletions chaoslib/litmus/azure-instance-runscript/azure-instance-runscript.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
package lib

import (
"context"
"fmt"
"os"
"os/signal"
"strings"
"syscall"
"time"

instanceRunScript "pkg/azure/instance-runscript"

experimentTypes "github.com/litmuschaos/litmus-go/pkg/azure/instance-runscript/types"
"github.com/litmuschaos/litmus-go/pkg/cerrors"
"github.com/litmuschaos/litmus-go/pkg/clients"
"github.com/litmuschaos/litmus-go/pkg/events"
"github.com/litmuschaos/litmus-go/pkg/log"
"github.com/litmuschaos/litmus-go/pkg/probe"
"github.com/litmuschaos/litmus-go/pkg/telemetry"
"github.com/litmuschaos/litmus-go/pkg/types"
"github.com/litmuschaos/litmus-go/pkg/utils/common"
"github.com/palantir/stacktrace"
"go.opentelemetry.io/otel"
)

var (
err error
inject, abort chan os.Signal
)

// PrepareAzureRunScript will initialize instanceNameList and start chaos injection based on sequence method selected
func PrepareAzureRunScript(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error {
ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "PrepareAzureInstanceRunScriptFault")
defer span.End()

// inject channel is used to transmit signal notifications
inject = make(chan os.Signal, 1)
// Catch and relay certain signal(s) to inject channel
signal.Notify(inject, os.Interrupt, syscall.SIGTERM)

// abort channel is used to transmit signal notifications.
abort = make(chan os.Signal, 1)
signal.Notify(abort, os.Interrupt, syscall.SIGTERM)

// Waiting for the ramp time before chaos injection
if experimentsDetails.RampTime != 0 {
log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime)
common.WaitForDuration(experimentsDetails.RampTime)
}

// get the instance name or list of instance names
instanceNameList := strings.Split(experimentsDetails.AzureInstanceNames, ",")
if experimentsDetails.AzureInstanceNames == "" || len(instanceNameList) == 0 {
return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance name found to stop"}
}

// watching for the abort signal and revert the chaos
go abortWatcher(experimentsDetails, instanceNameList)

switch strings.ToLower(experimentsDetails.Sequence) {
case "serial":
if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceNameList, clients, resultDetails, eventsDetails, chaosDetails); err != nil {
return stacktrace.Propagate(err, "could not run chaos in serial mode")
}
case "parallel":
if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceNameList, clients, resultDetails, eventsDetails, chaosDetails); err != nil {
return stacktrace.Propagate(err, "could not run chaos in parallel mode")
}
default:
return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)}
}

// Waiting for the ramp time after chaos injection
if experimentsDetails.RampTime != 0 {
log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime)
common.WaitForDuration(experimentsDetails.RampTime)
}
return nil
}

// injectChaosInSerialMode will inject the Azure instance termination in serial mode that is one after the other
func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, instanceNameList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error {
ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAzureInstanceRunScriptFaultInSerialMode")
defer span.End()

select {
case <-inject:
//running script the chaos execution, if abort signal received
os.Exit(0)
default:
// ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin
ChaosStartTimeStamp := time.Now()
duration := int(time.Since(ChaosStartTimeStamp).Seconds())

for duration < experimentsDetails.ChaosDuration {

log.Infof("[Info]: Target instanceName list, %v", instanceNameList)

if experimentsDetails.EngineName != "" {
msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on Azure instance"
types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails)
events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine")
}

//Run script in the instance serially
for i, vmName := range instanceNameList {

//Running start Script the Azure instance
log.Infof("[Chaos]:Running Script the Azure instance: %v", vmName)
if experimentsDetails.ScaleSet == "enable" {
// Yet to implement
} else {
if err := instanceRunScript.AzureInstanceRunScript(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName, experimentDetails.PowershellChaosStartBase64OrPsFilePath, experimentDetails.IsBase64, experimentDetails.PowershellChaosStartParamNames, experimentDetails.PowershellChaosStartParamValues); err != nil {
return stacktrace.Propagate(err, "unable to run script in the Azure instance")
}
}

// Run the probes during chaos
// the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration
if len(resultDetails.ProbeDetails) != 0 && i == 0 {
if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil {
return stacktrace.Propagate(err, "failed to run probes")
}
}

// Wait for Chaos interval
log.Infof("[Wait]: Waiting for chaos interval of %vs", experimentsDetails.ChaosInterval)
common.WaitForDuration(experimentsDetails.ChaosInterval)

// Running the end PS Script in the Azure instance
log.Info("[Chaos]: Starting back the Azure instance")
if experimentsDetails.ScaleSet == "enable" {
//scale set instance run script not implemented
} else {
if err := instanceRunScript.AzureInstanceRunScript(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName, experimentDetails.PowershellChaosEndBase64OrPsFilePath, experimentDetails.IsBase64, experimentDetails.PowershellChaosEndParamNames, experimentDetails.PowershellChaosEndParamValues); err != nil {
return stacktrace.Propagate(err, "unable to run the script in the Azure instance")
}
}
}
duration = int(time.Since(ChaosStartTimeStamp).Seconds())
}
}
return nil
}

// injectChaosInParallelMode will inject the Azure instance termination in parallel mode that is all at once
func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, instanceNameList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error {
ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "InjectAzureInstanceRunScriptFaultInParallelMode")
defer span.End()
//scale set instance run script not implemented so parallel mode run script also not implemented
}

// watching for the abort signal and revert the chaos
func abortWatcher(experimentsDetails *experimentTypes.ExperimentDetails, instanceNameList []string) {
<-abort

//Abort run script not implemented
}
14 changes: 14 additions & 0 deletions experiments/azure/instance-runscript/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
## Experiment Metadata

<table>
<tr>
<th> Name </th>
<th> Description </th>
<th> Documentation Link </th>
</tr>
<tr>
<td> Azure Instance Run PS Script </td>
<td> This experiment runs a Powershell script for creating Chaos and runs another script to complete Chaos within the chaos duration.</td>
<td> <a href="https://litmuschaos.github.io/litmus/experiments/categories/azure/azure-instance-runscript/"> Here </a> </td>
</tr>
</table>
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
package experiment

import (
"context"
"os"

"github.com/litmuschaos/chaos-operator/api/litmuschaos/v1alpha1"
litmusLIB "github.com/litmuschaos/litmus-go/chaoslib/litmus/azure-instance-runscript/lib"
experimentEnv "github.com/litmuschaos/litmus-go/pkg/azure/instance-runscript/environment"
experimentTypes "github.com/litmuschaos/litmus-go/pkg/azure/instance-runscript/types"
"github.com/litmuschaos/litmus-go/pkg/clients"
azureCommon "github.com/litmuschaos/litmus-go/pkg/cloud/azure/common"
azureStatus "github.com/litmuschaos/litmus-go/pkg/cloud/azure/instance"

"github.com/litmuschaos/litmus-go/pkg/events"
"github.com/litmuschaos/litmus-go/pkg/log"
"github.com/litmuschaos/litmus-go/pkg/probe"
"github.com/litmuschaos/litmus-go/pkg/result"
"github.com/litmuschaos/litmus-go/pkg/types"
"github.com/litmuschaos/litmus-go/pkg/utils/common"
"github.com/sirupsen/logrus"
)

// AzureInstanceStop inject the azure Instance Run Script chaos
func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) {

var err error
experimentsDetails := experimentTypes.ExperimentDetails{}
resultDetails := types.ResultDetails{}
eventsDetails := types.EventDetails{}
chaosDetails := types.ChaosDetails{}

//Fetching all the ENV passed from the runner pod
log.Infof("[PreReq]: Getting the ENV for the %v experiment", os.Getenv("EXPERIMENT_NAME"))
experimentEnv.GetENV(&experimentsDetails)

// Initialize the chaos attributes
types.InitialiseChaosVariables(&chaosDetails)

// Initialize Chaos Result Parameters
types.SetResultAttributes(&resultDetails, chaosDetails)

if experimentsDetails.EngineName != "" {
// Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet
if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil {
log.Errorf("Unable to initialize the probes: %v", err)
}
}

//Updating the chaos result in the beginning of experiment
log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName)
err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT")
if err != nil {
log.Errorf("Unable to create the chaosresult: %v", err)
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
return
}

// Set the chaos result uid
result.SetResultUID(&resultDetails, clients, &chaosDetails)

// Calling AbortWatcher go routine, it will continuously watch for the abort signal and generate the required events and result
go common.AbortWatcherWithoutExit(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails)

//DISPLAY THE APP INFORMATION
log.InfoWithValues("The instance information is as follows", logrus.Fields{
"Chaos Duration": experimentsDetails.ChaosDuration,
"Resource Group": experimentsDetails.ResourceGroup,
"Powershell Chaos Start Base64 Or Ps File Path": experimentsDetails.PowershellChaosStartBase64OrPsFilePath,
"Powershell Chaos End Base64 Or Ps File Path": experimentsDetails.PowershellChaosEndBase64OrPsFilePath,
"Instance Name": experimentsDetails.AzureInstanceNames,
"Sequence": experimentsDetails.Sequence,
})

// Setting up Azure Subscription ID
if experimentsDetails.SubscriptionID, err = azureCommon.GetSubscriptionID(); err != nil {
log.Errorf("Failed to get the subscription id: %v", err)
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
return
}

// generating the event in chaosresult to marked the verdict as awaited
msg := "experiment: " + experimentsDetails.ExperimentName + ", Result: Awaited"
types.SetResultEventAttributes(&eventsDetails, types.AwaitedVerdict, msg, "Normal", &resultDetails)
if eventErr := events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResults"); eventErr != nil {
log.Errorf("Failed to create %v event inside chaosresults", types.AwaitedVerdict)
}

if experimentsDetails.EngineName != "" {
// marking AUT as running, as we already checked the status of application under test
msg := "AUT: Running"

// run the probes in the pre-chaos check
if len(resultDetails.ProbeDetails) != 0 {

err = probe.RunProbes(ctx, &chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails)
if err != nil {
log.Errorf("Probe Failed: %v", err)
msg := "AUT: Running, Probes: Unsuccessful"
types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails)
if eventErr := events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine"); eventErr != nil {
log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck)
}
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
return
}
msg = "AUT: Running, Probes: Successful"
}
// generating the events for the pre-chaos check
types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails)
if eventErr := events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine"); eventErr != nil {
log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck)
}
}

//Verify the azure target instance is running (pre-chaos)
if chaosDetails.DefaultHealthCheck {
if err = azureStatus.InstanceStatusCheckByName(experimentsDetails.AzureInstanceNames, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup); err != nil {
log.Errorf("Azure instance status check failed: %v", err)
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
return
}
log.Info("[Status]: Azure instance(s) is in running state (pre-chaos)")
}

chaosDetails.Phase = types.ChaosInjectPhase

if err = litmusLIB.PrepareAzureRunScript(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil {
log.Errorf("Chaos injection failed: %v", err)
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
return
}

log.Info("[Confirmation]: Azure Instance Run Script chaos has been injected successfully")
resultDetails.Verdict = v1alpha1.ResultVerdictPassed

chaosDetails.Phase = types.PostChaosPhase

//Verify the azure instance is running (post chaos)
if chaosDetails.DefaultHealthCheck {
if err = azureStatus.InstanceStatusCheckByName(experimentsDetails.AzureInstanceNames, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup); err != nil {
log.Errorf("Azure instance status check failed: %v", err)
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
return
}
log.Info("[Status]: Azure instance is in running state (post chaos)")
}

if experimentsDetails.EngineName != "" {
// marking AUT as running, as we already checked the status of application under test
msg := "AUT: Running"

// run the probes in the post-chaos check
if len(resultDetails.ProbeDetails) != 0 {
err = probe.RunProbes(ctx, &chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails)
if err != nil {
log.Errorf("Probes Failed: %v", err)
msg := "AUT: Running, Probes: Unsuccessful"
types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails)
if eventErr := events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine"); eventErr != nil {
log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck)
}
result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails)
return
}
msg = "AUT: Running, Probes: Successful"
}

// generating post chaos event
types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Normal", &chaosDetails)
if eventErr := events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine"); eventErr != nil {
log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck)
}
}

//Updating the chaosResult in the end of experiment
log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName)
err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT")
if err != nil {
log.Errorf("Unable to update the chaosresult: %v", err)
}

// generating the event in chaosresult to marked the verdict as pass/fail
msg = "experiment: " + experimentsDetails.ExperimentName + ", Result: " + string(resultDetails.Verdict)
reason, eventType := types.GetChaosResultVerdictEvent(resultDetails.Verdict)
types.SetResultEventAttributes(&eventsDetails, reason, msg, eventType, &resultDetails)
if eventErr := events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResults"); eventErr != nil {
log.Errorf("Failed to create %v event inside chaosresults", reason)
}

if experimentsDetails.EngineName != "" {
msg := experimentsDetails.ExperimentName + " experiment has been " + string(resultDetails.Verdict) + "ed"
types.SetEngineEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &chaosDetails)
if eventErr := events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine"); eventErr != nil {
log.Errorf("Failed to create %v event inside chaosengine", types.Summary)
}
}

}
Loading