Skip to content

Commit 498f584

Browse files
committed
feat: ditributed tracing span error
Signed-off-by: Jaeyeon Park <[email protected]>
1 parent fc8ae3a commit 498f584

File tree

4 files changed

+130
-7
lines changed

4 files changed

+130
-7
lines changed

chaoslib/litmus/aws-ssm-chaos/lib/ssm-chaos.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/litmuschaos/litmus-go/pkg/utils/common"
1818
"github.com/palantir/stacktrace"
1919
"go.opentelemetry.io/otel"
20+
"go.opentelemetry.io/otel/codes"
2021
)
2122

2223
// InjectChaosInSerialMode will inject the aws ssm chaos in serial mode that is one after other
@@ -51,6 +52,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
5152
ec2IDList := strings.Fields(ec2ID)
5253
commandId, err := ssm.SendSSMCommand(experimentsDetails, ec2IDList)
5354
if err != nil {
55+
span.SetStatus(codes.Error, "failed to send ssm command")
56+
span.RecordError(err)
5457
return stacktrace.Propagate(err, "failed to send ssm command")
5558
}
5659
//prepare commands for abort recovery
@@ -59,20 +62,26 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
5962
//wait for the ssm command to get in running state
6063
log.Info("[Wait]: Waiting for the ssm command to get in InProgress state")
6164
if err := ssm.WaitForCommandStatus("InProgress", commandId, ec2ID, experimentsDetails.Region, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, experimentsDetails.Delay); err != nil {
65+
span.SetStatus(codes.Error, "failed to start ssm command")
66+
span.RecordError(err)
6267
return stacktrace.Propagate(err, "failed to start ssm command")
6368
}
6469
common.SetTargets(ec2ID, "injected", "EC2", chaosDetails)
6570

6671
// run the probes during chaos
6772
if len(resultDetails.ProbeDetails) != 0 && i == 0 {
6873
if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil {
74+
span.SetStatus(codes.Error, "failed to run probes")
75+
span.RecordError(err)
6976
return stacktrace.Propagate(err, "failed to run probes")
7077
}
7178
}
7279

7380
//wait for the ssm command to get succeeded in the given chaos duration
7481
log.Info("[Wait]: Waiting for the ssm command to get completed")
7582
if err := ssm.WaitForCommandStatus("Success", commandId, ec2ID, experimentsDetails.Region, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, experimentsDetails.Delay); err != nil {
83+
span.SetStatus(codes.Error, "failed to send ssm command")
84+
span.RecordError(err)
7685
return stacktrace.Propagate(err, "failed to send ssm command")
7786
}
7887
common.SetTargets(ec2ID, "reverted", "EC2", chaosDetails)
@@ -117,6 +126,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
117126
log.Info("[Chaos]: Starting the ssm command")
118127
commandId, err := ssm.SendSSMCommand(experimentsDetails, instanceIDList)
119128
if err != nil {
129+
span.SetStatus(codes.Error, "failed to send ssm command")
130+
span.RecordError(err)
120131
return stacktrace.Propagate(err, "failed to send ssm command")
121132
}
122133
//prepare commands for abort recovery
@@ -126,13 +137,17 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
126137
//wait for the ssm command to get in running state
127138
log.Info("[Wait]: Waiting for the ssm command to get in InProgress state")
128139
if err := ssm.WaitForCommandStatus("InProgress", commandId, ec2ID, experimentsDetails.Region, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, experimentsDetails.Delay); err != nil {
140+
span.SetStatus(codes.Error, "failed to start ssm command")
141+
span.RecordError(err)
129142
return stacktrace.Propagate(err, "failed to start ssm command")
130143
}
131144
}
132145

133146
// run the probes during chaos
134147
if len(resultDetails.ProbeDetails) != 0 {
135148
if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil {
149+
span.SetStatus(codes.Error, "failed to run probes")
150+
span.RecordError(err)
136151
return stacktrace.Propagate(err, "failed to run probes")
137152
}
138153
}
@@ -141,6 +156,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
141156
//wait for the ssm command to get succeeded in the given chaos duration
142157
log.Info("[Wait]: Waiting for the ssm command to get completed")
143158
if err := ssm.WaitForCommandStatus("Success", commandId, ec2ID, experimentsDetails.Region, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, experimentsDetails.Delay); err != nil {
159+
span.SetStatus(codes.Error, "failed to send ssm command")
160+
span.RecordError(err)
144161
return stacktrace.Propagate(err, "failed to send ssm command")
145162
}
146163
}

chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"github.com/litmuschaos/litmus-go/pkg/utils/retry"
2525
"github.com/palantir/stacktrace"
2626
"go.opentelemetry.io/otel"
27+
"go.opentelemetry.io/otel/codes"
2728
)
2829

2930
var (
@@ -55,11 +56,16 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper
5556
//get the disk name or list of disk names
5657
diskNameList := strings.Split(experimentsDetails.VirtualDiskNames, ",")
5758
if experimentsDetails.VirtualDiskNames == "" || len(diskNameList) == 0 {
58-
return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no volume names found to detach"}
59+
span.SetStatus(codes.Error, "no volume names found to detach")
60+
err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no volume names found to detach"}
61+
span.RecordError(err)
62+
return err
5963
}
6064
instanceNamesWithDiskNames, err := diskStatus.GetInstanceNameForDisks(diskNameList, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup)
6165

6266
if err != nil {
67+
span.SetStatus(codes.Error, "failed to get instance names for disks")
68+
span.RecordError(err)
6369
return stacktrace.Propagate(err, "error fetching attached instances for disks")
6470
}
6571

@@ -69,6 +75,8 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper
6975
for instanceName := range instanceNamesWithDiskNames {
7076
attachedDisksWithInstance[instanceName], err = diskStatus.GetInstanceDiskList(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, experimentsDetails.ScaleSet, instanceName)
7177
if err != nil {
78+
span.SetStatus(codes.Error, "failed to get attached disks")
79+
span.RecordError(err)
7280
return stacktrace.Propagate(err, "error fetching virtual disks")
7381
}
7482
}
@@ -85,14 +93,21 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper
8593
switch strings.ToLower(experimentsDetails.Sequence) {
8694
case "serial":
8795
if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceNamesWithDiskNames, attachedDisksWithInstance, clients, resultDetails, eventsDetails, chaosDetails); err != nil {
96+
span.SetStatus(codes.Error, "failed to run chaos in serial mode")
97+
span.RecordError(err)
8898
return stacktrace.Propagate(err, "could not run chaos in serial mode")
8999
}
90100
case "parallel":
91101
if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceNamesWithDiskNames, attachedDisksWithInstance, clients, resultDetails, eventsDetails, chaosDetails); err != nil {
102+
span.SetStatus(codes.Error, "failed to run chaos in parallel mode")
103+
span.RecordError(err)
92104
return stacktrace.Propagate(err, "could not run chaos in parallel mode")
93105
}
94106
default:
95-
return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)}
107+
span.SetStatus(codes.Error, "sequence is not supported")
108+
err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)}
109+
span.RecordError(err)
110+
return err
96111
}
97112

98113
//Waiting for the ramp time after chaos injection
@@ -125,6 +140,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
125140
log.Info("[Chaos]: Detaching the virtual disks from the instances")
126141
for instanceName, diskNameList := range instanceNamesWithDiskNames {
127142
if err = diskStatus.DetachDisks(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, instanceName, experimentsDetails.ScaleSet, diskNameList); err != nil {
143+
span.SetStatus(codes.Error, "failed to detach disks")
144+
span.RecordError(err)
128145
return stacktrace.Propagate(err, "failed to detach disks")
129146
}
130147
}
@@ -133,6 +150,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
133150
for _, diskName := range diskNameList {
134151
log.Infof("[Wait]: Waiting for Disk '%v' to detach", diskName)
135152
if err := diskStatus.WaitForDiskToDetach(experimentsDetails, diskName); err != nil {
153+
span.SetStatus(codes.Error, "failed to detach disks")
154+
span.RecordError(err)
136155
return stacktrace.Propagate(err, "disk detachment check failed")
137156
}
138157
}
@@ -147,6 +166,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
147166
// run the probes during chaos
148167
if len(resultDetails.ProbeDetails) != 0 {
149168
if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil {
169+
span.SetStatus(codes.Error, "failed to run probes")
170+
span.RecordError(err)
150171
return stacktrace.Propagate(err, "failed to run probes")
151172
}
152173
}
@@ -159,6 +180,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
159180
log.Info("[Chaos]: Attaching the Virtual disks back to the instances")
160181
for instanceName, diskNameList := range attachedDisksWithInstance {
161182
if err = diskStatus.AttachDisk(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, instanceName, experimentsDetails.ScaleSet, diskNameList); err != nil {
183+
span.SetStatus(codes.Error, "virtual disk attachment failed")
184+
span.RecordError(err)
162185
return stacktrace.Propagate(err, "virtual disk attachment failed")
163186
}
164187

@@ -167,6 +190,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
167190
for _, diskName := range diskNameList {
168191
log.Infof("[Wait]: Waiting for Disk '%v' to attach", diskName)
169192
if err := diskStatus.WaitForDiskToAttach(experimentsDetails, diskName); err != nil {
193+
span.SetStatus(codes.Error, "failed to attach disks")
194+
span.RecordError(err)
170195
return stacktrace.Propagate(err, "disk attachment check failed")
171196
}
172197
}
@@ -209,12 +234,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
209234
// Detaching the virtual disks
210235
log.Infof("[Chaos]: Detaching %v from the instance", diskName)
211236
if err = diskStatus.DetachDisks(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, instanceName, experimentsDetails.ScaleSet, diskNameToList); err != nil {
237+
span.SetStatus(codes.Error, "failed to detach disks")
238+
span.RecordError(err)
212239
return stacktrace.Propagate(err, "failed to detach disks")
213240
}
214241

215242
// Waiting for disk to be detached
216243
log.Infof("[Wait]: Waiting for Disk '%v' to detach", diskName)
217244
if err := diskStatus.WaitForDiskToDetach(experimentsDetails, diskName); err != nil {
245+
span.SetStatus(codes.Error, "failed to detach disks")
246+
span.RecordError(err)
218247
return stacktrace.Propagate(err, "disk detachment check failed")
219248
}
220249

@@ -235,12 +264,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
235264
//Attaching the virtual disks to the instance
236265
log.Infof("[Chaos]: Attaching %v back to the instance", diskName)
237266
if err = diskStatus.AttachDisk(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, instanceName, experimentsDetails.ScaleSet, attachedDisksWithInstance[instanceName]); err != nil {
267+
span.SetStatus(codes.Error, "disk attachment failed")
268+
span.RecordError(err)
238269
return stacktrace.Propagate(err, "disk attachment failed")
239270
}
240271

241272
// Waiting for disk to be attached
242273
log.Infof("[Wait]: Waiting for Disk '%v' to attach", diskName)
243274
if err := diskStatus.WaitForDiskToAttach(experimentsDetails, diskName); err != nil {
275+
span.SetStatus(codes.Error, "failed to attach disks")
276+
span.RecordError(err)
244277
return stacktrace.Propagate(err, "disk attachment check failed")
245278
}
246279

0 commit comments

Comments
 (0)