Skip to content

Commit e0d751f

Browse files
committed
feat: ditributed tracing span error
Signed-off-by: Jaeyeon Park <[email protected]>
1 parent 498f584 commit e0d751f

File tree

17 files changed

+405
-41
lines changed

17 files changed

+405
-41
lines changed

chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"github.com/litmuschaos/litmus-go/pkg/utils/common"
2020
"github.com/palantir/stacktrace"
2121
"go.opentelemetry.io/otel"
22+
"go.opentelemetry.io/otel/codes"
2223
)
2324

2425
var (
@@ -49,6 +50,8 @@ func PrepareAWSSSMChaosByID(ctx context.Context, experimentsDetails *experimentT
4950

5051
//create and upload the ssm document on the given aws service monitoring docs
5152
if err = ssm.CreateAndUploadDocument(experimentsDetails.DocumentName, experimentsDetails.DocumentType, experimentsDetails.DocumentFormat, experimentsDetails.DocumentPath, experimentsDetails.Region); err != nil {
53+
span.SetStatus(codes.Error, "could not create and upload the ssm document")
54+
span.RecordError(err)
5255
return stacktrace.Propagate(err, "could not create and upload the ssm document")
5356
}
5457
experimentsDetails.IsDocsUploaded = true
@@ -60,25 +63,37 @@ func PrepareAWSSSMChaosByID(ctx context.Context, experimentsDetails *experimentT
6063
//get the instance id or list of instance ids
6164
instanceIDList := strings.Split(experimentsDetails.EC2InstanceID, ",")
6265
if experimentsDetails.EC2InstanceID == "" || len(instanceIDList) == 0 {
63-
return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"}
66+
span.SetStatus(codes.Error, "no instance id found for chaos injection")
67+
err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"}
68+
span.RecordError(err)
69+
return err
6470
}
6571

6672
switch strings.ToLower(experimentsDetails.Sequence) {
6773
case "serial":
6874
if err = lib.InjectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil {
75+
span.SetStatus(codes.Error, "could not run chaos in serial mode")
76+
span.RecordError(err)
6977
return stacktrace.Propagate(err, "could not run chaos in serial mode")
7078
}
7179
case "parallel":
7280
if err = lib.InjectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil {
81+
span.SetStatus(codes.Error, "could not run chaos in parallel mode")
82+
span.RecordError(err)
7383
return stacktrace.Propagate(err, "could not run chaos in parallel mode")
7484
}
7585
default:
76-
return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)}
86+
span.SetStatus(codes.Error, "sequence is not supported")
87+
err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)}
88+
span.RecordError(err)
89+
return err
7790
}
7891

7992
//Delete the ssm document on the given aws service monitoring docs
8093
err = ssm.SSMDeleteDocument(experimentsDetails.DocumentName, experimentsDetails.Region)
8194
if err != nil {
95+
span.SetStatus(codes.Error, "failed to delete ssm doc")
96+
span.RecordError(err)
8297
return stacktrace.Propagate(err, "failed to delete ssm doc")
8398
}
8499

chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"github.com/litmuschaos/litmus-go/pkg/utils/common"
2020
"github.com/palantir/stacktrace"
2121
"go.opentelemetry.io/otel"
22+
"go.opentelemetry.io/otel/codes"
2223
)
2324

2425
// PrepareAWSSSMChaosByTag contains the prepration and injection steps for the experiment
@@ -44,6 +45,8 @@ func PrepareAWSSSMChaosByTag(ctx context.Context, experimentsDetails *experiment
4445

4546
//create and upload the ssm document on the given aws service monitoring docs
4647
if err = ssm.CreateAndUploadDocument(experimentsDetails.DocumentName, experimentsDetails.DocumentType, experimentsDetails.DocumentFormat, experimentsDetails.DocumentPath, experimentsDetails.Region); err != nil {
48+
span.SetStatus(codes.Error, "could not create and upload the ssm document")
49+
span.RecordError(err)
4750
return stacktrace.Propagate(err, "could not create and upload the ssm document")
4851
}
4952
experimentsDetails.IsDocsUploaded = true
@@ -55,25 +58,37 @@ func PrepareAWSSSMChaosByTag(ctx context.Context, experimentsDetails *experiment
5558
log.Infof("[Chaos]:Number of Instance targeted: %v", len(instanceIDList))
5659

5760
if len(instanceIDList) == 0 {
58-
return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"}
61+
span.SetStatus(codes.Error, "no instance id found for chaos injection")
62+
err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"}
63+
span.RecordError(err)
64+
return err
5965
}
6066

6167
switch strings.ToLower(experimentsDetails.Sequence) {
6268
case "serial":
6369
if err = lib.InjectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil {
70+
span.SetStatus(codes.Error, "could not run chaos in serial mode")
71+
span.RecordError(err)
6472
return stacktrace.Propagate(err, "could not run chaos in serial mode")
6573
}
6674
case "parallel":
6775
if err = lib.InjectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil {
76+
span.SetStatus(codes.Error, "could not run chaos in parallel mode")
77+
span.RecordError(err)
6878
return stacktrace.Propagate(err, "could not run chaos in parallel mode")
6979
}
7080
default:
71-
return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)}
81+
span.SetStatus(codes.Error, "sequence is not supported")
82+
err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)}
83+
span.RecordError(err)
84+
return err
7285
}
7386

7487
//Delete the ssm document on the given aws service monitoring docs
7588
err = ssm.SSMDeleteDocument(experimentsDetails.DocumentName, experimentsDetails.Region)
7689
if err != nil {
90+
span.SetStatus(codes.Error, "failed to delete ssm doc")
91+
span.RecordError(err)
7792
return stacktrace.Propagate(err, "failed to delete ssm doc")
7893
}
7994

chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper
6464
instanceNamesWithDiskNames, err := diskStatus.GetInstanceNameForDisks(diskNameList, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup)
6565

6666
if err != nil {
67-
span.SetStatus(codes.Error, "failed to get instance names for disks")
67+
span.SetStatus(codes.Error, "error fetching attached instances for disks")
6868
span.RecordError(err)
6969
return stacktrace.Propagate(err, "error fetching attached instances for disks")
7070
}
@@ -75,7 +75,7 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper
7575
for instanceName := range instanceNamesWithDiskNames {
7676
attachedDisksWithInstance[instanceName], err = diskStatus.GetInstanceDiskList(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, experimentsDetails.ScaleSet, instanceName)
7777
if err != nil {
78-
span.SetStatus(codes.Error, "failed to get attached disks")
78+
span.SetStatus(codes.Error, "error fetching virtual disks")
7979
span.RecordError(err)
8080
return stacktrace.Propagate(err, "error fetching virtual disks")
8181
}
@@ -93,13 +93,13 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper
9393
switch strings.ToLower(experimentsDetails.Sequence) {
9494
case "serial":
9595
if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceNamesWithDiskNames, attachedDisksWithInstance, clients, resultDetails, eventsDetails, chaosDetails); err != nil {
96-
span.SetStatus(codes.Error, "failed to run chaos in serial mode")
96+
span.SetStatus(codes.Error, "could not run chaos in serial mode")
9797
span.RecordError(err)
9898
return stacktrace.Propagate(err, "could not run chaos in serial mode")
9999
}
100100
case "parallel":
101101
if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceNamesWithDiskNames, attachedDisksWithInstance, clients, resultDetails, eventsDetails, chaosDetails); err != nil {
102-
span.SetStatus(codes.Error, "failed to run chaos in parallel mode")
102+
span.SetStatus(codes.Error, "could not run chaos in parallel mode")
103103
span.RecordError(err)
104104
return stacktrace.Propagate(err, "could not run chaos in parallel mode")
105105
}
@@ -150,7 +150,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
150150
for _, diskName := range diskNameList {
151151
log.Infof("[Wait]: Waiting for Disk '%v' to detach", diskName)
152152
if err := diskStatus.WaitForDiskToDetach(experimentsDetails, diskName); err != nil {
153-
span.SetStatus(codes.Error, "failed to detach disks")
153+
span.SetStatus(codes.Error, "disk detachment check failed")
154154
span.RecordError(err)
155155
return stacktrace.Propagate(err, "disk detachment check failed")
156156
}
@@ -190,7 +190,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
190190
for _, diskName := range diskNameList {
191191
log.Infof("[Wait]: Waiting for Disk '%v' to attach", diskName)
192192
if err := diskStatus.WaitForDiskToAttach(experimentsDetails, diskName); err != nil {
193-
span.SetStatus(codes.Error, "failed to attach disks")
193+
span.SetStatus(codes.Error, "disk attachment check failed")
194194
span.RecordError(err)
195195
return stacktrace.Propagate(err, "disk attachment check failed")
196196
}
@@ -242,7 +242,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
242242
// Waiting for disk to be detached
243243
log.Infof("[Wait]: Waiting for Disk '%v' to detach", diskName)
244244
if err := diskStatus.WaitForDiskToDetach(experimentsDetails, diskName); err != nil {
245-
span.SetStatus(codes.Error, "failed to detach disks")
245+
span.SetStatus(codes.Error, "disk detachment check failed")
246246
span.RecordError(err)
247247
return stacktrace.Propagate(err, "disk detachment check failed")
248248
}
@@ -253,6 +253,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
253253
// the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration
254254
if len(resultDetails.ProbeDetails) != 0 && i == 0 {
255255
if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil {
256+
span.SetStatus(codes.Error, "failed to run probes")
257+
span.RecordError(err)
256258
return stacktrace.Propagate(err, "failed to run probes")
257259
}
258260
}
@@ -272,7 +274,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
272274
// Waiting for disk to be attached
273275
log.Infof("[Wait]: Waiting for Disk '%v' to attach", diskName)
274276
if err := diskStatus.WaitForDiskToAttach(experimentsDetails, diskName); err != nil {
275-
span.SetStatus(codes.Error, "failed to attach disks")
277+
span.SetStatus(codes.Error, "disk attachment check failed")
276278
span.RecordError(err)
277279
return stacktrace.Propagate(err, "disk attachment check failed")
278280
}

chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,13 @@ func PrepareAzureStop(ctx context.Context, experimentsDetails *experimentTypes.E
6262
switch strings.ToLower(experimentsDetails.Sequence) {
6363
case "serial":
6464
if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceNameList, clients, resultDetails, eventsDetails, chaosDetails); err != nil {
65-
span.SetStatus(codes.Error, "failed to run chaos in serial mode")
65+
span.SetStatus(codes.Error, "could not run chaos in serial mode")
6666
span.RecordError(err)
6767
return stacktrace.Propagate(err, "could not run chaos in serial mode")
6868
}
6969
case "parallel":
7070
if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceNameList, clients, resultDetails, eventsDetails, chaosDetails); err != nil {
71-
span.SetStatus(codes.Error, "failed to run chaos in parallel mode")
71+
span.SetStatus(codes.Error, "could not run chaos in parallel mode")
7272
span.RecordError(err)
7373
return stacktrace.Propagate(err, "could not run chaos in parallel mode")
7474
}
@@ -118,13 +118,13 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
118118
log.Infof("[Chaos]: Stopping the Azure instance: %v", vmName)
119119
if experimentsDetails.ScaleSet == "enable" {
120120
if err := azureStatus.AzureScaleSetInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
121-
span.SetStatus(codes.Error, "failed to stop the Azure instance")
121+
span.SetStatus(codes.Error, "unable to stop the Azure instance")
122122
span.RecordError(err)
123123
return stacktrace.Propagate(err, "unable to stop the Azure instance")
124124
}
125125
} else {
126126
if err := azureStatus.AzureInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
127-
span.SetStatus(codes.Error, "failed to stop the Azure instance")
127+
span.SetStatus(codes.Error, "unable to stop the Azure instance")
128128
span.RecordError(err)
129129
return stacktrace.Propagate(err, "unable to stop the Azure instance")
130130
}
@@ -133,7 +133,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
133133
// Wait for Azure instance to completely stop
134134
log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the stopped state", vmName)
135135
if err := azureStatus.WaitForAzureComputeDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
136-
span.SetStatus(codes.Error, "failed to check instance poweroff status")
136+
span.SetStatus(codes.Error, "instance poweroff status check failed")
137137
span.RecordError(err)
138138
return stacktrace.Propagate(err, "instance poweroff status check failed")
139139
}
@@ -156,13 +156,13 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
156156
log.Info("[Chaos]: Starting back the Azure instance")
157157
if experimentsDetails.ScaleSet == "enable" {
158158
if err := azureStatus.AzureScaleSetInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
159-
span.SetStatus(codes.Error, "failed to start the Azure instance")
159+
span.SetStatus(codes.Error, "unable to start the Azure instance")
160160
span.RecordError(err)
161161
return stacktrace.Propagate(err, "unable to start the Azure instance")
162162
}
163163
} else {
164164
if err := azureStatus.AzureInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
165-
span.SetStatus(codes.Error, "failed to start the Azure instance")
165+
span.SetStatus(codes.Error, "unable to start the Azure instance")
166166
span.RecordError(err)
167167
return stacktrace.Propagate(err, "unable to start the Azure instance")
168168
}
@@ -171,7 +171,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
171171
// Wait for Azure instance to get in running state
172172
log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the running state", vmName)
173173
if err := azureStatus.WaitForAzureComputeUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
174-
span.SetStatus(codes.Error, "failed to check instance power on status")
174+
span.SetStatus(codes.Error, "instance power on status check failed")
175175
span.RecordError(err)
176176
return stacktrace.Propagate(err, "instance power on status check failed")
177177
}
@@ -212,13 +212,13 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
212212
log.Infof("[Chaos]: Stopping the Azure instance: %v", vmName)
213213
if experimentsDetails.ScaleSet == "enable" {
214214
if err := azureStatus.AzureScaleSetInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
215-
span.SetStatus(codes.Error, "failed to stop the Azure instance")
215+
span.SetStatus(codes.Error, "unable to stop Azure instance")
216216
span.RecordError(err)
217217
return stacktrace.Propagate(err, "unable to stop Azure instance")
218218
}
219219
} else {
220220
if err := azureStatus.AzureInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
221-
span.SetStatus(codes.Error, "failed to stop the Azure instance")
221+
span.SetStatus(codes.Error, "unable to stop Azure instance")
222222
span.RecordError(err)
223223
return stacktrace.Propagate(err, "unable to stop Azure instance")
224224
}
@@ -229,7 +229,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
229229
for _, vmName := range instanceNameList {
230230
log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the stopped state", vmName)
231231
if err := azureStatus.WaitForAzureComputeDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
232-
span.SetStatus(codes.Error, "failed to check instance poweroff status")
232+
span.SetStatus(codes.Error, "instance poweroff status check failed")
233233
span.RecordError(err)
234234
return stacktrace.Propagate(err, "instance poweroff status check failed")
235235
}
@@ -253,13 +253,13 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
253253
log.Infof("[Chaos]: Starting back the Azure instance: %v", vmName)
254254
if experimentsDetails.ScaleSet == "enable" {
255255
if err := azureStatus.AzureScaleSetInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
256-
span.SetStatus(codes.Error, "failed to start the Azure instance")
256+
span.SetStatus(codes.Error, "unable to start the Azure instance")
257257
span.RecordError(err)
258258
return stacktrace.Propagate(err, "unable to start the Azure instance")
259259
}
260260
} else {
261261
if err := azureStatus.AzureInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
262-
span.SetStatus(codes.Error, "failed to start the Azure instance")
262+
span.SetStatus(codes.Error, "unable to start the Azure instancee")
263263
span.RecordError(err)
264264
return stacktrace.Propagate(err, "unable to start the Azure instance")
265265
}
@@ -270,7 +270,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
270270
for _, vmName := range instanceNameList {
271271
log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the running state", vmName)
272272
if err := azureStatus.WaitForAzureComputeUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
273-
span.SetStatus(codes.Error, "failed to check instance power on status")
273+
span.SetStatus(codes.Error, "instance power on status check failed")
274274
span.RecordError(err)
275275
return stacktrace.Propagate(err, "instance power on status check failed")
276276
}

chaoslib/litmus/container-kill/lib/container-kill.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
175175
// run the probes during chaos
176176
if len(resultDetails.ProbeDetails) != 0 {
177177
if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil {
178+
span.SetStatus(codes.Error, "failed to run probes")
179+
span.RecordError(err)
178180
return err
179181
}
180182
}

0 commit comments

Comments
 (0)