Skip to content

Commit 2a2ddea

Browse files
authored
Controller: emit lifecycle events when the VM gets restarted or deleted (#208)
* Controller: emit lifecycle events when the VM gets restarted or deleted * vm_{scheduling,run}_time → vm_{scheduling,run}_duration for clarity * Update VM endpoint: only update VM started time when zero
1 parent 1730eaf commit 2a2ddea

4 files changed

Lines changed: 50 additions & 0 deletions

File tree

internal/controller/api_vms.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package controller
22

33
import (
44
"errors"
5+
"github.com/cirruslabs/orchard/internal/controller/lifecycle"
56
storepkg "github.com/cirruslabs/orchard/internal/controller/store"
67
"github.com/cirruslabs/orchard/internal/responder"
78
"github.com/cirruslabs/orchard/internal/simplename"
@@ -130,6 +131,10 @@ func (controller *Controller) updateVM(ctx *gin.Context) responder.Responder {
130131
NewErrorResponse("cannot update status for a VM in a terminal state"))
131132
}
132133

134+
if userVM.Status == v1.VMStatusRunning && dbVM.StartedAt.IsZero() {
135+
dbVM.StartedAt = time.Now()
136+
}
137+
133138
dbVM.Status = userVM.Status
134139
dbVM.StatusMessage = userVM.StatusMessage
135140
dbVM.ImageFQN = userVM.ImageFQN
@@ -197,6 +202,8 @@ func (controller *Controller) deleteVM(ctx *gin.Context) responder.Responder {
197202
return responder.Error(err)
198203
}
199204

205+
lifecycle.Report(vm, "VM deleted", controller.logger)
206+
200207
return responder.Code(http.StatusOK)
201208
})
202209
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package lifecycle
2+
3+
import (
4+
v1 "github.com/cirruslabs/orchard/pkg/resource/v1"
5+
"go.uber.org/zap"
6+
"time"
7+
)
8+
9+
func Report(vm *v1.VM, message string, logger *zap.SugaredLogger) {
10+
args := []interface{}{
11+
"component", "lifecycle",
12+
"vm_uid", vm.UID,
13+
"vm_name", vm.Name,
14+
"vm_restart_count", vm.RestartCount,
15+
"vm_image", vm.Image,
16+
"vm_status", vm.Status,
17+
}
18+
19+
if vm.ScheduledAt.IsZero() {
20+
// VM was never scheduled
21+
args = append(args, "vm_scheduling_duration", time.Since(vm.CreatedAt))
22+
} else {
23+
args = append(args, "vm_scheduling_duration", vm.ScheduledAt.Sub(vm.CreatedAt))
24+
}
25+
26+
if vm.StartedAt.IsZero() {
27+
// VM was never started
28+
args = append(args, "vm_run_duration", time.Duration(0))
29+
} else {
30+
args = append(args, "vm_run_duration", time.Since(vm.StartedAt))
31+
}
32+
33+
logger.With(args...).Info(message)
34+
}

internal/controller/scheduler/scheduler.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package scheduler
22

33
import (
44
"context"
5+
"github.com/cirruslabs/orchard/internal/controller/lifecycle"
56
"github.com/cirruslabs/orchard/internal/controller/notifier"
67
storepkg "github.com/cirruslabs/orchard/internal/controller/store"
78
"github.com/cirruslabs/orchard/internal/opentelemetry"
@@ -141,6 +142,7 @@ func (scheduler *Scheduler) schedulingLoopIteration() error {
141142
time.Since(unscheduledVM.CreatedAt).Seconds())
142143

143144
unscheduledVM.Worker = worker.Name
145+
unscheduledVM.ScheduledAt = time.Now()
144146

145147
if err := txn.SetVM(unscheduledVM); err != nil {
146148
return err
@@ -240,11 +242,15 @@ func (scheduler *Scheduler) healthCheckVM(txn storepkg.Transaction, nameToWorker
240242
if needsRestart {
241243
logger.Debugf("restarting VM")
242244

245+
lifecycle.Report(&vm, "VM restarted", scheduler.logger)
246+
243247
vm.Status = v1.VMStatusPending
244248
vm.StatusMessage = ""
245249
vm.Worker = ""
246250
vm.RestartedAt = time.Now()
247251
vm.RestartCount++
252+
vm.ScheduledAt = time.Time{}
253+
vm.StartedAt = time.Time{}
248254

249255
return txn.SetVM(vm)
250256
}

pkg/resource/v1/v1.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ type VM struct {
5757
// by the worker using "tart fqn" command after it had pulled the image.
5858
ImageFQN string `json:"image_fqn,omitempty"`
5959

60+
ScheduledAt time.Time `json:"scheduled_at,omitempty"`
61+
StartedAt time.Time `json:"started_at,omitempty"`
62+
6063
Meta
6164
}
6265

0 commit comments

Comments
 (0)