Skip to content

Commit af6e05d

Browse files
committed
feat: add 'process-compose analyze critical-chain' subcommand
Adds a new 'analyze' top-level command with a 'critical-chain' subcommand that prints a tree of processes from top-level (nothing depends on them) down through their dependencies, annotated with startup timings -- in the spirit of 'systemd-analyze critical-chain'. For each process two times are shown: @<offset> Time after the project started that the process became ready. For processes without a readiness signal this is the time the process was launched. +<duration> Time the process spent between launch and becoming ready. Only shown for processes with a readiness probe / liveness probe / 'ready_log_line'. Example output: app (not started) [Pending] ├─migrations (not started) [Pending] │ ├─postgres @4min 30.148s +270.145s │ └─setup @4ms (not ready) ├─postgres @4min 30.148s +270.145s ├─redis @2min 10.446s +130.435s └─setup @4ms (not ready) To support this, three optional timestamp fields are added to ProcessState: - process_start_time: when the process first entered a launched/running state. - process_ready_time: when the process became ready (readiness probe succeeded, ready_log_line matched, or -- for processes without any readiness signal -- equal to process_start_time). - process_end_time: when the process ended (completed / errored / terminated / skipped). These fields are populated in src/app/process.go from the existing lifecycle hook points (run(), setProcHealth(), handleOutput() ready-log match, and onProcessEnd()), are surfaced through the existing GET /processes endpoint, and are JSON-omitempty so existing clients are unaffected. The critical-chain command itself is implemented purely on the client side -- it composes the existing /project/state, /processes, and /graph endpoints and walks the dependency graph (sorting siblings by descending ready-time, like systemd does), so no new server-side endpoints are introduced. If process names are passed as positional arguments, only those processes and their dependency sub-chains are printed; otherwise every top-level process (plus any isolated processes) is printed.
1 parent 4582f33 commit af6e05d

4 files changed

Lines changed: 346 additions & 1 deletion

File tree

src/app/process.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,20 @@ loop:
141141
return 1
142142
}
143143

144-
p.setStartTime(time.Now())
144+
now := time.Now()
145+
p.setStartTime(now)
145146
p.stateMtx.Lock()
146147
p.procState.Pid = p.command.Pid()
148+
if p.procState.ProcessStartTime == nil {
149+
startCopy := now
150+
p.procState.ProcessStartTime = &startCopy
151+
// Processes without any readiness signal are considered ready as
152+
// soon as they start. Probes/log-ready handlers override this later.
153+
if p.readyProber == nil && p.procConf.ReadyLogLine == "" {
154+
readyCopy := now
155+
p.procState.ProcessReadyTime = &readyCopy
156+
}
157+
}
147158
p.metricsProc, err = puproc.NewProcess(int32(p.procState.Pid))
148159
if err != nil {
149160
log.Err(err).Msgf("Could not find pid %d with name %s", p.procState.Pid, p.getName())
@@ -549,6 +560,13 @@ func (p *Process) onProcessEnd(state string) {
549560
p.setState(state)
550561
p.updateProcState()
551562

563+
p.stateMtx.Lock()
564+
if p.procState.ProcessEndTime == nil {
565+
now := time.Now()
566+
p.procState.ProcessEndTime = &now
567+
}
568+
p.stateMtx.Unlock()
569+
552570
p.Lock()
553571
p.done = true
554572
p.Unlock()
@@ -730,6 +748,10 @@ func (p *Process) handleOutput(pipe io.ReadCloser, output string, handler func(m
730748
}
731749
if p.procConf.ReadyLogLine != "" && p.procState.Health == types.ProcessHealthUnknown && strings.Contains(line, p.procConf.ReadyLogLine) {
732750
p.procState.Health = types.ProcessHealthReady
751+
if p.procState.ProcessReadyTime == nil {
752+
now := time.Now()
753+
p.procState.ProcessReadyTime = &now
754+
}
733755
p.cancelReadyLogFunc(nil)
734756
}
735757
p.checkElevatedProcOutput(line)
@@ -1063,6 +1085,10 @@ func (p *Process) setProcHealth(health string) {
10631085
p.stateMtx.Lock()
10641086
defer p.stateMtx.Unlock()
10651087
p.procState.Health = health
1088+
if health == types.ProcessHealthReady && p.procState.ProcessReadyTime == nil {
1089+
now := time.Now()
1090+
p.procState.ProcessReadyTime = &now
1091+
}
10661092
}
10671093

10681094
// set elevated process password

src/cmd/analyze.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package cmd
2+
3+
import (
4+
"os"
5+
6+
"github.com/spf13/cobra"
7+
)
8+
9+
// analyzeCmd represents the analyze command group.
10+
var analyzeCmd = &cobra.Command{
11+
Use: "analyze",
12+
Short: "Analyze startup timing and dependency information",
13+
Long: `Analyze inspects a running process-compose instance to provide insight
14+
into how long each process took to start up and how those times relate
15+
through the dependency graph.
16+
17+
Available subcommands:
18+
critical-chain Print the dependency chains ordered by startup time.`,
19+
Run: func(cmd *cobra.Command, args []string) {
20+
if len(args) == 0 {
21+
_ = cmd.Help()
22+
os.Exit(0)
23+
}
24+
},
25+
}
26+
27+
func init() {
28+
rootCmd.AddCommand(analyzeCmd)
29+
}

src/cmd/analyze_critical_chain.go

Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
package cmd
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"sort"
7+
"time"
8+
9+
"github.com/f1bonacc1/process-compose/src/types"
10+
"github.com/fatih/color"
11+
"github.com/spf13/cobra"
12+
)
13+
14+
var analyzeCriticalChainCmd = &cobra.Command{
15+
Use: "critical-chain [process...]",
16+
Short: "Print the critical process startup chain",
17+
Long: `Print a tree of processes ordered from the processes that nothing
18+
depends on (top-level) down through their dependencies, annotated with
19+
startup timings -- similar to 'systemd-analyze critical-chain'.
20+
21+
For each process two times are printed:
22+
23+
@<offset> Time after the project started that the process became ready.
24+
(For processes without a readiness probe this is the time the
25+
process was launched.)
26+
+<duration> Time the process spent between launch and becoming ready.
27+
Only shown for processes with a readiness signal (readiness
28+
probe, liveness probe, or 'ready_log_line').
29+
30+
If process names are given as arguments, only those processes (and their
31+
dependency sub-chains) are printed; otherwise every top-level process is
32+
printed.`,
33+
Run: runAnalyzeCriticalChain,
34+
}
35+
36+
func init() {
37+
analyzeCmd.AddCommand(analyzeCriticalChainCmd)
38+
}
39+
40+
func runAnalyzeCriticalChain(cmd *cobra.Command, args []string) {
41+
c := getClient()
42+
43+
projectState, err := c.GetProjectState(false)
44+
if err != nil {
45+
fmt.Fprintf(os.Stderr, "failed to get project state: %v\n", err)
46+
os.Exit(1)
47+
}
48+
49+
procStates, err := c.GetRemoteProcessesState()
50+
if err != nil {
51+
fmt.Fprintf(os.Stderr, "failed to get process states: %v\n", err)
52+
os.Exit(1)
53+
}
54+
55+
graph, err := c.GetDependencyGraph()
56+
if err != nil {
57+
fmt.Fprintf(os.Stderr, "failed to get dependency graph: %v\n", err)
58+
os.Exit(1)
59+
}
60+
graph.RebuildInternalIndices()
61+
62+
stateByName := make(map[string]*types.ProcessState, len(procStates.States))
63+
for i := range procStates.States {
64+
s := &procStates.States[i]
65+
stateByName[s.Name] = s
66+
}
67+
68+
// Determine the starting set of nodes.
69+
var roots []*types.DependencyNode
70+
if len(args) > 0 {
71+
for _, name := range args {
72+
node, ok := graph.AllNodes[name]
73+
if !ok {
74+
fmt.Fprintf(os.Stderr, "unknown process (or process has no dependencies): %s\n", name)
75+
os.Exit(1)
76+
}
77+
roots = append(roots, node)
78+
}
79+
} else {
80+
// Top-level = processes that nothing depends on. graph.Nodes already
81+
// contains the leaves (see BuildDependencyGraph).
82+
for _, n := range graph.Nodes {
83+
roots = append(roots, n)
84+
}
85+
// Also include processes from the state that aren't in the graph at
86+
// all (isolated) so the user can see their timings too.
87+
inGraph := make(map[string]bool, len(graph.AllNodes))
88+
for name := range graph.AllNodes {
89+
inGraph[name] = true
90+
}
91+
for _, s := range procStates.States {
92+
if !inGraph[s.Name] {
93+
roots = append(roots, &types.DependencyNode{Name: s.Name})
94+
}
95+
}
96+
}
97+
98+
sort.Slice(roots, func(i, j int) bool {
99+
ai := readyOffsetForSort(stateByName[roots[i].Name], projectState.StartTime)
100+
aj := readyOffsetForSort(stateByName[roots[j].Name], projectState.StartTime)
101+
if ai == aj {
102+
return roots[i].Name < roots[j].Name
103+
}
104+
return ai > aj
105+
})
106+
107+
// Header
108+
green := color.New(color.FgGreen).SprintFunc()
109+
fmt.Println("The time when unit became ready is printed after the \"@\" character.")
110+
fmt.Println("The time the unit took to become ready is printed after the \"+\" character.")
111+
fmt.Println()
112+
fmt.Printf("%s: %s\n", green("Project"), projectState.ProjectName)
113+
fmt.Printf("%s: %s\n", green("Started"), projectState.StartTime.Format(time.RFC3339))
114+
fmt.Printf("%s: %s\n", green("Up time"), projectState.UpTime.Round(time.Millisecond))
115+
fmt.Println()
116+
117+
for i, root := range roots {
118+
last := i == len(roots)-1
119+
printCriticalChain(root, stateByName, projectState.StartTime, "", true, last)
120+
}
121+
}
122+
123+
// readyOffsetForSort returns the duration from project start until the process
124+
// became ready. Processes that never became ready sort to the top (return
125+
// max duration) so the "slowest" chain is printed first.
126+
func readyOffsetForSort(s *types.ProcessState, projectStart time.Time) time.Duration {
127+
if s == nil {
128+
return time.Duration(1<<62 - 1)
129+
}
130+
if s.ProcessReadyTime != nil {
131+
return s.ProcessReadyTime.Sub(projectStart)
132+
}
133+
if s.ProcessStartTime != nil {
134+
return s.ProcessStartTime.Sub(projectStart)
135+
}
136+
return time.Duration(1<<62 - 1)
137+
}
138+
139+
func printCriticalChain(
140+
node *types.DependencyNode,
141+
stateByName map[string]*types.ProcessState,
142+
projectStart time.Time,
143+
prefix string,
144+
isRoot bool,
145+
isLast bool,
146+
) {
147+
// Tree branch characters.
148+
branch := ""
149+
nextPrefix := prefix
150+
if !isRoot {
151+
if isLast {
152+
branch = "└─"
153+
nextPrefix = prefix + " "
154+
} else {
155+
branch = "├─"
156+
nextPrefix = prefix + "│ "
157+
}
158+
}
159+
160+
line := formatNodeLine(node, stateByName[node.Name], projectStart)
161+
fmt.Printf("%s%s%s\n", prefix, branch, line)
162+
163+
// Sort dependencies by when they became ready (latest first), mirroring
164+
// systemd-analyze critical-chain.
165+
deps := make([]*types.DependencyNode, 0, len(node.DependsOn))
166+
for _, link := range node.DependsOn {
167+
if link.DependencyNode != nil {
168+
deps = append(deps, link.DependencyNode)
169+
}
170+
}
171+
sort.Slice(deps, func(i, j int) bool {
172+
ai := readyOffsetForSort(stateByName[deps[i].Name], projectStart)
173+
aj := readyOffsetForSort(stateByName[deps[j].Name], projectStart)
174+
if ai == aj {
175+
return deps[i].Name < deps[j].Name
176+
}
177+
return ai > aj
178+
})
179+
180+
for i, dep := range deps {
181+
printCriticalChain(dep, stateByName, projectStart, nextPrefix, false, i == len(deps)-1)
182+
}
183+
}
184+
185+
func formatNodeLine(
186+
node *types.DependencyNode,
187+
state *types.ProcessState,
188+
projectStart time.Time,
189+
) string {
190+
cyan := color.New(color.FgCyan).SprintFunc()
191+
yellow := color.New(color.FgYellow).SprintFunc()
192+
red := color.New(color.FgRed).SprintFunc()
193+
faint := color.New(color.Faint).SprintFunc()
194+
195+
name := cyan(node.Name)
196+
197+
if state == nil {
198+
return fmt.Sprintf("%s %s", name, faint("(no state)"))
199+
}
200+
201+
var parts []string
202+
parts = append(parts, name)
203+
204+
if state.ProcessReadyTime != nil {
205+
offset := state.ProcessReadyTime.Sub(projectStart)
206+
parts = append(parts, fmt.Sprintf("@%s", yellow(formatOffset(offset))))
207+
} else if state.ProcessStartTime != nil {
208+
offset := state.ProcessStartTime.Sub(projectStart)
209+
parts = append(parts,
210+
fmt.Sprintf("@%s", yellow(formatOffset(offset))),
211+
faint("(not ready)"))
212+
} else {
213+
parts = append(parts, faint("(not started)"))
214+
}
215+
216+
// +<duration> only meaningful when there is a gap between start and ready.
217+
if state.ProcessStartTime != nil && state.ProcessReadyTime != nil {
218+
ready := state.ProcessReadyTime.Sub(*state.ProcessStartTime)
219+
if ready > 0 {
220+
parts = append(parts, fmt.Sprintf("+%s", yellow(formatDuration(ready))))
221+
}
222+
}
223+
224+
// Status annotation if anything looks off.
225+
switch state.Status {
226+
case types.ProcessStateError, types.ProcessStateSkipped:
227+
parts = append(parts, red(fmt.Sprintf("[%s]", state.Status)))
228+
case types.ProcessStateCompleted, types.ProcessStateRunning:
229+
// expected -- no extra annotation
230+
default:
231+
if state.Status != "" {
232+
parts = append(parts, faint(fmt.Sprintf("[%s]", state.Status)))
233+
}
234+
}
235+
236+
// Join with spaces.
237+
out := ""
238+
for i, p := range parts {
239+
if i > 0 {
240+
out += " "
241+
}
242+
out += p
243+
}
244+
return out
245+
}
246+
247+
// formatOffset formats a duration representing "time since project start".
248+
// Uses a more compact form for shorter durations and a minute-aware form for
249+
// longer ones, mirroring `systemd-analyze`.
250+
func formatOffset(d time.Duration) string {
251+
if d < 0 {
252+
return "?"
253+
}
254+
if d < time.Minute {
255+
return formatDuration(d)
256+
}
257+
mins := int(d / time.Minute)
258+
rem := d - time.Duration(mins)*time.Minute
259+
return fmt.Sprintf("%dmin %s", mins, formatDuration(rem))
260+
}
261+
262+
// formatDuration prints a short human-friendly duration such as "5ms",
263+
// "1.234s", or "2.500s".
264+
func formatDuration(d time.Duration) string {
265+
if d < 0 {
266+
return "?"
267+
}
268+
if d < time.Microsecond {
269+
return fmt.Sprintf("%dns", d.Nanoseconds())
270+
}
271+
if d < time.Millisecond {
272+
return fmt.Sprintf("%dus", d.Microseconds())
273+
}
274+
if d < time.Second {
275+
return fmt.Sprintf("%dms", d.Milliseconds())
276+
}
277+
secs := float64(d) / float64(time.Second)
278+
return fmt.Sprintf("%.3fs", secs)
279+
}

src/types/process.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,17 @@ type ProcessState struct {
249249
CPU float64 `json:"cpu"`
250250
IsRunning bool `json:"is_running"`
251251
NextRunTime *time.Time `json:"next_run_time,omitempty"`
252+
// ProcessStartTime is the wall-clock time the process (first) entered a
253+
// running/launched state. Used by `process-compose analyze critical-chain`.
254+
ProcessStartTime *time.Time `json:"process_start_time,omitempty"`
255+
// ProcessReadyTime is the wall-clock time the process became ready. For
256+
// processes with a readiness probe or a `ready_log_line`, this is set when
257+
// the probe succeeds / the line is observed. For processes without any
258+
// readiness probe, it equals ProcessStartTime.
259+
ProcessReadyTime *time.Time `json:"process_ready_time,omitempty"`
260+
// ProcessEndTime is the wall-clock time the process ended (completed,
261+
// errored, terminated, or was skipped).
262+
ProcessEndTime *time.Time `json:"process_end_time,omitempty"`
252263
}
253264

254265
type ProcessPorts struct {

0 commit comments

Comments
 (0)