Skip to content

Commit 77451ed

Browse files
authored
Add agent command for reporting metrics (#8)
1 parent 26cfdae commit 77451ed

File tree

7 files changed

+458
-2
lines changed

7 files changed

+458
-2
lines changed

.golangci.yml

-1
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,4 @@ issues:
2525
- vendor/
2626

2727
run:
28-
deadline: 5m
2928
tests: true

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ adheres to [Semantic Versioning](http://semver.org/).
99
### Added
1010

1111
- Made API endpoint configurable
12+
- Added [agent command](./README.md#agent-command)
1213

1314
## [0.1.0] - 2024-12-09
1415

README.md

+16
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,22 @@ Optional flags:
5959
- `-v, --revision`: Revision being deployed
6060
- `-u, --user`: Local username of the person deploying
6161

62+
### Agent Command
63+
64+
Start a metrics reporting agent that collects and sends system metrics to Honeybadger Insights:
65+
66+
```bash
67+
hb agent
68+
```
69+
70+
The agent collects and reports the following metrics:
71+
- CPU usage and load averages
72+
- Memory usage (total, used, free, available)
73+
- Disk usage for all mounted filesystems
74+
75+
Optional flags:
76+
- `--interval`: Reporting interval in seconds (default: 60)
77+
6278
## Development
6379

6480
Pull requests are welcome. If you're adding a new feature, please [submit an issue](https://github.com/honeybadger-io/cli/issues/new) as a preliminary step; that way you can be (moderately) sure that your pull request will be accepted.

cmd/agent.go

+231
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
package cmd
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"math"
9+
"net/http"
10+
"os"
11+
"strings"
12+
"time"
13+
14+
"github.com/shirou/gopsutil/v3/cpu"
15+
"github.com/shirou/gopsutil/v3/disk"
16+
"github.com/shirou/gopsutil/v3/load"
17+
"github.com/shirou/gopsutil/v3/mem"
18+
"github.com/spf13/cobra"
19+
"github.com/spf13/viper"
20+
)
21+
22+
var (
23+
interval int
24+
)
25+
26+
type cpuPayload struct {
27+
Ts string `json:"ts"`
28+
Event string `json:"event_type"`
29+
Host string `json:"host"`
30+
UsedPercent float64 `json:"used_percent"`
31+
LoadAvg1 float64 `json:"load_avg_1"`
32+
LoadAvg5 float64 `json:"load_avg_5"`
33+
LoadAvg15 float64 `json:"load_avg_15"`
34+
NumCPUs int `json:"num_cpus"`
35+
}
36+
37+
type memoryPayload struct {
38+
Ts string `json:"ts"`
39+
Event string `json:"event_type"`
40+
Host string `json:"host"`
41+
Total uint64 `json:"total_bytes"`
42+
Used uint64 `json:"used_bytes"`
43+
Free uint64 `json:"free_bytes"`
44+
Available uint64 `json:"available_bytes"`
45+
UsedPercent float64 `json:"used_percent"`
46+
}
47+
48+
type diskPayload struct {
49+
Ts string `json:"ts"`
50+
Event string `json:"event_type"`
51+
Host string `json:"host"`
52+
Mountpoint string `json:"mountpoint"`
53+
Device string `json:"device"`
54+
Fstype string `json:"fstype"`
55+
Total uint64 `json:"total_bytes"`
56+
Used uint64 `json:"used_bytes"`
57+
Free uint64 `json:"free_bytes"`
58+
UsedPercent float64 `json:"used_percent"`
59+
}
60+
61+
// agentCmd represents the agent command
62+
var agentCmd = &cobra.Command{
63+
Use: "agent",
64+
Short: "Start a metrics reporting agent",
65+
Long: `Start a persistent process that periodically reports host metrics to Honeybadger's Insights API.
66+
This command collects and reports system metrics such as CPU usage, memory usage, disk usage, and load averages.
67+
Metrics are aggregated and reported at a configurable interval (default: 60 seconds).`,
68+
RunE: func(cmd *cobra.Command, args []string) error {
69+
// Check for API key before starting
70+
apiKey := viper.GetString("api_key")
71+
if apiKey == "" {
72+
return fmt.Errorf("API key not configured. Use --api-key flag or set HONEYBADGER_API_KEY environment variable")
73+
}
74+
75+
ctx := context.Background()
76+
ticker := time.NewTicker(time.Duration(interval) * time.Second)
77+
defer ticker.Stop()
78+
79+
hostname, err := os.Hostname()
80+
if err != nil {
81+
hostname = "unknown"
82+
}
83+
84+
fmt.Printf("Starting metrics agent, reporting every %d seconds...\n", interval)
85+
86+
for {
87+
select {
88+
case <-ctx.Done():
89+
return nil
90+
case <-ticker.C:
91+
if err := reportMetrics(hostname); err != nil {
92+
fmt.Fprintf(os.Stderr, "Error reporting metrics: %v\n", err)
93+
}
94+
}
95+
}
96+
},
97+
}
98+
99+
func init() {
100+
rootCmd.AddCommand(agentCmd)
101+
agentCmd.Flags().IntVarP(&interval, "interval", "i", 60, "Reporting interval in seconds")
102+
}
103+
104+
// sendMetric sends a single metric event to Honeybadger
105+
func sendMetric(payload interface{}) error {
106+
jsonData, err := json.Marshal(payload)
107+
if err != nil {
108+
return fmt.Errorf("error marshaling metrics: %w", err)
109+
}
110+
111+
req, err := http.NewRequest("POST", fmt.Sprintf("%s/v1/events", endpoint), strings.NewReader(string(jsonData)+"\n"))
112+
if err != nil {
113+
return fmt.Errorf("error creating request: %w", err)
114+
}
115+
116+
req.Header.Set("Content-Type", "application/json")
117+
req.Header.Set("X-API-Key", viper.GetString("api_key"))
118+
119+
client := &http.Client{Timeout: 10 * time.Second}
120+
resp, err := client.Do(req)
121+
if err != nil {
122+
return fmt.Errorf("error sending metrics: %w", err)
123+
}
124+
defer func() {
125+
if cerr := resp.Body.Close(); cerr != nil {
126+
fmt.Fprintf(os.Stderr, "error closing response body: %v\n", cerr)
127+
}
128+
}()
129+
130+
if resp.StatusCode >= 300 {
131+
body, _ := io.ReadAll(resp.Body)
132+
return fmt.Errorf("received error response: %s\n%s", resp.Status, body)
133+
}
134+
135+
return nil
136+
}
137+
138+
func reportMetrics(hostname string) error {
139+
timestamp := time.Now().UTC().Format(time.RFC3339)
140+
141+
// Collect and send CPU metrics
142+
cpuPercent, err := cpu.Percent(time.Second, false)
143+
if err != nil {
144+
return fmt.Errorf("error getting CPU metrics: %w", err)
145+
}
146+
147+
loadAvg, err := load.Avg()
148+
if err != nil {
149+
return fmt.Errorf("error getting load average: %w", err)
150+
}
151+
152+
numCPU, err := cpu.Counts(true)
153+
if err != nil {
154+
numCPU = 0 // fallback if we can't get the count
155+
}
156+
157+
cpuPayload := cpuPayload{
158+
Ts: timestamp,
159+
Event: "report.system.cpu",
160+
Host: hostname,
161+
UsedPercent: math.Round(cpuPercent[0]*100) / 100,
162+
LoadAvg1: loadAvg.Load1,
163+
LoadAvg5: loadAvg.Load5,
164+
LoadAvg15: loadAvg.Load15,
165+
NumCPUs: numCPU,
166+
}
167+
if err := sendMetric(cpuPayload); err != nil {
168+
return fmt.Errorf("error sending CPU metrics: %w", err)
169+
}
170+
171+
// Collect and send memory metrics
172+
virtualMem, err := mem.VirtualMemory()
173+
if err != nil {
174+
return fmt.Errorf("error getting memory metrics: %w", err)
175+
}
176+
177+
memoryPayload := memoryPayload{
178+
Ts: timestamp,
179+
Event: "report.system.memory",
180+
Host: hostname,
181+
Total: virtualMem.Total,
182+
Used: virtualMem.Used,
183+
Free: virtualMem.Free,
184+
Available: virtualMem.Available,
185+
UsedPercent: math.Round(virtualMem.UsedPercent*100) / 100,
186+
}
187+
if err := sendMetric(memoryPayload); err != nil {
188+
return fmt.Errorf("error sending memory metrics: %w", err)
189+
}
190+
191+
// Collect and send disk metrics
192+
parts, err := disk.Partitions(false)
193+
if err != nil {
194+
return fmt.Errorf("error getting disk partitions: %w", err)
195+
}
196+
197+
// Send metrics for each disk partition
198+
for _, part := range parts {
199+
// Skip pseudo filesystems
200+
if part.Fstype == "devfs" || part.Fstype == "autofs" || part.Fstype == "nullfs" ||
201+
strings.HasPrefix(part.Fstype, "fuse.") ||
202+
strings.Contains(part.Mountpoint, "/System/Volumes") {
203+
continue
204+
}
205+
206+
usage, err := disk.Usage(part.Mountpoint)
207+
if err != nil {
208+
// Log error but continue with other partitions
209+
fmt.Fprintf(os.Stderr, "Error getting disk usage for %s: %v\n", part.Mountpoint, err)
210+
continue
211+
}
212+
213+
diskPayload := diskPayload{
214+
Ts: timestamp,
215+
Event: "report.system.disk",
216+
Host: hostname,
217+
Mountpoint: part.Mountpoint,
218+
Device: part.Device,
219+
Fstype: part.Fstype,
220+
Total: usage.Total,
221+
Used: usage.Used,
222+
Free: usage.Free,
223+
UsedPercent: math.Round(usage.UsedPercent*100) / 100,
224+
}
225+
if err := sendMetric(diskPayload); err != nil {
226+
return fmt.Errorf("error sending disk metrics for %s: %w", part.Mountpoint, err)
227+
}
228+
}
229+
230+
return nil
231+
}

0 commit comments

Comments
 (0)