Skip to content

Commit c3ba8fb

Browse files
Ehco1996claude
andauthored
web: admin revamp — operator's terminal + IA + bucketed metrics (Ehco1996#450)
* web: admin revamp — operator's terminal direction + IA shuffle + bucketed metrics Visual: replace Inter with JetBrains Mono everywhere; sharper geometry (rounded-md, 1px borders); rate-first home strip with ↓/↑ as the page's anchor; full-width host throughput chart; lowercase page titles; hover- only conn kill; logs zebra rows. IA: flatten the 8-page / 2-group sidebar to 6 flat items. Home subsumes the old standalone Node page (host charts embedded). Settings owns the Updates flow as an embedded panel. /xray/users → /users, /xray/conns → /conns. Legacy paths redirect. Metrics store: query API now accepts a `step` (seconds) param. Node metrics are bucket-averaged via GROUP BY (timestamp/step)*step; rule metrics keep the last sample per (label,remote,bucket) so the SPA's delta-on-consecutive-points trend math stays correct. Frontend picks a step per window so 30d windows return ≤360 points. New /api/v1/overview aggregate. XrayServer satisfies a new glue.XrayStatus interface and is wired post-construction so web/ doesn't need to import pkg/xray. Saves the home page three RTs per poll tick. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * web: fix y-axis truncation, enable metric_reader on darwin, simplify pass Visual fix: chart y-axis was clipping "12 MB/s" to "2 MB/s" because the default uPlot gutter is too narrow for 7-char byte labels. New bytesShort() helper renders compact "12M/s" / "830K/s" form for axes; keep verbose bytes() for tabular contexts. Darwin metric_reader: node_darwin.go was a no-op since 2025-11-08 with a stale "node_exporter has compatibility issues on macOS" comment. v1.10.2 ships working darwin collectors for cpu / meminfo / loadavg / filesystem / netdev — verified all the metrics ParseNodeMetrics expects are emitted. Wire the same NewNodeCollector path as linux, with --no-collector.thermal to silence "no CPU power status has been recorded" log noise. Simplify pass over the PR diff: - Extract zapLevelToSlogLevel to internal/metrics/log_level.go (was duplicated verbatim across node_linux.go and node_darwin.go). - QueryRuleMetric: deduplicate the column list and WHERE filters between bucketed and raw branches; share whereSQL/whereArgs. - OverviewResp: drop redundant NetRateIn/NetRateOut — they duplicated host.network_in/out. SPA reads host() directly. - Home page: memoize series() (was re-sorting + re-allocating every read across 3 charts); precompute topUsers scoring (was running recentBytes(u) O(n log n) times inside sort comparator); extract <ChartCard> + <ListHeader> components for the three near-identical card wrappers; delete section-divider comments and dead `void poll`. - Settings + UpdatesPanel: lowercase CardHeader titles for consistency with the rest of the revamped pages. - handler_api.go::Overview: hoist time.Now() to a local. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent c8ab14e commit c3ba8fb

29 files changed

Lines changed: 815 additions & 690 deletions

internal/cli/config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ func MustStartComponents(mainCtx context.Context, cfg *config.Config) {
127127
}
128128
if webS != nil {
129129
xrayS.RegisterRoutes(webS.APIGroup())
130+
webS.SetXrayStatus(xrayS)
130131
}
131132
if err := xrayS.Start(mainCtx); err != nil {
132133
cliLogger.Fatalf("Start XrayServer meet err=%v", err)

internal/cmgr/ms/handler.go

Lines changed: 63 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package ms
22

33
import (
44
"context"
5+
"database/sql"
56

67
"github.com/Ehco1996/ehco/pkg/metric_reader"
78
)
@@ -20,6 +21,10 @@ type QueryNodeMetricsReq struct {
2021
StartTimestamp int64
2122
EndTimestamp int64
2223
Num int64
24+
// Step buckets samples into N-second windows when > 1, averaging
25+
// every gauge field per bucket. Lets the SPA pull 7d/30d windows
26+
// without dragging back hundreds of thousands of raw points.
27+
Step int64
2328
}
2429

2530
type QueryNodeMetricsResp struct {
@@ -47,6 +52,11 @@ type QueryRuleMetricsReq struct {
4752
StartTimestamp int64
4853
EndTimestamp int64
4954
Num int64
55+
// Step keeps the last sample per (label, remote) within each
56+
// N-second bucket. Counter-style fields (transmit bytes) keep
57+
// monotonic semantics so the SPA's delta-on-consecutive-points
58+
// trend math still works after bucketing.
59+
Step int64
5060
}
5161

5262
type QueryRuleMetricsResp struct {
@@ -94,13 +104,33 @@ func (ms *MetricsStore) AddRuleMetric(ctx context.Context, rm *metric_reader.Rul
94104
}
95105

96106
func (ms *MetricsStore) QueryNodeMetric(ctx context.Context, req *QueryNodeMetricsReq) (*QueryNodeMetricsResp, error) {
97-
rows, err := ms.db.QueryContext(ctx, `
98-
SELECT timestamp, cpu_usage, memory_usage, disk_usage, network_in, network_out
99-
FROM node_metrics
100-
WHERE timestamp >= ? AND timestamp <= ?
101-
ORDER BY timestamp DESC
102-
LIMIT ?
103-
`, req.StartTimestamp, req.EndTimestamp, req.Num)
107+
var (
108+
rows *sql.Rows
109+
err error
110+
)
111+
if req.Step > 1 {
112+
// Floor each timestamp to a step-second bucket and average every
113+
// gauge field. Cheaper than rolling a separate downsample table
114+
// for the windows we care about (≤30d).
115+
rows, err = ms.db.QueryContext(ctx, `
116+
SELECT (timestamp/?)*? AS bucket_ts,
117+
AVG(cpu_usage), AVG(memory_usage), AVG(disk_usage),
118+
AVG(network_in), AVG(network_out)
119+
FROM node_metrics
120+
WHERE timestamp >= ? AND timestamp <= ?
121+
GROUP BY bucket_ts
122+
ORDER BY bucket_ts DESC
123+
LIMIT ?
124+
`, req.Step, req.Step, req.StartTimestamp, req.EndTimestamp, req.Num)
125+
} else {
126+
rows, err = ms.db.QueryContext(ctx, `
127+
SELECT timestamp, cpu_usage, memory_usage, disk_usage, network_in, network_out
128+
FROM node_metrics
129+
WHERE timestamp >= ? AND timestamp <= ?
130+
ORDER BY timestamp DESC
131+
LIMIT ?
132+
`, req.StartTimestamp, req.EndTimestamp, req.Num)
133+
}
104134
if err != nil {
105135
return nil, err
106136
}
@@ -119,29 +149,37 @@ func (ms *MetricsStore) QueryNodeMetric(ctx context.Context, req *QueryNodeMetri
119149
}
120150

121151
func (ms *MetricsStore) QueryRuleMetric(ctx context.Context, req *QueryRuleMetricsReq) (*QueryRuleMetricsResp, error) {
122-
query := `
123-
SELECT timestamp, label, remote, ping_latency,
124-
tcp_connection_count, tcp_handshake_duration, tcp_network_transmit_bytes,
125-
udp_connection_count, udp_handshake_duration, udp_network_transmit_bytes
126-
FROM rule_metrics
127-
WHERE timestamp >= ? AND timestamp <= ?
128-
`
129-
args := []interface{}{req.StartTimestamp, req.EndTimestamp}
130-
152+
// Bucketed mode keeps the last sample per (label, remote) inside each
153+
// step-second window. The bytes columns are monotonic counters, so
154+
// last-of-bucket preserves the deltas the SPA computes — averaging
155+
// would smear the curve.
156+
const cols = `timestamp, label, remote, ping_latency,
157+
tcp_connection_count, tcp_handshake_duration, tcp_network_transmit_bytes,
158+
udp_connection_count, udp_handshake_duration, udp_network_transmit_bytes`
159+
160+
whereSQL := "WHERE timestamp >= ? AND timestamp <= ?"
161+
whereArgs := []interface{}{req.StartTimestamp, req.EndTimestamp}
131162
if req.RuleLabel != "" {
132-
query += " AND label = ?"
133-
args = append(args, req.RuleLabel)
163+
whereSQL += " AND label = ?"
164+
whereArgs = append(whereArgs, req.RuleLabel)
134165
}
135166
if req.Remote != "" {
136-
query += " AND remote = ?"
137-
args = append(args, req.Remote)
167+
whereSQL += " AND remote = ?"
168+
whereArgs = append(whereArgs, req.Remote)
138169
}
139170

140-
query += `
141-
ORDER BY timestamp DESC
142-
LIMIT ?
143-
`
144-
args = append(args, req.Num)
171+
var query string
172+
var args []interface{}
173+
if req.Step > 1 {
174+
query = "SELECT " + cols + " FROM rule_metrics WHERE rowid IN (" +
175+
"SELECT MAX(rowid) FROM rule_metrics " + whereSQL +
176+
" GROUP BY (timestamp/?), label, remote) ORDER BY timestamp DESC LIMIT ?"
177+
args = append(append([]interface{}{}, whereArgs...), req.Step, req.Num)
178+
} else {
179+
query = "SELECT " + cols + " FROM rule_metrics " + whereSQL +
180+
" ORDER BY timestamp DESC LIMIT ?"
181+
args = append(whereArgs, req.Num)
182+
}
145183

146184
rows, err := ms.db.Query(query, args...)
147185
if err != nil {

internal/glue/interface.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,21 @@ type HealthChecker interface {
1212
// get relay by ID and check the connection health
1313
HealthCheck(ctx context.Context, RelayID string) (int64, error)
1414
}
15+
16+
// XrayStatus is the slice of XrayServer the web admin needs for its
17+
// aggregate /overview endpoint. Defined here so web/ doesn't need to
18+
// import pkg/xray.
19+
type XrayStatus interface {
20+
// Snapshot returns instantaneous counters scraped from the user
21+
// pool and conn tracker. Cheap — no DB hits.
22+
Snapshot() XraySnapshot
23+
}
24+
25+
type XraySnapshot struct {
26+
Conns int `json:"conns"`
27+
Users int `json:"users"`
28+
EnabledUsers int `json:"enabled_users"`
29+
RunningUsers int `json:"running_users"`
30+
UploadTotal int64 `json:"upload_total"`
31+
DownloadTotal int64 `json:"download_total"`
32+
}

internal/metrics/log_level.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package metrics
2+
3+
import (
4+
"log/slog"
5+
"strings"
6+
)
7+
8+
func zapLevelToSlogLevel(zapLevel string) slog.Level {
9+
switch strings.ToLower(zapLevel) {
10+
case "debug":
11+
return slog.LevelDebug
12+
case "info":
13+
return slog.LevelInfo
14+
case "warn", "warning":
15+
return slog.LevelWarn
16+
case "error":
17+
return slog.LevelError
18+
default:
19+
return slog.LevelInfo
20+
}
21+
}

internal/metrics/node_darwin.go

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,34 @@
33
package metrics
44

55
import (
6+
"fmt"
7+
"log/slog"
8+
"os"
9+
610
"github.com/Ehco1996/ehco/internal/config"
11+
"github.com/alecthomas/kingpin/v2"
12+
"github.com/prometheus/client_golang/prometheus"
13+
"github.com/prometheus/node_exporter/collector"
714
)
815

9-
// RegisterNodeExporterMetrics is a no-op on Darwin/macOS
10-
// node_exporter has compatibility issues on macOS, so we disable it
16+
// `thermal` collector logs an ERROR per scrape on most Macs with
17+
// "no CPU power status has been recorded". Disable it via kingpin so
18+
// logs stay quiet. Kept separate from node_linux.go because Linux
19+
// doesn't have the collector and shouldn't carry the flag noise.
1120
func RegisterNodeExporterMetrics(cfg *config.Config) error {
12-
// node_exporter is not supported on macOS, skip registration
21+
logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{
22+
Level: zapLevelToSlogLevel(cfg.LogLeveL),
23+
}))
24+
25+
if _, err := kingpin.CommandLine.Parse([]string{
26+
"--no-collector.thermal",
27+
}); err != nil {
28+
return err
29+
}
30+
nc, err := collector.NewNodeCollector(logger)
31+
if err != nil {
32+
return fmt.Errorf("couldn't create collector: %w", err)
33+
}
34+
prometheus.MustRegister(nc)
1335
return nil
1436
}

internal/metrics/node_linux.go

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,30 +6,13 @@ import (
66
"fmt"
77
"log/slog"
88
"os"
9-
"strings"
109

1110
"github.com/Ehco1996/ehco/internal/config"
1211
"github.com/alecthomas/kingpin/v2"
1312
"github.com/prometheus/client_golang/prometheus"
1413
"github.com/prometheus/node_exporter/collector"
1514
)
1615

17-
// zapLevelToSlogLevel converts zap log level string to slog.Level
18-
func zapLevelToSlogLevel(zapLevel string) slog.Level {
19-
switch strings.ToLower(zapLevel) {
20-
case "debug":
21-
return slog.LevelDebug
22-
case "info":
23-
return slog.LevelInfo
24-
case "warn", "warning":
25-
return slog.LevelWarn
26-
case "error":
27-
return slog.LevelError
28-
default:
29-
return slog.LevelInfo
30-
}
31-
}
32-
3316
func RegisterNodeExporterMetrics(cfg *config.Config) error {
3417
slogLevel := zapLevelToSlogLevel(cfg.LogLeveL)
3518
logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{

internal/web/handler_api.go

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"time"
99

1010
"github.com/Ehco1996/ehco/internal/cmgr/ms"
11+
"github.com/Ehco1996/ehco/internal/glue"
1112
"github.com/labstack/echo/v4"
1213
)
1314

@@ -20,6 +21,7 @@ type queryParams struct {
2021
startTS int64
2122
endTS int64
2223
latest bool
24+
step int64
2325
}
2426

2527
func parseQueryParams(c echo.Context) (*queryParams, error) {
@@ -41,6 +43,10 @@ func parseQueryParams(c echo.Context) (*queryParams, error) {
4143
params.latest = latest
4244
}
4345

46+
if step, err := strconv.ParseInt(c.QueryParam("step"), 10, 64); err == nil && step > 0 {
47+
params.step = step
48+
}
49+
4450
if params.startTS >= params.endTS {
4551
return nil, fmt.Errorf(errInvalidParam, "time range")
4652
}
@@ -60,7 +66,7 @@ func (s *Server) GetNodeMetrics(c echo.Context) error {
6066
if err != nil {
6167
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
6268
}
63-
req := &ms.QueryNodeMetricsReq{StartTimestamp: params.startTS, EndTimestamp: params.endTS, Num: -1}
69+
req := &ms.QueryNodeMetricsReq{StartTimestamp: params.startTS, EndTimestamp: params.endTS, Num: -1, Step: params.step}
6470
if params.latest {
6571
req.Num = 1
6672
}
@@ -80,6 +86,7 @@ func (s *Server) GetRuleMetrics(c echo.Context) error {
8086
StartTimestamp: params.startTS,
8187
EndTimestamp: params.endTS,
8288
Num: -1,
89+
Step: params.step,
8390
RuleLabel: c.QueryParam("label"),
8491
Remote: c.QueryParam("remote"),
8592
}
@@ -134,6 +141,44 @@ func (s *Server) HandleReload(c echo.Context) error {
134141
return nil
135142
}
136143

144+
// OverviewResp bundles everything the SPA's home page polls — saves
145+
// the front-end the 3 parallel fetches it would otherwise need on
146+
// every refresh tick. Fields stay nil/zero when their subsystem is
147+
// disabled (xray-less deployments, no host sampler yet).
148+
type OverviewResp struct {
149+
Xray *glue.XraySnapshot `json:"xray,omitempty"`
150+
Host *ms.NodeMetrics `json:"host,omitempty"`
151+
Rules int `json:"rules"`
152+
}
153+
154+
func (s *Server) Overview(c echo.Context) error {
155+
out := OverviewResp{}
156+
157+
if s.cfg != nil {
158+
out.Rules = len(s.cfg.RelayConfigs)
159+
}
160+
161+
if p := s.xrayStatus.Load(); p != nil && *p != nil {
162+
snap := (*p).Snapshot()
163+
out.Xray = &snap
164+
}
165+
166+
if s.connMgr != nil {
167+
now := time.Now()
168+
req := &ms.QueryNodeMetricsReq{
169+
StartTimestamp: now.Add(-5 * time.Minute).Unix(),
170+
EndTimestamp: now.Unix(),
171+
Num: 1,
172+
}
173+
if resp, err := s.connMgr.QueryNodeMetrics(c.Request().Context(), req); err == nil && len(resp.Data) > 0 {
174+
h := resp.Data[0]
175+
out.Host = &h
176+
}
177+
}
178+
179+
return c.JSON(http.StatusOK, out)
180+
}
181+
137182
func (s *Server) HandleHealthCheck(c echo.Context) error {
138183
relayLabel := c.QueryParam("relay_label")
139184
if relayLabel == "" {

internal/web/server.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,20 @@ type Server struct {
3434

3535
connMgr cmgr.Cmgr
3636
updateJob atomic.Pointer[JobStatus]
37+
38+
// xrayStatus is wired post-construction by cli boot once the
39+
// XrayServer exists. Always read via Load() — may be nil when
40+
// xray sync is disabled. Atomic pointer keeps it lock-free.
41+
xrayStatus atomic.Pointer[glue.XrayStatus]
42+
}
43+
44+
// SetXrayStatus is called by cli boot once the XrayServer is
45+
// constructed. The /overview handler picks it up via Load().
46+
func (s *Server) SetXrayStatus(p glue.XrayStatus) {
47+
if p == nil {
48+
return
49+
}
50+
s.xrayStatus.Store(&p)
3751
}
3852

3953
func NewServer(
@@ -112,6 +126,7 @@ func setupRoutes(s *Server) {
112126
api.GET("/health_check/", s.HandleHealthCheck)
113127
api.GET("/node_metrics/", s.GetNodeMetrics)
114128
api.GET("/rule_metrics/", s.GetRuleMetrics)
129+
api.GET("/overview", s.Overview)
115130
api.GET("/version", s.Version)
116131
api.GET("/update/check", s.UpdateCheck)
117132
api.POST("/update/apply", s.UpdateApply)

0 commit comments

Comments
 (0)