Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions eos/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,16 @@ func TestParseEOSServerVersion(t *testing.T) {
input: "EOS_INSTANCE=eosdev\n",
want: "",
},
{
name: "dash dash version output",
input: "EOS 5.4.2 (2026)\n\nDeveloped by the CERN IT Storage Group\n",
want: "5.4.2",
},
{
name: "dash dash version older output",
input: "EOS 5.4.0 (2020)\n\nDeveloped by the CERN IT storage group\n",
want: "5.4.0",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down Expand Up @@ -1228,6 +1238,116 @@ func TestParseNamespaceAttrsNoEquals(t *testing.T) {
}
}

func TestParseMonitoringKeyValues(t *testing.T) {
input := []byte(`
uid=all gid=all is_master=true master_id=eospilot-ns-02.cern.ch:1094
uid=all gid=all ns.mgm.local=eospilot-ns-02.cern.ch:1094
uid=all gid=all ns.mgm.role=leader
uid=all gid=all ns.mgm.leader=eospilot-ns-02.cern.ch:1094
uid=all gid=all ns.mgm.followers=eospilot-ns-ip700.cern.ch:1094,eospilot-ns-01.cern.ch:1094
uid=all gid=all ns.qdb.leader=eospilot-ns-02.cern.ch:7777
uid=all gid=all ns.qdb.followers=eospilot-ns-01.cern.ch:7777,eospilot-ns-ip700.cern.ch:7777
`)
values := parseMonitoringKeyValues(input)

if got := values["ns.mgm.leader"]; got != "eospilot-ns-02.cern.ch:1094" {
t.Fatalf("expected ns.mgm.leader, got %q", got)
}
if got := values["ns.qdb.followers"]; got != "eospilot-ns-01.cern.ch:7777,eospilot-ns-ip700.cern.ch:7777" {
t.Fatalf("expected ns.qdb.followers, got %q", got)
}
if got := values["master_id"]; got != "eospilot-ns-02.cern.ch:1094" {
t.Fatalf("expected master_id, got %q", got)
}
}

func TestParseMGMsFromNSStatMonitoring(t *testing.T) {
input := []byte(`
uid=all gid=all ns.mgm.local=eospilot-ns-02.cern.ch:1094
uid=all gid=all ns.mgm.role=leader
uid=all gid=all ns.mgm.leader=eospilot-ns-02.cern.ch:1094
uid=all gid=all ns.mgm.followers=eospilot-ns-ip700.cern.ch:1094,eospilot-ns-01.cern.ch:1094
uid=all gid=all ns.qdb.leader=eospilot-ns-02.cern.ch:7777
uid=all gid=all ns.qdb.followers=eospilot-ns-01.cern.ch:7777,eospilot-ns-ip700.cern.ch:7777
`)

mgms, ok := parseMGMsFromNSStatMonitoring(input)
if !ok {
t.Fatal("expected structured ns stat monitoring output to parse")
}
if len(mgms) != 3 {
t.Fatalf("expected 3 combined records, got %d", len(mgms))
}

if mgms[0].Host != "eospilot-ns-02.cern.ch" || mgms[0].Port != 1094 || mgms[0].Role != "leader" || mgms[0].Status != "online" {
t.Fatalf("unexpected MGM leader record: %+v", mgms[0])
}
if mgms[0].QDBHost != "eospilot-ns-02.cern.ch" || mgms[0].QDBPort != 7777 || mgms[0].QDBRole != "leader" || mgms[0].QDBStatus != "online" {
t.Fatalf("unexpected QDB leader record: %+v", mgms[0])
}
if mgms[1].Host != "eospilot-ns-ip700.cern.ch" || mgms[1].Role != "follower" {
t.Fatalf("unexpected first follower record: %+v", mgms[1])
}
if mgms[1].QDBHost != "eospilot-ns-01.cern.ch" || mgms[1].QDBRole != "follower" {
t.Fatalf("unexpected first QDB follower record: %+v", mgms[1])
}
if mgms[2].Host != "eospilot-ns-01.cern.ch" || mgms[2].QDBHost != "eospilot-ns-ip700.cern.ch" {
t.Fatalf("unexpected combined record ordering: %+v", mgms[2])
}
}

func TestNodeStatsFromMonitoringValues(t *testing.T) {
values := parseMonitoringKeyValues([]byte(`
uid=all gid=all ns.total.files=23502173
uid=all gid=all ns.total.directories=383968
uid=all gid=all ns.current.fid=2590610131
uid=all gid=all ns.current.cid=3465125
uid=all gid=all ns.memory.virtual=27031158784
uid=all gid=all ns.memory.resident=13764378624
uid=all gid=all ns.memory.share=89653248
uid=all gid=all ns.memory.growth=23214104576
uid=all gid=all ns.stat.threads=666
uid=all gid=all ns.fds.all=866
uid=all gid=all ns.uptime=1523
`))

stats := nodeStatsFromMonitoringValues(values)

if stats.FileCount != 23502173 {
t.Fatalf("expected file count, got %d", stats.FileCount)
}
if stats.DirCount != 383968 {
t.Fatalf("expected dir count, got %d", stats.DirCount)
}
if stats.CurrentFID != 2590610131 || stats.CurrentCID != 3465125 {
t.Fatalf("unexpected current IDs: fid=%d cid=%d", stats.CurrentFID, stats.CurrentCID)
}
if stats.MemVirtual != 27031158784 || stats.MemResident != 13764378624 {
t.Fatalf("unexpected memory stats: virtual=%d resident=%d", stats.MemVirtual, stats.MemResident)
}
if stats.MemShared != 89653248 || stats.MemGrowth != 23214104576 {
t.Fatalf("unexpected shared/growth memory stats: shared=%d growth=%d", stats.MemShared, stats.MemGrowth)
}
if stats.ThreadCount != 666 || stats.FileDescs != 866 {
t.Fatalf("unexpected threads/fds: threads=%d fds=%d", stats.ThreadCount, stats.FileDescs)
}
if stats.Uptime.Seconds() != 1523 {
t.Fatalf("expected uptime 1523s, got %s", stats.Uptime)
}
}

func TestMGMPortFromMonitoringValues(t *testing.T) {
values := parseMonitoringKeyValues([]byte(`
uid=all gid=all master_id=eospilot-ns-02.cern.ch:1094
`))
if got := mgmPortFromMonitoringValues(values); got != "1094" {
t.Fatalf("expected 1094, got %q", got)
}
if got := mgmPortFromMonitoringValues(map[string]string{}); got != "1094" {
t.Fatalf("expected fallback port, got %q", got)
}
}

// --- entryFromCLI ---

func TestEntryFromCLIRootPath(t *testing.T) {
Expand Down
131 changes: 111 additions & 20 deletions eos/fetch_mgm.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package eos

import (
"context"
"encoding/json"
"fmt"
"sort"
"strings"
Expand All @@ -11,6 +10,19 @@ import (
func (c *Client) MGMs(ctx context.Context) ([]MgmRecord, error) {
_ = ctx

if output, err := c.runCommand("eos", "-b", "ns", "stat", "-m"); err == nil {
values := parseMonitoringKeyValues(output)
if mgms, ok := parseMGMsFromMonitoringValues(values); ok {
return mgms, nil
}
return c.mgmsFromRaftInfo(mgmPortFromMonitoringValues(values))
}

return c.mgmsFromRaftInfo("")
}

func (c *Client) mgmsFromRaftInfo(mgmPort string) ([]MgmRecord, error) {

// Run redis-cli raft-info directly via runCommand.
// The SSH target (if set) is always the MGM or an MGM leader node,
// so we do not need a separate SSH hop.
Expand All @@ -33,10 +45,11 @@ func (c *Client) MGMs(ctx context.Context) ([]MgmRecord, error) {
return nil, fmt.Errorf("no MGM cluster info from raft-info")
}

// Fetch the MGM service port from `eos ns stat` via master_id
// (e.g. "eospilot-ns-02.cern.ch:1094"). The raft nodes use the QDB port
// (7777); the actual MGM port must be read from the namespace.
mgmPort := mgmPortFromNsStat(c)
// The raft nodes use the QDB port (7777); the actual MGM port is read from
// the namespace monitoring payload when available.
if mgmPort == "" {
mgmPort = "1094"
}

// Determine leader hostname (strip raft port :7777)
leaderHost := hostOnly(info.Leader)
Expand Down Expand Up @@ -92,7 +105,9 @@ func (c *Client) MGMs(ctx context.Context) ([]MgmRecord, error) {
QDBHost: qh,
QDBPort: qp,
Role: role,
QDBRole: role,
Status: status,
QDBStatus: status,
EOSVersion: version,
QDBVersion: version,
})
Expand All @@ -112,27 +127,103 @@ func (c *Client) MGMs(ctx context.Context) ([]MgmRecord, error) {
return mgms, nil
}

// mgmPortFromNsStat fetches the MGM service port by reading master_id from
// `eos ns stat`. master_id is of the form "hostname:port" (e.g.
// "eospilot-ns-02.cern.ch:1094"). Falls back to "1094" on any error.
func mgmPortFromNsStat(c *Client) string {
const fallback = "1094"
func parseMGMsFromNSStatMonitoring(output []byte) ([]MgmRecord, bool) {
return parseMGMsFromMonitoringValues(parseMonitoringKeyValues(output))
}

out, err := c.runCommand("eos", "-j", "-b", "ns", "stat")
if err != nil {
return fallback
func parseMGMsFromMonitoringValues(values map[string]string) ([]MgmRecord, bool) {
mgmLeader := strings.TrimSpace(values["ns.mgm.leader"])
qdbLeader := strings.TrimSpace(values["ns.qdb.leader"])
if mgmLeader == "" || qdbLeader == "" {
return nil, false
}

mgmNodes := append([]string{mgmLeader}, splitMonitoringList(values["ns.mgm.followers"])...)
qdbNodes := append([]string{qdbLeader}, splitMonitoringList(values["ns.qdb.followers"])...)
mgmNodes = uniqueEndpoints(mgmNodes)
qdbNodes = uniqueEndpoints(qdbNodes)

count := len(mgmNodes)
if len(qdbNodes) > count {
count = len(qdbNodes)
}
mgms := make([]MgmRecord, 0, count)
for i := 0; i < count; i++ {
var record MgmRecord
if i < len(mgmNodes) {
record.Host, record.Port = splitHostPort(mgmNodes[i])
record.Role = "follower"
if mgmNodes[i] == mgmLeader {
record.Role = "leader"
}
record.Status = "online"
}
if i < len(qdbNodes) {
record.QDBHost, record.QDBPort = splitHostPort(qdbNodes[i])
record.QDBRole = "follower"
if qdbNodes[i] == qdbLeader {
record.QDBRole = "leader"
}
record.QDBStatus = "online"
}
mgms = append(mgms, record)
}
return mgms, true
}

func parseMonitoringKeyValues(output []byte) map[string]string {
values := make(map[string]string)
for _, raw := range strings.Split(string(output), "\n") {
for _, field := range strings.Fields(strings.TrimSpace(raw)) {
key, value, ok := strings.Cut(field, "=")
if !ok || key == "" {
continue
}
values[key] = value
}
}
return values
}

var payload struct {
Result []struct {
Master string `json:"master_id"`
} `json:"result"`
func splitMonitoringList(raw string) []string {
raw = strings.TrimSpace(raw)
if raw == "" || raw == "none" {
return nil
}
if err := json.Unmarshal(stripEOSPreamble(out), &payload); err != nil || len(payload.Result) == 0 {
return fallback
parts := strings.Split(raw, ",")
out := make([]string, 0, len(parts))
for _, part := range parts {
part = strings.TrimSpace(part)
if part != "" && part != "none" {
out = append(out, part)
}
}
return out
}

func uniqueEndpoints(nodes []string) []string {
seen := make(map[string]struct{}, len(nodes))
out := make([]string, 0, len(nodes))
for _, node := range nodes {
node = strings.TrimSpace(node)
if node == "" {
continue
}
if _, ok := seen[node]; ok {
continue
}
seen[node] = struct{}{}
out = append(out, node)
}
return out
}

// mgmPortFromMonitoringValues extracts the MGM service port from the
// master_id key in `eos ns stat -m`. Falls back to the default MGM port.
func mgmPortFromMonitoringValues(values map[string]string) string {
const fallback = "1094"

masterID := payload.Result[0].Master // e.g. "eospilot-ns-02.cern.ch:1094"
masterID := strings.TrimSpace(values["master_id"])
if idx := strings.LastIndex(masterID, ":"); idx != -1 {
if port := masterID[idx+1:]; port != "" {
return port
Expand Down
Loading