@@ -2,7 +2,6 @@ package eos
22
33import (
44 "context"
5- "encoding/json"
65 "fmt"
76 "sort"
87 "strings"
@@ -11,6 +10,19 @@ import (
1110func (c * Client ) MGMs (ctx context.Context ) ([]MgmRecord , error ) {
1211 _ = ctx
1312
13+ if output , err := c .runCommand ("eos" , "-b" , "ns" , "stat" , "-m" ); err == nil {
14+ values := parseMonitoringKeyValues (output )
15+ if mgms , ok := parseMGMsFromMonitoringValues (values ); ok {
16+ return mgms , nil
17+ }
18+ return c .mgmsFromRaftInfo (mgmPortFromMonitoringValues (values ))
19+ }
20+
21+ return c .mgmsFromRaftInfo ("" )
22+ }
23+
24+ func (c * Client ) mgmsFromRaftInfo (mgmPort string ) ([]MgmRecord , error ) {
25+
1426 // Run redis-cli raft-info directly via runCommand.
1527 // The SSH target (if set) is always the MGM or an MGM leader node,
1628 // so we do not need a separate SSH hop.
@@ -33,10 +45,11 @@ func (c *Client) MGMs(ctx context.Context) ([]MgmRecord, error) {
3345 return nil , fmt .Errorf ("no MGM cluster info from raft-info" )
3446 }
3547
36- // Fetch the MGM service port from `eos ns stat` via master_id
37- // (e.g. "eospilot-ns-02.cern.ch:1094"). The raft nodes use the QDB port
38- // (7777); the actual MGM port must be read from the namespace.
39- mgmPort := mgmPortFromNsStat (c )
48+ // The raft nodes use the QDB port (7777); the actual MGM port is read from
49+ // the namespace monitoring payload when available.
50+ if mgmPort == "" {
51+ mgmPort = "1094"
52+ }
4053
4154 // Determine leader hostname (strip raft port :7777)
4255 leaderHost := hostOnly (info .Leader )
@@ -92,7 +105,9 @@ func (c *Client) MGMs(ctx context.Context) ([]MgmRecord, error) {
92105 QDBHost : qh ,
93106 QDBPort : qp ,
94107 Role : role ,
108+ QDBRole : role ,
95109 Status : status ,
110+ QDBStatus : status ,
96111 EOSVersion : version ,
97112 QDBVersion : version ,
98113 })
@@ -112,27 +127,103 @@ func (c *Client) MGMs(ctx context.Context) ([]MgmRecord, error) {
112127 return mgms , nil
113128}
114129
115- // mgmPortFromNsStat fetches the MGM service port by reading master_id from
116- // `eos ns stat`. master_id is of the form "hostname:port" (e.g.
117- // "eospilot-ns-02.cern.ch:1094"). Falls back to "1094" on any error.
118- func mgmPortFromNsStat (c * Client ) string {
119- const fallback = "1094"
130+ func parseMGMsFromNSStatMonitoring (output []byte ) ([]MgmRecord , bool ) {
131+ return parseMGMsFromMonitoringValues (parseMonitoringKeyValues (output ))
132+ }
120133
121- out , err := c .runCommand ("eos" , "-j" , "-b" , "ns" , "stat" )
122- if err != nil {
123- return fallback
134+ func parseMGMsFromMonitoringValues (values map [string ]string ) ([]MgmRecord , bool ) {
135+ mgmLeader := strings .TrimSpace (values ["ns.mgm.leader" ])
136+ qdbLeader := strings .TrimSpace (values ["ns.qdb.leader" ])
137+ if mgmLeader == "" || qdbLeader == "" {
138+ return nil , false
139+ }
140+
141+ mgmNodes := append ([]string {mgmLeader }, splitMonitoringList (values ["ns.mgm.followers" ])... )
142+ qdbNodes := append ([]string {qdbLeader }, splitMonitoringList (values ["ns.qdb.followers" ])... )
143+ mgmNodes = uniqueEndpoints (mgmNodes )
144+ qdbNodes = uniqueEndpoints (qdbNodes )
145+
146+ count := len (mgmNodes )
147+ if len (qdbNodes ) > count {
148+ count = len (qdbNodes )
149+ }
150+ mgms := make ([]MgmRecord , 0 , count )
151+ for i := 0 ; i < count ; i ++ {
152+ var record MgmRecord
153+ if i < len (mgmNodes ) {
154+ record .Host , record .Port = splitHostPort (mgmNodes [i ])
155+ record .Role = "follower"
156+ if mgmNodes [i ] == mgmLeader {
157+ record .Role = "leader"
158+ }
159+ record .Status = "online"
160+ }
161+ if i < len (qdbNodes ) {
162+ record .QDBHost , record .QDBPort = splitHostPort (qdbNodes [i ])
163+ record .QDBRole = "follower"
164+ if qdbNodes [i ] == qdbLeader {
165+ record .QDBRole = "leader"
166+ }
167+ record .QDBStatus = "online"
168+ }
169+ mgms = append (mgms , record )
170+ }
171+ return mgms , true
172+ }
173+
174+ func parseMonitoringKeyValues (output []byte ) map [string ]string {
175+ values := make (map [string ]string )
176+ for _ , raw := range strings .Split (string (output ), "\n " ) {
177+ for _ , field := range strings .Fields (strings .TrimSpace (raw )) {
178+ key , value , ok := strings .Cut (field , "=" )
179+ if ! ok || key == "" {
180+ continue
181+ }
182+ values [key ] = value
183+ }
124184 }
185+ return values
186+ }
125187
126- var payload struct {
127- Result [] struct {
128- Master string `json:"master_id"`
129- } `json:"result"`
188+ func splitMonitoringList ( raw string ) [] string {
189+ raw = strings . TrimSpace ( raw )
190+ if raw == "" || raw == "none" {
191+ return nil
130192 }
131- if err := json .Unmarshal (stripEOSPreamble (out ), & payload ); err != nil || len (payload .Result ) == 0 {
132- return fallback
193+ parts := strings .Split (raw , "," )
194+ out := make ([]string , 0 , len (parts ))
195+ for _ , part := range parts {
196+ part = strings .TrimSpace (part )
197+ if part != "" && part != "none" {
198+ out = append (out , part )
199+ }
133200 }
201+ return out
202+ }
203+
204+ func uniqueEndpoints (nodes []string ) []string {
205+ seen := make (map [string ]struct {}, len (nodes ))
206+ out := make ([]string , 0 , len (nodes ))
207+ for _ , node := range nodes {
208+ node = strings .TrimSpace (node )
209+ if node == "" {
210+ continue
211+ }
212+ if _ , ok := seen [node ]; ok {
213+ continue
214+ }
215+ seen [node ] = struct {}{}
216+ out = append (out , node )
217+ }
218+ return out
219+ }
220+
221+ // mgmPortFromMonitoringValues extracts the MGM service port from the
222+ // master_id key in `eos ns stat -m`. Falls back to the default MGM port.
223+ func mgmPortFromMonitoringValues (values map [string ]string ) string {
224+ const fallback = "1094"
134225
135- masterID := payload . Result [ 0 ]. Master // e.g. "eospilot-ns-02.cern.ch:1094"
226+ masterID := strings . TrimSpace ( values [ "master_id" ])
136227 if idx := strings .LastIndex (masterID , ":" ); idx != - 1 {
137228 if port := masterID [idx + 1 :]; port != "" {
138229 return port
0 commit comments