1+ import json
12import random
23import string
34import time
45import traceback
6+ from json import JSONDecodeError
57
68from ceph .ceph import CommandFailed
79from tests .cephfs .cephfs_utilsV1 import FsUtils
810from utility .log import Log
11+ from utility .retry import retry
912
1013log = Log (__name__ )
1114
@@ -24,6 +27,109 @@ class Metrics_Value_Not_Matching(Exception):
2427 pass
2528
2629
30+ @retry ((JSONDecodeError , CommandFailed ), tries = 5 , delay = 5 )
31+ def get_client_id (client , rank = 0 , mounted_dir = "" , fs_name = "cephfs" ):
32+ ranked_mds , _ = client .exec_command (
33+ sudo = True ,
34+ cmd = f"ceph fs status { fs_name } -f json | jq '.mdsmap[] | select(.rank == { rank } ) | .name'" ,
35+ )
36+ log .info ("Executing MDS name with rank command: %s" , ranked_mds )
37+ ranked_mds = ranked_mds .replace ('"' , "" ).replace ("\n " , "" )
38+ client_id_cmd = (
39+ f"ceph tell mds.{ ranked_mds } session ls | jq '.[] | select(.client_metadata.mount_point"
40+ f' != null and (.client_metadata.mount_point | contains("{ mounted_dir } "))) | .id\' '
41+ )
42+ log .info (f"Executing Client ID Command : { client_id_cmd } " )
43+ client_id , _ = client .exec_command (sudo = True , cmd = client_id_cmd )
44+ client_id = client_id .replace ('"' , "" ).replace ("\n " , "" )
45+ if client_id == "" :
46+ log .error (f"Client not found for Mounted Directory : { mounted_dir } " )
47+ raise CommandFailed (f"Client not found for Mounted Directory : { mounted_dir } " )
48+ log .info (f"Client ID :[{ client_id } ] for Mounted Directory : [{ mounted_dir } ]" )
49+ return client_id , rank
50+
51+
52+ @retry ((JSONDecodeError , CommandFailed ), tries = 5 , delay = 10 )
53+ def get_mds_metrics_for_client (
54+ client , client_id , rank , mds_rank = 0 , mounted_dir = "" , fs_name = "cephfs"
55+ ):
56+ ranked_mds , _ = client .exec_command (
57+ sudo = True ,
58+ cmd = f"ceph fs status { fs_name } -f json | jq '.mdsmap[] | select(.rank == { mds_rank } ) | .name'" ,
59+ )
60+ log .info (f"Executing MDS name with rank command: { ranked_mds } " )
61+ ranked_mds = ranked_mds .replace ('"' , "" ).replace ("\n " , "" )
62+ log .info (f"Client ID :[{ client_id } ] for Mounted Directory : [{ mounted_dir } ]" )
63+ cmd = f""" ceph tell mds.{ ranked_mds } counter dump 2>/dev/null | \
64+ jq -r '. | to_entries | map(select(.key | match("mds_client_metrics"))) | \
65+ .[].value[] | select(.labels.client != null and (.labels.client | contains("{ client_id } "))
66+ and (.labels.rank == "{ rank } "))'
67+ """
68+ metrics_out , _ = client .exec_command (sudo = True , cmd = cmd )
69+ log .info (
70+ f"Metrics for MDS : { ranked_mds } Mounted Directory: { mounted_dir } and Client : { client_id } is { metrics_out } "
71+ )
72+ if metrics_out == "" :
73+ log .error (f"Metrics not found for MDS : { ranked_mds } " )
74+ raise CommandFailed (f"Client not found for Mounted Directory : { mounted_dir } " )
75+ metrics_out = json .loads (str (metrics_out ))
76+ return metrics_out
77+
78+
79+ def get_mds_metrics_from_ranks (ranks , fs_util , client , mount_dir , cephfs ):
80+ """
81+ Try fetching MDS metrics for the given client and mount_dir from the list of ranks.
82+
83+ Returns:
84+ dict: MDS metrics if found.
85+
86+ Raises:
87+ CommandFailed: If no metrics are found from any rank.
88+ """
89+ client_id = None
90+ client_rank = None
91+
92+ # Step 1: Get client_id from one of the MDS ranks
93+ for rank in ranks :
94+ try :
95+ client_id , client_rank = get_client_id (client , rank , mount_dir , cephfs )
96+ if client_id :
97+ log .info (f"Found client ID '{ client_id } ' from rank { client_rank } " )
98+ break
99+ except Exception as e :
100+ log .warning (
101+ f"Rank { rank } : Failed to get client ID for mount { mount_dir } : { e } "
102+ )
103+ continue
104+
105+ if not client_id :
106+ raise CommandFailed (f"Client not found in any MDS ranks for mount { mount_dir } " )
107+
108+ # Step 2: Use client_id and try to collect metrics from all MDS ranks
109+ for rank in ranks :
110+ try :
111+ mds_metric = get_mds_metrics_for_client (
112+ client ,
113+ client_id ,
114+ client_rank ,
115+ mds_rank = rank ,
116+ mounted_dir = mount_dir ,
117+ fs_name = cephfs ,
118+ )
119+ if mds_metric and mds_metric != 1 :
120+ log .info (f"Successfully got MDS metrics from rank { rank } " )
121+ return mds_metric
122+ except Exception as e :
123+ log .warning (
124+ f"Rank { rank } : Failed to fetch metrics for client { client_id } : { e } "
125+ )
126+ continue
127+
128+ raise CommandFailed (
129+ f"Metrics not found for client { client_id } in any of the MDS ranks for mount { mount_dir } "
130+ )
131+
132+
27133def run (ceph_cluster , ** kw ):
28134 try :
29135 tc = "CEPH-83588355"
@@ -104,17 +210,18 @@ def run(ceph_cluster, **kw):
104210 fs_util .fuse_mount ([client4 ], fuse_mounting_dir_4 )
105211 # Get initial MDS metrics
106212 # pdb.set_trace()
107- mds_metric_client1 = fs_util .get_mds_metrics (
108- client1 , 0 , fuse_mounting_dir_1 , cephfs
213+
214+ mds_metric_client1 = get_mds_metrics_from_ranks (
215+ [0 , 1 , 2 ], fs_util , client1 , fuse_mounting_dir_1 , cephfs
109216 )
110- mds_metric_client2 = fs_util . get_mds_metrics (
111- client2 , 0 , fuse_mounting_dir_2 , cephfs
217+ mds_metric_client2 = get_mds_metrics_from_ranks (
218+ [ 0 , 1 , 2 ], fs_util , client2 , fuse_mounting_dir_2 , cephfs
112219 )
113- mds_metric_client3 = fs_util . get_mds_metrics (
114- client3 , 0 , fuse_mounting_dir_3 , cephfs
220+ mds_metric_client3 = get_mds_metrics_from_ranks (
221+ [ 0 , 1 , 2 ], fs_util , client3 , fuse_mounting_dir_3 , cephfs
115222 )
116- mds_metric_client4 = fs_util . get_mds_metrics (
117- client4 , 0 , fuse_mounting_dir_4 , cephfs
223+ mds_metric_client4 = get_mds_metrics_from_ranks (
224+ [ 0 , 1 , 2 ], fs_util , client4 , fuse_mounting_dir_4 , cephfs
118225 )
119226
120227 log .info (f"mds_metric_client1: { mds_metric_client1 } " )
@@ -132,11 +239,11 @@ def run(ceph_cluster, **kw):
132239 inode_list = ["opened_inodes" , "pinned_icaps" , "total_inodes" ]
133240
134241 # Get initial inode metrics for client1 and client3
135- client1_pre_inode_metrics = fs_util . get_mds_metrics (
136- client1 , 0 , fuse_mounting_dir_1 , cephfs
242+ client1_pre_inode_metrics = get_mds_metrics_from_ranks (
243+ [ 0 , 1 , 2 ], fs_util , client1 , fuse_mounting_dir_1 , cephfs
137244 )
138- client3_pre_inode_metrics = fs_util . get_mds_metrics (
139- client3 , 0 , fuse_mounting_dir_3 , cephfs
245+ client3_pre_inode_metrics = get_mds_metrics_from_ranks (
246+ [ 0 , 1 , 2 ], fs_util , client3 , fuse_mounting_dir_3 , cephfs
140247 )
141248 log .info (f"client1_pre_inode_metrics: { client1_pre_inode_metrics } " )
142249 log .info (f"client3_pre_inode_metrics: { client3_pre_inode_metrics } " )
@@ -165,11 +272,11 @@ def run(ceph_cluster, **kw):
165272
166273 log .info ("Writing files is done for client1 and client3" )
167274 log .info ("Get metrics only for client1 and client3" )
168- client1_post_inode_metrics = fs_util . get_mds_metrics (
169- client1 , 0 , fuse_mounting_dir_1 , cephfs
275+ client1_post_inode_metrics = get_mds_metrics_from_ranks (
276+ [ 0 , 1 , 2 ], fs_util , client1 , fuse_mounting_dir_1 , cephfs
170277 )
171- client3_post_inode_metrics = fs_util . get_mds_metrics (
172- client3 , 0 , fuse_mounting_dir_3 , cephfs
278+ client3_post_inode_metrics = get_mds_metrics_from_ranks (
279+ [ 0 , 1 , 2 ], fs_util , client3 , fuse_mounting_dir_3 , cephfs
173280 )
174281 log .info (f"client1_post_inode_metrics: { client1_post_inode_metrics } " )
175282 log .info (f"client3_post_inode_metrics: { client3_post_inode_metrics } " )
@@ -197,11 +304,11 @@ def run(ceph_cluster, **kw):
197304 )
198305 file_paths_client2 = []
199306 file_paths_client4 = []
200- pre_opened_files_client2 = fs_util . get_mds_metrics (
201- client2 , 0 , fuse_mounting_dir_2 , cephfs
307+ pre_opened_files_client2 = get_mds_metrics_from_ranks (
308+ [ 0 , 1 , 2 ], fs_util , client2 , fuse_mounting_dir_2 , cephfs
202309 )["counters" ]["opened_files" ]
203- pre_opened_files_client4 = fs_util . get_mds_metrics (
204- client4 , 0 , fuse_mounting_dir_4 , cephfs
310+ pre_opened_files_client4 = get_mds_metrics_from_ranks (
311+ [ 0 , 1 , 2 ], fs_util , client4 , fuse_mounting_dir_4 , cephfs
205312 )["counters" ]["opened_files" ]
206313 log .info (f"pre_opened_files_client2: { pre_opened_files_client2 } " )
207314 log .info (f"pre_opened_files_client4: { pre_opened_files_client4 } " )
@@ -240,11 +347,11 @@ def run(ceph_cluster, **kw):
240347 log .info (f"Number of PID4s from opening files: { pids4 } " )
241348 time .sleep (10 )
242349 log .info ("Get final MDS metrics after opening files" )
243- client2_post_opened_files = fs_util . get_mds_metrics (
244- client2 , 0 , fuse_mounting_dir_2 , cephfs
350+ client2_post_opened_files = get_mds_metrics_from_ranks (
351+ [ 0 , 1 , 2 ], fs_util , client2 , fuse_mounting_dir_2 , cephfs
245352 )["counters" ]["opened_files" ]
246- client4_post_opened_files = fs_util . get_mds_metrics (
247- client4 , 0 , fuse_mounting_dir_4 , cephfs
353+ client4_post_opened_files = get_mds_metrics_from_ranks (
354+ [ 0 , 1 , 2 ], fs_util , client4 , fuse_mounting_dir_4 , cephfs
248355 )["counters" ]["opened_files" ]
249356 log .info (f"client2_post_opened_files: { client2_post_opened_files } " )
250357 log .info (f"client4_post_opened_files: { client4_post_opened_files } " )
@@ -268,11 +375,11 @@ def run(ceph_cluster, **kw):
268375 log .error (f"Failed to kill tail processes: { e } " )
269376 time .sleep (5 )
270377 log .info ("Get final MDS metrics after killing the PIDs" )
271- post_opened_files_client2 = fs_util . get_mds_metrics (
272- client2 , 0 , fuse_mounting_dir_2 , cephfs
378+ post_opened_files_client2 = get_mds_metrics_from_ranks (
379+ [ 0 , 1 , 2 ], fs_util , client2 , fuse_mounting_dir_2 , cephfs
273380 )["counters" ]["opened_files" ]
274- post_opened_files_client4 = fs_util . get_mds_metrics (
275- client4 , 0 , fuse_mounting_dir_4 , cephfs
381+ post_opened_files_client4 = get_mds_metrics_from_ranks (
382+ [ 0 , 1 , 2 ], fs_util , client4 , fuse_mounting_dir_4 , cephfs
276383 )["counters" ]["opened_files" ]
277384 log .info (f"post_opened_files_client2: { post_opened_files_client2 } " )
278385 log .info (f"post_opened_files_client4: { post_opened_files_client4 } " )
@@ -288,11 +395,11 @@ def run(ceph_cluster, **kw):
288395 "Failed to verify opened_files for client4"
289396 )
290397 log .info ("Verify if other clients opened_files metrics remain same" )
291- post_opened_files_client1 = fs_util . get_mds_metrics (
292- client1 , 0 , fuse_mounting_dir_1 , cephfs
398+ post_opened_files_client1 = get_mds_metrics_from_ranks (
399+ [ 0 , 1 , 2 ], fs_util , client1 , fuse_mounting_dir_1 , cephfs
293400 )["counters" ]["opened_files" ]
294- post_opened_files_client3 = fs_util . get_mds_metrics (
295- client3 , 0 , fuse_mounting_dir_3 , cephfs
401+ post_opened_files_client3 = get_mds_metrics_from_ranks (
402+ [ 0 , 1 , 2 ], fs_util , client3 , fuse_mounting_dir_3 , cephfs
296403 )["counters" ]["opened_files" ]
297404 log .info (f"post_opened_files_client1: { post_opened_files_client1 } " )
298405 log .info (f"post_opened_files_client3: { post_opened_files_client3 } " )
@@ -315,17 +422,17 @@ def run(ceph_cluster, **kw):
315422 log .info (
316423 "Increase only Client2 and Client4 dentry metrics and other clients should remain same"
317424 )
318- pre_dentry_client1 = fs_util . get_mds_metrics (
319- client1 , 0 , fuse_mounting_dir_1 , cephfs
425+ pre_dentry_client1 = get_mds_metrics_from_ranks (
426+ [ 0 , 1 , 2 ], fs_util , client1 , fuse_mounting_dir_1 , cephfs
320427 )["counters" ]
321- pre_dentry_client2 = fs_util . get_mds_metrics (
322- client2 , 0 , fuse_mounting_dir_2 , cephfs
428+ pre_dentry_client2 = get_mds_metrics_from_ranks (
429+ [ 0 , 1 , 2 ], fs_util , client2 , fuse_mounting_dir_2 , cephfs
323430 )["counters" ]
324- pre_dentry_client3 = fs_util . get_mds_metrics (
325- client3 , 0 , fuse_mounting_dir_3 , cephfs
431+ pre_dentry_client3 = get_mds_metrics_from_ranks (
432+ [ 0 , 1 , 2 ], fs_util , client3 , fuse_mounting_dir_3 , cephfs
326433 )["counters" ]
327- pre_dentry_client4 = fs_util . get_mds_metrics (
328- client4 , 0 , fuse_mounting_dir_4 , cephfs
434+ pre_dentry_client4 = get_mds_metrics_from_ranks (
435+ [ 0 , 1 , 2 ], fs_util , client4 , fuse_mounting_dir_4 , cephfs
329436 )["counters" ]
330437 log .info (f"pre_dentry_client2: { pre_dentry_client2 } " )
331438 log .info (f"pre_dentry_client4: { pre_dentry_client4 } " )
@@ -368,11 +475,11 @@ def run(ceph_cluster, **kw):
368475 client4 .exec_command (sudo = True , cmd = f"ls { fuse_mounting_dir_4 } /{ dir } /" )
369476 time .sleep (5 )
370477 log .info ("Get final MDS metrics after creating directories and files" )
371- post_dentry_client2 = fs_util . get_mds_metrics (
372- client2 , 0 , fuse_mounting_dir_2 , cephfs
478+ post_dentry_client2 = get_mds_metrics_from_ranks (
479+ [ 0 , 1 , 2 ], fs_util , client2 , fuse_mounting_dir_2 , cephfs
373480 )["counters" ]
374- post_dentry_client4 = fs_util . get_mds_metrics (
375- client4 , 0 , fuse_mounting_dir_4 , cephfs
481+ post_dentry_client4 = get_mds_metrics_from_ranks (
482+ [ 0 , 1 , 2 ], fs_util , client4 , fuse_mounting_dir_4 , cephfs
376483 )["counters" ]
377484 log .info (f"post_dentry_client2: { post_dentry_client2 } " )
378485 log .info (f"post_dentry_client4: { post_dentry_client4 } " )
@@ -388,11 +495,11 @@ def run(ceph_cluster, **kw):
388495 f"Failed to verify { dentry } for client4"
389496 )
390497 log .info ("Verify if other clients dentry metrics remain same" )
391- post_dentry_client1 = fs_util . get_mds_metrics (
392- client1 , 0 , fuse_mounting_dir_1 , cephfs
498+ post_dentry_client1 = get_mds_metrics_from_ranks (
499+ [ 0 , 1 , 2 ], fs_util , client1 , fuse_mounting_dir_1 , cephfs
393500 )["counters" ]
394- post_dentry_client3 = fs_util . get_mds_metrics (
395- client3 , 0 , fuse_mounting_dir_3 , cephfs
501+ post_dentry_client3 = get_mds_metrics_from_ranks (
502+ [ 0 , 1 , 2 ], fs_util , client3 , fuse_mounting_dir_3 , cephfs
396503 )["counters" ]
397504 log .info (f"post_dentry_client1: { post_dentry_client1 } " )
398505 log .info (f"post_dentry_client3: { post_dentry_client3 } " )
@@ -415,11 +522,11 @@ def run(ceph_cluster, **kw):
415522 )
416523 # Using scp from client1 to client2, it will increase total_read_ops and total_read_size in client1
417524 # In client2, it will increase total_write_ops and total_write_size
418- pre_read_ops_client1 = fs_util . get_mds_metrics (
419- client1 , 0 , fuse_mounting_dir_1 , cephfs
525+ pre_read_ops_client1 = get_mds_metrics_from_ranks (
526+ [ 0 , 1 , 2 ], fs_util , client1 , fuse_mounting_dir_1 , cephfs
420527 )["counters" ]
421- pre_write_ops_client2 = fs_util . get_mds_metrics (
422- client2 , 0 , fuse_mounting_dir_2 , cephfs
528+ pre_write_ops_client2 = get_mds_metrics_from_ranks (
529+ [ 0 , 1 , 2 ], fs_util , client2 , fuse_mounting_dir_2 , cephfs
423530 )["counters" ]
424531 log .info (f"pre_read_ops_client1: { pre_read_ops_client1 } " )
425532 log .info (f"pre_write_ops_client2: { pre_write_ops_client2 } " )
@@ -456,11 +563,11 @@ def run(ceph_cluster, **kw):
456563 "total_write_size" ,
457564 ]
458565 log .info ("Get final MDS metrics after copying a file" )
459- post_read_ops_client1 = fs_util . get_mds_metrics (
460- client1 , 0 , fuse_mounting_dir_1 , cephfs
566+ post_read_ops_client1 = get_mds_metrics_from_ranks (
567+ [ 0 , 1 , 2 ], fs_util , client1 , fuse_mounting_dir_1 , cephfs
461568 )["counters" ]
462- post_write_ops_client2 = fs_util . get_mds_metrics (
463- client2 , 0 , fuse_mounting_dir_2 , cephfs
569+ post_write_ops_client2 = get_mds_metrics_from_ranks (
570+ [ 0 , 1 , 2 ], fs_util , client2 , fuse_mounting_dir_2 , cephfs
464571 )["counters" ]
465572 log .info (f"post_read_ops_client1: { post_read_ops_client1 } " )
466573 log .info (f"post_write_ops_client2: { post_write_ops_client2 } " )
0 commit comments