Skip to content

Commit 15b7c36

Browse files
authored
Add SLURM 'comment' field to RSC job metrics scuba table
Differential Revision: D103691035 Pull Request resolved: #147
1 parent dbd8a9c commit 15b7c36

11 files changed

Lines changed: 66 additions & 12 deletions

shelper/local.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ type SlurmMetadata struct {
2020
NumNodes string `json:"NumNodes"`
2121
ArrayJobID string `json:"ArrayJobID"`
2222
ArrayTaskID string `json:"ArrayTaskID"`
23+
Comment string `json:"Comment"`
2324
}
2425

2526
// SlurmMetadataList is a struct that contains metadata about a slurm job
@@ -33,6 +34,7 @@ type SlurmMetadataList struct {
3334
NumNodes []string
3435
ArrayJobID []string
3536
ArrayTaskID []string
37+
Comment []string
3638
}
3739

3840
func parseNewLineToList(input string) []string {

shelper/slurm_helpers.go

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ func parseGRES(gresOut string) []string {
2222
}
2323

2424
indexString := strings.SplitN(indicesKey[1], ":", 2)[1]
25-
indices := strings.Split(indexString, ",")
25+
indices := strings.SplitSeq(indexString, ",")
2626

27-
for _, index := range indices {
27+
for index := range indices {
2828
if strings.Contains(index, "-") {
2929
indexRange := strings.Split(index, "-")
3030
st, err1 := strconv.Atoi(indexRange[0])
@@ -74,6 +74,7 @@ func GetGPUData(GPUToSlurm map[string]SlurmMetadata) SlurmMetadataList {
7474
allPartition := make(map[string]bool)
7575
allAccount := make(map[string]bool)
7676
allNumNodes := make(map[string]bool)
77+
allComment := make(map[string]bool)
7778
for _, value := range GPUToSlurm {
7879
allJobID[value.JobID] = true
7980
allJobName[value.JobName] = true
@@ -84,6 +85,7 @@ func GetGPUData(GPUToSlurm map[string]SlurmMetadata) SlurmMetadataList {
8485
allPartition[value.Partition] = true
8586
allAccount[value.Account] = true
8687
allNumNodes[value.NumNodes] = true
88+
allComment[value.Comment] = true
8789
}
8890
return SlurmMetadataList{
8991
JobID: setToSlice(allJobID),
@@ -95,6 +97,7 @@ func GetGPUData(GPUToSlurm map[string]SlurmMetadata) SlurmMetadataList {
9597
Partition: setToSlice(allPartition),
9698
Account: setToSlice(allAccount),
9799
NumNodes: setToSlice(allNumNodes),
100+
Comment: setToSlice(allComment),
98101
}
99102
}
100103

@@ -118,12 +121,13 @@ func AttributeGPU2SlurmMetadata(jobMetadata []string, hostname string, GPU2Slurm
118121
allAccount := make(map[string]bool)
119122
allPartition := make(map[string]bool)
120123
allNumNodes := make(map[string]bool)
124+
allComment := make(map[string]bool)
121125

122-
lines := strings.Split(jm, "\n")
123-
for _, line := range lines {
124-
field := strings.Fields(line)
126+
lines := strings.SplitSeq(jm, "\n")
127+
for line := range lines {
128+
field := strings.FieldsSeq(line)
125129

126-
for _, data := range field {
130+
for data := range field {
127131
parts := strings.SplitN(data, "=", 2)
128132
if parts[0] == "UserId" {
129133
end := strings.Index(parts[1], "(")
@@ -161,6 +165,9 @@ func AttributeGPU2SlurmMetadata(jobMetadata []string, hostname string, GPU2Slurm
161165
if parts[0] == "NumNodes" {
162166
allNumNodes[parts[1]] = true
163167
}
168+
if parts[0] == "Comment" {
169+
allComment[parts[1]] = true
170+
}
164171
if parts[0] == "GRES" {
165172
gresIndex = append(gresIndex, parseGRES(parts[1])...)
166173
}
@@ -177,6 +184,7 @@ func AttributeGPU2SlurmMetadata(jobMetadata []string, hostname string, GPU2Slurm
177184
Account: stringifySet(allAccount),
178185
Partition: stringifySet(allPartition),
179186
NumNodes: stringifySet(allNumNodes),
187+
Comment: stringifySet(allComment),
180188
}
181189

182190
for _, gpu := range gresIndex {
@@ -187,11 +195,11 @@ func AttributeGPU2SlurmMetadata(jobMetadata []string, hostname string, GPU2Slurm
187195

188196
// GetHostList takes a slurm job metadata string and returns the hostlist
189197
func GetHostList(jobMetadata string) string {
190-
lines := strings.Split(jobMetadata, "\n")
198+
lines := strings.SplitSeq(jobMetadata, "\n")
191199

192-
for _, line := range lines {
193-
field := strings.Fields(line)
194-
for _, data := range field {
200+
for line := range lines {
201+
field := strings.FieldsSeq(line)
202+
for data := range field {
195203
parts := strings.SplitN(data, "=", 2)
196204
if parts[0] == "NodeList" {
197205
return parts[1]
@@ -259,8 +267,8 @@ func hostnameMatchesGroup(hostname string, group string) bool {
259267
return false
260268
}
261269

262-
ranges := strings.Split(rangesStr, ",")
263-
for _, r := range ranges {
270+
ranges := strings.SplitSeq(rangesStr, ",")
271+
for r := range ranges {
264272
if strings.Contains(r, "-") {
265273
bounds := strings.Split(r, "-")
266274
start, err := strconv.Atoi(bounds[0])

shelper/slurm_helpers_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ func TestGetSlurmDataFromSlurmLineAllGpus(t *testing.T) {
2929
Account: "test_account",
3030
Partition: "learn",
3131
NumNodes: "1",
32+
Comment: "test_comment",
3233
},
3334
"1": {
3435
User: "test_username",
@@ -40,6 +41,7 @@ func TestGetSlurmDataFromSlurmLineAllGpus(t *testing.T) {
4041
Account: "test_account",
4142
Partition: "learn",
4243
NumNodes: "1",
44+
Comment: "test_comment",
4345
},
4446
"2": {
4547
User: "test_username",
@@ -51,6 +53,7 @@ func TestGetSlurmDataFromSlurmLineAllGpus(t *testing.T) {
5153
Account: "test_account",
5254
Partition: "learn",
5355
NumNodes: "1",
56+
Comment: "test_comment",
5457
},
5558
"3": {
5659
User: "test_username",
@@ -62,6 +65,7 @@ func TestGetSlurmDataFromSlurmLineAllGpus(t *testing.T) {
6265
Account: "test_account",
6366
Partition: "learn",
6467
NumNodes: "1",
68+
Comment: "test_comment",
6569
},
6670
"4": {
6771
User: "test_username",
@@ -73,6 +77,7 @@ func TestGetSlurmDataFromSlurmLineAllGpus(t *testing.T) {
7377
Account: "test_account",
7478
Partition: "learn",
7579
NumNodes: "1",
80+
Comment: "test_comment",
7681
},
7782
"5": {
7883
User: "test_username",
@@ -84,6 +89,7 @@ func TestGetSlurmDataFromSlurmLineAllGpus(t *testing.T) {
8489
Account: "test_account",
8590
Partition: "learn",
8691
NumNodes: "1",
92+
Comment: "test_comment",
8793
},
8894
"6": {
8995
User: "test_username",
@@ -95,6 +101,7 @@ func TestGetSlurmDataFromSlurmLineAllGpus(t *testing.T) {
95101
Account: "test_account",
96102
Partition: "learn",
97103
NumNodes: "1",
104+
Comment: "test_comment",
98105
},
99106
"7": {
100107
User: "test_username",
@@ -106,6 +113,7 @@ func TestGetSlurmDataFromSlurmLineAllGpus(t *testing.T) {
106113
Account: "test_account",
107114
Partition: "learn",
108115
NumNodes: "1",
116+
Comment: "test_comment",
109117
},
110118
}
111119

@@ -133,6 +141,7 @@ func TestGetSlurmDataFromSlurmLineSomeGpus(t *testing.T) {
133141
Account: "test_account",
134142
Partition: "learn",
135143
NumNodes: "1",
144+
Comment: "test_comment",
136145
},
137146
"1": {
138147
User: "test_username",
@@ -144,6 +153,7 @@ func TestGetSlurmDataFromSlurmLineSomeGpus(t *testing.T) {
144153
Account: "test_account",
145154
Partition: "learn",
146155
NumNodes: "1",
156+
Comment: "test_comment",
147157
},
148158
"2": {
149159
User: "test_username",
@@ -155,6 +165,7 @@ func TestGetSlurmDataFromSlurmLineSomeGpus(t *testing.T) {
155165
Account: "test_account",
156166
Partition: "learn",
157167
NumNodes: "1",
168+
Comment: "test_comment",
158169
},
159170
"3": {
160171
User: "test_username",
@@ -166,6 +177,7 @@ func TestGetSlurmDataFromSlurmLineSomeGpus(t *testing.T) {
166177
Account: "test_account",
167178
Partition: "learn",
168179
NumNodes: "1",
180+
Comment: "test_comment",
169181
},
170182
}
171183

@@ -208,6 +220,7 @@ func TestGetSlurmDataFromSlurmLineUniqueEntries(t *testing.T) {
208220
Account: "test2_account",
209221
Partition: "learn",
210222
NumNodes: "1",
223+
Comment: "test_comment",
211224
},
212225
"1": {
213226
User: "test_username",
@@ -219,6 +232,7 @@ func TestGetSlurmDataFromSlurmLineUniqueEntries(t *testing.T) {
219232
Account: "test2_account",
220233
Partition: "learn",
221234
NumNodes: "1",
235+
Comment: "test_comment",
222236
},
223237
"2": {
224238
User: "test_username",
@@ -230,6 +244,7 @@ func TestGetSlurmDataFromSlurmLineUniqueEntries(t *testing.T) {
230244
Account: "test2_account",
231245
Partition: "learn",
232246
NumNodes: "1",
247+
Comment: "test_comment",
233248
},
234249
"3": {
235250
User: "test_username",
@@ -241,6 +256,7 @@ func TestGetSlurmDataFromSlurmLineUniqueEntries(t *testing.T) {
241256
Account: "test2_account",
242257
Partition: "learn",
243258
NumNodes: "1",
259+
Comment: "test_comment",
244260
},
245261
"5": {
246262
User: "test_username_2",
@@ -252,6 +268,7 @@ func TestGetSlurmDataFromSlurmLineUniqueEntries(t *testing.T) {
252268
Account: "test_account",
253269
Partition: "test",
254270
NumNodes: "3",
271+
Comment: "test_comment_2",
255272
},
256273
"6": {
257274
User: "test_username_2",
@@ -263,6 +280,7 @@ func TestGetSlurmDataFromSlurmLineUniqueEntries(t *testing.T) {
263280
Account: "test_account",
264281
Partition: "test",
265282
NumNodes: "3",
283+
Comment: "test_comment_2",
266284
},
267285
"7": {
268286
User: "test_username_2",
@@ -274,6 +292,7 @@ func TestGetSlurmDataFromSlurmLineUniqueEntries(t *testing.T) {
274292
Account: "test_account",
275293
Partition: "test",
276294
NumNodes: "3",
295+
Comment: "test_comment_2",
277296
},
278297
}
279298
AttributeGPU2SlurmMetadata(blocks, "node1751", GPU2Slurm)
@@ -300,6 +319,7 @@ func TestGetSlurmDataFromSlurmLineMainArrayJob(t *testing.T) {
300319
Account: "test2_account",
301320
Partition: "learn",
302321
NumNodes: "1",
322+
Comment: "test_comment",
303323
},
304324
"1": {
305325
User: "test_username",
@@ -311,6 +331,7 @@ func TestGetSlurmDataFromSlurmLineMainArrayJob(t *testing.T) {
311331
Account: "test2_account",
312332
Partition: "learn",
313333
NumNodes: "1",
334+
Comment: "test_comment",
314335
},
315336
"2": {
316337
User: "test_username",
@@ -322,6 +343,7 @@ func TestGetSlurmDataFromSlurmLineMainArrayJob(t *testing.T) {
322343
Account: "test2_account",
323344
Partition: "learn",
324345
NumNodes: "1",
346+
Comment: "test_comment",
325347
},
326348
"3": {
327349
User: "test_username",
@@ -333,6 +355,7 @@ func TestGetSlurmDataFromSlurmLineMainArrayJob(t *testing.T) {
333355
Account: "test2_account",
334356
Partition: "learn",
335357
NumNodes: "1",
358+
Comment: "test_comment",
336359
},
337360
"5": {
338361
User: "test_username_2",
@@ -344,6 +367,7 @@ func TestGetSlurmDataFromSlurmLineMainArrayJob(t *testing.T) {
344367
Account: "test_account",
345368
Partition: "test",
346369
NumNodes: "3",
370+
Comment: "test_comment_2",
347371
},
348372
"6": {
349373
User: "test_username_2",
@@ -355,6 +379,7 @@ func TestGetSlurmDataFromSlurmLineMainArrayJob(t *testing.T) {
355379
Account: "test_account",
356380
Partition: "test",
357381
NumNodes: "3",
382+
Comment: "test_comment_2",
358383
},
359384
"7": {
360385
User: "test_username_2",
@@ -366,6 +391,7 @@ func TestGetSlurmDataFromSlurmLineMainArrayJob(t *testing.T) {
366391
Account: "test_account",
367392
Partition: "test",
368393
NumNodes: "3",
394+
Comment: "test_comment_2",
369395
},
370396
}
371397
AttributeGPU2SlurmMetadata(blocks, "node1751", GPU2Slurm)
@@ -555,6 +581,7 @@ func TestGetGPUData(t *testing.T) {
555581
NumNodes: "1",
556582
ArrayJobID: "0",
557583
ArrayTaskID: "0",
584+
Comment: "test_comment",
558585
},
559586
"1": {
560587
JobID: "1234",
@@ -566,6 +593,7 @@ func TestGetGPUData(t *testing.T) {
566593
NumNodes: "1",
567594
ArrayJobID: "10",
568595
ArrayTaskID: "10",
596+
Comment: "test_comment",
569597
},
570598
}
571599
expectedMetadata := SlurmMetadataList{
@@ -578,6 +606,7 @@ func TestGetGPUData(t *testing.T) {
578606
NumNodes: []string{"1"},
579607
ArrayJobID: []string{"0", "10"},
580608
ArrayTaskID: []string{"0", "10"},
609+
Comment: []string{"test_comment"},
581610
}
582611

583612
metadata := GetGPUData(GPUToSlurm)

shelper/testdata/scontrol_out_all_gpus.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
JobId=30214 ArrayJobId=30185 ArrayTaskId=28 ArrayTaskThrottle=100 JobName=demo_ods
22
UserId=test_username(1472000020) GroupId=test_username(1472000020) MCS_label=N/A
33
Priority=1000000 Nice=0 Account=test_account QOS=normal
4+
Comment=test_comment
45
JobState=RUNNING Reason=None Dependency=(null)
56
Requeue=1 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0
67
DerivedExitCode=0:0

shelper/testdata/scontrol_out_main_array_job.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
JobId=30214 ArrayJobId=30185 ArrayTaskId=28-59%100 ArrayTaskThrottle=100 JobName=demo_ods
22
UserId=test_username(1472000020) GroupId=test_username(1472000020) MCS_label=N/A
33
Priority=1000000 Nice=0 Account=test2_account QOS=normal
4+
Comment=test_comment
45
JobState=RUNNING Reason=None Dependency=(null)
56
Requeue=1 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0
67
DerivedExitCode=0:0
@@ -34,6 +35,7 @@ JobId=30214 ArrayJobId=30185 ArrayTaskId=28-59%100 ArrayTaskThrottle=100 JobName
3435
JobId=31214 ArrayJobId=31185 ArrayTaskId=128 ArrayTaskThrottle=100 JobName=demo_ods2
3536
UserId=test_username_2(1472000020) GroupId=test_username_2(1472000020) MCS_label=N/A
3637
Priority=1000000 Nice=0 Account=test_account QOS=dev
38+
Comment=test_comment_2
3739
JobState=RUNNING Reason=None Dependency=(null)
3840
Requeue=1 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0
3941
DerivedExitCode=0:0

shelper/testdata/scontrol_out_multi_node.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
JobId=8401413 JobName=slurm_job_name
22
UserId=test_username(1472000020) GroupId=test_username(1472000020) MCS_label=N/A
33
Priority=1002612 Nice=0 Account=test_account QOS=test_qos
4+
Comment=test_comment
45
JobState=RUNNING Reason=None Dependency=(null)
56
Requeue=1 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0
67
DerivedExitCode=0:0

shelper/testdata/scontrol_out_no_gpus.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
JobId=30214 ArrayJobId=30185 ArrayTaskId=28 ArrayTaskThrottle=100 JobName=demo_ods
22
UserId=test_username(1472000020) GroupId=test_username(1472000020) MCS_label=N/A
33
Priority=1000000 Nice=0 Account=test_account QOS=normal
4+
Comment=test_comment
45
JobState=RUNNING Reason=None Dependency=(null)
56
Requeue=1 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0
67
DerivedExitCode=0:0

0 commit comments

Comments
 (0)