Skip to content

Commit 96db7e2

Browse files
authored
Merge pull request open-mpi#13104 from lrbison/tuned_json
coll/tuned dynamic file in json format
2 parents 120b71b + 1217a95 commit 96db7e2

29 files changed

+1376
-292
lines changed
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright (c) 2024-2025 Amazon.com, Inc. or its affiliates.
4+
# All Rights reserved.
5+
# $COPYRIGHT$
6+
#
7+
# Additional copyrights may follow
8+
9+
import re
10+
import json
11+
from collections import OrderedDict
12+
13+
coll_dict = {
14+
'allgather' : 0,
15+
'allgatherv' : 1,
16+
'allreduce' : 2,
17+
'alltoall' : 3,
18+
'alltoallv' : 4,
19+
'alltoallw' : 5,
20+
'barrier' : 6,
21+
'bcast' : 7,
22+
'exscan' : 8,
23+
'gather' : 9,
24+
'gatherv' : 10,
25+
'reduce' : 11,
26+
'reducescatter' : 12,
27+
'reducescatterblock' : 13,
28+
'scan' : 14,
29+
'scatter' : 15,
30+
'scatterv' : 16,
31+
'neighbor_allgather' : 17,
32+
'neighbor_allgatherv' : 18,
33+
'neighbor_alltoall' : 19,
34+
'neighbor_alltoallv' : 20,
35+
'neighbor_alltoallw' : 21 }
36+
coll_dict_rev = { v:k for k,v in coll_dict.items() }
37+
38+
han_component_dict = {
39+
"self" : 0,
40+
"basic" : 1,
41+
"libnbc" : 2,
42+
"tuned" : 3,
43+
"sm" : 4,
44+
"adapt" : 5,
45+
"han" : 6,
46+
}
47+
48+
han_topo_level_dict = {
49+
'intra_node' : 0,
50+
'inter_node' : 1,
51+
'global_communicator' : 2,
52+
}
53+
54+
55+
def strip_comments(line):
56+
return re.sub(r"#.*","",line).strip()
57+
58+
class GenericOpenMPIRuleReader():
59+
def __init__(self, fp, fname_for_prints=""):
60+
self.fp = fp
61+
# The 1-indexed line number which corresponds to the next byte of fp read.
62+
self.jline = 1
63+
self.line_start = 0
64+
def get_next_line(self):
65+
while True:
66+
self.line_start = self.fp.tell()
67+
line = self.fp.readline()
68+
if not line: return None
69+
self.jline += 1
70+
if strip_comments(line):
71+
return line
72+
73+
def isnext_digit(self):
74+
# ompi_coll_base_file_peek_next_char_isdigit
75+
tell = self.fp.tell()
76+
while True:
77+
next = self.fp.read(1)
78+
if next in ' \t':
79+
tell += 1
80+
continue
81+
self.fp.seek(tell)
82+
return next in '0123456789'
83+
84+
def get_next(self):
85+
# (ompi_coll_base_file_getnext_long)
86+
while True:
87+
line = self.get_next_line()
88+
if not line: return None
89+
UNK = -1
90+
jnum_start = UNK
91+
jnum_end = UNK
92+
for jc in range(len(line)):
93+
if line[jc] in "#":
94+
break
95+
if line[jc] in '0123456789':
96+
if jnum_start == UNK:
97+
jnum_start = jc
98+
jnum_end = jc
99+
else:
100+
if jnum_end != UNK:
101+
break
102+
if jnum_end != UNK:
103+
self.fp.seek(self.line_start+jnum_end+1)
104+
# decrement the line number, the next read will continue on this line.
105+
self.jline -= 1
106+
return int(line[jnum_start:jnum_end+1])
107+
108+
def read_header(self):
109+
line = self.get_next_line()
110+
match = re.match("rule-file-version-([0-9])", line)
111+
if match:
112+
return int(match.group(1))
113+
else:
114+
self.jline -= 1
115+
self.fp.seek(self.line_start)
116+
return 1
117+
118+
class TunedRuleReader(GenericOpenMPIRuleReader):
119+
def load_rulefile(self):
120+
json_root = OrderedDict()
121+
file_ver = self.read_header()
122+
json_root['rule_file_version'] = 3
123+
json_root['module'] = 'tuned'
124+
json_root['collectives'] = OrderedDict()
125+
126+
ncollectives = self.get_next()
127+
for jcol in range(ncollectives):
128+
coll_id = self.get_next()
129+
coll_name = coll_dict_rev[coll_id]
130+
comm_rules = []
131+
ncomm_sizes = self.get_next()
132+
for jcomm_size in range(ncomm_sizes):
133+
comm_size = self.get_next()
134+
nmsg_sizes = self.get_next()
135+
comm_rule = OrderedDict()
136+
comm_rule['comm_size_min'] = 0
137+
if jcomm_size+1 < ncomm_sizes:
138+
comm_rule['comm_size_max'] = max(comm_size-1, 0)
139+
if jcomm_size > 0:
140+
comm_rule['comm_size_min'] = comm_rules[jcomm_size-1]['comm_size_max'] + 1
141+
msg_rules = []
142+
for jmsg in range(nmsg_sizes):
143+
msg_size = self.get_next()
144+
result_alg = self.get_next()
145+
result_topo_faninout = self.get_next()
146+
result_segsize = self.get_next()
147+
rule = OrderedDict()
148+
rule['msg_size_min'] = msg_size
149+
if jmsg < nmsg_sizes - 1:
150+
rule['msg_size_max'] = 'Inf'
151+
if jmsg > 0:
152+
msg_rules[jmsg-1]['msg_size_max'] = msg_size - 1
153+
rule['alg'] = result_alg
154+
if result_topo_faninout != 0:
155+
rule['faninout'] = result_topo_faninout
156+
if result_segsize != 0:
157+
rule['segsize'] = result_segsize
158+
result_maxreq = 0
159+
if file_ver > 1 and self.isnext_digit():
160+
result_maxreq = self.get_next()
161+
if result_maxreq != 0:
162+
rule['reqs'] = result_maxreq
163+
msg_rules.append(rule)
164+
comm_rule['rules'] = msg_rules
165+
comm_rules.append(comm_rule)
166+
json_root['collectives'][coll_name] = comm_rules
167+
return json_root
168+
169+
class TunedRuleWriter():
170+
def __init__(self):
171+
pass
172+
def to_file(json_rules):
173+
for coll in coll_dict.keys():
174+
if coll in json_rules['collectives']:
175+
pass
176+
177+
if __name__ == '__main__':
178+
import argparse
179+
parser = argparse.ArgumentParser()
180+
parser.add_argument("--input","-i", type=argparse.FileType('r'), required=True)
181+
# parser.add_argument("--output","-o",type=argparse.FileType('w'), required=True)
182+
183+
args = parser.parse_args()
184+
reader = TunedRuleReader(args.input)
185+
print(json.dumps(reader.load_rulefile(), indent=4))

docs/tuning-apps/coll-tuned.rst

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,77 @@ after.
9292
.. code-block:: sh
9393
9494
shell$ mpirun ... --mca coll_tuned_use_dynamic_rules 1 \
95-
--mca coll_tuned_dynamic_rules_filename /path/to/my_rules.conf ...
95+
--mca coll_tuned_dynamic_rules_filename /path/to/my_rules.json ...
9696
9797
The loaded set of rules then are used to select the algorithm
9898
to use based on the collective, the communicator size, and the message size.
9999
Collectives for which rules have not be specified in the file will make use of
100100
the *fixed decision* rules as usual.
101101

102-
Dynamic tuning files are organized in this format:
102+
Starting with Open MPI 6.0, dynamic tuning files can be specified in JSON
103+
format, although the classic format will still be accepted. A converter script
104+
is also available to transfer classic format files into JSON.
105+
106+
The JSON format can be checked using the schema in
107+
`docs/tuning-apps/tuned_dynamic_file_schema.json`. If your editor supports it,
108+
this schema may provide validation of your file along with helpful tooltips for
109+
each variable.
110+
111+
An example file is shown here:
112+
113+
.. code-block:: json
114+
115+
{
116+
"$schema": "tuned_schema.json",
117+
"rule_file_version" : 3,
118+
"module" : "tuned",
119+
"collectives" : {
120+
"allreduce" :
121+
[
122+
{
123+
"comm_size_min" : 64,
124+
"comm_size_max" : 128,
125+
"rules" : [
126+
{
127+
"msg_size_min" : 512,
128+
"msg_size_max" : 511999,
129+
"alg" : 2,
130+
},
131+
{
132+
"msg_size_min" : 512000,
133+
"msg_size_max" : "inf",
134+
"alg" : "recursive_doubling",
135+
"reqs" : 8
136+
}
137+
]
138+
}
139+
]
140+
}
141+
}
142+
143+
In this toy example the MPI_Allreduce collective (indicated by the `allreduce`
144+
field) has two algorithms that will only be used on communicators with between
145+
64 and 128 ranks. Additionally, those rules only apply to certain message
146+
sizes. All others communicator sizes or message sizes fall back to the default
147+
set of rules, and collectives other than MPI_Allreduce are not affected.
148+
149+
Unlike in the classic file format, there is no need to specify a default rule or
150+
specify rules in increasing order. Overlapping message sizes or communicator
151+
sizes are allowed, and won't emit warnings.
152+
153+
The process for selecting the matching rule is a simple first-match principle.
154+
During communicator creation, the first set of communicator-rules which
155+
satisfies the requirements (`comm_size_min`/`comm_size_max`) is selected. Then,
156+
during each collective call, the message size is used to find the first matching
157+
entry in the "rules" list.
158+
159+
The algorithm selected is indicated by the `alg` field. It may be either an
160+
integer mapping to the classic file format, or a string. In both cases, the
161+
value is checked against the appropriate coll_tuned_<collectived>_algorithm MCA
162+
parameter, and un-recognized values will cause the rule to be ignored.
163+
164+
165+
Classic file format:
103166

104167
.. code-block:: sh
105168
:linenos:
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
{
2+
"$schema": "https://json-schema.org/draft/2019-09/schema#",
3+
"title": "OpenMPITunedRules",
4+
"description": "Defines configuration for the Open MPI Tuned module to select which collective algorithms will be used depending on comm size, message size, etc.",
5+
"type": "object",
6+
"required": ["rule_file_version","module","collectives"],
7+
"additionalProperties" : false,
8+
"properties": {
9+
"rule_file_version": {
10+
"description": "The version of this configuration file",
11+
"type": "number"
12+
},
13+
"module": {
14+
"description": "The collective module intended to use these rules (tuned)",
15+
"type": "string"
16+
},
17+
"$schema": {
18+
"description": "The schema used for validation",
19+
"type": "string"
20+
},
21+
"collectives" : {
22+
"description": "The collectives, each with their own rules. Each collective is indicated by a lowercase property such as \"allgather\"",
23+
"type": "object",
24+
"additionalProperties" : false,
25+
"patternProperties": {
26+
"^(allgather|allreduce|alltoall|alltoallv|alltoallw|barrier)$": {
27+
"type" : "array",
28+
"items": { "$ref" : "#/$defs/comm_size_rule" }
29+
},
30+
"^(bcast|exscan|gather|gatherv|reduce|reducescatter|reducescatterblock)$": {
31+
"type" : "array",
32+
"items": { "$ref" : "#/$defs/comm_size_rule" }
33+
},
34+
"^(scan|scatter|scatterv|neighbor_allgather|neighbor_allgatherv)$": {
35+
"type" : "array",
36+
"items": { "$ref" : "#/$defs/comm_size_rule" }
37+
},
38+
"^(neighbor_alltoall|neighbor_alltoallv|neighbor_alltoallw)$": {
39+
"type" : "array",
40+
"items": { "$ref" : "#/$defs/comm_size_rule" }
41+
}
42+
}
43+
}
44+
},
45+
46+
"$defs": {
47+
"msg_size_rule": {
48+
"type": "object",
49+
"required": ["alg"],
50+
"additionalProperties" : false,
51+
"properties" : {
52+
"msg_size_min" : {
53+
"description" : "The smallest message size in bytes this rule applies to",
54+
"anyOf" : { "$ref" : "#/$defs/int_or_inf" }
55+
},
56+
"msg_size_max" : {
57+
"description" : "The largest message size (inclusive) in bytes this rule applies to",
58+
"anyOf" : { "$ref" : "#/$defs/int_or_inf" }
59+
},
60+
"alg" : {
61+
"description" : "The algorithm to use for this collective. Integer or name, see coll_tuned_<collective>_algorithm for options.",
62+
"type" : [ "string", "integer"]
63+
},
64+
"reqs" : {
65+
"description" : "Algorithm parameter: Use this many requests. Some algorithms may ignore this option.",
66+
"type" : [ "integer"]
67+
},
68+
"faninout" : {
69+
"description" : "Algorithm parameter: Fan in and/or out by this much. Some algorithms may ignore this option.",
70+
"type" : [ "integer"]
71+
}
72+
}
73+
},
74+
75+
"comm_size_rule": {
76+
"type": "object",
77+
"required": ["rules"],
78+
"additionalProperties" : false,
79+
"properties" : {
80+
"comm_size_min" : {
81+
"description" : "The smallest size communicator these rules apply to",
82+
"anyOf" : { "$ref" : "#/$defs/int_or_inf" }
83+
},
84+
"comm_size_max" : {
85+
"description" : "The largest (inclusive) size communicator these rules apply to",
86+
"anyOf" : { "$ref" : "#/$defs/int_or_inf" }
87+
},
88+
"comm_rank_distribution" : {
89+
"description" : "A description of how the ranks are distributed within the communicator",
90+
"enum" : ["any", "one-per-node", "single-node"]
91+
},
92+
93+
"rules" : {
94+
"description" : "A list of rules. The first matching rule is selected. If no match is found, defaults are used.",
95+
"type" : "array",
96+
"items": { "$ref" : "#/$defs/msg_size_rule" }
97+
}
98+
}
99+
},
100+
"collective_identifier": {
101+
"enum" : [
102+
"allgather",
103+
"allreduce",
104+
"alltoall",
105+
"alltoallv",
106+
"alltoallw",
107+
"barrier",
108+
"bcast",
109+
"exscan",
110+
"gather",
111+
"gatherv",
112+
"reduce",
113+
"reducescatter",
114+
"reducescatterblock",
115+
"scan",
116+
"scatter",
117+
"scatterv",
118+
"neighbor_allgather",
119+
"neighbor_allgatherv",
120+
"neighbor_alltoall",
121+
"neighbor_alltoallv",
122+
"neighbor_alltoallw"
123+
]
124+
},
125+
"int_or_inf": [
126+
{ "type" : "integer" },
127+
{ "enum": ["inf","INF","Inf"] }
128+
]
129+
}
130+
}

0 commit comments

Comments
 (0)