Skip to content

Commit 7f49190

Browse files
authored
YARN-7327: Enable asynchronous scheduling by default for capacity scheduler (#7138)
1 parent 5fe4f13 commit 7f49190

File tree

9 files changed

+311
-14
lines changed

9 files changed

+311
-14
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
299299
DEFAULT_SCHEDULE_ASYNCHRONOUSLY_MAXIMUM_PENDING_BACKLOGS = 100;
300300

301301
@Private
302-
public static final boolean DEFAULT_SCHEDULE_ASYNCHRONOUSLY_ENABLE = false;
302+
public static final boolean DEFAULT_SCHEDULE_ASYNCHRONOUSLY_ENABLE = true;
303303

304304
@Private
305305
public static final String QUEUE_MAPPING = PREFIX + "queue-mappings";

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/FSYarnSiteConverter.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ public void convertSiteProperties(Configuration conf,
4949
FairSchedulerConfiguration.CONTINUOUS_SCHEDULING_ENABLED,
5050
FairSchedulerConfiguration.DEFAULT_CONTINUOUS_SCHEDULING_ENABLED)) {
5151
yarnSiteConfig.setBoolean(
52-
CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_ENABLE, true);
52+
CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_ENABLE, enableAsyncScheduler);
5353
int interval = conf.getInt(
5454
FairSchedulerConfiguration.CONTINUOUS_SCHEDULING_SLEEP_MS,
5555
FairSchedulerConfiguration.DEFAULT_CONTINUOUS_SCHEDULING_SLEEP_MS);

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAsyncScheduling.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -929,7 +929,11 @@ public void testReleaseOutdatedReservedContainer() throws Exception {
929929
* First proposal should be accepted, second proposal should be rejected
930930
* because it try to release an outdated reserved container
931931
*/
932-
MockRM rm1 = new MockRM();
932+
// disable async-scheduling for simulating complex scene
933+
Configuration disableAsyncConf = new Configuration(conf);
934+
disableAsyncConf.setBoolean(
935+
CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_ENABLE, false);
936+
MockRM rm1 = new MockRM(disableAsyncConf);
933937
rm1.getRMContext().setNodeLabelManager(mgr);
934938
rm1.start();
935939
MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB);

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/TestFSYarnSiteConverter.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ public void testSiteContinuousSchedulingConversion() {
6060
FairSchedulerConfiguration.CONTINUOUS_SCHEDULING_SLEEP_MS, 666);
6161

6262
converter.convertSiteProperties(yarnConfig, yarnConvertedConfig, false,
63-
false, false, null);
63+
true, false, null);
6464

6565
assertTrue("Cont. scheduling", yarnConvertedConfig.getBoolean(
6666
CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_ENABLE, false));
@@ -224,7 +224,7 @@ public void testAsyncSchedulingDisabledConversion() {
224224

225225
assertFalse("Asynchronous scheduling", yarnConvertedConfig.getBoolean(
226226
CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_ENABLE,
227-
CapacitySchedulerConfiguration.DEFAULT_SCHEDULE_ASYNCHRONOUSLY_ENABLE));
227+
false));
228228
}
229229

230230
@Test

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/weightconversion/TestWeightToPercentageConverter.java

+5-5
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ public void testNoChildQueueConversion() {
6666
FSQueue root = createFSQueues();
6767
converter.convertWeightsForChildQueues(root, csConfig);
6868

69-
assertEquals("Converted items", 19,
69+
assertEquals("Converted items", 20,
7070
csConfig.getPropsWithPrefix(PREFIX).size());
7171
}
7272

@@ -76,7 +76,7 @@ public void testMultiWeightConversion() {
7676

7777
converter.convertWeightsForChildQueues(root, csConfig);
7878

79-
assertEquals("Number of properties", 22,
79+
assertEquals("Number of properties", 23,
8080
csConfig.getPropsWithPrefix(PREFIX).size());
8181
// this is no fixing - it's the result of BigDecimal rounding
8282
assertEquals("root.a capacity", 16.667f,
@@ -95,7 +95,7 @@ public void testMultiWeightConversionWhenOfThemIsZero() {
9595

9696
assertFalse("Capacity zerosum allowed",
9797
csConfig.getAllowZeroCapacitySum(ROOT));
98-
assertEquals("Number of properties", 22,
98+
assertEquals("Number of properties", 23,
9999
csConfig.getPropsWithPrefix(PREFIX).size());
100100
assertEquals("root.a capacity", 0.000f,
101101
csConfig.getNonLabeledQueueCapacity(ROOT_A), 0.0f);
@@ -111,7 +111,7 @@ public void testMultiWeightConversionWhenAllOfThemAreZero() {
111111

112112
converter.convertWeightsForChildQueues(root, csConfig);
113113

114-
assertEquals("Number of properties", 23,
114+
assertEquals("Number of properties", 24,
115115
csConfig.getPropsWithPrefix(PREFIX).size());
116116
assertTrue("Capacity zerosum allowed",
117117
csConfig.getAllowZeroCapacitySum(ROOT));
@@ -129,7 +129,7 @@ public void testCapacityFixingWithThreeQueues() {
129129

130130
converter.convertWeightsForChildQueues(root, csConfig);
131131

132-
assertEquals("Number of properties", 22,
132+
assertEquals("Number of properties", 23,
133133
csConfig.getPropsWithPrefix(PREFIX).size());
134134
assertEquals("root.a capacity", 33.334f,
135135
csConfig.getNonLabeledQueueCapacity(ROOT_A), 0.0f);

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/weightconversion/TestWeightToWeightConverter.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ public void testNoChildQueueConversion() {
5454

5555
assertEquals("root weight", 1.0f,
5656
csConfig.getNonLabeledQueueWeight(ROOT), 0.0f);
57-
assertEquals("Converted items", 21,
57+
assertEquals("Converted items", 22,
5858
csConfig.getPropsWithPrefix(PREFIX).size());
5959
}
6060

@@ -67,7 +67,7 @@ public void testSingleWeightConversion() {
6767
csConfig.getNonLabeledQueueWeight(ROOT), 0.0f);
6868
assertEquals("root.a weight", 1.0f,
6969
csConfig.getNonLabeledQueueWeight(ROOT_A), 0.0f);
70-
assertEquals("Number of properties", 22,
70+
assertEquals("Number of properties", 23,
7171
csConfig.getPropsWithPrefix(PREFIX).size());
7272
}
7373

@@ -77,7 +77,7 @@ public void testMultiWeightConversion() {
7777

7878
converter.convertWeightsForChildQueues(root, csConfig);
7979

80-
assertEquals("Number of properties", 24,
80+
assertEquals("Number of properties", 25,
8181
csConfig.getPropsWithPrefix(PREFIX).size());
8282
assertEquals("root weight", 1.0f,
8383
csConfig.getNonLabeledQueueWeight(ROOT), 0.0f);
@@ -103,7 +103,7 @@ public void testAutoCreateV2FlagOnParentWithoutChildren() {
103103
FSQueue root = createParent(new ArrayList<>());
104104
converter.convertWeightsForChildQueues(root, csConfig);
105105

106-
assertEquals("Number of properties", 21,
106+
assertEquals("Number of properties", 22,
107107
csConfig.getPropsWithPrefix(PREFIX).size());
108108
assertTrue("root autocreate v2 enabled",
109109
csConfig.isAutoQueueCreationV2Enabled(ROOT));

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesCapacitySched.java

+1
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ private Configuration createConfig() {
167167
conf.set("yarn.scheduler.capacity.root.a.max-parallel-app", "42");
168168
conf.set("yarn.scheduler.capacity.root.b.capacity", "50");
169169
conf.set("yarn.scheduler.capacity.root.c.capacity", "37.5");
170+
conf.set("yarn.scheduler.capacity.schedule-asynchronously.enable", "false");
170171
return conf;
171172
}
172173
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
<!--
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software
9+
distributed under the License is distributed on an "AS IS" BASIS,
10+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
See the License for the specific language governing permissions and
12+
limitations under the License. See accompanying LICENSE file.
13+
-->
14+
<configuration>
15+
16+
<property>
17+
<name>yarn.scheduler.capacity.maximum-applications</name>
18+
<value>10000</value>
19+
<description>
20+
Maximum number of applications that can be pending and running.
21+
</description>
22+
</property>
23+
24+
<property>
25+
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
26+
<value>0.1</value>
27+
<description>
28+
Maximum percent of resources in the cluster which can be used to run
29+
application masters i.e. controls number of concurrent running
30+
applications.
31+
</description>
32+
</property>
33+
34+
<property>
35+
<name>yarn.scheduler.capacity.resource-calculator</name>
36+
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
37+
<description>
38+
The ResourceCalculator implementation to be used to compare
39+
Resources in the scheduler.
40+
The default i.e. DefaultResourceCalculator only uses Memory while
41+
DominantResourceCalculator uses dominant-resource to compare
42+
multi-dimensional resources such as Memory, CPU etc.
43+
</description>
44+
</property>
45+
46+
<property>
47+
<name>yarn.scheduler.capacity.root.queues</name>
48+
<value>default</value>
49+
<description>
50+
The queues at the this level (root is the root queue).
51+
</description>
52+
</property>
53+
54+
<property>
55+
<name>yarn.scheduler.capacity.root.default.capacity</name>
56+
<value>100</value>
57+
<description>Default queue target capacity.</description>
58+
</property>
59+
60+
<property>
61+
<name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
62+
<value>1</value>
63+
<description>
64+
Default queue user limit a percentage from 0.0 to 1.0.
65+
</description>
66+
</property>
67+
68+
<property>
69+
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
70+
<value>100</value>
71+
<description>
72+
The maximum capacity of the default queue.
73+
</description>
74+
</property>
75+
76+
<property>
77+
<name>yarn.scheduler.capacity.root.default.state</name>
78+
<value>RUNNING</value>
79+
<description>
80+
The state of the default queue. State can be one of RUNNING or STOPPED.
81+
</description>
82+
</property>
83+
84+
<property>
85+
<name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
86+
<value>*</value>
87+
<description>
88+
The ACL of who can submit jobs to the default queue.
89+
</description>
90+
</property>
91+
92+
<property>
93+
<name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
94+
<value>*</value>
95+
<description>
96+
The ACL of who can administer jobs on the default queue.
97+
</description>
98+
</property>
99+
100+
<property>
101+
<name>yarn.scheduler.capacity.root.default.acl_application_max_priority</name>
102+
<value>*</value>
103+
<description>
104+
The ACL of who can submit applications with configured priority.
105+
For e.g, [user={name} group={name} max_priority={priority} default_priority={priority}]
106+
</description>
107+
</property>
108+
109+
<property>
110+
<name>yarn.scheduler.capacity.root.default.maximum-application-lifetime
111+
</name>
112+
<value>-1</value>
113+
<description>
114+
Maximum lifetime of an application which is submitted to a queue
115+
in seconds. Any value less than or equal to zero will be considered as
116+
disabled.
117+
This will be a hard time limit for all applications in this
118+
queue. If positive value is configured then any application submitted
119+
to this queue will be killed after exceeds the configured lifetime.
120+
User can also specify lifetime per application basis in
121+
application submission context. But user lifetime will be
122+
overridden if it exceeds queue maximum lifetime. It is point-in-time
123+
configuration.
124+
Note : Configuring too low value will result in killing application
125+
sooner. This feature is applicable only for leaf queue.
126+
</description>
127+
</property>
128+
129+
<property>
130+
<name>yarn.scheduler.capacity.root.default.default-application-lifetime
131+
</name>
132+
<value>-1</value>
133+
<description>
134+
Default lifetime of an application which is submitted to a queue
135+
in seconds. Any value less than or equal to zero will be considered as
136+
disabled.
137+
If the user has not submitted application with lifetime value then this
138+
value will be taken. It is point-in-time configuration.
139+
Note : Default lifetime can't exceed maximum lifetime. This feature is
140+
applicable only for leaf queue.
141+
</description>
142+
</property>
143+
144+
<property>
145+
<name>yarn.scheduler.capacity.node-locality-delay</name>
146+
<value>40</value>
147+
<description>
148+
Number of missed scheduling opportunities after which the CapacityScheduler
149+
attempts to schedule rack-local containers.
150+
When setting this parameter, the size of the cluster should be taken into account.
151+
We use 40 as the default value, which is approximately the number of nodes in one rack.
152+
Note, if this value is -1, the locality constraint in the container request
153+
will be ignored, which disables the delay scheduling.
154+
</description>
155+
</property>
156+
157+
<property>
158+
<name>yarn.scheduler.capacity.rack-locality-additional-delay</name>
159+
<value>-1</value>
160+
<description>
161+
Number of additional missed scheduling opportunities over the node-locality-delay
162+
ones, after which the CapacityScheduler attempts to schedule off-switch containers,
163+
instead of rack-local ones.
164+
Example: with node-locality-delay=40 and rack-locality-delay=20, the scheduler will
165+
attempt rack-local assignments after 40 missed opportunities, and off-switch assignments
166+
after 40+20=60 missed opportunities.
167+
When setting this parameter, the size of the cluster should be taken into account.
168+
We use -1 as the default value, which disables this feature. In this case, the number
169+
of missed opportunities for assigning off-switch containers is calculated based on
170+
the number of containers and unique locations specified in the resource request,
171+
as well as the size of the cluster.
172+
</description>
173+
</property>
174+
175+
<property>
176+
<name>yarn.scheduler.capacity.queue-mappings</name>
177+
<value></value>
178+
<description>
179+
A list of mappings that will be used to assign jobs to queues
180+
The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
181+
Typically this list will be used to map users to queues,
182+
for example, u:%user:%user maps all users to queues with the same name
183+
as the user.
184+
</description>
185+
</property>
186+
187+
<property>
188+
<name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
189+
<value>false</value>
190+
<description>
191+
If a queue mapping is present, will it override the value specified
192+
by the user? This can be used by administrators to place jobs in queues
193+
that are different than the one specified by the user.
194+
The default is false.
195+
</description>
196+
</property>
197+
198+
<property>
199+
<name>yarn.scheduler.capacity.per-node-heartbeat.maximum-offswitch-assignments</name>
200+
<value>1</value>
201+
<description>
202+
Controls the number of OFF_SWITCH assignments allowed
203+
during a node's heartbeat. Increasing this value can improve
204+
scheduling rate for OFF_SWITCH containers. Lower values reduce
205+
"clumping" of applications on particular nodes. The default is 1.
206+
Legal values are 1-MAX_INT. This config is refreshable.
207+
</description>
208+
</property>
209+
210+
211+
<property>
212+
<name>yarn.scheduler.capacity.application.fail-fast</name>
213+
<value>false</value>
214+
<description>
215+
Whether RM should fail during recovery if previous applications'
216+
queue is no longer valid.
217+
</description>
218+
</property>
219+
220+
<property>
221+
<name>yarn.scheduler.capacity.workflow-priority-mappings</name>
222+
<value></value>
223+
<description>
224+
A list of mappings that will be used to override application priority.
225+
The syntax for this list is
226+
[workflowId]:[full_queue_name]:[priority][,next mapping]*
227+
where an application submitted (or mapped to) queue "full_queue_name"
228+
and workflowId "workflowId" (as specified in application submission
229+
context) will be given priority "priority".
230+
</description>
231+
</property>
232+
233+
<property>
234+
<name>yarn.scheduler.capacity.workflow-priority-mappings-override.enable</name>
235+
<value>false</value>
236+
<description>
237+
If a priority mapping is present, will it override the value specified
238+
by the user? This can be used by administrators to give applications a
239+
priority that is different than the one specified by the user.
240+
The default is false.
241+
</description>
242+
</property>
243+
244+
<!-- Although asynchronous scheduling is enabled by default, for unit testing-->
245+
<!-- disabling it by default to give more control over container scheduling while-->
246+
<!-- simulating complex tests.-->
247+
<property>
248+
<name>yarn.scheduler.capacity.schedule-asynchronously.enable</name>
249+
<value>false</value>
250+
<description>
251+
Whether to enable asynchronous scheduling.
252+
</description>
253+
</property>
254+
255+
</configuration>

0 commit comments

Comments
 (0)