Skip to content

Commit 0a1c567

Browse files
committed
HDFS-17751. [ARR] Add unit tests using asynchronous router rpc for all in org.apache.hadoop.hdfs.server.federation.router.
1 parent f0430f2 commit 0a1c567

File tree

3 files changed

+353
-3
lines changed

3 files changed

+353
-3
lines changed

hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1991,10 +1991,11 @@ private static boolean isReadCall(Method method) {
19911991
* Checks and sets last refresh time for a namespace's stateId.
19921992
* Returns true if refresh time is newer than threshold.
19931993
* Otherwise, return false and call should be handled by active namenode.
1994-
* @param nsId namespaceID
1994+
* @param nsId namespaceID.
1995+
* @return true if refresh time is newer than threshold. Otherwise, return false.
19951996
*/
19961997
@VisibleForTesting
1997-
boolean isNamespaceStateIdFresh(String nsId) {
1998+
public boolean isNamespaceStateIdFresh(String nsId) {
19981999
if (activeNNStateIdRefreshPeriodMs < 0) {
19992000
return true;
20002001
}

hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterStateIdContext.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ public class RouterStateIdContext implements AlignmentContext {
6464
/** Nameservice specific overrides of the default setting for enabling observer reads. */
6565
private HashSet<String> observerReadEnabledOverrides = new HashSet<>();
6666

67-
RouterStateIdContext(Configuration conf) {
67+
public RouterStateIdContext(Configuration conf) {
6868
this.coordinatedMethods = new HashSet<>();
6969
// For now, only ClientProtocol methods can be coordinated, so only checking
7070
// against ClientProtocol.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hdfs.server.federation.router.async;
19+
20+
import static org.apache.hadoop.hdfs.server.federation.FederationTestUtils.simulateSlowNamenode;
21+
import static org.apache.hadoop.hdfs.server.federation.router.async.TestDisableNameservicesExample.cluster;
22+
import static org.apache.hadoop.hdfs.server.federation.router.async.TestDisableNameservicesExample.routerContext;
23+
import static org.apache.hadoop.hdfs.server.federation.router.async.TestDisableNameservicesExample.setUp;
24+
import static org.apache.hadoop.util.Time.monotonicNow;
25+
import static org.junit.Assert.assertEquals;
26+
import static org.junit.Assert.assertTrue;
27+
28+
import java.io.IOException;
29+
import java.lang.reflect.Method;
30+
import java.util.Iterator;
31+
import java.util.Map;
32+
import java.util.Set;
33+
import java.util.TreeMap;
34+
import java.util.concurrent.TimeUnit;
35+
36+
import org.apache.hadoop.conf.Configuration;
37+
import org.apache.hadoop.fs.FileStatus;
38+
import org.apache.hadoop.fs.FileSystem;
39+
import org.apache.hadoop.fs.Path;
40+
import org.apache.hadoop.hdfs.MiniDFSCluster;
41+
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
42+
import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.NamenodeContext;
43+
import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.RouterContext;
44+
import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder;
45+
import org.apache.hadoop.hdfs.server.federation.StateStoreDFSCluster;
46+
import org.apache.hadoop.hdfs.server.federation.metrics.RBFMetrics;
47+
import org.apache.hadoop.hdfs.server.federation.resolver.MembershipNamenodeResolver;
48+
import org.apache.hadoop.hdfs.server.federation.resolver.MountTableManager;
49+
import org.apache.hadoop.hdfs.server.federation.resolver.MountTableResolver;
50+
import org.apache.hadoop.hdfs.server.federation.router.NameserviceManager;
51+
import org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys;
52+
import org.apache.hadoop.hdfs.server.federation.router.Router;
53+
import org.apache.hadoop.hdfs.server.federation.router.RouterClient;
54+
import org.apache.hadoop.hdfs.server.federation.store.DisabledNameserviceStore;
55+
import org.apache.hadoop.hdfs.server.federation.store.StateStoreService;
56+
import org.apache.hadoop.hdfs.server.federation.store.protocol.AddMountTableEntryRequest;
57+
import org.apache.hadoop.hdfs.server.federation.store.protocol.DisableNameserviceRequest;
58+
import org.apache.hadoop.hdfs.server.federation.store.records.MountTable;
59+
import org.apache.hadoop.hdfs.server.namenode.NameNode;
60+
import org.codehaus.jettison.json.JSONObject;
61+
import org.junit.jupiter.api.Nested;
62+
import org.junit.jupiter.api.extension.AfterAllCallback;
63+
import org.junit.jupiter.api.extension.AfterEachCallback;
64+
import org.junit.jupiter.api.extension.BeforeEachCallback;
65+
import org.junit.jupiter.api.extension.ExtendWith;
66+
import org.junit.jupiter.api.extension.ExtensionContext;
67+
import org.junit.jupiter.params.ParameterizedTest;
68+
import org.junit.jupiter.params.provider.ValueSource;
69+
70+
/**
71+
* Test the behavior when disabling name services.
72+
*/
73+
@SuppressWarnings("checkstyle:VisibilityModifier")
74+
public class TestDisableNameservicesExample {
75+
76+
public static final String ASYNC_MODE = "ASYNC";
77+
public static final String SYNC_MODE = "SYNC";
78+
79+
static StateStoreDFSCluster cluster;
80+
static RouterContext routerContext;
81+
static RouterClient routerAdminClient;
82+
static ClientProtocol routerProtocol;
83+
84+
@Nested
85+
@ExtendWith(RouterServerHelper.class)
86+
class TestWithAsyncRouterRpc {
87+
88+
@ParameterizedTest
89+
@ValueSource(strings = {ASYNC_MODE})
90+
public void testMetricsAsync(String rpcMode) throws Exception {
91+
testMetrics();
92+
}
93+
94+
/* @ParameterizedTest
95+
@ValueSource(strings = {ASYNC_MODE)
96+
public void testDisablingAsync() throws Exception {
97+
testDisabling();
98+
}
99+
100+
@ParameterizedTest
101+
@ValueSource(strings = {ASYNC_MODE)
102+
public void testWithoutDisablingAsync() throws IOException {
103+
testWithoutDisabling();
104+
}*/
105+
}
106+
107+
@Nested
108+
@ExtendWith(RouterServerHelper.class)
109+
class TestWithSyncRouterRpc {
110+
111+
@ParameterizedTest
112+
@ValueSource(strings = {SYNC_MODE})
113+
public void testMetricsSync(String rpcMode) throws Exception {
114+
testMetrics();
115+
}
116+
117+
@ParameterizedTest
118+
@ValueSource(strings = {SYNC_MODE})
119+
public void testDisablingSync() throws Exception {
120+
testDisabling();
121+
}
122+
123+
@ParameterizedTest
124+
@ValueSource(strings = {SYNC_MODE})
125+
public void testWithoutDisablingSync() throws IOException {
126+
testWithoutDisabling();
127+
}
128+
}
129+
130+
public static void setUp(String rpcMode) throws Exception {
131+
// Build and start a federated cluster.
132+
cluster = new StateStoreDFSCluster(false, 2);
133+
Configuration routerConf = new RouterConfigBuilder()
134+
.stateStore()
135+
.metrics()
136+
.admin()
137+
.rpc()
138+
.build();
139+
// Reduce the number of RPC threads to saturate the Router easy.
140+
routerConf.setInt(RBFConfigKeys.DFS_ROUTER_HANDLER_COUNT_KEY, 8);
141+
routerConf.setInt(RBFConfigKeys.DFS_ROUTER_CLIENT_THREADS_SIZE, 4);
142+
143+
// Use async router rpc.
144+
if (rpcMode.equals("ASYNC")) {
145+
routerConf.setBoolean(RBFConfigKeys.DFS_ROUTER_ASYNC_RPC_ENABLE_KEY, true);
146+
}
147+
148+
// Set the DNs to belong to only one subcluster.
149+
cluster.setIndependentDNs();
150+
151+
cluster.addRouterOverrides(routerConf);
152+
// Override some settings for the client.
153+
cluster.startCluster();
154+
cluster.startRouters();
155+
cluster.waitClusterUp();
156+
157+
routerContext = cluster.getRandomRouter();
158+
routerProtocol = routerContext.getClient().getNamenode();
159+
routerAdminClient = routerContext.getAdminClient();
160+
161+
setupNamespace();
162+
163+
// Simulate one of the subclusters to be slow.
164+
MiniDFSCluster dfsCluster = cluster.getCluster();
165+
NameNode nn0 = dfsCluster.getNameNode(0);
166+
simulateSlowNamenode(nn0, 1);
167+
}
168+
169+
private static void setupNamespace() throws IOException {
170+
171+
// Setup a mount table to map to the two namespaces.
172+
MountTableManager mountTable = routerAdminClient.getMountTableManager();
173+
Map<String, String> destinations = new TreeMap<>();
174+
destinations.put("ns0", "/dirns0");
175+
MountTable newEntry = MountTable.newInstance("/dirns0", destinations);
176+
AddMountTableEntryRequest request =
177+
AddMountTableEntryRequest.newInstance(newEntry);
178+
mountTable.addMountTableEntry(request);
179+
180+
destinations = new TreeMap<>();
181+
destinations.put("ns1", "/dirns1");
182+
newEntry = MountTable.newInstance("/dirns1", destinations);
183+
request = AddMountTableEntryRequest.newInstance(newEntry);
184+
mountTable.addMountTableEntry(request);
185+
186+
// Refresh the cache in the Router.
187+
Router router = routerContext.getRouter();
188+
MountTableResolver mountTableResolver =
189+
(MountTableResolver) router.getSubclusterResolver();
190+
mountTableResolver.loadCache(true);
191+
192+
// Add a folder to each namespace.
193+
NamenodeContext nn0 = cluster.getNamenode("ns0", null);
194+
nn0.getFileSystem().mkdirs(new Path("/dirns0/0"));
195+
nn0.getFileSystem().mkdirs(new Path("/dir-ns"));
196+
NamenodeContext nn1 = cluster.getNamenode("ns1", null);
197+
nn1.getFileSystem().mkdirs(new Path("/dirns1/1"));
198+
}
199+
200+
public static void tearDown() {
201+
if (cluster != null) {
202+
cluster.stopRouter(routerContext);
203+
cluster.shutdown();
204+
cluster = null;
205+
}
206+
}
207+
208+
public void cleanup() throws IOException {
209+
Router router = routerContext.getRouter();
210+
StateStoreService stateStore = router.getStateStore();
211+
DisabledNameserviceStore store =
212+
stateStore.getRegisteredRecordStore(DisabledNameserviceStore.class);
213+
store.loadCache(true);
214+
215+
Set<String> disabled = store.getDisabledNameservices();
216+
for (String nsId : disabled) {
217+
store.enableNameservice(nsId);
218+
}
219+
store.loadCache(true);
220+
}
221+
222+
public void testWithoutDisabling() throws IOException {
223+
// ns0 is slow and renewLease should take a long time.
224+
long t0 = monotonicNow();
225+
routerProtocol.renewLease("client0", null);
226+
long t = monotonicNow() - t0;
227+
assertTrue("It took too little: " + t + "ms",
228+
t > TimeUnit.SECONDS.toMillis(1));
229+
// Return the results from all subclusters even if slow.
230+
FileSystem routerFs = routerContext.getFileSystem();
231+
FileStatus[] filesStatus = routerFs.listStatus(new Path("/"));
232+
assertEquals(3, filesStatus.length);
233+
assertEquals("dir-ns", filesStatus[0].getPath().getName());
234+
assertEquals("dirns0", filesStatus[1].getPath().getName());
235+
assertEquals("dirns1", filesStatus[2].getPath().getName());
236+
}
237+
238+
public void testDisabling() throws Exception {
239+
disableNameservice("ns0");
240+
241+
// RenewLease should be fast as we are skipping ns0.
242+
long t0 = monotonicNow();
243+
routerProtocol.renewLease("client0", null);
244+
long t = monotonicNow() - t0;
245+
assertTrue("It took too long: " + t + "ms",
246+
t < TimeUnit.SECONDS.toMillis(1));
247+
// We should not report anything from ns0.
248+
FileSystem routerFs = routerContext.getFileSystem();
249+
250+
FileStatus[] filesStatus = routerFs.listStatus(new Path("/"));
251+
assertEquals(2, filesStatus.length);
252+
assertEquals("dirns0", filesStatus[0].getPath().getName());
253+
assertEquals("dirns1", filesStatus[1].getPath().getName());
254+
255+
filesStatus = routerFs.listStatus(new Path("/dirns1"));
256+
assertEquals(1, filesStatus.length);
257+
assertEquals("1", filesStatus[0].getPath().getName());
258+
}
259+
260+
public void testMetrics() throws Exception {
261+
disableNameservice("ns0");
262+
263+
int numActive = 0;
264+
int numDisabled = 0;
265+
Router router = routerContext.getRouter();
266+
RBFMetrics metrics = router.getMetrics();
267+
String jsonString = metrics.getNameservices();
268+
JSONObject jsonObject = new JSONObject(jsonString);
269+
Iterator<?> keys = jsonObject.keys();
270+
while (keys.hasNext()) {
271+
String key = (String) keys.next();
272+
JSONObject json = jsonObject.getJSONObject(key);
273+
String nsId = json.getString("nameserviceId");
274+
String state = json.getString("state");
275+
if (nsId.equals("ns0")) {
276+
assertEquals("DISABLED", state);
277+
numDisabled++;
278+
} else {
279+
assertEquals("ACTIVE", state);
280+
numActive++;
281+
}
282+
}
283+
assertEquals(1, numActive);
284+
assertEquals(1, numDisabled);
285+
}
286+
287+
private static void disableNameservice(final String nsId)
288+
throws IOException {
289+
NameserviceManager nsManager = routerAdminClient.getNameserviceManager();
290+
DisableNameserviceRequest req =
291+
DisableNameserviceRequest.newInstance(nsId);
292+
nsManager.disableNameservice(req);
293+
294+
Router router = routerContext.getRouter();
295+
StateStoreService stateStore = router.getStateStore();
296+
DisabledNameserviceStore store =
297+
stateStore.getRegisteredRecordStore(DisabledNameserviceStore.class);
298+
store.loadCache(true);
299+
MembershipNamenodeResolver resolver =
300+
(MembershipNamenodeResolver) router.getNamenodeResolver();
301+
resolver.loadCache(true);
302+
}
303+
}
304+
305+
class RouterServerHelper implements BeforeEachCallback, AfterEachCallback, AfterAllCallback {
306+
307+
private static final ThreadLocal<RouterServerHelper> TEST_ROUTER_SERVER_TL =
308+
new InheritableThreadLocal<RouterServerHelper>();
309+
310+
@Override
311+
public void afterEach(ExtensionContext context) throws Exception {
312+
Router router = routerContext.getRouter();
313+
StateStoreService stateStore = router.getStateStore();
314+
DisabledNameserviceStore store =
315+
stateStore.getRegisteredRecordStore(DisabledNameserviceStore.class);
316+
store.loadCache(true);
317+
318+
Set<String> disabled = store.getDisabledNameservices();
319+
for (String nsId : disabled) {
320+
store.enableNameservice(nsId);
321+
}
322+
store.loadCache(true);
323+
}
324+
325+
@Override
326+
public void beforeEach(ExtensionContext context) throws Exception {
327+
Method testMethod = context.getRequiredTestMethod();
328+
ValueSource enumAnnotation = testMethod.getAnnotation(ValueSource.class);
329+
if (enumAnnotation != null) {
330+
String[] strings = enumAnnotation.strings();
331+
for (String rpcMode : strings) {
332+
if (TEST_ROUTER_SERVER_TL.get() == null) {
333+
setUp(rpcMode);
334+
}
335+
}
336+
}
337+
TEST_ROUTER_SERVER_TL.set(RouterServerHelper.this);
338+
}
339+
340+
@Override
341+
public void afterAll(ExtensionContext context) throws Exception {
342+
if (cluster != null) {
343+
cluster.stopRouter(routerContext);
344+
cluster.shutdown();
345+
cluster = null;
346+
}
347+
TEST_ROUTER_SERVER_TL.remove();
348+
}
349+
}

0 commit comments

Comments
 (0)