Skip to content

Commit e3fc1f2

Browse files
authored
Merge pull request #43 from vintmd/retrieve-retry
add the retrieve socket exception retry
2 parents 4211625 + d220ce3 commit e3fc1f2

File tree

4 files changed

+82
-26
lines changed

4 files changed

+82
-26
lines changed

pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<groupId>com.qcloud.cos</groupId>
88
<artifactId>hadoop-cos</artifactId>
9-
<version>8.0.2</version>
9+
<version>8.0.3</version>
1010
<packaging>jar</packaging>
1111

1212
<name>Apache Hadoop Tencent Qcloud COS Support</name>

src/main/java/org/apache/hadoop/fs/CosNConfigKeys.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
@InterfaceStability.Unstable
1313
public class CosNConfigKeys extends CommonConfigurationKeys {
1414
public static final String USER_AGENT = "fs.cosn.user.agent";
15-
public static final String DEFAULT_USER_AGENT = "cos-hadoop-plugin-v8.0.2";
15+
public static final String DEFAULT_USER_AGENT = "cos-hadoop-plugin-v8.0.3";
1616

1717
public static final String TENCENT_EMR_VERSION_KEY = "fs.emr.version";
1818

@@ -61,6 +61,8 @@ public class CosNConfigKeys extends CommonConfigurationKeys {
6161
public static final long DEFAULT_RETRY_INTERVAL = 3;
6262
public static final String CLIENT_MAX_RETRIES_KEY = "fs.cosn.client.maxRetries";
6363
public static final int DEFAULT_CLIENT_MAX_RETRIES = 5;
64+
public static final String CLIENT_SOCKET_ERROR_MAX_RETRIES = "fs.cosn.socket.error.maxRetries";
65+
public static final int DEFAULT_CLIENT_SOCKET_ERROR_MAX_RETRIES = 5;
6466

6567
public static final String UPLOAD_THREAD_POOL_SIZE_KEY = "fs.cosn.upload_thread_pool";
6668
public static final int DEFAULT_UPLOAD_THREAD_POOL_SIZE = 10;
@@ -87,7 +89,7 @@ public class CosNConfigKeys extends CommonConfigurationKeys {
8789

8890
public static final String CUSTOMER_DOMAIN = "fs.cosn.customer.domain";
8991
public static final String OPEN_CHECK_MERGE_BUCKET = "fs.cosn.check.merge.bucket";
90-
public static final boolean DEFAULT_CHECK_MERGE_BUCKET = true;
92+
public static final boolean DEFAULT_CHECK_MERGE_BUCKET = false;
9193
public static final String MERGE_BUCKET_MAX_LIST_NUM = "fs.cosn.merge.bucket.max.list.num";
9294
public static final int DEFAULT_MERGE_BUCKET_MAX_LIST_NUM = 5000;
9395
public static final String NORMAL_BUCKET_MAX_LIST_NUM = "fs.cosn.normal.bucket.max.list.num";

src/main/java/org/apache/hadoop/fs/CosNFSInputStream.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ public long getEnd() {
101101
private final int maxReadPartNumber;
102102
private byte[] buffer;
103103
private boolean closed = false;
104+
private final int socketErrMaxRetryTimes;
104105

105106
private final ExecutorService readAheadExecutorService;
106107
private final Queue<ReadBuffer> readBufferQueue;
@@ -136,6 +137,9 @@ public CosNFSInputStream(
136137
this.maxReadPartNumber = conf.getInt(
137138
CosNConfigKeys.READ_AHEAD_QUEUE_SIZE,
138139
CosNConfigKeys.DEFAULT_READ_AHEAD_QUEUE_SIZE);
140+
this.socketErrMaxRetryTimes = conf.getInt(
141+
CosNConfigKeys.CLIENT_SOCKET_ERROR_MAX_RETRIES,
142+
CosNConfigKeys.DEFAULT_CLIENT_SOCKET_ERROR_MAX_RETRIES);
139143
this.readAheadExecutorService = readAheadExecutorService;
140144
this.readBufferQueue =
141145
new ArrayDeque<ReadBuffer>(this.maxReadPartNumber);
@@ -203,8 +207,8 @@ private synchronized void reopen(long pos) throws IOException {
203207
readBuffer.setStatus(ReadBuffer.SUCCESS);
204208
} else {
205209
this.readAheadExecutorService.execute(
206-
new CosNFileReadTask(
207-
this.conf, this.key, this.store, readBuffer));
210+
new CosNFileReadTask(this.conf, this.key, this.store,
211+
readBuffer, this.socketErrMaxRetryTimes));
208212
}
209213

210214
this.readBufferQueue.add(readBuffer);

src/main/java/org/apache/hadoop/fs/CosNFileReadTask.java

+71-21
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,17 @@
77

88
import java.io.IOException;
99
import java.io.InputStream;
10+
import java.net.SocketException;
11+
import java.util.concurrent.ThreadLocalRandom;
1012

1113
public class CosNFileReadTask implements Runnable {
1214
static final Logger LOG = LoggerFactory.getLogger(CosNFileReadTask.class);
1315

16+
private final Configuration conf;
1417
private final String key;
1518
private final NativeFileSystemStore store;
1619
private final CosNFSInputStream.ReadBuffer readBuffer;
20+
private final int socketErrMaxRetryTimes;
1721

1822
/**
1923
* cos file read task
@@ -24,40 +28,86 @@ public class CosNFileReadTask implements Runnable {
2428
*/
2529
public CosNFileReadTask(Configuration conf, String key,
2630
NativeFileSystemStore store,
27-
CosNFSInputStream.ReadBuffer readBuffer) {
31+
CosNFSInputStream.ReadBuffer readBuffer,
32+
int socketErrMaxRetryTimes) {
33+
this.conf = conf;
2834
this.key = key;
2935
this.store = store;
3036
this.readBuffer = readBuffer;
37+
this.socketErrMaxRetryTimes = socketErrMaxRetryTimes;
3138
}
3239

3340
@Override
3441
public void run() {
3542
try {
3643
this.readBuffer.lock();
37-
try {
38-
InputStream inputStream = this.store.retrieveBlock(
39-
this.key, this.readBuffer.getStart(),
40-
this.readBuffer.getEnd());
41-
IOUtils.readFully(
42-
inputStream, this.readBuffer.getBuffer(), 0,
43-
readBuffer.getBuffer().length);
44-
int readEof = inputStream.read();
45-
if (readEof != -1) {
46-
LOG.error("Expect to read the eof, but the return is not -1. key: {}.", this.key);
44+
int retryIndex = 1;
45+
boolean needRetry = false;
46+
while (true) {
47+
try {
48+
this.retrieveBlock();
49+
needRetry = false;
50+
} catch (SocketException se) {
51+
// if we get stream success, but exceptions occurs when read cos input stream
52+
String errMsg = String.format("retrieve block sdk socket failed, " +
53+
"retryIndex: [%d / %d], key: %s, range: [%d , %d], exception: %s",
54+
retryIndex, this.socketErrMaxRetryTimes, this.key,
55+
this.readBuffer.getStart(), this.readBuffer.getEnd(), se.toString());
56+
if (retryIndex <= this.socketErrMaxRetryTimes) {
57+
LOG.info(errMsg, se);
58+
long sleepLeast = retryIndex * 300L;
59+
long sleepBound = retryIndex * 500L;
60+
try {
61+
Thread.sleep(ThreadLocalRandom.current().
62+
nextLong(sleepLeast, sleepBound));
63+
++retryIndex;
64+
needRetry = true;
65+
} catch (InterruptedException ie) {
66+
this.setFailResult(errMsg, new IOException(ie.toString()));
67+
break;
68+
}
69+
} else {
70+
this.setFailResult(errMsg, se);
71+
break;
72+
}
73+
} catch (IOException e) {
74+
String errMsg = String.format("retrieve block sdk socket failed, " +
75+
"retryIndex: [%d / %d], key: %s, range: [%d , %d], exception: %s",
76+
retryIndex, this.socketErrMaxRetryTimes, this.key,
77+
this.readBuffer.getStart(), this.readBuffer.getEnd(), e.toString());
78+
this.setFailResult(errMsg, e);
79+
break;
4780
}
48-
inputStream.close();
49-
this.readBuffer.setStatus(CosNFSInputStream.ReadBuffer.SUCCESS);
50-
} catch (IOException e) {
51-
this.readBuffer.setStatus(CosNFSInputStream.ReadBuffer.ERROR);
52-
this.readBuffer.setException(e);
53-
LOG.error("Exception occurs when retrieve the block range " +
54-
"start: "
55-
+ String.valueOf(this.readBuffer.getStart()) + " " +
56-
"end: " + this.readBuffer.getEnd(), e);
57-
}
81+
82+
if (!needRetry) {
83+
break;
84+
}
85+
} // end of retry
5886
this.readBuffer.signalAll();
5987
} finally {
6088
this.readBuffer.unLock();
6189
}
6290
}
91+
92+
public void setFailResult(String msg, IOException e) {
93+
this.readBuffer.setStatus(CosNFSInputStream.ReadBuffer.ERROR);
94+
this.readBuffer.setException(e);
95+
LOG.error(msg);
96+
}
97+
98+
// not thread safe
99+
public void retrieveBlock() throws IOException {
100+
InputStream inputStream = this.store.retrieveBlock(
101+
this.key, this.readBuffer.getStart(),
102+
this.readBuffer.getEnd());
103+
IOUtils.readFully(
104+
inputStream, this.readBuffer.getBuffer(), 0,
105+
readBuffer.getBuffer().length);
106+
int readEof = inputStream.read();
107+
if (readEof != -1) {
108+
LOG.error("Expect to read the eof, but the return is not -1. key: {}.", this.key);
109+
}
110+
inputStream.close();
111+
this.readBuffer.setStatus(CosNFSInputStream.ReadBuffer.SUCCESS);
112+
}
63113
}

0 commit comments

Comments
 (0)