@@ -93,7 +93,8 @@ public CachedDataInputStream(
93
93
* Retrieves the appropriate input stream for reading data. This method attempts to use the
94
94
* cached stream if it is available and valid. If the cached stream is not available, it falls
95
95
* back to the original stream. The method also handles the transition between cached and
96
- * original streams based on the current status of the stream.
96
+ * original streams based on the current status of the stream. The invoker must ensure to
97
+ * release the cache stream after use.
97
98
*
98
99
* @return the input stream to be used for reading data
99
100
* @throws IOException if an I/O error occurs while accessing the stream
@@ -102,15 +103,18 @@ private FSDataInputStream getStream() throws IOException {
102
103
if (isFlinkThread ()) {
103
104
cacheEntry .touch ();
104
105
}
105
- FSDataInputStream stream = tryGetCacheStream ();
106
- if (stream != null ) {
107
- fileBasedCache .incHitCounter ();
108
- return stream ;
109
- }
110
-
111
- if (streamStatus == StreamStatus .CACHED_CLOSED
112
- || streamStatus == StreamStatus .CACHED_CLOSING ) {
106
+ int round = 0 ;
107
+ // Repeat at most 3 times. If fails, we will get the original stream for read.
108
+ while (round ++ < 3 ) {
109
+ // Firstly, we try to get cache stream
110
+ FSDataInputStream stream = tryGetCacheStream ();
111
+ if (stream != null ) {
112
+ fileBasedCache .incHitCounter ();
113
+ return stream ;
114
+ }
115
+ // No cache stream
113
116
if (streamStatus == StreamStatus .CACHED_CLOSING ) {
117
+ // if closing, update the position
114
118
try {
115
119
semaphore .acquire (1 );
116
120
} catch (InterruptedException e ) {
@@ -119,62 +123,78 @@ private FSDataInputStream getStream() throws IOException {
119
123
originalStream .seek (position );
120
124
position = -1 ;
121
125
LOG .trace (
122
- "Stream {} status from {} to {}" ,
126
+ "Cached Stream {} status from {} to {}" ,
123
127
cacheEntry .cachePath ,
124
128
streamStatus ,
125
129
StreamStatus .CACHED_CLOSED );
126
130
streamStatus = StreamStatus .CACHED_CLOSED ;
127
131
}
128
- // try reopen
129
- tryReopen ();
130
- stream = tryGetCacheStream ();
131
- if (stream != null ) {
132
- fileBasedCache .incHitCounter ();
133
- return stream ;
134
- }
135
- fileBasedCache .incMissCounter ();
136
- return originalStream ;
137
- } else if (streamStatus == StreamStatus .ORIGINAL ) {
138
- fileBasedCache .incMissCounter ();
139
- return originalStream ;
140
- } else {
141
- if (streamStatus == StreamStatus .CACHED_OPEN ) {
142
- stream = tryGetCacheStream ();
132
+ // if it is CACHED_CLOSED, we try to reopen it
133
+ if (streamStatus == StreamStatus .CACHED_CLOSED ) {
134
+ stream = tryReopenCachedStream ();
143
135
if (stream != null ) {
144
136
fileBasedCache .incHitCounter ();
145
137
return stream ;
146
138
}
139
+ fileBasedCache .incMissCounter ();
140
+ return originalStream ;
141
+ } else if (streamStatus == StreamStatus .ORIGINAL ) {
142
+ fileBasedCache .incMissCounter ();
143
+ return originalStream ;
144
+ } else {
145
+ // The stream is not closed, but we cannot get the cache stream.
146
+ // Meaning that it is in the process of closing, but the status has not been
147
+ // updated. Thus, we'd better retry here until it reach a stable state (CLOSING).
148
+ Thread .yield ();
147
149
}
148
- fileBasedCache .incMissCounter ();
149
- return originalStream ;
150
150
}
151
+ return originalStream ;
151
152
}
152
153
154
+ /**
155
+ * Attempts to retrieve the cached stream if it is open and the reference count is greater than
156
+ * zero. If successful, it retains the reference count and returns the cached stream. The
157
+ * invoker must ensure to release the stream after use.
158
+ *
159
+ * @return the cached stream if available, or null if not
160
+ */
153
161
private FSDataInputStream tryGetCacheStream () {
154
162
if (streamStatus == StreamStatus .CACHED_OPEN && cacheEntry .tryRetain () > 0 ) {
155
- return fsdis ;
163
+ // Double-check the status as it may change after retain.
164
+ if (streamStatus == StreamStatus .CACHED_OPEN ) {
165
+ return fsdis ;
166
+ }
156
167
}
157
168
return null ;
158
169
}
159
170
160
- private void tryReopen () {
171
+ /**
172
+ * Attempts to reopen the cached stream if it is closed and the current thread is a Flink
173
+ * thread. If successful, it updates the stream status and seeks to the original stream's
174
+ * position. Reference counting is retained, the invoked thread must dereference the stream
175
+ * after use.
176
+ *
177
+ * @return the reopened cached stream, or null if reopening fails
178
+ */
179
+ private FSDataInputStream tryReopenCachedStream () {
161
180
if (streamStatus == StreamStatus .CACHED_CLOSED && isFlinkThread ()) {
162
181
try {
163
182
fsdis = cacheEntry .getCacheStream ();
164
183
if (fsdis != null ) {
165
184
LOG .trace (
166
- "Stream {} status from {} to {}" ,
185
+ "Cached Stream {} status from {} to {}" ,
167
186
cacheEntry .cachePath ,
168
187
streamStatus ,
169
188
StreamStatus .CACHED_OPEN );
170
189
fsdis .seek (originalStream .getPos ());
171
190
streamStatus = StreamStatus .CACHED_OPEN ;
172
- cacheEntry . release () ;
191
+ return fsdis ;
173
192
}
174
193
} catch (IOException e ) {
175
194
LOG .warn ("Reopen stream error." , e );
176
195
}
177
196
}
197
+ return null ;
178
198
}
179
199
180
200
/**
@@ -196,72 +216,87 @@ synchronized void closeCachedStream() throws IOException {
196
216
}
197
217
}
198
218
199
- private void finish () {
200
- if (streamStatus == StreamStatus .CACHED_OPEN ) {
201
- cacheEntry .release ();
202
- }
203
- }
204
-
205
219
@ Override
206
220
public void seek (long desired ) throws IOException {
221
+ FSDataInputStream stream = getStream ();
207
222
try {
208
- getStream () .seek (desired );
223
+ stream .seek (desired );
209
224
} finally {
210
- finish ();
225
+ if (stream != originalStream ) {
226
+ cacheEntry .release ();
227
+ }
211
228
}
212
229
}
213
230
214
231
@ Override
215
232
public long getPos () throws IOException {
233
+ FSDataInputStream stream = getStream ();
216
234
try {
217
- return getStream () .getPos ();
235
+ return stream .getPos ();
218
236
} finally {
219
- finish ();
237
+ if (stream != originalStream ) {
238
+ cacheEntry .release ();
239
+ }
220
240
}
221
241
}
222
242
223
243
@ Override
224
244
public int read () throws IOException {
245
+ FSDataInputStream stream = getStream ();
225
246
try {
226
- return getStream () .read ();
247
+ return stream .read ();
227
248
} finally {
228
- finish ();
249
+ if (stream != originalStream ) {
250
+ cacheEntry .release ();
251
+ }
229
252
}
230
253
}
231
254
232
255
@ Override
233
256
public int read (byte [] b ) throws IOException {
257
+ FSDataInputStream stream = getStream ();
234
258
try {
235
- return getStream () .read (b );
259
+ return stream .read (b );
236
260
} finally {
237
- finish ();
261
+ if (stream != originalStream ) {
262
+ cacheEntry .release ();
263
+ }
238
264
}
239
265
}
240
266
241
267
@ Override
242
268
public int read (byte [] b , int off , int len ) throws IOException {
269
+ FSDataInputStream stream = getStream ();
243
270
try {
244
- return getStream () .read (b , off , len );
271
+ return stream .read (b , off , len );
245
272
} finally {
246
- finish ();
273
+ if (stream != originalStream ) {
274
+ cacheEntry .release ();
275
+ }
247
276
}
248
277
}
249
278
250
279
@ Override
251
280
public long skip (long n ) throws IOException {
281
+ FSDataInputStream stream = getStream ();
252
282
try {
253
- return getStream () .skip (n );
283
+ return stream .skip (n );
254
284
} finally {
255
- finish ();
285
+ if (stream != originalStream ) {
286
+ cacheEntry .release ();
287
+ }
256
288
}
257
289
}
258
290
259
291
@ Override
260
292
public int available () throws IOException {
293
+ FSDataInputStream stream = getStream ();
261
294
try {
262
- return getStream () .available ();
295
+ return stream .available ();
263
296
} finally {
264
- finish ();
297
+ if (stream != originalStream ) {
298
+ cacheEntry .release ();
299
+ }
265
300
}
266
301
}
267
302
@@ -281,32 +316,45 @@ public boolean isClosed() {
281
316
@ Override
282
317
public void mark (int readlimit ) {
283
318
try {
284
- getStream ().mark (readlimit );
319
+ FSDataInputStream stream = getStream ();
320
+ try {
321
+ stream .mark (readlimit );
322
+ } finally {
323
+ if (stream != originalStream ) {
324
+ cacheEntry .release ();
325
+ }
326
+ }
285
327
} catch (Exception e ) {
286
328
LOG .warn ("Mark error." , e );
287
- } finally {
288
- finish ();
289
329
}
290
330
}
291
331
292
332
@ Override
293
333
public void reset () throws IOException {
334
+ FSDataInputStream stream = getStream ();
294
335
try {
295
- getStream () .reset ();
336
+ stream .reset ();
296
337
} finally {
297
- finish ();
338
+ if (stream != originalStream ) {
339
+ cacheEntry .release ();
340
+ }
298
341
}
299
342
}
300
343
301
344
@ Override
302
345
public boolean markSupported () {
303
346
try {
304
- return getStream ().markSupported ();
347
+ FSDataInputStream stream = getStream ();
348
+ try {
349
+ return stream .markSupported ();
350
+ } finally {
351
+ if (stream != originalStream ) {
352
+ cacheEntry .release ();
353
+ }
354
+ }
305
355
} catch (IOException e ) {
306
356
LOG .warn ("MarkSupported error." , e );
307
357
return false ;
308
- } finally {
309
- finish ();
310
358
}
311
359
}
312
360
@@ -317,28 +365,32 @@ public int read(ByteBuffer bb) throws IOException {
317
365
} else if (bb .remaining () == 0 ) {
318
366
return 0 ;
319
367
}
368
+ FSDataInputStream stream = getStream ();
320
369
try {
321
- FSDataInputStream stream = getStream ();
322
370
return stream instanceof ByteBufferReadable
323
371
? ((ByteBufferReadable ) stream ).read (bb )
324
372
: readFullyFromFSDataInputStream (stream , bb );
325
373
} finally {
326
- finish ();
374
+ if (stream != originalStream ) {
375
+ cacheEntry .release ();
376
+ }
327
377
}
328
378
}
329
379
330
380
@ Override
331
381
public int read (long position , ByteBuffer bb ) throws IOException {
382
+ FSDataInputStream stream = getStream ();
332
383
try {
333
- FSDataInputStream stream = getStream ();
334
384
if (stream instanceof ByteBufferReadable ) {
335
385
return ((ByteBufferReadable ) stream ).read (position , bb );
336
386
} else {
337
387
stream .seek (position );
338
388
return readFullyFromFSDataInputStream (stream , bb );
339
389
}
340
390
} finally {
341
- finish ();
391
+ if (stream != originalStream ) {
392
+ cacheEntry .release ();
393
+ }
342
394
}
343
395
}
344
396
0 commit comments