19
19
package org .apache .flink .test .checkpointing ;
20
20
21
21
import org .apache .flink .api .common .JobID ;
22
+ import org .apache .flink .api .common .functions .OpenContext ;
22
23
import org .apache .flink .api .common .functions .RichFlatMapFunction ;
23
24
import org .apache .flink .api .common .state .ValueState ;
24
25
import org .apache .flink .api .common .state .ValueStateDescriptor ;
26
+ import org .apache .flink .api .common .state .v2 .StateFuture ;
27
+ import org .apache .flink .api .common .typeinfo .BasicTypeInfo ;
25
28
import org .apache .flink .api .connector .sink2 .Sink ;
26
29
import org .apache .flink .api .connector .sink2 .SinkWriter ;
27
30
import org .apache .flink .api .connector .sink2 .WriterInitContext ;
40
43
import org .apache .flink .runtime .testutils .MiniClusterResourceConfiguration ;
41
44
import org .apache .flink .streaming .api .checkpoint .CheckpointedFunction ;
42
45
import org .apache .flink .streaming .api .datastream .DataStream ;
46
+ import org .apache .flink .streaming .api .datastream .KeyedStream ;
43
47
import org .apache .flink .streaming .api .environment .StreamExecutionEnvironment ;
44
48
import org .apache .flink .streaming .api .functions .source .legacy .RichParallelSourceFunction ;
49
+ import org .apache .flink .streaming .api .operators .StreamingRuntimeContext ;
45
50
import org .apache .flink .test .util .MiniClusterWithClientResource ;
46
51
import org .apache .flink .testutils .junit .SharedObjects ;
47
52
import org .apache .flink .testutils .junit .SharedReference ;
56
61
import org .junit .Rule ;
57
62
import org .junit .Test ;
58
63
import org .junit .rules .TemporaryFolder ;
64
+ import org .junit .runner .RunWith ;
65
+ import org .junit .runners .Parameterized ;
59
66
60
67
import java .io .IOException ;
68
+ import java .util .Arrays ;
69
+ import java .util .Collection ;
61
70
import java .util .Collections ;
62
71
import java .util .HashSet ;
63
72
import java .util .Optional ;
74
83
* NotifyingDefiniteKeySource, SubtaskIndexFlatMapper and CollectionSink refer to RescalingITCase,
75
84
* because the static fields in these classes can not be shared.
76
85
*/
86
+ @ RunWith (Parameterized .class )
77
87
public class RescaleCheckpointManuallyITCase extends TestLogger {
78
88
79
89
private static final int NUM_TASK_MANAGERS = 2 ;
@@ -84,10 +94,24 @@ public class RescaleCheckpointManuallyITCase extends TestLogger {
84
94
85
95
@ ClassRule public static TemporaryFolder temporaryFolder = new TemporaryFolder ();
86
96
97
+ @ Parameterized .Parameter (0 )
98
+ public String statebackendType ;
99
+
100
+ @ Parameterized .Parameter (1 )
101
+ public boolean enableAsyncState ;
102
+
103
+ @ Parameterized .Parameters (name = "statebackend type ={0}, enableAsyncState={1}" )
104
+ public static Collection <Object []> parameter () {
105
+ return Arrays .asList (
106
+ new Object [][] {
107
+ {"forst" , true }, {"forst" , false }, {"rocksdb" , true }, {"rocksdb" , false }
108
+ });
109
+ }
110
+
87
111
@ Before
88
112
public void setup () throws Exception {
89
113
Configuration config = new Configuration ();
90
- config .set (StateBackendOptions .STATE_BACKEND , "rocksdb" );
114
+ config .set (StateBackendOptions .STATE_BACKEND , statebackendType );
91
115
config .set (CheckpointingOptions .INCREMENTAL_CHECKPOINTS , true );
92
116
93
117
cluster =
@@ -263,7 +287,7 @@ private JobGraph createJobGraphWithKeyedState(
263
287
264
288
SharedReference <JobID > jobID = sharedObjects .add (new JobID ());
265
289
SharedReference <MiniCluster > miniClusterRef = sharedObjects .add (miniCluster );
266
- DataStream < Integer > input =
290
+ KeyedStream < Integer , Integer > input =
267
291
env .addSource (
268
292
new NotifyingDefiniteKeySource (
269
293
numberKeys , numberElements , failAfterEmission ) {
@@ -300,10 +324,18 @@ public Integer getKey(Integer value) {
300
324
return value ;
301
325
}
302
326
});
303
- DataStream <Tuple2 <Integer , Integer >> result =
304
- input .flatMap (new SubtaskIndexFlatMapper (numberElementsExpect ));
327
+ if (enableAsyncState ) {
328
+ input .enableAsyncState ();
329
+ DataStream <Tuple2 <Integer , Integer >> result =
330
+ input .flatMap (new AsyncSubtaskIndexFlatMapper (numberElementsExpect ));
305
331
306
- result .sinkTo (new CollectionSink <>());
332
+ result .sinkTo (new CollectionSink <>());
333
+ } else {
334
+ DataStream <Tuple2 <Integer , Integer >> result =
335
+ input .flatMap (new SubtaskIndexFlatMapper (numberElementsExpect ));
336
+
337
+ result .sinkTo (new CollectionSink <>());
338
+ }
307
339
308
340
return env .getStreamGraph ().getJobGraph (env .getClass ().getClassLoader (), jobID .get ());
309
341
}
@@ -349,8 +381,9 @@ public void run(SourceContext<Integer> ctx) throws Exception {
349
381
} else {
350
382
boolean newCheckpoint = false ;
351
383
long waited = 0L ;
384
+ running = false ;
352
385
// maximum wait 5min
353
- while (!newCheckpoint && waited < 30000L ) {
386
+ while (!newCheckpoint && waited < 300000L ) {
354
387
synchronized (ctx .getCheckpointLock ()) {
355
388
newCheckpoint = waitCheckpointCompleted ();
356
389
}
@@ -423,6 +456,79 @@ public void initializeState(FunctionInitializationContext context) throws Except
423
456
}
424
457
}
425
458
459
+ private static class AsyncSubtaskIndexFlatMapper
460
+ extends RichFlatMapFunction <Integer , Tuple2 <Integer , Integer >>
461
+ implements CheckpointedFunction {
462
+
463
+ private static final long serialVersionUID = 1L ;
464
+
465
+ private transient org .apache .flink .api .common .state .v2 .ValueState <Integer > counter ;
466
+ private transient org .apache .flink .api .common .state .v2 .ValueState <Integer > sum ;
467
+
468
+ private final int numberElements ;
469
+
470
+ public AsyncSubtaskIndexFlatMapper (int numberElements ) {
471
+ this .numberElements = numberElements ;
472
+ }
473
+
474
+ @ Override
475
+ public void flatMap (Integer value , Collector <Tuple2 <Integer , Integer >> out )
476
+ throws Exception {
477
+ StateFuture <Integer > counterFuture =
478
+ counter .asyncValue ()
479
+ .thenCompose (
480
+ (Integer c ) -> {
481
+ int updated = c == null ? 1 : c + 1 ;
482
+ return counter .asyncUpdate (updated )
483
+ .thenApply (nothing -> updated );
484
+ });
485
+ StateFuture <Integer > sumFuture =
486
+ sum .asyncValue ()
487
+ .thenCompose (
488
+ (Integer s ) -> {
489
+ int updated = s == null ? value : s + value ;
490
+ return sum .asyncUpdate (updated )
491
+ .thenApply (nothing -> updated );
492
+ });
493
+
494
+ counterFuture .thenCombine (
495
+ sumFuture ,
496
+ (c , s ) -> {
497
+ if (c == numberElements ) {
498
+ out .collect (
499
+ Tuple2 .of (
500
+ getRuntimeContext ()
501
+ .getTaskInfo ()
502
+ .getIndexOfThisSubtask (),
503
+ s ));
504
+ }
505
+ return null ;
506
+ });
507
+ }
508
+
509
+ @ Override
510
+ public void snapshotState (FunctionSnapshotContext context ) throws Exception {
511
+ // all managed, nothing to do.
512
+ }
513
+
514
+ @ Override
515
+ public void initializeState (FunctionInitializationContext context ) throws Exception {}
516
+
517
+ @ Override
518
+ public void open (OpenContext openContext ) throws Exception {
519
+ counter =
520
+ ((StreamingRuntimeContext ) getRuntimeContext ())
521
+ .getValueState (
522
+ new org .apache .flink .api .common .state .v2 .ValueStateDescriptor <>(
523
+ "counter" , BasicTypeInfo .INT_TYPE_INFO ));
524
+ sum =
525
+ ((StreamingRuntimeContext ) getRuntimeContext ())
526
+ .getValueState (
527
+ new org .apache .flink .api .common .state .v2 .ValueStateDescriptor <>(
528
+ "sum" , BasicTypeInfo .INT_TYPE_INFO ));
529
+ }
530
+ }
531
+
426
532
private static class CollectionSink <IN > implements Sink <IN > {
427
533
428
534
private static final ConcurrentHashMap <JobID , CollectionSinkWriter <?>> writers =
0 commit comments