15
15
import java .util .concurrent .TimeoutException ;
16
16
import java .util .function .Consumer ;
17
17
import java .util .function .Supplier ;
18
-
18
+ import jakarta .inject .Inject ;
19
+ import jakarta .inject .Named ;
19
20
import org .cloudfoundry .multiapps .common .SLException ;
20
21
import org .cloudfoundry .multiapps .controller .client .util .CheckedSupplier ;
21
22
import org .cloudfoundry .multiapps .controller .client .util .ResilientOperationExecutor ;
35
36
import org .springframework .http .HttpStatus ;
36
37
import org .springframework .http .ResponseEntity ;
37
38
38
- import jakarta .inject .Inject ;
39
- import jakarta .inject .Named ;
40
-
41
39
@ Named
42
40
public class ApplicationHealthCalculator {
43
41
44
42
private static final Logger LOGGER = LoggerFactory .getLogger (ApplicationHealthCalculator .class );
45
43
46
44
private static final int UPDATE_HEALTH_CHECK_STATUS_PERIOD_IN_SECONDS = 10 ;
47
45
private static final int SINGLE_TASK_TIMEOUT_IN_SECONDS = 70 ; // timeout is set to 70 so it is higher than the DB connection acquisition
48
- // timeout
46
+ // timeout
49
47
private static final int TOTAL_TASK_TIMEOUT_IN_SECONDS = 3 * SINGLE_TASK_TIMEOUT_IN_SECONDS ;
50
48
51
49
private final ObjectStoreFileStorage objectStoreFileStorage ;
@@ -54,19 +52,20 @@ public class ApplicationHealthCalculator {
54
52
private final DatabaseMonitoringService databaseMonitoringService ;
55
53
private final DatabaseWaitingLocksAnalyzer databaseWaitingLocksAnalyzer ;
56
54
57
- private final CachedObject <Boolean > objectStoreFileStorageHealthCache = new CachedObject <>(Duration .ofSeconds (TOTAL_TASK_TIMEOUT_IN_SECONDS ));
55
+ private final CachedObject <Boolean > objectStoreFileStorageHealthCache = new CachedObject <>(
56
+ Duration .ofSeconds (TOTAL_TASK_TIMEOUT_IN_SECONDS ));
58
57
private final CachedObject <Boolean > dbHealthServiceCache = new CachedObject <>(Duration .ofSeconds (TOTAL_TASK_TIMEOUT_IN_SECONDS ));
59
58
private final CachedObject <Boolean > hasIncreasedLocksCache = new CachedObject <>(false ,
60
59
Duration .ofSeconds (TOTAL_TASK_TIMEOUT_IN_SECONDS ));
61
60
private final ScheduledExecutorService scheduler = Executors .newSingleThreadScheduledExecutor ();
62
61
private final ExecutorService taskExecutor = new ThreadPoolExecutor (3 ,
63
- 3 ,
62
+ 9 ,
64
63
0L ,
65
64
TimeUnit .MILLISECONDS ,
66
65
new SynchronousQueue <>(),
67
66
new ThreadPoolExecutor .AbortPolicy ());
68
67
private final ExecutorService timeoutExecutor = new ThreadPoolExecutor (3 ,
69
- 3 ,
68
+ 9 ,
70
69
0L ,
71
70
TimeUnit .MILLISECONDS ,
72
71
new SynchronousQueue <>(),
@@ -120,11 +119,13 @@ private void executeFuture(Future<Boolean> future, Consumer<Boolean> consumer, b
120
119
Thread .currentThread ()
121
120
.interrupt ();
122
121
LOGGER .error (Messages .THREAD_WAS_INTERRUPTED_WHILE_WAITING_FOR_THE_RESULT_OF_A_FUTURE , e );
122
+ future .cancel (true );
123
123
consumer .accept (onErrorValue );
124
124
} catch (Exception e ) {
125
125
LOGGER .error (MessageFormat .format (Messages .ERROR_OCCURRED_DURING_HEALTH_CHECKING_FOR_INSTANCE_0_MESSAGE_1 ,
126
126
applicationConfiguration .getApplicationInstanceIndex (), errorMessage ),
127
127
e );
128
+ future .cancel (true );
128
129
consumer .accept (onErrorValue );
129
130
}
130
131
}
@@ -150,13 +151,16 @@ public ResponseEntity<ApplicationHealthResult> calculateApplicationHealth() {
150
151
}
151
152
boolean hasIncreasedDbLocks = hasIncreasedLocksCache .getOrRefresh (() -> true );
152
153
if (hasIncreasedDbLocks ) {
153
- LOGGER .warn (MessageFormat .format (Messages .DETECTED_INCREASED_NUMBER_OF_PROCESSES_WAITING_FOR_LOCKS_FOR_INSTANCE_0_GETTING_THE_LOCKS ,
154
- applicationConfiguration .getApplicationInstanceIndex ()));
155
- long countOfProcessesWaitingForLocks = resilientOperationExecutor .execute ((Supplier <Long >) () -> databaseMonitoringService .getProcessesWaitingForLocks (ApplicationInstanceNameUtil .buildApplicationInstanceTemplate (applicationConfiguration )));
154
+ LOGGER .warn (
155
+ MessageFormat .format (Messages .DETECTED_INCREASED_NUMBER_OF_PROCESSES_WAITING_FOR_LOCKS_FOR_INSTANCE_0_GETTING_THE_LOCKS ,
156
+ applicationConfiguration .getApplicationInstanceIndex ()));
157
+ long countOfProcessesWaitingForLocks = resilientOperationExecutor .execute (
158
+ (Supplier <Long >) () -> databaseMonitoringService .getProcessesWaitingForLocks (
159
+ ApplicationInstanceNameUtil .buildApplicationInstanceTemplate (applicationConfiguration )));
156
160
LOGGER .warn (MessageFormat .format (Messages .DETECTED_INCREASED_NUMBER_OF_PROCESSES_WAITING_FOR_LOCKS_FOR_INSTANCE ,
157
161
countOfProcessesWaitingForLocks , applicationConfiguration .getApplicationInstanceIndex ()));
158
162
return ResponseEntity .ok (ImmutableApplicationHealthResult .builder () // TODO: Make this return 503 instead of 200 when the
159
- // detection is trustworthy
163
+ // detection is trustworthy
160
164
.status (ApplicationHealthResult .Status .DOWN )
161
165
.hasIncreasedLocks (true )
162
166
.countOfProcessesWaitingForLocks (countOfProcessesWaitingForLocks )
@@ -194,6 +198,7 @@ private boolean testObjectStoreConnectionWithTimeout() throws ExecutionException
194
198
LOGGER .debug (Messages .CHECKING_OBJECT_STORE_HEALTH );
195
199
return future .get (SINGLE_TASK_TIMEOUT_IN_SECONDS , TimeUnit .SECONDS );
196
200
} catch (TimeoutException e ) {
201
+ future .cancel (true );
197
202
throw new SLException (e , Messages .TIMEOUT_WHILE_CHECKING_OBJECT_STORE_HEALTH );
198
203
}
199
204
}
@@ -219,6 +224,7 @@ private boolean testDatabaseConnectionWithTimeout() throws ExecutionException, I
219
224
LOGGER .debug (Messages .CHECKING_DATABASE_HEALTH );
220
225
return future .get (SINGLE_TASK_TIMEOUT_IN_SECONDS , TimeUnit .SECONDS );
221
226
} catch (TimeoutException e ) {
227
+ future .cancel (true );
222
228
throw new SLException (e , Messages .TIMEOUT_WHILE_CHECKING_DATABASE_HEALTH );
223
229
}
224
230
}
@@ -229,6 +235,7 @@ private boolean checkForIncreasedLocksWithTimeout() throws ExecutionException, I
229
235
LOGGER .debug (Messages .CHECKING_FOR_INCREASED_LOCKS );
230
236
return future .get (SINGLE_TASK_TIMEOUT_IN_SECONDS , TimeUnit .SECONDS );
231
237
} catch (TimeoutException e ) {
238
+ future .cancel (true );
232
239
throw new SLException (e , Messages .TIMEOUT_WHILE_CHECKING_FOR_INCREASED_LOCKS );
233
240
}
234
241
}
0 commit comments