Skip to content

Commit 2219a8e

Browse files
fiskiwanowww
authored andcommitted
VMThermostat profiler
1 parent 9f2ebf8 commit 2219a8e

File tree

4 files changed

+407
-0
lines changed

4 files changed

+407
-0
lines changed

src/hotspot/share/logging/logTag.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ class outputStream;
209209
LOG_TAG(table) \
210210
LOG_TAG(task) \
211211
DEBUG_ONLY(LOG_TAG(test)) \
212+
LOG_TAG(thermostat) \
212213
LOG_TAG(thread) \
213214
LOG_TAG(throttle) \
214215
LOG_TAG(timer) \

src/hotspot/share/runtime/threads.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@
103103
#include "runtime/trimNativeHeap.hpp"
104104
#include "runtime/vm_version.hpp"
105105
#include "runtime/vmOperations.hpp"
106+
#include "runtime/vmThermostat.hpp"
106107
#include "sanitizers/address.hpp"
107108
#include "services/attachListener.hpp"
108109
#include "services/management.hpp"
@@ -989,6 +990,9 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) {
989990
}
990991
}
991992

993+
// Start the VMThermostat thread
994+
VMThermostat::initialize();
995+
992996
return JNI_OK;
993997
}
994998

Lines changed: 352 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,352 @@
1+
/*
2+
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*
23+
*/
24+
25+
#include "logging/log.hpp"
26+
#include "runtime/handshake.hpp"
27+
#include "runtime/interfaceSupport.inline.hpp"
28+
#include "runtime/javaThread.hpp"
29+
#include "runtime/os.hpp"
30+
#include "runtime/threadSMR.inline.hpp"
31+
#include "runtime/vmThermostat.hpp"
32+
#include "utilities/ticks.hpp"
33+
34+
static constexpr uint64_t sampling_interval_nanos = 100000; // 1000 us
35+
static constexpr int samples_per_window = 1000; // 100 ms
36+
37+
// Sampled relative performance difference between tiers in a sample program
38+
static constexpr float tier0_relative_performance = 0.0318f;
39+
static constexpr float tier1_relative_performance = 0.5f;
40+
static constexpr float tier2_relative_performance = 0.4209f;
41+
static constexpr float tier3_relative_performance = 0.2364f;
42+
static constexpr float tier4_relative_performance = 1.0f;
43+
44+
VMThermostat* VMThermostat::_thermostat;
45+
46+
void VMThermostat::initialize() {
47+
LogTarget(Info, thermostat) lt;
48+
if (!lt.is_enabled()) {
49+
return;
50+
}
51+
52+
HandleMark hm(JavaThread::current());
53+
EXCEPTION_MARK;
54+
55+
const char* name = "VM Thermostat Thread";
56+
Handle thread_oop = JavaThread::create_system_thread_object(name, CHECK);
57+
58+
_thermostat = new VMThermostat();
59+
JavaThread::vm_exit_on_osthread_failure(_thermostat);
60+
JavaThread::start_internal_daemon(THREAD, _thermostat, thread_oop, NearMaxPriority);
61+
}
62+
63+
VMThermostat::VMThermostat()
64+
: JavaThread(thread_entry),
65+
_start_nanos(0),
66+
_nticks(0) {
67+
}
68+
69+
void VMThermostat::thread_entry(JavaThread* thread, TRAPS) {
70+
static_cast<VMThermostat*>(thread)->run_loop();
71+
}
72+
73+
// Returns how many sampling windows were missed due to latency problems
74+
int VMThermostat::wait_for_tick() {
75+
if (_nticks++ == 0) {
76+
// First tick, set start time and record startup pause for premain
77+
const Ticks now = Ticks::now();
78+
_start_nanos = now.nanoseconds();
79+
return _start_nanos / sampling_interval_nanos;
80+
}
81+
82+
for (;;) {
83+
// We might wake up spuriously from wait, so always recalculate
84+
// the timeout after a wakeup to see if we need to wait again.
85+
const Ticks now = Ticks::now();
86+
const uint64_t now_nanos = now.nanoseconds();
87+
const uint64_t next_nanos = _start_nanos + (sampling_interval_nanos * _nticks);
88+
const int64_t timeout_nanos = next_nanos - now_nanos;
89+
90+
if (timeout_nanos > 0) {
91+
// Wait
92+
ThreadBlockInVM tbivm(_thermostat);
93+
if (timeout_nanos > NANOUNITS_PER_MILLIUNIT) {
94+
// More than a millisecond to sleep
95+
os::naked_short_sleep(timeout_nanos / NANOUNITS_PER_MILLIUNIT);
96+
} else {
97+
// Less than a millisecond to sleep
98+
os::naked_short_nanosleep(timeout_nanos);
99+
}
100+
} else {
101+
// Tick
102+
int overslept_ticks = 0;
103+
if (timeout_nanos < 0) {
104+
const uint64_t overslept = -timeout_nanos;
105+
if (overslept > sampling_interval_nanos) {
106+
// Missed one or more ticks. Bump _nticks accordingly to
107+
// avoid firing a string of immediate ticks to make up
108+
// for the ones we missed.
109+
overslept_ticks = overslept / sampling_interval_nanos;
110+
_nticks += overslept_ticks;
111+
}
112+
}
113+
114+
return overslept_ticks;
115+
}
116+
}
117+
}
118+
119+
class ThermostatHandshake : public HandshakeClosure {
120+
private:
121+
float _sampled_relative_performance;
122+
123+
public:
124+
ThermostatHandshake()
125+
: HandshakeClosure("ThermostatHandshake"),
126+
_sampled_relative_performance(1.0f) {}
127+
virtual void do_thread(Thread* thread) {
128+
JavaThread* jt = JavaThread::cast(thread);
129+
if (!jt->has_last_Java_frame()) {
130+
// No java frame, no action
131+
_sampled_relative_performance = 0.0f;
132+
return;
133+
}
134+
135+
// Sample top frame to see if the program is doing something
136+
// we know is slower than it could be
137+
frame f = jt->last_frame();
138+
139+
// Skip any stub frames etc
140+
RegisterMap map(jt,
141+
RegisterMap::UpdateMap::skip,
142+
RegisterMap::ProcessFrames::skip,
143+
RegisterMap::WalkContinuation::skip);
144+
if (f.is_safepoint_blob_frame() || f.is_runtime_frame()) {
145+
f = f.sender(&map);
146+
}
147+
148+
if (f.is_interpreted_frame()) {
149+
_sampled_relative_performance = tier0_relative_performance;
150+
return;
151+
}
152+
153+
if (f.is_compiled_frame()) {
154+
nmethod* nm = f.cb()->as_nmethod();
155+
156+
switch (nm->comp_level()) {
157+
case CompLevel_simple:
158+
_sampled_relative_performance = tier1_relative_performance;
159+
break;
160+
case CompLevel_limited_profile:
161+
_sampled_relative_performance = tier2_relative_performance;
162+
break;
163+
case CompLevel_full_profile:
164+
_sampled_relative_performance = tier3_relative_performance;
165+
break;
166+
case CompLevel_full_optimization:
167+
_sampled_relative_performance = tier4_relative_performance;
168+
break;
169+
default:
170+
// Assume we are not slowed down by default
171+
break;
172+
}
173+
}
174+
}
175+
176+
float sampled_relative_performance() const {
177+
return _sampled_relative_performance;
178+
}
179+
};
180+
181+
static JavaThread* select_target(ThreadsListHandle& tlh, JavaThread* current) {
182+
int length = tlh.length();
183+
GrowableArray<JavaThread*> candidates(length);
184+
185+
for (int i = 0; i < length; i++) {
186+
JavaThread* target = tlh.thread_at(i);
187+
// Hidden threads are not so interesting
188+
if (target->is_hidden_from_external_view()) {
189+
continue;
190+
}
191+
192+
// Threads not calling Java or not so interesting
193+
if (!target->can_call_java()) {
194+
continue;
195+
}
196+
197+
// Exiting threads are not so interesting
198+
oop thread_oop = target->threadObj();
199+
if (thread_oop == nullptr) {
200+
continue;
201+
}
202+
203+
// Daemon threads are not so interesting
204+
if (java_lang_Thread::is_daemon(thread_oop)) {
205+
continue;
206+
}
207+
208+
// Threads potentially blocking are not so interesting
209+
JavaThreadState state = target->thread_state();
210+
if (state == _thread_in_native) {
211+
continue;
212+
}
213+
214+
if (state == _thread_blocked) {
215+
continue;
216+
}
217+
218+
candidates.append(target);
219+
}
220+
221+
if (candidates.length() == 0) {
222+
return nullptr;
223+
}
224+
225+
// Pick a random candidate
226+
return candidates.at(os::random() % candidates.length());
227+
}
228+
229+
int VMThermostat::sample(int overslept, float& the_sample) {
230+
int idle_ticks = overslept;
231+
232+
ThermostatHandshake op;
233+
ThreadsListHandle tlh;
234+
JavaThread* target = select_target(tlh, _thermostat);
235+
236+
if (target == nullptr) {
237+
// No interesting threads running? Well there is no known slowdown
238+
// on any threads at the moment then
239+
the_sample = 1.0f;
240+
return idle_ticks;
241+
}
242+
243+
const Ticks before = Ticks::now();
244+
Handshake::execute(&op, &tlh, target);
245+
const Ticks after = Ticks::now();
246+
247+
Tickspan duration = after - before;
248+
idle_ticks += duration.nanoseconds() / sampling_interval_nanos;
249+
250+
the_sample = op.sampled_relative_performance();
251+
252+
return idle_ticks;
253+
}
254+
255+
// Sample a window
256+
void VMThermostat::sample_window(int& overflowed_idle_samples, float& overflow_sample, float* samples) {
257+
ResourceMark rm(JavaThread::current());
258+
259+
int current = 0;
260+
261+
// Idle samples "stand still"; relative performance is 0
262+
while (current < MIN2(overflowed_idle_samples, samples_per_window)) {
263+
samples[current++] = 0.0f;
264+
}
265+
266+
if (current == samples_per_window) {
267+
overflowed_idle_samples -= samples_per_window;
268+
return;
269+
}
270+
271+
if (overflow_sample > -0.5f) {
272+
// Fill in the overflow sample from last time
273+
samples[current++] = overflow_sample;
274+
if (current == samples_per_window) {
275+
overflow_sample = -1.0f;
276+
overflowed_idle_samples = 0;
277+
}
278+
}
279+
280+
while (current < samples_per_window) {
281+
int result = wait_for_tick();
282+
283+
float the_sample;
284+
int idle_ticks = sample(result, the_sample);
285+
int remaining_ticks = samples_per_window - current;
286+
287+
int consumed_idle_ticks = MIN2(idle_ticks, remaining_ticks);
288+
289+
// Count idle samples as standing still; relative performance is 0
290+
for (int i = 0; i < consumed_idle_ticks; ++i) {
291+
samples[current++] = 0.0f;
292+
}
293+
294+
if (current == samples_per_window) {
295+
overflow_sample = the_sample;
296+
overflowed_idle_samples = idle_ticks - consumed_idle_ticks;
297+
return;
298+
}
299+
300+
samples[current++] = the_sample;
301+
}
302+
303+
// No overflow to the next window
304+
overflowed_idle_samples = 0;
305+
overflow_sample = -1.0f;
306+
}
307+
308+
int order_floats(const float* v1, const float* v2) {
309+
if (*v1 > *v2) {
310+
return -1;
311+
}
312+
313+
if (*v1 < *v2) {
314+
return 1;
315+
}
316+
317+
return 0;
318+
}
319+
320+
float calculate_average(float* samples, int from) {
321+
float sum = 0.0f;
322+
for (int i = from; i < samples_per_window; ++i) {
323+
sum += samples[i];
324+
}
325+
float avg = sum / (samples_per_window - from);
326+
return avg;
327+
}
328+
329+
float calculate_percentile(float* samples, int percentile) {
330+
int ignore = percentile * samples_per_window / 1000;
331+
return int((1.0f - calculate_average(samples, ignore)) * 100.0f);
332+
}
333+
334+
void VMThermostat::report_window(float* samples, int window_number) {
335+
qsort(samples, samples_per_window, sizeof(float), (_sort_Fn)order_floats);
336+
int p0 = calculate_percentile(samples, 0);
337+
int p50 = calculate_percentile(samples, 500);
338+
int p90 = calculate_percentile(samples, 900);
339+
log_info(thermostat)(UINT64_FORMAT "\t%d\t%d\t%d", uint64_t(window_number) * samples_per_window * sampling_interval_nanos / NANOUNITS_PER_MILLIUNIT, p0, p50, p90);
340+
}
341+
342+
void VMThermostat::run_loop() {
343+
log_info(thermostat)("Time ms\tP0\tP50\tP90");
344+
int overflowed_idle_samples = 0;
345+
float overflow_sample = -1.0f;
346+
float samples[samples_per_window];
347+
int window_number = 0;
348+
for (;;) {
349+
sample_window(overflowed_idle_samples, overflow_sample, samples);
350+
report_window(samples, window_number++);
351+
}
352+
}

0 commit comments

Comments
 (0)