Skip to content

Commit a919f6d

Browse files
stefankkstefanj
andcommitted
8356372: JVMTI heap sampling not working properly with outside TLAB allocations
Co-authored-by: Stefan Johansson <sjohanss@openjdk.org> Reviewed-by: sjohanss, sspitsyn
1 parent 8184ce3 commit a919f6d

15 files changed

Lines changed: 184 additions & 93 deletions

src/hotspot/share/gc/g1/g1YoungGCPreEvacuateTasks.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class G1PreEvacuateCollectionSetBatchTask::JavaThreadRetireTLABAndFlushLogs : pu
5959
BarrierSet::barrier_set()->make_parsable((JavaThread*)thread);
6060
// Retire TLABs.
6161
if (UseTLAB) {
62-
thread->tlab().retire(&_tlab_stats);
62+
thread->retire_tlab(&_tlab_stats);
6363
}
6464
// Concatenate logs.
6565
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();

src/hotspot/share/gc/shared/collectedHeap.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -517,8 +517,8 @@ void CollectedHeap::ensure_parsability(bool retire_tlabs) {
517517
for (JavaThreadIteratorWithHandle jtiwh; JavaThread *thread = jtiwh.next();) {
518518
BarrierSet::barrier_set()->make_parsable(thread);
519519
if (UseTLAB) {
520-
if (retire_tlabs) {
521-
thread->tlab().retire(&stats);
520+
if (retire_tlabs || ZeroTLAB) {
521+
thread->retire_tlab(&stats);
522522
} else {
523523
thread->tlab().make_parsable();
524524
}

src/hotspot/share/gc/shared/memAllocator.cpp

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ class MemAllocator::Allocation: StackObj {
5151
bool _overhead_limit_exceeded;
5252
bool _allocated_outside_tlab;
5353
size_t _allocated_tlab_size;
54-
bool _tlab_end_reset_for_sample;
5554

5655
bool check_out_of_memory();
5756
void verify_before();
@@ -74,8 +73,7 @@ class MemAllocator::Allocation: StackObj {
7473
_obj_ptr(obj_ptr),
7574
_overhead_limit_exceeded(false),
7675
_allocated_outside_tlab(false),
77-
_allocated_tlab_size(0),
78-
_tlab_end_reset_for_sample(false)
76+
_allocated_tlab_size(0)
7977
{
8078
assert(Thread::current() == allocator._thread, "do not pass MemAllocator across threads");
8179
verify_before();
@@ -174,33 +172,31 @@ void MemAllocator::Allocation::notify_allocation_jvmti_sampler() {
174172
return;
175173
}
176174

177-
if (!_allocated_outside_tlab && _allocated_tlab_size == 0 && !_tlab_end_reset_for_sample) {
178-
// Sample if it's a non-TLAB allocation, or a TLAB allocation that either refills the TLAB
179-
// or expands it due to taking a sampler induced slow path.
180-
return;
181-
}
175+
ThreadHeapSampler& heap_sampler = _thread->heap_sampler();
176+
ThreadLocalAllocBuffer& tlab = _thread->tlab();
182177

183-
// If we want to be sampling, protect the allocated object with a Handle
184-
// before doing the callback. The callback is done in the destructor of
185-
// the JvmtiSampledObjectAllocEventCollector.
186-
size_t bytes_since_last = 0;
178+
// Log sample decision
179+
heap_sampler.log_sample_decision(tlab.top());
187180

188-
{
181+
if (heap_sampler.should_sample(tlab.top())) {
182+
// If we want to be sampling, protect the allocated object with a Handle
183+
// before doing the callback. The callback is done in the destructor of
184+
// the JvmtiSampledObjectAllocEventCollector.
189185
PreserveObj obj_h(_thread, _obj_ptr);
190186
JvmtiSampledObjectAllocEventCollector collector;
191-
size_t size_in_bytes = _allocator._word_size * HeapWordSize;
192-
ThreadLocalAllocBuffer& tlab = _thread->tlab();
193187

194-
if (!_allocated_outside_tlab) {
195-
bytes_since_last = tlab.bytes_since_last_sample_point();
196-
}
188+
// Perform the sampling
189+
heap_sampler.sample(obj_h(), tlab.top());
197190

198-
_thread->heap_sampler().check_for_sampling(obj_h(), size_in_bytes, bytes_since_last);
191+
// Note that after this point all the TLAB can have been retired, and agent
192+
// code can run and allocate, don't rely on earlier calculations involving
193+
// the TLAB.
199194
}
200195

201-
if (_tlab_end_reset_for_sample || _allocated_tlab_size != 0) {
202-
// Tell tlab to forget bytes_since_last if we passed it to the heap sampler.
203-
_thread->tlab().set_sample_end(bytes_since_last != 0);
196+
// Set a new sampling point in the TLAB if it fits in the current TLAB
197+
const size_t words_until_sample = heap_sampler.bytes_until_sample(tlab.top()) / HeapWordSize;
198+
if (words_until_sample <= tlab.free()) {
199+
tlab.set_sampling_point(tlab.top() + words_until_sample);
204200
}
205201
}
206202

@@ -249,6 +245,7 @@ HeapWord* MemAllocator::mem_allocate_outside_tlab(Allocation& allocation) const
249245

250246
size_t size_in_bytes = _word_size * HeapWordSize;
251247
_thread->incr_allocated_bytes(size_in_bytes);
248+
_thread->heap_sampler().inc_outside_tlab_bytes(size_in_bytes);
252249

253250
return mem;
254251
}
@@ -262,12 +259,16 @@ HeapWord* MemAllocator::mem_allocate_inside_tlab_slow(Allocation& allocation) co
262259
ThreadLocalAllocBuffer& tlab = _thread->tlab();
263260

264261
if (JvmtiExport::should_post_sampled_object_alloc()) {
262+
// When sampling we artificially set the TLAB end to the sample point.
263+
// When we hit that point it looks like the TLAB is full, but it's
264+
// not necessarily the case. Set the real end and retry the allocation.
265+
266+
// Undo previous adjustment of end.
267+
// Note that notify_allocation_jvmti_sampler will set a new sample point.
265268
tlab.set_back_allocation_end();
266-
mem = tlab.allocate(_word_size);
267269

268-
// We set back the allocation sample point to try to allocate this, reset it
269-
// when done.
270-
allocation._tlab_end_reset_for_sample = true;
270+
// Retry the TLAB allocation with the proper end
271+
mem = tlab.allocate(_word_size);
271272

272273
if (mem != nullptr) {
273274
return mem;
@@ -282,11 +283,16 @@ HeapWord* MemAllocator::mem_allocate_inside_tlab_slow(Allocation& allocation) co
282283
}
283284

284285
// Discard tlab and allocate a new one.
286+
287+
// Record the amount wasted
288+
tlab.record_refill_waste();
289+
290+
// Retire the current TLAB
291+
_thread->retire_tlab();
292+
285293
// To minimize fragmentation, the last TLAB may be smaller than the rest.
286294
size_t new_tlab_size = tlab.compute_size(_word_size);
287295

288-
tlab.retire_before_allocation();
289-
290296
if (new_tlab_size == 0) {
291297
return nullptr;
292298
}
@@ -317,7 +323,8 @@ HeapWord* MemAllocator::mem_allocate_inside_tlab_slow(Allocation& allocation) co
317323
Copy::fill_to_words(mem + hdr_size, allocation._allocated_tlab_size - hdr_size, badHeapWordVal);
318324
}
319325

320-
tlab.fill(mem, mem + _word_size, allocation._allocated_tlab_size);
326+
_thread->fill_tlab(mem, _word_size, allocation._allocated_tlab_size);
327+
321328
return mem;
322329
}
323330

src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ ThreadLocalAllocBuffer::ThreadLocalAllocBuffer() :
4949
_desired_size(0),
5050
_refill_waste_limit(0),
5151
_allocated_before_last_gc(0),
52-
_bytes_since_last_sample_point(0),
5352
_number_of_refills(0),
5453
_refill_waste(0),
5554
_gc_waste(0),
@@ -125,11 +124,7 @@ void ThreadLocalAllocBuffer::insert_filler() {
125124
void ThreadLocalAllocBuffer::make_parsable() {
126125
if (end() != nullptr) {
127126
invariants();
128-
if (ZeroTLAB) {
129-
retire();
130-
} else {
131-
insert_filler();
132-
}
127+
insert_filler();
133128
}
134129
}
135130

@@ -140,15 +135,13 @@ void ThreadLocalAllocBuffer::retire(ThreadLocalAllocStats* stats) {
140135

141136
if (end() != nullptr) {
142137
invariants();
143-
thread()->incr_allocated_bytes(used_bytes());
144138
insert_filler();
145139
initialize(nullptr, nullptr, nullptr);
146140
}
147141
}
148142

149-
void ThreadLocalAllocBuffer::retire_before_allocation() {
143+
void ThreadLocalAllocBuffer::record_refill_waste() {
150144
_refill_waste += (unsigned int)remaining();
151-
retire();
152145
}
153146

154147
void ThreadLocalAllocBuffer::resize() {
@@ -312,24 +305,6 @@ void ThreadLocalAllocBuffer::print_stats(const char* tag) {
312305
_refill_waste * HeapWordSize);
313306
}
314307

315-
void ThreadLocalAllocBuffer::set_sample_end(bool reset_byte_accumulation) {
316-
size_t heap_words_remaining = pointer_delta(_end, _top);
317-
size_t bytes_until_sample = thread()->heap_sampler().bytes_until_sample();
318-
size_t words_until_sample = bytes_until_sample / HeapWordSize;
319-
320-
if (reset_byte_accumulation) {
321-
_bytes_since_last_sample_point = 0;
322-
}
323-
324-
if (heap_words_remaining > words_until_sample) {
325-
HeapWord* new_end = _top + words_until_sample;
326-
set_end(new_end);
327-
_bytes_since_last_sample_point += bytes_until_sample;
328-
} else {
329-
_bytes_since_last_sample_point += heap_words_remaining * HeapWordSize;
330-
}
331-
}
332-
333308
Thread* ThreadLocalAllocBuffer::thread() {
334309
return (Thread*)(((char*)this) + in_bytes(start_offset()) - in_bytes(Thread::tlab_start_offset()));
335310
}
@@ -338,6 +313,14 @@ void ThreadLocalAllocBuffer::set_back_allocation_end() {
338313
_end = _allocation_end;
339314
}
340315

316+
void ThreadLocalAllocBuffer::set_sampling_point(HeapWord* sampling_point) {
317+
precond(sampling_point >= _top);
318+
precond(sampling_point <= _allocation_end);
319+
320+
// This will trigger a slow-path, which in turn might take a sample.
321+
_end = sampling_point;
322+
}
323+
341324
HeapWord* ThreadLocalAllocBuffer::hard_end() {
342325
return _allocation_end + alignment_reserve();
343326
}

src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ class ThreadLocalAllocBuffer: public CHeapObj<mtThread> {
5656
size_t _desired_size; // desired size (including alignment_reserve)
5757
size_t _refill_waste_limit; // hold onto tlab if free() is larger than this
5858
size_t _allocated_before_last_gc; // total bytes allocated up until the last gc
59-
size_t _bytes_since_last_sample_point; // bytes since last sample point.
6059

6160
static size_t _max_size; // maximum size of any TLAB
6261
static int _reserve_for_allocation_prefetch; // Reserve at the end of the TLAB
@@ -124,7 +123,6 @@ class ThreadLocalAllocBuffer: public CHeapObj<mtThread> {
124123
size_t free() const { return pointer_delta(end(), top()); }
125124
// Don't discard tlab if remaining space is larger than this.
126125
size_t refill_waste_limit() const { return _refill_waste_limit; }
127-
size_t bytes_since_last_sample_point() const { return _bytes_since_last_sample_point; }
128126

129127
// For external inspection.
130128
const HeapWord* start_relaxed() const;
@@ -158,17 +156,18 @@ class ThreadLocalAllocBuffer: public CHeapObj<mtThread> {
158156
// Retire an in-use tlab and optionally collect statistics.
159157
void retire(ThreadLocalAllocStats* stats = nullptr);
160158

161-
// Retire in-use tlab before allocation of a new tlab
162-
void retire_before_allocation();
159+
// Record refill waste before allocating (refilling) with a new TLAB.
160+
void record_refill_waste();
163161

164162
// Resize based on amount of allocation, etc.
165163
void resize();
166164

167165
void fill(HeapWord* start, HeapWord* top, size_t new_size);
168166
void initialize();
169167

168+
// Support for TLAB sampling
170169
void set_back_allocation_end();
171-
void set_sample_end(bool reset_byte_accumulation);
170+
void set_sampling_point(HeapWord* sampling_point);
172171

173172
static size_t refill_waste_limit_increment();
174173

src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,6 +1499,9 @@ void ShenandoahHeap::labs_make_parsable() {
14991499
for (JavaThreadIteratorWithHandle jtiwh; JavaThread *t = jtiwh.next(); ) {
15001500
ThreadLocalAllocBuffer& tlab = t->tlab();
15011501
tlab.make_parsable();
1502+
if (ZeroTLAB) {
1503+
t->retire_tlab();
1504+
}
15021505
cl.do_thread(t);
15031506
}
15041507

@@ -1516,10 +1519,9 @@ void ShenandoahHeap::tlabs_retire(bool resize) {
15161519
ThreadLocalAllocStats stats;
15171520

15181521
for (JavaThreadIteratorWithHandle jtiwh; JavaThread *t = jtiwh.next(); ) {
1519-
ThreadLocalAllocBuffer& tlab = t->tlab();
1520-
tlab.retire(&stats);
1522+
t->retire_tlab(&stats);
15211523
if (resize) {
1522-
tlab.resize();
1524+
t->tlab().resize();
15231525
}
15241526
}
15251527

src/hotspot/share/gc/shenandoah/shenandoahStackWatermark.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ void ShenandoahStackWatermark::retire_tlab() {
119119
// Retire TLAB
120120
if (UseTLAB) {
121121
_stats.reset();
122-
_jt->tlab().retire(&_stats);
122+
_jt->retire_tlab(&_stats);
123123
if (ResizeTLAB) {
124124
_jt->tlab().resize();
125125
}

src/hotspot/share/gc/z/zThreadLocalAllocBuffer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ void ZThreadLocalAllocBuffer::publish_statistics() {
6565
void ZThreadLocalAllocBuffer::retire(JavaThread* thread, ThreadLocalAllocStats* stats) {
6666
if (UseTLAB) {
6767
stats->reset();
68-
thread->tlab().retire(stats);
68+
thread->retire_tlab(stats);
6969
if (ResizeTLAB) {
7070
thread->tlab().resize();
7171
}

src/hotspot/share/runtime/javaThread.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -968,7 +968,7 @@ void JavaThread::exit(bool destroy_vm, ExitType exit_type) {
968968
_stack_overflow_state.remove_stack_guard_pages();
969969

970970
if (UseTLAB) {
971-
tlab().retire();
971+
retire_tlab();
972972
}
973973

974974
if (JvmtiEnv::environments_might_exist()) {
@@ -1042,7 +1042,7 @@ void JavaThread::cleanup_failed_attach_current_thread(bool is_daemon) {
10421042
_stack_overflow_state.remove_stack_guard_pages();
10431043

10441044
if (UseTLAB) {
1045-
tlab().retire();
1045+
retire_tlab();
10461046
}
10471047

10481048
Threads::remove(this, is_daemon);

src/hotspot/share/runtime/thread.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,25 @@ void Thread::initialize_tlab() {
154154
}
155155
}
156156

157+
void Thread::retire_tlab(ThreadLocalAllocStats* stats) {
158+
// Sampling and serviceability support
159+
if (tlab().end() != nullptr) {
160+
incr_allocated_bytes(tlab().used_bytes());
161+
heap_sampler().retire_tlab(tlab().top());
162+
}
163+
164+
// Retire the TLAB
165+
tlab().retire(stats);
166+
}
167+
168+
void Thread::fill_tlab(HeapWord* start, size_t pre_reserved, size_t new_size) {
169+
// Thread allocation sampling support
170+
heap_sampler().set_tlab_top_at_sample_start(start);
171+
172+
// Fill the TLAB
173+
tlab().fill(start, start + pre_reserved, new_size);
174+
}
175+
157176
void Thread::initialize_thread_current() {
158177
assert(_thr_current == nullptr, "Thread::current already initialized");
159178
_thr_current = this;

0 commit comments

Comments
 (0)