@@ -53,7 +53,7 @@ void MMTkVMCompanionThread::run() {
53
53
MutexLockerEx locker (_lock, Mutex::_no_safepoint_check_flag);
54
54
assert (_reached_state == _threads_resumed, " Threads should be running at this moment." );
55
55
while (_desired_state != _threads_suspended) {
56
- _lock->wait (true );
56
+ _lock->wait (Mutex::_no_safepoint_check_flag );
57
57
}
58
58
assert (_reached_state == _threads_resumed, " Threads should still be running at this moment." );
59
59
}
@@ -63,25 +63,9 @@ void MMTkVMCompanionThread::run() {
63
63
VM_MMTkSTWOperation op (this );
64
64
// VMThread::execute() is blocking. The companion thread will be blocked
65
65
// here waiting for the VM thread to execute op, and the VM thread will
66
- // be blocked in reach_suspended_and_wait_for_resume () until a GC thread
66
+ // be blocked in do_mmtk_stw_operation () until a GC thread
67
67
// calls request(_threads_resumed).
68
68
VMThread::execute (&op);
69
-
70
- // Tell the waiter thread that the world has resumed.
71
- log_trace (gc)(" MMTkVMCompanionThread: Notifying threads resumption..." );
72
- {
73
- MutexLockerEx locker (_lock, Mutex::_no_safepoint_check_flag);
74
- assert (_desired_state == _threads_resumed, " start-the-world should be requested." );
75
- assert (_reached_state == _threads_suspended, " Threads should still be suspended at this moment." );
76
- _reached_state = _threads_resumed;
77
- _lock->notify_all ();
78
- }
79
- {
80
- MutexLocker x (Heap_lock);
81
- if (Universe::has_reference_pending_list ()) {
82
- Heap_lock->notify_all ();
83
- }
84
- }
85
69
}
86
70
}
87
71
@@ -107,7 +91,7 @@ void MMTkVMCompanionThread::request(stw_state desired_state, bool wait_until_rea
107
91
108
92
if (wait_until_reached) {
109
93
while (_reached_state != desired_state) {
110
- _lock->wait (true );
94
+ _lock->wait (Mutex::_no_safepoint_check_flag );
111
95
}
112
96
}
113
97
}
@@ -123,23 +107,70 @@ void MMTkVMCompanionThread::wait_for_reached(stw_state desired_state) {
123
107
assert (_desired_state == desired_state, " State %d not requested." , desired_state);
124
108
125
109
while (_reached_state != desired_state) {
126
- _lock->wait (true );
110
+ _lock->wait (Mutex::_no_safepoint_check_flag );
127
111
}
128
112
}
129
113
130
- // Called by the VM thread to indicate that all Java threads have stopped.
131
- // This method will block until the GC requests start-the-world.
132
- void MMTkVMCompanionThread::reach_suspended_and_wait_for_resume () {
133
- assert (Thread::current ()->is_VM_thread (), " reach_suspended_and_wait_for_resume can only be executed by the VM thread" );
114
+ // Called by the VM thread in `VM_MMTkSTWOperation`.
115
+ // This method notify that all Java threads have yielded, and will block the VM thread (thereby
116
+ // blocking Java threads) until the GC requests start-the-world.
117
+ void MMTkVMCompanionThread::do_mmtk_stw_operation () {
118
+ assert (Thread::current ()->is_VM_thread (), " do_mmtk_stw_operation can only be executed by the VM thread" );
134
119
135
- MutexLockerEx locker (_lock, Mutex::_no_safepoint_check_flag);
120
+ {
121
+ MutexLockerEx locker (_lock, Mutex::_no_safepoint_check_flag);
136
122
137
- // Tell the waiter thread that the world has stopped.
138
- _reached_state = _threads_suspended;
139
- _lock->notify_all ();
123
+ // Tell the waiter thread that Java threads have stopped at yieldpoints.
124
+ _reached_state = _threads_suspended;
125
+ log_trace (gc)(" do_mmtk_stw_operation: Reached _thread_suspended state. Notifying..." );
126
+ _lock->notify_all ();
127
+
128
+ // Wait until resume-the-world is requested
129
+ while (_desired_state != _threads_resumed) {
130
+ _lock->wait (Mutex::_no_safepoint_check_flag);
131
+ }
140
132
141
- // Wait until resume-the-world is requested
142
- while (_desired_state != _threads_resumed) {
143
- _lock->wait (true );
133
+ // Tell the waiter thread that Java threads will eventually resume from yieldpoints. This
134
+ // function will return, and, as soon as the VM thread stops executing safepoint VM operations,
135
+ // Java threads will resume from yieldpoints.
136
+ //
137
+ // Note: We have to notify *now* instead of after `VMThread::execute()`. For reasons unknown
138
+ // (likely a bug in OpenJDK 11), the VMThread fails to notify the companion thread after
139
+ // evaluating `VM_MMTkSTWOperation`, and continues to execute other VM operations (such as
140
+ // `RevokeBias`). This leaves the companion thread blocking on `VMThread::execute()` until the
141
+ // VM thread finishes executing the next batch of queued VM operations. If we notify after
142
+ // `VMThread::execute` in `run()`, it will cause a deadlock like the following:
143
+ //
144
+ // - The companion thread is blocked at `VMThread::execute()`, waiting for the next batch of VM
145
+ // operations to finish.
146
+ // - The VM thread is blocked in `SafepointSynchronize::begin()`, waiting for all mutators to
147
+ // reach safepoints.
148
+ // - One mutator is allocating too fast and triggers a GC, which requires the `WorkerMonitor`
149
+ // lock in mmtk-core.
150
+ // - A GC worker is still executing `mmtk_resume_mutator`, holding the `WorkerMutator` (as the
151
+ // last parked GC worker). It is asking the companion thread to resume mutators, and is still
152
+ // waiting for the companion thread to reach the `_thread_resumed` state. As we see before,
153
+ // the companion thread is waiting, too.
154
+ //
155
+ // By notifying now, we let the companion thread stop waiting, and therefore allowing the last
156
+ // parked GC worker to finish `resume_mutators`, breaking the deadlock. When the next GC
157
+ // starts, the GC worker running `mmtk_stop_all_mutators` will need to wait a little longer (as
158
+ // it always should) until the VM thread finishes executing other VM operations and the
159
+ // companion thread is ready to respond to another request from GC workers.
160
+ //
161
+ // Also note that OpenJDK 17 changed the way the VM thread executes VM operations. The same
162
+ // problem may not manifest in OpenJDK 17 or 21.
163
+ assert (_desired_state == _threads_resumed, " start-the-world should be requested." );
164
+ assert (_reached_state == _threads_suspended, " Threads should still be suspended at this moment." );
165
+ _reached_state = _threads_resumed;
166
+ log_trace (gc)(" do_mmtk_stw_operation: Reached _thread_resumed state. Notifying..." );
167
+ _lock->notify_all ();
168
+ }
169
+
170
+ {
171
+ MutexLockerEx x (Heap_lock, Mutex::_no_safepoint_check_flag);
172
+ if (Universe::has_reference_pending_list ()) {
173
+ Heap_lock->notify_all ();
174
+ }
144
175
}
145
176
}
0 commit comments