Skip to content

Commit e38717d

Browse files
authored
Trigger GC for actors when they tell the cycle detector they're blocked (#3278)
Prior to this commit, if an actor blocked, it did not run GC to free any memory it no longer needed. This would result in blocked actors holding on to (potentially lots of) memory unnecessarily. This commit causes GC to be triggered when the cycle detector asks an actor if it is blocked and the actor responds telling the cycle detector that it is blocked. This should result in memory being held by blocked actors to be freed more quickly even if the cycle detector doesn't end up detecting a cycle and reaping the actors. This will force a GC for an actor based on the following three things: * The actor processed at least one message since it's last GC (i.e. it did some work [GC acquire/release message or app message) * The actor's heap is greater than 0 (i.e. it has memory that could potentially be freed) * The actor is blocked and is about to tell the cycle detector that it is blocked (i.e. it thinks it has no more work to do at the moment) The sequence of events for GC'ing when sending a block message to the CD is: 1. actor gets a message from another actor 2. gets rescheduled because it processed an application message 3. next run has an empty queue (and the actor gets marked internally as blocked but doesn't send a block message to the CD) 4. some time passes and the CD eventually asks the actor if it is blocked 5. the actor garbage collects because of this change before sending the block message to the CD (to prevent race conditions) 6. the actor responds to the CD by sending a block message This shouldn't be a performance hit because step 4 is based on how often the CD runs (not very often) along with the fact that the CD doesn't ask all actors it knows about if they're blocked on every run, it asks them in batches instead and so step 4 will not occur very frequently for any actor even if steps 1 - 3 happen regularly.
1 parent 83ef004 commit e38717d

File tree

1 file changed

+72
-55
lines changed

1 file changed

+72
-55
lines changed

src/libponyrt/actor/actor.c

Lines changed: 72 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -241,13 +241,80 @@ static bool well_formed_msg_chain(pony_msg_t* first, pony_msg_t* last)
241241
}
242242
#endif
243243

244+
static void try_gc(pony_ctx_t* ctx, pony_actor_t* actor)
245+
{
246+
if(!ponyint_heap_startgc(&actor->heap
247+
#ifdef USE_RUNTIMESTATS
248+
, actor))
249+
#else
250+
))
251+
#endif
252+
return;
253+
254+
#ifdef USE_RUNTIMESTATS
255+
uint64_t used_cpu = ponyint_sched_cpu_used(ctx);
256+
ctx->schedulerstats.misc_cpu += used_cpu;
257+
#endif
258+
259+
DTRACE1(GC_START, (uintptr_t)ctx->scheduler);
260+
261+
ponyint_gc_mark(ctx);
262+
263+
if(actor->type->trace != NULL)
264+
actor->type->trace(ctx, actor);
265+
266+
ponyint_mark_done(ctx);
267+
268+
#ifdef USE_RUNTIMESTATS
269+
used_cpu = ponyint_sched_cpu_used(ctx);
270+
ctx->schedulerstats.actor_gc_mark_cpu += used_cpu;
271+
actor->actorstats.gc_mark_cpu += used_cpu;
272+
#endif
273+
274+
ponyint_heap_endgc(&actor->heap
275+
#ifdef USE_RUNTIMESTATS
276+
, actor);
277+
#else
278+
);
279+
#endif
280+
281+
DTRACE1(GC_END, (uintptr_t)ctx->scheduler);
282+
283+
#ifdef USE_RUNTIMESTATS
284+
used_cpu = ponyint_sched_cpu_used(ctx);
285+
ctx->schedulerstats.actor_gc_sweep_cpu += used_cpu;
286+
actor->actorstats.gc_sweep_cpu += used_cpu;
287+
#endif
288+
}
289+
244290
static void send_unblock(pony_actor_t* actor)
245291
{
246292
// Send unblock before continuing.
247293
unset_internal_flag(actor, FLAG_BLOCKED | FLAG_BLOCKED_SENT);
248294
ponyint_cycle_unblock(actor);
249295
}
250296

297+
static void send_block(pony_ctx_t* ctx, pony_actor_t* actor)
298+
{
299+
pony_assert(ctx->current == actor);
300+
301+
// Try and run GC because we're blocked and sending a block message
302+
// to the CD. This will try and free any memory the actor has in its
303+
// heap that wouldn't get freed otherwise until the actor is
304+
// destroyed or happens to receive more work via application messages
305+
// that eventually trigger a GC which may not happen for a long time
306+
// (or ever). Do this BEFORE sending the message or else we might be
307+
// GCing while the CD destroys us.
308+
pony_triggergc(ctx);
309+
try_gc(ctx, actor);
310+
311+
312+
// We're blocked, send block message.
313+
set_internal_flag(actor, FLAG_BLOCKED_SENT);
314+
set_internal_flag(actor, FLAG_CD_CONTACTED);
315+
ponyint_cycle_block(actor, &actor->gc);
316+
}
317+
251318
static bool handle_message(pony_ctx_t* ctx, pony_actor_t* actor,
252319
pony_msg_t* msg)
253320
{
@@ -366,9 +433,7 @@ static bool handle_message(pony_ctx_t* ctx, pony_actor_t* actor,
366433
//
367434
// Sending multiple "i'm blocked" messages to the cycle detector
368435
// will result in actor potentially being freed more than once.
369-
set_internal_flag(actor, FLAG_BLOCKED_SENT);
370-
pony_assert(ctx->current == actor);
371-
ponyint_cycle_block(actor, &actor->gc);
436+
send_block(ctx, actor);
372437
}
373438

374439
return false;
@@ -444,52 +509,6 @@ static bool handle_message(pony_ctx_t* ctx, pony_actor_t* actor,
444509
}
445510
}
446511

447-
static void try_gc(pony_ctx_t* ctx, pony_actor_t* actor)
448-
{
449-
if(!ponyint_heap_startgc(&actor->heap
450-
#ifdef USE_RUNTIMESTATS
451-
, actor))
452-
#else
453-
))
454-
#endif
455-
return;
456-
457-
#ifdef USE_RUNTIMESTATS
458-
uint64_t used_cpu = ponyint_sched_cpu_used(ctx);
459-
ctx->schedulerstats.misc_cpu += used_cpu;
460-
#endif
461-
462-
DTRACE1(GC_START, (uintptr_t)ctx->scheduler);
463-
464-
ponyint_gc_mark(ctx);
465-
466-
if(actor->type->trace != NULL)
467-
actor->type->trace(ctx, actor);
468-
469-
ponyint_mark_done(ctx);
470-
471-
#ifdef USE_RUNTIMESTATS
472-
used_cpu = ponyint_sched_cpu_used(ctx);
473-
ctx->schedulerstats.actor_gc_mark_cpu += used_cpu;
474-
actor->actorstats.gc_mark_cpu += used_cpu;
475-
#endif
476-
477-
ponyint_heap_endgc(&actor->heap
478-
#ifdef USE_RUNTIMESTATS
479-
, actor);
480-
#else
481-
);
482-
#endif
483-
484-
DTRACE1(GC_END, (uintptr_t)ctx->scheduler);
485-
486-
#ifdef USE_RUNTIMESTATS
487-
used_cpu = ponyint_sched_cpu_used(ctx);
488-
ctx->schedulerstats.actor_gc_sweep_cpu += used_cpu;
489-
actor->actorstats.gc_sweep_cpu += used_cpu;
490-
#endif
491-
}
492-
493512
// return true if mute occurs
494513
static bool maybe_should_mute(pony_actor_t* actor)
495514
{
@@ -631,13 +650,12 @@ bool ponyint_actor_run(pony_ctx_t* ctx, pony_actor_t* actor, bool polling)
631650
)
632651
{
633652
// The cycle detector (CD) doesn't know we exist so it won't try
634-
// and reach out to us even though we're blocked, so send block message //// and set flag that the CD knows we exist now so that when we block
653+
// and reach out to us even though we're blocked, so send block message
654+
// and set flag that the CD knows we exist now so that when we block
635655
// in the future we will wait for the CD to reach out and ask
636656
// if we're blocked or not.
637657
// But, only if gc.rc > 0 because if gc.rc == 0 we are a zombie.
638-
set_internal_flag(actor, FLAG_BLOCKED_SENT);
639-
set_internal_flag(actor, FLAG_CD_CONTACTED);
640-
ponyint_cycle_block(actor, &actor->gc);
658+
send_block(ctx, actor);
641659
}
642660

643661
}
@@ -748,8 +766,7 @@ bool ponyint_actor_run(pony_ctx_t* ctx, pony_actor_t* actor, bool polling)
748766
// unblocked (which would create a race condition) and we've also
749767
// ensured that the cycle detector will not send this actor any more
750768
// messages (which would also create a race condition).
751-
set_internal_flag(actor, FLAG_BLOCKED_SENT);
752-
ponyint_cycle_block(actor, &actor->gc);
769+
send_block(ctx, actor);
753770

754771
// mark the queue as empty or else destroy will hang
755772
bool empty = ponyint_messageq_markempty(&actor->q);

0 commit comments

Comments
 (0)