Skip to content

Commit 767b301

Browse files
committed
Make sure allocated buffers live through GC
When allocating an `RMatch` instance, make sure the buffers allocated for `registers.{beg,end}` fields are properly rooted so that they live through GC if allocating `RMatch` triggers a GC.
1 parent 83e1407 commit 767b301

File tree

2 files changed

+61
-4
lines changed

2 files changed

+61
-4
lines changed

Diff for: re.c

+44
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,16 @@ match_init_copy(VALUE obj, VALUE orig)
11221122
RB_OBJ_WRITE(obj, &RMATCH(obj)->regexp, RMATCH(orig)->regexp);
11231123

11241124
rm = RMATCH_EXT(obj);
1125+
1126+
#if USE_MMTK
1127+
if (rb_mmtk_enabled_p()) {
1128+
// The rb_reg_region_copy below may write to `obj` (`rm->registers.{beg,end}`).
1129+
// We apply write barrier here. It's probably not necessary because the `RB_OBJ_WRITE`
1130+
// above executes the same object-remembering operation.
1131+
rb_gc_writebarrier_remember(obj);
1132+
}
1133+
#endif
1134+
11251135
if (rb_reg_region_copy(&rm->regs, RMATCH_REGS(orig)))
11261136
rb_memerror();
11271137

@@ -1501,6 +1511,15 @@ match_set_string(VALUE m, VALUE string, long pos, long len)
15011511

15021512
RB_OBJ_WRITE(match, &RMATCH(match)->str, string);
15031513
RB_OBJ_WRITE(match, &RMATCH(match)->regexp, Qnil);
1514+
1515+
#if USE_MMTK
1516+
if (rb_mmtk_enabled_p()) {
1517+
// The onig_region_resize below may write to `m` (`rmatch->registers.{beg,end}`).
1518+
// We apply write barrier here. It's probably not necessary because the `RB_OBJ_WRITE`
1519+
// above executes the same object-remembering operation.
1520+
rb_gc_writebarrier_remember(m);
1521+
}
1522+
#endif
15041523
int err = onig_region_resize(&rmatch->regs, 1);
15051524
if (err) rb_memerror();
15061525
rmatch->regs.beg[0] = pos;
@@ -1825,10 +1844,35 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
18251844
return ONIG_MISMATCH;
18261845
}
18271846

1847+
#if USE_MMTK
1848+
VALUE root_beg;
1849+
VALUE root_end;
1850+
if (rb_mmtk_enabled_p()) {
1851+
// When using MMTk, the `beg` and `end` fields of `re_registers` point to heap objects,
1852+
// but are interior pointers. The conservative stack scanner will not recognize interior
1853+
// pointers as object references. We compute the pointers to the beginning of those
1854+
// objects and use RB_GC_GUARD to keep them on the stack so that even if the `match_alloc`
1855+
// invocation triggers GC, the `beg` and `end` will still be kept alive.
1856+
root_beg = (VALUE)rb_mmtk_chars_to_strbuf((char*)args.regs.beg);
1857+
root_end = (VALUE)rb_mmtk_chars_to_strbuf((char*)args.regs.end);
1858+
} else {
1859+
root_beg = root_end = Qnil;
1860+
}
1861+
#endif
1862+
1863+
// MMTk note: `match_alloc` may trigger GC.
18281864
VALUE match = match_alloc(rb_cMatch);
18291865
rb_matchext_t *rm = RMATCH_EXT(match);
18301866
rm->regs = args.regs;
18311867

1868+
#if USE_MMTK
1869+
// Guard `root_beg` and `root_end` until here. Now that `args.regs` has been assigned to a
1870+
// field of `match`, the conservative stack scanner will pick up the `match` variable, and
1871+
// `gc_mark_children` will take care of the interior pointers when scanning the `T_MATCH`.
1872+
RB_GC_GUARD(root_beg);
1873+
RB_GC_GUARD(root_end);
1874+
#endif
1875+
18321876
if (set_backref_str) {
18331877
RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str));
18341878
}

Diff for: regexec.c

+17-4
Original file line numberDiff line numberDiff line change
@@ -889,9 +889,10 @@ onig_region_clear(OnigRegion* region)
889889

890890
#if USE_MMTK
891891
static OnigPosition*
892-
rb_mmtk_onig_position_array_alloc(size_t len)
892+
rb_mmtk_onig_position_array_alloc(size_t len, VALUE *root_var)
893893
{
894894
rb_mmtk_strbuf_t *strbuf = rb_mmtk_new_strbuf(len * sizeof(OnigPosition));
895+
*root_var = (VALUE)strbuf;
895896
OnigPosition *result = (OnigPosition*)rb_mmtk_strbuf_to_chars(strbuf);
896897
return result;
897898
}
@@ -931,13 +932,25 @@ onig_region_resize(OnigRegion* region, int n)
931932
}
932933
#if USE_MMTK
933934
} else {
934-
OnigPosition *new_beg = rb_mmtk_onig_position_array_alloc(n);
935-
OnigPosition *new_end = rb_mmtk_onig_position_array_alloc(n);
936-
// TODO: Use write barrier when writing these fields.
935+
VALUE root_beg;
936+
VALUE root_end;
937+
OnigPosition *new_beg = rb_mmtk_onig_position_array_alloc(n, &root_beg); // May trigger GC.
938+
OnigPosition *new_end = rb_mmtk_onig_position_array_alloc(n, &root_end); // May trigger GC.
939+
940+
// About write barriers: The following assignments may write to fields of `RMatch` or local
941+
// variables on the stack, depending on the call site. This file `regexec.c` knows nothing
942+
// about the Ruby-level `RMatch` object, so we can't apply write barriers here. The caller
943+
// should apply `rb_gc_writebarrier_remember` to the `RMatch` object if applicable.
937944
region->beg = new_beg;
938945
region->end = new_end;
946+
939947
// Currently MMTk panicks when out of memory. We can customize the OOM handling in the
940948
// `mmtk-ruby` binding so that it returns NULL instead.
949+
950+
// Prevent new_beg and `new_end from being collected. Actually we only need to guard
951+
// `root_beg`, but we do the same for `root_end` just for consistency.
952+
RB_GC_GUARD(root_beg);
953+
RB_GC_GUARD(root_end);
941954
}
942955
#endif
943956

0 commit comments

Comments
 (0)