Skip to content

Commit 45d6f28

Browse files
authored
C: Implement Tracking Allocator and add --leak-check to analyze (#1304)
This pull request implements a new `hb_alloctor_T` implementation that tracks allocations and checks if all allocations are also being freed again. We also expose a new `--leak-check` CLI flag for the `analyze` subcommand so we can easily check a whole project if any of the files leak. **`herb analyze ../rubyevents --leak-check`** <img width="1900" height="882" alt="CleanShot 2026-03-05 at 08 40 16@2x" src="https://github.com/user-attachments/assets/3ed26685-dbf9-4661-8e55-2379a967779a" /> **With leaks:** <img width="2844" height="1762" alt="CleanShot 2026-03-05 at 08 39 40@2x" src="https://github.com/user-attachments/assets/6b95f474-2479-4f8a-be1b-514847fbc86a" /> **Calling `delloc` on a pointer that wasn't allocated using our allocator:** <img width="2812" height="1722" alt="CleanShot 2026-03-05 at 08 40 36@2x" src="https://github.com/user-attachments/assets/2190fefc-023b-4398-9d24-8193d6381646" />
1 parent a780551 commit 45d6f28

File tree

5 files changed

+385
-2
lines changed

5 files changed

+385
-2
lines changed

ext/herb/extension.c

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,98 @@ static VALUE Herb_arena_stats(int argc, VALUE* argv, VALUE self) {
315315
return hash;
316316
}
317317

318+
static VALUE make_tracking_hash(hb_allocator_tracking_stats_T* stats) {
319+
VALUE hash = rb_hash_new();
320+
rb_hash_aset(hash, ID2SYM(rb_intern("allocations")), SIZET2NUM(stats->allocation_count));
321+
rb_hash_aset(hash, ID2SYM(rb_intern("deallocations")), SIZET2NUM(stats->deallocation_count));
322+
rb_hash_aset(hash, ID2SYM(rb_intern("bytes_allocated")), SIZET2NUM(stats->bytes_allocated));
323+
rb_hash_aset(hash, ID2SYM(rb_intern("bytes_deallocated")), SIZET2NUM(stats->bytes_deallocated));
324+
rb_hash_aset(hash, ID2SYM(rb_intern("untracked_deallocations")), SIZET2NUM(stats->untracked_deallocation_count));
325+
326+
VALUE leaks = rb_ary_new();
327+
for (size_t i = 0; i < stats->buckets_capacity; i++) {
328+
if (stats->buckets[i].pointer != NULL && stats->buckets[i].pointer != (void*) 1) {
329+
rb_ary_push(leaks, SIZET2NUM(stats->buckets[i].size));
330+
}
331+
}
332+
rb_hash_aset(hash, ID2SYM(rb_intern("leaks")), leaks);
333+
334+
VALUE untracked = rb_ary_new_capa((long) stats->untracked_pointers_size);
335+
for (size_t i = 0; i < stats->untracked_pointers_size; i++) {
336+
rb_ary_push(untracked, rb_sprintf("%p", stats->untracked_pointers[i]));
337+
}
338+
rb_hash_aset(hash, ID2SYM(rb_intern("untracked_pointers")), untracked);
339+
340+
return hash;
341+
}
342+
343+
static VALUE Herb_leak_check(VALUE self, VALUE source) {
344+
char* string = (char*) check_string(source);
345+
VALUE result = rb_hash_new();
346+
347+
{
348+
hb_allocator_T allocator;
349+
if (!hb_allocator_init(&allocator, HB_ALLOCATOR_TRACKING)) { return Qnil; }
350+
351+
hb_array_T* tokens = herb_lex(string, &allocator);
352+
if (tokens != NULL) { herb_free_tokens(&tokens, &allocator); }
353+
354+
hb_allocator_tracking_stats_T* stats = hb_allocator_tracking_stats(&allocator);
355+
rb_hash_aset(result, ID2SYM(rb_intern("lex")), make_tracking_hash(stats));
356+
357+
hb_allocator_destroy(&allocator);
358+
}
359+
360+
{
361+
hb_allocator_T allocator;
362+
if (!hb_allocator_init(&allocator, HB_ALLOCATOR_TRACKING)) { return Qnil; }
363+
364+
parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS;
365+
AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, &allocator);
366+
if (root != NULL) { ast_node_free((AST_NODE_T*) root, &allocator); }
367+
368+
hb_allocator_tracking_stats_T* stats = hb_allocator_tracking_stats(&allocator);
369+
rb_hash_aset(result, ID2SYM(rb_intern("parse")), make_tracking_hash(stats));
370+
371+
hb_allocator_destroy(&allocator);
372+
}
373+
374+
{
375+
hb_buffer_T output;
376+
if (!hb_buffer_init(&output, strlen(string))) { return Qnil; }
377+
378+
hb_allocator_T allocator;
379+
if (!hb_allocator_init(&allocator, HB_ALLOCATOR_TRACKING)) { return Qnil; }
380+
381+
herb_extract_ruby_options_T extract_options = HERB_EXTRACT_RUBY_DEFAULT_OPTIONS;
382+
herb_extract_ruby_to_buffer_with_options(string, &output, &extract_options, &allocator);
383+
384+
hb_allocator_tracking_stats_T* stats = hb_allocator_tracking_stats(&allocator);
385+
rb_hash_aset(result, ID2SYM(rb_intern("extract_ruby")), make_tracking_hash(stats));
386+
387+
hb_allocator_destroy(&allocator);
388+
free(output.value);
389+
}
390+
391+
{
392+
hb_buffer_T output;
393+
if (!hb_buffer_init(&output, strlen(string))) { return Qnil; }
394+
395+
hb_allocator_T allocator;
396+
if (!hb_allocator_init(&allocator, HB_ALLOCATOR_TRACKING)) { return Qnil; }
397+
398+
herb_extract_html_to_buffer(string, &output, &allocator);
399+
400+
hb_allocator_tracking_stats_T* stats = hb_allocator_tracking_stats(&allocator);
401+
rb_hash_aset(result, ID2SYM(rb_intern("extract_html")), make_tracking_hash(stats));
402+
403+
hb_allocator_destroy(&allocator);
404+
free(output.value);
405+
}
406+
407+
return result;
408+
}
409+
318410
static VALUE Herb_version(VALUE self) {
319411
VALUE gem_version = rb_const_get(self, rb_intern("VERSION"));
320412
VALUE libherb_version = rb_utf8_str_new_cstr(herb_version());
@@ -345,5 +437,6 @@ __attribute__((__visibility__("default"))) void Init_herb(void) {
345437
rb_define_singleton_method(mHerb, "extract_ruby", Herb_extract_ruby, -1);
346438
rb_define_singleton_method(mHerb, "extract_html", Herb_extract_html, 1);
347439
rb_define_singleton_method(mHerb, "arena_stats", Herb_arena_stats, -1);
440+
rb_define_singleton_method(mHerb, "leak_check", Herb_leak_check, 1);
348441
rb_define_singleton_method(mHerb, "version", Herb_version, 0);
349442
}

lib/herb/cli.rb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
class Herb::CLI
99
include Herb::Colors
1010

11-
attr_accessor :json, :silent, :log_file, :no_timing, :local, :escape, :no_escape, :freeze, :debug, :tool, :strict, :analyze, :track_whitespace, :verbose, :isolate, :arena_stats
11+
attr_accessor :json, :silent, :log_file, :no_timing, :local, :escape, :no_escape, :freeze, :debug, :tool, :strict, :analyze, :track_whitespace, :verbose, :isolate, :arena_stats, :leak_check
1212

1313
def initialize(args)
1414
@args = args
@@ -150,6 +150,7 @@ def result
150150
project.isolate = isolate
151151
project.validate_ruby = true
152152
project.arena_stats = arena_stats
153+
project.leak_check = leak_check
153154
has_issues = project.analyze!
154155
exit(has_issues ? 1 : 0)
155156
when "report"
@@ -304,6 +305,10 @@ def option_parser
304305
parser.on("--arena-stats", "Print arena memory statistics (for lex/parse/analyze commands)") do
305306
self.arena_stats = true
306307
end
308+
309+
parser.on("--leak-check", "Check for memory leaks in lex/parse/extract operations (for analyze command)") do
310+
self.leak_check = true
311+
end
307312
end
308313
end
309314

lib/herb/project.rb

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ module Herb
1212
class Project
1313
include Colors
1414

15-
attr_accessor :project_path, :output_file, :no_log_file, :no_timing, :silent, :verbose, :isolate, :validate_ruby, :file_paths, :arena_stats
15+
attr_accessor :project_path, :output_file, :no_log_file, :no_timing, :silent, :verbose, :isolate, :validate_ruby, :file_paths, :arena_stats, :leak_check
1616

1717
# Known error types that indicate issues in the user's template, not bugs in the parser.
1818
TEMPLATE_ERRORS = [
@@ -260,6 +260,10 @@ def analyze!
260260
print_arena_summary(file_results)
261261
end
262262

263+
if leak_check
264+
print_leak_check_summary(file_results)
265+
end
266+
263267
unless no_log_file
264268
puts "\n #{separator}"
265269
puts "\n #{dimmed("Results saved to #{output_file}")}"
@@ -347,6 +351,10 @@ def process_file_direct(file_path)
347351
result[:arena_stats] = capture_arena_stats(file_content)
348352
end
349353

354+
if leak_check
355+
result[:leak_check] = capture_leak_check(file_content)
356+
end
357+
350358
Timeout.timeout(1) do
351359
parse_result = Herb.parse(file_content)
352360

@@ -887,6 +895,90 @@ def format_duration(seconds)
887895
end
888896
end
889897

898+
def capture_leak_check(file_content)
899+
Herb.leak_check(file_content)
900+
rescue StandardError
901+
{ lex: { allocations: 0, deallocations: 0, bytes_allocated: 0, bytes_deallocated: 0 },
902+
parse: { allocations: 0, deallocations: 0, bytes_allocated: 0, bytes_deallocated: 0 },
903+
extract_ruby: { allocations: 0, deallocations: 0, bytes_allocated: 0, bytes_deallocated: 0 },
904+
extract_html: { allocations: 0, deallocations: 0, bytes_allocated: 0, bytes_deallocated: 0 } }
905+
end
906+
907+
def print_leak_check_summary(file_results)
908+
leaky_files = file_results.filter_map { |result|
909+
next unless result[:leak_check]
910+
911+
ops = result[:leak_check]
912+
leaks = ops.select { |_op, stats| stats[:leaks]&.any? || stats[:allocations] != stats[:deallocations] || stats[:untracked_deallocations]&.positive? }
913+
next if leaks.empty?
914+
915+
{ file: result[:file_path], leaks: leaks, all: ops }
916+
}
917+
918+
puts "\n #{separator}"
919+
puts "\n"
920+
puts " #{bold("Leak check:")}"
921+
922+
if leaky_files.empty?
923+
puts ""
924+
puts " #{bold(green("✓"))} #{green("No leaks detected across all files.")}"
925+
return
926+
end
927+
928+
puts " #{red("#{leaky_files.size} #{pluralize(leaky_files.size, "file")} with potential leaks:")}"
929+
puts ""
930+
931+
leaky_files.each do |entry|
932+
relative = relative_path(entry[:file])
933+
puts " #{cyan(relative)}:"
934+
935+
entry[:all].each do |op, stats|
936+
leaks = stats[:leaks] || []
937+
untracked_count = stats[:untracked_deallocations] || 0
938+
untracked_ptrs = stats[:untracked_pointers] || []
939+
leaked_bytes = stats[:bytes_allocated] - stats[:bytes_deallocated]
940+
941+
if leaks.any?
942+
puts " #{red("✗")} #{op}: #{stats[:allocations]} allocs, #{stats[:deallocations]} deallocs (#{bold(red("#{leaks.size} unfreed, #{format_bytes(leaked_bytes)}"))})"
943+
leaks.each_with_index do |size, i|
944+
puts " #{dimmed("#{i + 1}.")} #{format_bytes(size)}"
945+
end
946+
elsif untracked_count.positive?
947+
puts " #{yellow("~")} #{op}: #{stats[:allocations]} allocs, #{stats[:deallocations]} deallocs"
948+
else
949+
puts " #{green("✓")} #{op}: #{stats[:allocations]} allocs, #{stats[:deallocations]} deallocs"
950+
end
951+
952+
next unless untracked_count.positive?
953+
954+
puts " #{yellow("#{untracked_count} untracked #{pluralize(untracked_count, "deallocation")}")} #{dimmed("(freed through allocator but not allocated through it)")}"
955+
untracked_ptrs.each_with_index do |ptr, i|
956+
puts " #{dimmed("#{i + 1}.")} #{ptr}"
957+
end
958+
end
959+
960+
puts ""
961+
end
962+
963+
op_to_command = { lex: "lex", parse: "parse", extract_ruby: "ruby", extract_html: "html" }
964+
965+
commands = leaky_files.flat_map { |entry|
966+
entry[:leaks].keys.map { |op| { command: op_to_command[op] || op.to_s, file: entry[:file] } }
967+
}
968+
969+
puts " #{dimmed("To debug, run the following from the herb repo root (build with `make` first):")}"
970+
puts ""
971+
puts " #{dimmed("# macOS")}"
972+
commands.each do |cmd|
973+
puts " leaks --atExit -- ./herb #{cmd[:command]} #{cmd[:file]}"
974+
end
975+
puts ""
976+
puts " #{dimmed("# Linux")}"
977+
commands.each do |cmd|
978+
puts " valgrind --leak-check=full ./herb #{cmd[:command]} #{cmd[:file]}"
979+
end
980+
end
981+
890982
def capture_arena_stats(file_content)
891983
stats = Herb.arena_stats(file_content)
892984

src/include/util/hb_allocator.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,28 @@
1313
typedef enum {
1414
HB_ALLOCATOR_MALLOC,
1515
HB_ALLOCATOR_ARENA,
16+
HB_ALLOCATOR_TRACKING,
1617
} hb_allocator_type_T;
1718

19+
typedef struct {
20+
void* pointer;
21+
size_t size;
22+
} hb_allocator_tracking_entry_T;
23+
24+
typedef struct {
25+
size_t allocation_count;
26+
size_t deallocation_count;
27+
size_t untracked_deallocation_count;
28+
size_t bytes_allocated;
29+
size_t bytes_deallocated;
30+
hb_allocator_tracking_entry_T* buckets;
31+
size_t buckets_capacity;
32+
size_t buckets_used;
33+
void** untracked_pointers;
34+
size_t untracked_pointers_size;
35+
size_t untracked_pointers_capacity;
36+
} hb_allocator_tracking_stats_T;
37+
1838
typedef struct hb_allocator {
1939
void* (*alloc)(struct hb_allocator* self, size_t size);
2040
void (*dealloc)(struct hb_allocator* self, void* pointer);
@@ -30,6 +50,9 @@ void hb_allocator_destroy(hb_allocator_T* allocator);
3050

3151
hb_allocator_T hb_allocator_with_malloc(void);
3252
hb_allocator_T hb_allocator_with_arena(hb_arena_T* arena);
53+
hb_allocator_T hb_allocator_with_tracking(void);
54+
55+
hb_allocator_tracking_stats_T* hb_allocator_tracking_stats(hb_allocator_T* allocator);
3356

3457
static inline void* hb_allocator_alloc(hb_allocator_T* allocator, size_t size) {
3558
return allocator->alloc(allocator, size);

0 commit comments

Comments
 (0)