Skip to content

Commit d128e57

Browse files
authored
Ruby CLI: Add --arena-stats flag to analyze subcommand (#1294)
This pull request updates the `analyze` subcommand in the Ruby CLI to also accept the `--arena-stats` flag. **`herb analyze --arena-stats`** <img width="2718" height="1792" alt="CleanShot 2026-03-04 at 11 14 44@2x" src="https://github.com/user-attachments/assets/4be6bab4-2b10-4b93-b1ef-f3a21c2b17ab" /> ... <img width="2676" height="1842" alt="CleanShot 2026-03-04 at 11 14 04@2x" src="https://github.com/user-attachments/assets/d8c79175-f193-4125-829e-597b0f19375f" />
1 parent 1bb0b5b commit d128e57

File tree

7 files changed

+189
-27
lines changed

7 files changed

+189
-27
lines changed

ext/herb/extension.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,50 @@ static VALUE Herb_extract_html(VALUE self, VALUE source) {
271271
return rb_ensure(buffer_to_string_body, (VALUE) &args, buffer_cleanup, (VALUE) &args);
272272
}
273273

274+
static VALUE Herb_arena_stats(int argc, VALUE* argv, VALUE self) {
275+
VALUE source, options;
276+
rb_scan_args(argc, argv, "1:", &source, &options);
277+
278+
char* string = (char*) check_string(source);
279+
280+
parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS;
281+
282+
if (!NIL_P(options)) {
283+
VALUE track_whitespace = rb_hash_lookup(options, rb_utf8_str_new_cstr("track_whitespace"));
284+
if (NIL_P(track_whitespace)) { track_whitespace = rb_hash_lookup(options, ID2SYM(rb_intern("track_whitespace"))); }
285+
if (!NIL_P(track_whitespace) && RTEST(track_whitespace)) { parser_options.track_whitespace = true; }
286+
287+
VALUE analyze = rb_hash_lookup(options, rb_utf8_str_new_cstr("analyze"));
288+
if (NIL_P(analyze)) { analyze = rb_hash_lookup(options, ID2SYM(rb_intern("analyze"))); }
289+
if (!NIL_P(analyze) && !RTEST(analyze)) { parser_options.analyze = false; }
290+
291+
VALUE strict = rb_hash_lookup(options, rb_utf8_str_new_cstr("strict"));
292+
if (NIL_P(strict)) { strict = rb_hash_lookup(options, ID2SYM(rb_intern("strict"))); }
293+
if (!NIL_P(strict)) { parser_options.strict = RTEST(strict); }
294+
}
295+
296+
hb_allocator_T allocator;
297+
if (!hb_allocator_init(&allocator, HB_ALLOCATOR_ARENA)) { return Qnil; }
298+
299+
AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, &allocator);
300+
301+
hb_arena_stats_T stats = hb_arena_get_stats((hb_arena_T*) allocator.context);
302+
303+
if (root != NULL) { ast_node_free((AST_NODE_T*) root, &allocator); }
304+
hb_allocator_destroy(&allocator);
305+
306+
VALUE hash = rb_hash_new();
307+
rb_hash_aset(hash, ID2SYM(rb_intern("pages")), SIZET2NUM(stats.pages));
308+
rb_hash_aset(hash, ID2SYM(rb_intern("total_capacity")), SIZET2NUM(stats.total_capacity));
309+
rb_hash_aset(hash, ID2SYM(rb_intern("total_used")), SIZET2NUM(stats.total_used));
310+
rb_hash_aset(hash, ID2SYM(rb_intern("total_available")), SIZET2NUM(stats.total_available));
311+
rb_hash_aset(hash, ID2SYM(rb_intern("allocations")), SIZET2NUM(stats.allocations));
312+
rb_hash_aset(hash, ID2SYM(rb_intern("fragmentation")), SIZET2NUM(stats.fragmentation));
313+
rb_hash_aset(hash, ID2SYM(rb_intern("default_page_size")), SIZET2NUM(stats.default_page_size));
314+
315+
return hash;
316+
}
317+
274318
static VALUE Herb_version(VALUE self) {
275319
VALUE gem_version = rb_const_get(self, rb_intern("VERSION"));
276320
VALUE libherb_version = rb_utf8_str_new_cstr(herb_version());
@@ -300,5 +344,6 @@ __attribute__((__visibility__("default"))) void Init_herb(void) {
300344
rb_define_singleton_method(mHerb, "lex_file", Herb_lex_file, -1);
301345
rb_define_singleton_method(mHerb, "extract_ruby", Herb_extract_ruby, -1);
302346
rb_define_singleton_method(mHerb, "extract_html", Herb_extract_html, 1);
347+
rb_define_singleton_method(mHerb, "arena_stats", Herb_arena_stats, -1);
303348
rb_define_singleton_method(mHerb, "version", Herb_version, 0);
304349
}

lib/herb/cli.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ def result
149149
project.verbose = verbose || ci?
150150
project.isolate = isolate
151151
project.validate_ruby = true
152+
project.arena_stats = arena_stats
152153
has_issues = project.analyze!
153154
exit(has_issues ? 1 : 0)
154155
when "report"
@@ -300,7 +301,7 @@ def option_parser
300301
self.tool = t.to_sym
301302
end
302303

303-
parser.on("--arena-stats", "Print arena memory statistics (for lex/parse commands)") do
304+
parser.on("--arena-stats", "Print arena memory statistics (for lex/parse/analyze commands)") do
304305
self.arena_stats = true
305306
end
306307
end

lib/herb/project.rb

Lines changed: 92 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ module Herb
1212
class Project
1313
include Colors
1414

15-
attr_accessor :project_path, :output_file, :no_log_file, :no_timing, :silent, :verbose, :isolate, :validate_ruby, :file_paths
15+
attr_accessor :project_path, :output_file, :no_log_file, :no_timing, :silent, :verbose, :isolate, :validate_ruby, :file_paths, :arena_stats
1616

1717
# Known error types that indicate issues in the user's template, not bugs in the parser.
1818
TEMPLATE_ERRORS = [
@@ -256,6 +256,10 @@ def analyze!
256256

257257
log_problem_file_details(results, log)
258258

259+
if arena_stats
260+
print_arena_summary(file_results)
261+
end
262+
259263
unless no_log_file
260264
puts "\n #{separator}"
261265
puts "\n #{dimmed("Results saved to #{output_file}")}"
@@ -339,6 +343,10 @@ def process_file_direct(file_path)
339343
file_content = File.read(file_path)
340344
result = { file_path: file_path }
341345

346+
if arena_stats
347+
result[:arena_stats] = capture_arena_stats(file_content)
348+
end
349+
342350
Timeout.timeout(1) do
343351
parse_result = Herb.parse(file_content)
344352

@@ -353,17 +361,17 @@ def process_file_direct(file_path)
353361

354362
result
355363
rescue Timeout::Error
356-
{ file_path: file_path, status: :timeout, file_content: file_content,
357-
log: "⏱️ Parsing #{file_path} timed out after 1 second" }
364+
result.merge(status: :timeout, file_content: file_content,
365+
log: "⏱️ Parsing #{file_path} timed out after 1 second")
358366
rescue StandardError => e
359367
file_content ||= begin
360368
File.read(file_path)
361369
rescue StandardError
362370
nil
363371
end
364372

365-
{ file_path: file_path, status: :failed, file_content: file_content,
366-
log: "⚠️ Error processing #{file_path}: #{e.message}" }
373+
result.merge(status: :failed, file_content: file_content,
374+
log: "⚠️ Error processing #{file_path}: #{e.message}")
367375
end
368376

369377
def process_file_isolated(file_path)
@@ -878,5 +886,84 @@ def format_duration(seconds)
878886
"#{minutes}m #{remaining_seconds.round(2)}s"
879887
end
880888
end
889+
890+
def capture_arena_stats(file_content)
891+
stats = Herb.arena_stats(file_content)
892+
893+
{
894+
pages: stats[:pages],
895+
bytes: stats[:total_used],
896+
allocations: stats[:allocations],
897+
lines: file_content.count("\n") + 1,
898+
length: file_content.bytesize,
899+
}
900+
rescue StandardError
901+
{ pages: 0, bytes: 0, allocations: 0, lines: 0, length: 0 }
902+
end
903+
904+
def print_arena_summary(file_results)
905+
stats = file_results.filter_map { |result|
906+
next unless result[:arena_stats] && result[:arena_stats][:bytes].positive?
907+
908+
{ file: result[:file_path], **result[:arena_stats] }
909+
}
910+
911+
return if stats.empty?
912+
913+
stats.sort_by! { |stat| -stat[:bytes] }
914+
915+
puts "\n #{separator}"
916+
puts "\n"
917+
puts " #{bold("Arena memory usage:")}"
918+
puts ""
919+
920+
relatives = stats.map { |stat| relative_path(stat[:file]) }
921+
used_strings = stats.map { |stat| format_bytes(stat[:bytes]) }
922+
length_strings = stats.map { |stat| format_bytes(stat[:length]) }
923+
used_width = [used_strings.max_by(&:length).length, 4].max
924+
pages_width = [stats.max_by { |stat| stat[:pages] }[:pages].to_s.length, 5].max
925+
allocs_width = [stats.max_by { |stat| stat[:allocations] }[:allocations].to_s.length, 6].max
926+
lines_width = [stats.max_by { |stat| stat[:lines] }[:lines].to_s.length, 5].max
927+
length_width = [length_strings.max_by(&:length).length, 4].max
928+
total_width = pages_width + used_width + allocs_width + lines_width + length_width + 11
929+
930+
puts format(" %#{lines_width}s %#{length_width}s %#{pages_width}s %#{used_width}s %#{allocs_width}s %s", "Lines", "Size", "Pages", "Used", "Allocs", "File")
931+
puts " #{"-" * (total_width + relatives.max_by(&:length).length)}"
932+
933+
stats.each_with_index do |stat, index|
934+
relative = relatives[index]
935+
used = used_strings[index]
936+
length = length_strings[index]
937+
color = stat[:pages] > 1 ? :yellow : :green
938+
colored_used = send(color, used)
939+
padding = colored_used.length - used.length
940+
puts format(" %#{lines_width}d %#{length_width}s %#{pages_width}d %#{used_width + padding}s %#{allocs_width}d %s", stat[:lines], length, stat[:pages], colored_used, stat[:allocations], relative)
941+
end
942+
943+
total_bytes = stats.sum { |stat| stat[:bytes] }
944+
max = stats.first
945+
946+
puts ""
947+
puts " #{label("Total")} #{cyan(format_bytes(total_bytes))} across #{cyan("#{stats.size} #{pluralize(stats.size, "file")}")}"
948+
puts " #{label("Largest")} #{cyan(relative_path(max[:file]))} (#{cyan(format_bytes(max[:bytes]))}, #{cyan("#{max[:pages]} #{pluralize(max[:pages], "page")}")})"
949+
950+
thresholds = { "16 KB" => 16 * 1024, "64 KB" => 64 * 1024, "128 KB" => 128 * 1024, "256 KB" => 256 * 1024, "512 KB" => 512 * 1024 }
951+
952+
puts ""
953+
thresholds.each do |label_text, threshold|
954+
count = stats.count { |stat| stat[:bytes] > threshold }
955+
puts " #{label(" > #{label_text}")} #{count} #{pluralize(count, "file")}"
956+
end
957+
end
958+
959+
def format_bytes(bytes)
960+
if bytes >= 1024 * 1024
961+
"#{(bytes / (1024.0 * 1024.0)).round(1)} MB"
962+
elsif bytes >= 1024
963+
"#{(bytes / 1024.0).round(0)} KB"
964+
else
965+
"#{bytes} B"
966+
end
967+
end
881968
end
882969
end

src/include/util/hb_arena.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ typedef struct HB_ARENA_STRUCT {
2020
size_t allocation_count;
2121
} hb_arena_T;
2222

23+
#define hb_arena_for_each_page(arena, page) \
24+
for (hb_arena_page_T* page = (arena)->head; page != NULL; page = page->next)
25+
26+
#define hb_arena_for_each_page_const(arena, page) \
27+
for (const hb_arena_page_T* page = (arena)->head; page != NULL; page = page->next)
28+
2329
bool hb_arena_init(hb_arena_T* allocator, size_t initial_size);
2430
void* hb_arena_alloc(hb_arena_T* allocator, size_t size);
2531
size_t hb_arena_position(hb_arena_T* allocator);

src/include/util/hb_arena_debug.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,17 @@
33

44
#include "hb_arena.h"
55

6-
void hb_arena_print_stats(const hb_arena_T* allocator);
6+
typedef struct {
7+
size_t pages;
8+
size_t total_capacity;
9+
size_t total_used;
10+
size_t total_available;
11+
size_t allocations;
12+
size_t fragmentation;
13+
size_t default_page_size;
14+
} hb_arena_stats_T;
15+
16+
hb_arena_stats_T hb_arena_get_stats(const hb_arena_T* arena);
17+
void hb_arena_print_stats(const hb_arena_T* arena);
718

819
#endif

src/util/hb_arena.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,6 @@ static void hb_arena_free_page(void* pointer, size_t size) {
3434
#endif
3535
}
3636

37-
#define hb_arena_for_each_page(allocator, page) \
38-
for (hb_arena_page_T* page = (allocator)->head; page != NULL; page = page->next)
39-
4037
static inline size_t hb_arena_align_size(size_t size, size_t alignment) {
4138
assert(size <= SIZE_MAX - (alignment - 1));
4239

src/util/hb_arena_debug.c

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <string.h>
44

55
#include "../include/util/hb_arena.h"
6+
#include "../include/util/hb_arena_debug.h"
67

78
#define ANSI_COLOR_GREEN "\033[32m"
89
#define ANSI_COLOR_YELLOW "\033[33m"
@@ -117,6 +118,29 @@ static void print_box_line_with_bullet_and_color(const char* color, const char*
117118
printf("║%s%*s║\n", line, BOX_WIDTH - visual_length, "");
118119
}
119120

121+
hb_arena_stats_T hb_arena_get_stats(const hb_arena_T* arena) {
122+
hb_arena_stats_T stats = { 0 };
123+
124+
if (arena->head == NULL) { return stats; }
125+
126+
stats.default_page_size = arena->default_page_size;
127+
stats.allocations = arena->allocation_count;
128+
129+
hb_arena_for_each_page_const(arena, page) {
130+
stats.pages++;
131+
stats.total_capacity += page->capacity;
132+
stats.total_used += page->position;
133+
134+
if (page != arena->tail && page->position < page->capacity) {
135+
stats.fragmentation += (page->capacity - page->position);
136+
}
137+
}
138+
139+
stats.total_available = stats.total_capacity - stats.total_used;
140+
141+
return stats;
142+
}
143+
120144
void hb_arena_print_stats(const hb_arena_T* allocator) {
121145
if (allocator->head == NULL) {
122146
print_box_top();
@@ -128,22 +152,13 @@ void hb_arena_print_stats(const hb_arena_T* allocator) {
128152
return;
129153
}
130154

131-
size_t num_pages = 0;
132-
size_t total_capacity = 0;
133-
size_t total_used = 0;
134-
size_t fragmentation = 0;
155+
hb_arena_stats_T stats = hb_arena_get_stats(allocator);
135156

136-
for (const hb_arena_page_T* page = allocator->head; page != NULL; page = page->next) {
137-
num_pages++;
138-
total_capacity += page->capacity;
139-
total_used += page->position;
140-
141-
if (page != allocator->tail && page->position < page->capacity) {
142-
fragmentation += (page->capacity - page->position);
143-
}
144-
}
157+
size_t total_capacity = stats.total_capacity;
158+
size_t total_used = stats.total_used;
159+
size_t total_available = stats.total_available;
160+
size_t fragmentation = stats.fragmentation;
145161

146-
size_t total_available = total_capacity - total_used;
147162
double usage_percentage = (double) total_used / (double) total_capacity * 100.0;
148163
double fragmentation_percentage = (double) fragmentation / (double) total_capacity * 100.0;
149164
const char* overall_color = get_usage_color(usage_percentage);
@@ -160,7 +175,7 @@ void hb_arena_print_stats(const hb_arena_T* allocator) {
160175
print_box_separator();
161176
print_box_line(" Statistics:");
162177

163-
print_box_line_with_bullet(" • Pages: %zu", num_pages);
178+
print_box_line_with_bullet(" • Pages: %zu", stats.pages);
164179
print_box_line_with_bullet(" • Default Page Size: %s", default_size_string);
165180
print_box_line_with_bullet(" • Total Capacity: %s", capacity_string);
166181
print_box_line_with_bullet_and_color(
@@ -178,7 +193,7 @@ void hb_arena_print_stats(const hb_arena_T* allocator) {
178193
usage_percentage,
179194
ANSI_COLOR_RESET
180195
);
181-
print_box_line_with_bullet(" • Allocations: %zu", allocator->allocation_count);
196+
print_box_line_with_bullet(" • Allocations: %zu", stats.allocations);
182197
print_box_line_with_bullet(" • Fragmentation: %s", fragmentation_string);
183198

184199
if (fragmentation > 0) { print_box_line(" (%.1f%% skipped in non-tail pages)", fragmentation_percentage); }
@@ -187,7 +202,7 @@ void hb_arena_print_stats(const hb_arena_T* allocator) {
187202

188203
size_t page_number = 0;
189204

190-
for (const hb_arena_page_T* page = allocator->head; page != NULL; page = page->next) {
205+
hb_arena_for_each_page_const(allocator, page) {
191206
double page_usage = (double) page->position / (double) page->capacity * 100.0;
192207
const char* page_color = get_usage_color(page_usage);
193208

0 commit comments

Comments
 (0)