Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions ext/herb/extension.c
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,50 @@ static VALUE Herb_extract_html(VALUE self, VALUE source) {
return rb_ensure(buffer_to_string_body, (VALUE) &args, buffer_cleanup, (VALUE) &args);
}

static VALUE Herb_arena_stats(int argc, VALUE* argv, VALUE self) {
VALUE source, options;
rb_scan_args(argc, argv, "1:", &source, &options);

char* string = (char*) check_string(source);

parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS;

if (!NIL_P(options)) {
VALUE track_whitespace = rb_hash_lookup(options, rb_utf8_str_new_cstr("track_whitespace"));
if (NIL_P(track_whitespace)) { track_whitespace = rb_hash_lookup(options, ID2SYM(rb_intern("track_whitespace"))); }
if (!NIL_P(track_whitespace) && RTEST(track_whitespace)) { parser_options.track_whitespace = true; }

VALUE analyze = rb_hash_lookup(options, rb_utf8_str_new_cstr("analyze"));
if (NIL_P(analyze)) { analyze = rb_hash_lookup(options, ID2SYM(rb_intern("analyze"))); }
if (!NIL_P(analyze) && !RTEST(analyze)) { parser_options.analyze = false; }

VALUE strict = rb_hash_lookup(options, rb_utf8_str_new_cstr("strict"));
if (NIL_P(strict)) { strict = rb_hash_lookup(options, ID2SYM(rb_intern("strict"))); }
if (!NIL_P(strict)) { parser_options.strict = RTEST(strict); }
}

hb_allocator_T allocator;
if (!hb_allocator_init(&allocator, HB_ALLOCATOR_ARENA)) { return Qnil; }

AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, &allocator);

hb_arena_stats_T stats = hb_arena_get_stats((hb_arena_T*) allocator.context);

if (root != NULL) { ast_node_free((AST_NODE_T*) root, &allocator); }
hb_allocator_destroy(&allocator);

VALUE hash = rb_hash_new();
rb_hash_aset(hash, ID2SYM(rb_intern("pages")), SIZET2NUM(stats.pages));
rb_hash_aset(hash, ID2SYM(rb_intern("total_capacity")), SIZET2NUM(stats.total_capacity));
rb_hash_aset(hash, ID2SYM(rb_intern("total_used")), SIZET2NUM(stats.total_used));
rb_hash_aset(hash, ID2SYM(rb_intern("total_available")), SIZET2NUM(stats.total_available));
rb_hash_aset(hash, ID2SYM(rb_intern("allocations")), SIZET2NUM(stats.allocations));
rb_hash_aset(hash, ID2SYM(rb_intern("fragmentation")), SIZET2NUM(stats.fragmentation));
rb_hash_aset(hash, ID2SYM(rb_intern("default_page_size")), SIZET2NUM(stats.default_page_size));

return hash;
}

static VALUE Herb_version(VALUE self) {
VALUE gem_version = rb_const_get(self, rb_intern("VERSION"));
VALUE libherb_version = rb_utf8_str_new_cstr(herb_version());
Expand Down Expand Up @@ -300,5 +344,6 @@ __attribute__((__visibility__("default"))) void Init_herb(void) {
rb_define_singleton_method(mHerb, "lex_file", Herb_lex_file, -1);
rb_define_singleton_method(mHerb, "extract_ruby", Herb_extract_ruby, -1);
rb_define_singleton_method(mHerb, "extract_html", Herb_extract_html, 1);
rb_define_singleton_method(mHerb, "arena_stats", Herb_arena_stats, -1);
rb_define_singleton_method(mHerb, "version", Herb_version, 0);
}
3 changes: 2 additions & 1 deletion lib/herb/cli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def result
project.verbose = verbose || ci?
project.isolate = isolate
project.validate_ruby = true
project.arena_stats = arena_stats
has_issues = project.analyze!
exit(has_issues ? 1 : 0)
when "report"
Expand Down Expand Up @@ -300,7 +301,7 @@ def option_parser
self.tool = t.to_sym
end

parser.on("--arena-stats", "Print arena memory statistics (for lex/parse commands)") do
parser.on("--arena-stats", "Print arena memory statistics (for lex/parse/analyze commands)") do
self.arena_stats = true
end
end
Expand Down
97 changes: 92 additions & 5 deletions lib/herb/project.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ module Herb
class Project
include Colors

attr_accessor :project_path, :output_file, :no_log_file, :no_timing, :silent, :verbose, :isolate, :validate_ruby, :file_paths
attr_accessor :project_path, :output_file, :no_log_file, :no_timing, :silent, :verbose, :isolate, :validate_ruby, :file_paths, :arena_stats

# Known error types that indicate issues in the user's template, not bugs in the parser.
TEMPLATE_ERRORS = [
Expand Down Expand Up @@ -256,6 +256,10 @@ def analyze!

log_problem_file_details(results, log)

if arena_stats
print_arena_summary(file_results)
end

unless no_log_file
puts "\n #{separator}"
puts "\n #{dimmed("Results saved to #{output_file}")}"
Expand Down Expand Up @@ -339,6 +343,10 @@ def process_file_direct(file_path)
file_content = File.read(file_path)
result = { file_path: file_path }

if arena_stats
result[:arena_stats] = capture_arena_stats(file_content)
end

Timeout.timeout(1) do
parse_result = Herb.parse(file_content)

Expand All @@ -353,17 +361,17 @@ def process_file_direct(file_path)

result
rescue Timeout::Error
{ file_path: file_path, status: :timeout, file_content: file_content,
log: "⏱️ Parsing #{file_path} timed out after 1 second" }
result.merge(status: :timeout, file_content: file_content,
log: "⏱️ Parsing #{file_path} timed out after 1 second")
rescue StandardError => e
file_content ||= begin
File.read(file_path)
rescue StandardError
nil
end

{ file_path: file_path, status: :failed, file_content: file_content,
log: "⚠️ Error processing #{file_path}: #{e.message}" }
result.merge(status: :failed, file_content: file_content,
log: "⚠️ Error processing #{file_path}: #{e.message}")
end

def process_file_isolated(file_path)
Expand Down Expand Up @@ -878,5 +886,84 @@ def format_duration(seconds)
"#{minutes}m #{remaining_seconds.round(2)}s"
end
end

def capture_arena_stats(file_content)
stats = Herb.arena_stats(file_content)

{
pages: stats[:pages],
bytes: stats[:total_used],
allocations: stats[:allocations],
lines: file_content.count("\n") + 1,
length: file_content.bytesize,
}
rescue StandardError
{ pages: 0, bytes: 0, allocations: 0, lines: 0, length: 0 }
end

def print_arena_summary(file_results)
stats = file_results.filter_map { |result|
next unless result[:arena_stats] && result[:arena_stats][:bytes].positive?

{ file: result[:file_path], **result[:arena_stats] }
}

return if stats.empty?

stats.sort_by! { |stat| -stat[:bytes] }

puts "\n #{separator}"
puts "\n"
puts " #{bold("Arena memory usage:")}"
puts ""

relatives = stats.map { |stat| relative_path(stat[:file]) }
used_strings = stats.map { |stat| format_bytes(stat[:bytes]) }
length_strings = stats.map { |stat| format_bytes(stat[:length]) }
used_width = [used_strings.max_by(&:length).length, 4].max
pages_width = [stats.max_by { |stat| stat[:pages] }[:pages].to_s.length, 5].max
allocs_width = [stats.max_by { |stat| stat[:allocations] }[:allocations].to_s.length, 6].max
lines_width = [stats.max_by { |stat| stat[:lines] }[:lines].to_s.length, 5].max
length_width = [length_strings.max_by(&:length).length, 4].max
total_width = pages_width + used_width + allocs_width + lines_width + length_width + 11

puts format(" %#{lines_width}s %#{length_width}s %#{pages_width}s %#{used_width}s %#{allocs_width}s %s", "Lines", "Size", "Pages", "Used", "Allocs", "File")
puts " #{"-" * (total_width + relatives.max_by(&:length).length)}"

stats.each_with_index do |stat, index|
relative = relatives[index]
used = used_strings[index]
length = length_strings[index]
color = stat[:pages] > 1 ? :yellow : :green
colored_used = send(color, used)
padding = colored_used.length - used.length
puts format(" %#{lines_width}d %#{length_width}s %#{pages_width}d %#{used_width + padding}s %#{allocs_width}d %s", stat[:lines], length, stat[:pages], colored_used, stat[:allocations], relative)
end

total_bytes = stats.sum { |stat| stat[:bytes] }
max = stats.first

puts ""
puts " #{label("Total")} #{cyan(format_bytes(total_bytes))} across #{cyan("#{stats.size} #{pluralize(stats.size, "file")}")}"
puts " #{label("Largest")} #{cyan(relative_path(max[:file]))} (#{cyan(format_bytes(max[:bytes]))}, #{cyan("#{max[:pages]} #{pluralize(max[:pages], "page")}")})"

thresholds = { "16 KB" => 16 * 1024, "64 KB" => 64 * 1024, "128 KB" => 128 * 1024, "256 KB" => 256 * 1024, "512 KB" => 512 * 1024 }

puts ""
thresholds.each do |label_text, threshold|
count = stats.count { |stat| stat[:bytes] > threshold }
puts " #{label(" > #{label_text}")} #{count} #{pluralize(count, "file")}"
end
end

def format_bytes(bytes)
if bytes >= 1024 * 1024
"#{(bytes / (1024.0 * 1024.0)).round(1)} MB"
elsif bytes >= 1024
"#{(bytes / 1024.0).round(0)} KB"
else
"#{bytes} B"
end
end
end
end
6 changes: 6 additions & 0 deletions src/include/util/hb_arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ typedef struct HB_ARENA_STRUCT {
size_t allocation_count;
} hb_arena_T;

#define hb_arena_for_each_page(arena, page) \
for (hb_arena_page_T* page = (arena)->head; page != NULL; page = page->next)

#define hb_arena_for_each_page_const(arena, page) \
for (const hb_arena_page_T* page = (arena)->head; page != NULL; page = page->next)

bool hb_arena_init(hb_arena_T* allocator, size_t initial_size);
void* hb_arena_alloc(hb_arena_T* allocator, size_t size);
size_t hb_arena_position(hb_arena_T* allocator);
Expand Down
13 changes: 12 additions & 1 deletion src/include/util/hb_arena_debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@

#include "hb_arena.h"

void hb_arena_print_stats(const hb_arena_T* allocator);
typedef struct {
size_t pages;
size_t total_capacity;
size_t total_used;
size_t total_available;
size_t allocations;
size_t fragmentation;
size_t default_page_size;
} hb_arena_stats_T;

hb_arena_stats_T hb_arena_get_stats(const hb_arena_T* arena);
void hb_arena_print_stats(const hb_arena_T* arena);

#endif
3 changes: 0 additions & 3 deletions src/util/hb_arena.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,6 @@ static void hb_arena_free_page(void* pointer, size_t size) {
#endif
}

#define hb_arena_for_each_page(allocator, page) \
for (hb_arena_page_T* page = (allocator)->head; page != NULL; page = page->next)

static inline size_t hb_arena_align_size(size_t size, size_t alignment) {
assert(size <= SIZE_MAX - (alignment - 1));

Expand Down
49 changes: 32 additions & 17 deletions src/util/hb_arena_debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <string.h>

#include "../include/util/hb_arena.h"
#include "../include/util/hb_arena_debug.h"

#define ANSI_COLOR_GREEN "\033[32m"
#define ANSI_COLOR_YELLOW "\033[33m"
Expand Down Expand Up @@ -117,6 +118,29 @@ static void print_box_line_with_bullet_and_color(const char* color, const char*
printf("║%s%*s║\n", line, BOX_WIDTH - visual_length, "");
}

hb_arena_stats_T hb_arena_get_stats(const hb_arena_T* arena) {
hb_arena_stats_T stats = { 0 };

if (arena->head == NULL) { return stats; }

stats.default_page_size = arena->default_page_size;
stats.allocations = arena->allocation_count;

hb_arena_for_each_page_const(arena, page) {
stats.pages++;
stats.total_capacity += page->capacity;
stats.total_used += page->position;

if (page != arena->tail && page->position < page->capacity) {
stats.fragmentation += (page->capacity - page->position);
}
}

stats.total_available = stats.total_capacity - stats.total_used;

return stats;
}

void hb_arena_print_stats(const hb_arena_T* allocator) {
if (allocator->head == NULL) {
print_box_top();
Expand All @@ -128,22 +152,13 @@ void hb_arena_print_stats(const hb_arena_T* allocator) {
return;
}

size_t num_pages = 0;
size_t total_capacity = 0;
size_t total_used = 0;
size_t fragmentation = 0;
hb_arena_stats_T stats = hb_arena_get_stats(allocator);

for (const hb_arena_page_T* page = allocator->head; page != NULL; page = page->next) {
num_pages++;
total_capacity += page->capacity;
total_used += page->position;

if (page != allocator->tail && page->position < page->capacity) {
fragmentation += (page->capacity - page->position);
}
}
size_t total_capacity = stats.total_capacity;
size_t total_used = stats.total_used;
size_t total_available = stats.total_available;
size_t fragmentation = stats.fragmentation;

size_t total_available = total_capacity - total_used;
double usage_percentage = (double) total_used / (double) total_capacity * 100.0;
double fragmentation_percentage = (double) fragmentation / (double) total_capacity * 100.0;
const char* overall_color = get_usage_color(usage_percentage);
Expand All @@ -160,7 +175,7 @@ void hb_arena_print_stats(const hb_arena_T* allocator) {
print_box_separator();
print_box_line(" Statistics:");

print_box_line_with_bullet(" • Pages: %zu", num_pages);
print_box_line_with_bullet(" • Pages: %zu", stats.pages);
print_box_line_with_bullet(" • Default Page Size: %s", default_size_string);
print_box_line_with_bullet(" • Total Capacity: %s", capacity_string);
print_box_line_with_bullet_and_color(
Expand All @@ -178,7 +193,7 @@ void hb_arena_print_stats(const hb_arena_T* allocator) {
usage_percentage,
ANSI_COLOR_RESET
);
print_box_line_with_bullet(" • Allocations: %zu", allocator->allocation_count);
print_box_line_with_bullet(" • Allocations: %zu", stats.allocations);
print_box_line_with_bullet(" • Fragmentation: %s", fragmentation_string);

if (fragmentation > 0) { print_box_line(" (%.1f%% skipped in non-tail pages)", fragmentation_percentage); }
Expand All @@ -187,7 +202,7 @@ void hb_arena_print_stats(const hb_arena_T* allocator) {

size_t page_number = 0;

for (const hb_arena_page_T* page = allocator->head; page != NULL; page = page->next) {
hb_arena_for_each_page_const(allocator, page) {
double page_usage = (double) page->position / (double) page->capacity * 100.0;
const char* page_color = get_usage_color(page_usage);

Expand Down
Loading