Skip to content

Commit bed5ae2

Browse files
committed
Metric changes
1 parent 06a1612 commit bed5ae2

File tree

5 files changed

+44
-7
lines changed

5 files changed

+44
-7
lines changed

src/slop_code/metrics/checkpoint/extractors.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,12 @@ def _build_metrics_from_snapshot(
202202
"branches_concentration": functions["branches_concentration"],
203203
"control_mean": functions["control_mean"],
204204
"control_concentration": functions["control_concentration"],
205+
# Size concentration (Gini for LOC and statements across functions)
206+
"lines_concentration": functions.get("lines_concentration", 0.0),
207+
"statements_mean": functions.get("statements_mean", 0.0),
208+
"statements_concentration": functions.get(
209+
"statements_concentration", 0.0
210+
),
205211
# AST-grep
206212
"ast_grep_violations": ast_grep["violations"],
207213
# Source file tracking

src/slop_code/metrics/driver.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ def _compute_function_stats(
209209
cc_values: list[int],
210210
depth_values: list[int],
211211
lines_values: list[int],
212+
statements_values: list[int],
212213
control_blocks_values: list[int],
213214
branches_values: list[int],
214215
comparisons_values: list[int],
@@ -241,9 +242,17 @@ def _compute_function_stats(
241242
# Depth stats
242243
depth_max = max(depth_values) if depth_values else 0
243244

244-
# Lines stats
245+
# Lines stats (LOC per function)
245246
lines_sum = sum(lines_values)
246247
lines_mean = statistics.mean(lines_values) if lines_values else 0.0
248+
lines_concentration = _concentration_score(lines_values)
249+
250+
# Statements stats (statements per function)
251+
statements_sum = sum(statements_values)
252+
statements_mean = (
253+
statistics.mean(statements_values) if statements_values else 0.0
254+
)
255+
statements_concentration = _concentration_score(statements_values)
247256

248257
# Control blocks
249258
control_blocks_sum = sum(control_blocks_values)
@@ -280,6 +289,10 @@ def _dist_stats(
280289
depth_max=depth_max,
281290
lines_sum=lines_sum,
282291
lines_mean=lines_mean,
292+
lines_concentration=lines_concentration,
293+
statements_sum=statements_sum,
294+
statements_mean=statements_mean,
295+
statements_concentration=statements_concentration,
283296
control_blocks_sum=control_blocks_sum,
284297
# Distribution stats
285298
nesting_mean=nesting_mean,
@@ -392,6 +405,7 @@ def compute_aggregates(
392405
func_cc: list[int] = []
393406
func_depth: list[int] = []
394407
func_lines: list[int] = []
408+
func_statements: list[int] = []
395409
func_control_blocks: list[int] = []
396410
func_branches: list[int] = []
397411
func_comparisons: list[int] = []
@@ -412,6 +426,7 @@ def compute_aggregates(
412426
func_cc.append(sym.complexity)
413427
func_depth.append(sym.max_nesting_depth)
414428
func_lines.append(sym.lines)
429+
func_statements.append(sym.statements)
415430
func_control_blocks.append(sym.control_blocks)
416431
func_branches.append(sym.branches)
417432
func_comparisons.append(sym.comparisons)
@@ -426,6 +441,7 @@ def compute_aggregates(
426441
func_cc,
427442
func_depth,
428443
func_lines,
444+
func_statements,
429445
func_control_blocks,
430446
func_branches,
431447
func_comparisons,

src/slop_code/metrics/models.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,9 +342,14 @@ class FunctionStats(BaseModel):
342342
cc_concentration: float = 0.0
343343
# Depth stats
344344
depth_max: int = 0
345-
# Lines stats
345+
# Lines stats (LOC per function)
346346
lines_sum: int = 0
347347
lines_mean: float = 0.0
348+
lines_concentration: float = 0.0
349+
# Statements stats (statements per function)
350+
statements_sum: int = 0
351+
statements_mean: float = 0.0
352+
statements_concentration: float = 0.0
348353
# Control blocks
349354
control_blocks_sum: int = 0
350355

src/slop_code/visualization/chart_builders.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,13 +288,15 @@ class ProgressLineChartConfig:
288288
title: Chart title
289289
width: Figure width
290290
height: Figure height
291+
num_bins: Number of progress bins (default 5 = 20% bins)
291292
"""
292293

293294
metric_col: str
294295
y_title: str
295296
title: str
296297
width: int = GRAPH_WIDTH
297298
height: int = GRAPH_HEIGHT
299+
num_bins: int = 5
298300

299301

300302
class ProgressLineChartBuilder:
@@ -325,7 +327,9 @@ def build(
325327
color_map = MODEL_COLORS
326328

327329
high = filter_high_thinking_checkpoints(checkpoints)
328-
progress_data = compute_progress_metric(high, self.config.metric_col)
330+
progress_data = compute_progress_metric(
331+
high, self.config.metric_col, num_bins=self.config.num_bins
332+
)
329333

330334
fig = go.Figure()
331335

src/slop_code/visualization/data_transforms.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,16 +146,19 @@ def compute_progress_bins(df: pd.DataFrame) -> pd.DataFrame:
146146
return df
147147

148148

149-
def compute_progress_metric(df: pd.DataFrame, metric_col: str) -> pd.DataFrame:
149+
def compute_progress_metric(
150+
df: pd.DataFrame, metric_col: str, num_bins: int = 5
151+
) -> pd.DataFrame:
150152
"""Compute progress % and aggregate metric by model.
151153
152154
Progress = checkpoint_idx / total_checkpoints for that problem.
153-
Bins into 5 buckets (0.2, 0.4, 0.6, 0.8, 1.0) and averages.
155+
Bins into buckets and averages.
154156
If multiple checkpoints from same problem fall in same bin, take the later one.
155157
156158
Args:
157159
df: Filtered checkpoints DataFrame with 'cp_num' column
158160
metric_col: Column name to aggregate
161+
num_bins: Number of progress bins (default 5 = 20% bins, use 10 for 10% bins)
159162
160163
Returns:
161164
DataFrame with columns: model, progress_bin, {metric_col}
@@ -164,8 +167,11 @@ def compute_progress_metric(df: pd.DataFrame, metric_col: str) -> pd.DataFrame:
164167
df = df.copy()
165168
df["max_cp"] = df["problem"].map(max_cp)
166169
df["progress"] = df["cp_num"] / df["max_cp"]
167-
df["progress_bin"] = np.ceil(df["progress"] * 5) / 5 # 20% bins
168-
df.loc[df["progress_bin"] == 0, "progress_bin"] = 0.2 # Move 0 to first bin
170+
bin_size = 1.0 / num_bins
171+
df["progress_bin"] = np.ceil(df["progress"] * num_bins) / num_bins
172+
df.loc[df["progress_bin"] == 0, "progress_bin"] = (
173+
bin_size # Move 0 to first bin
174+
)
169175

170176
# If multiple checkpoints from same problem in same bin, take the later one
171177
df = df.sort_values("cp_num", ascending=False)

0 commit comments

Comments
 (0)