-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathevaluation.py
More file actions
82 lines (69 loc) · 2.87 KB
/
Copy pathevaluation.py
File metadata and controls
82 lines (69 loc) · 2.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import subprocess
import json
import numpy as np
import os
import sys
NUM_RUNS = 15 # CAMBIAR: Número de veces que repetiremos el experimento
PYTHON_EXE = sys.executable
results = {
"ASTNN": {"f1": [], "precision": [], "recall": [], "time": []}, # Añadido "time"
"CodeBERT": {"f1": [], "precision": [], "recall": [], "time": []} # Añadido "time"
}
def run_script(script_path, work_dir, model_name):
print(f"Ejecutando {model_name}...")
process = subprocess.Popen(
[PYTHON_EXE, "-u", script_path, "--lang", "java"],
cwd=work_dir,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
output_json = None
while True:
line = process.stdout.readline()
if not line and process.poll() is not None:
break
if line:
print(f" [{model_name}] {line.strip()}")
if "__DATA_JSON__" in line: # Encontramos la línea con los datos
json_str = line.split("__DATA_JSON__")[1].strip()
output_json = json.loads(json_str)
if output_json:
print(f"Resultados: F1={output_json['f1']:.4f}")
return output_json
else:
print(f"Error: No se encontraron métricas finales para {model_name}")
print(process.stderr.read())
return None
print(f"--- INICIANDO BENCHMARK DE {NUM_RUNS} EJECUCIONES ---")
for i in range(1, NUM_RUNS + 1):
print(f"\n=== VUELTA {i}/{NUM_RUNS} ===")
# 1. ASTNN
astnn_metrics = run_script("train.py", "astnn", "ASTNN")
if astnn_metrics:
results["ASTNN"]["f1"].append(astnn_metrics["f1"])
results["ASTNN"]["precision"].append(astnn_metrics["precision"])
results["ASTNN"]["recall"].append(astnn_metrics["recall"])
results["ASTNN"]["time"].append(astnn_metrics["avg_inference_time"])
# 2. CodeBERT
cb_metrics = run_script("train_codebert.py", "codebert", "CodeBERT")
if cb_metrics:
results["CodeBERT"]["f1"].append(cb_metrics["f1"])
results["CodeBERT"]["precision"].append(cb_metrics["precision"])
results["CodeBERT"]["recall"].append(cb_metrics["recall"])
results["CodeBERT"]["time"].append(cb_metrics["avg_inference_time"])
print("\n\n=== INFORME FINAL DE RESULTADOS ===")
for model in ["ASTNN", "CodeBERT"]:
print(f"\n🔹 Modelo: {model}")
if len(results[model]["f1"]) > 0:
mean_f1 = np.mean(results[model]["f1"])
std_f1 = np.std(results[model]["f1"])
mean_time = np.mean(results[model]["time"])
std_time = np.std(results[model]["time"])
print(f" F1-Score : {mean_f1:.4f} ± {std_f1:.4f}")
print(f" Inferencia (s): {mean_time:.6f}s ± {std_time:.6f}s")
print(f" Inferencia (ms): {mean_time * 1000:.2f}ms")
else:
print("No hay datos disponibles.")
with open("resultados_benchmarking.txt", "w") as f:
f.write(str(results))