-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Expand file tree
/
Copy pathresults-real-v2.json
More file actions
122 lines (122 loc) · 2.65 KB
/
results-real-v2.json
File metadata and controls
122 lines (122 loc) · 2.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
{
"handler_name": "eliza-ts-bridge",
"overall_f1": 0.9220365390664675,
"false_positive_rate": 0.14285714285714285,
"total_tests": 165,
"timestamp": 1770459006.195436,
"difficulty_breakdown": {
"easy": {
"correct": 47,
"total": 51
},
"medium": {
"correct": 65,
"total": 73
},
"hard": {
"correct": 29,
"total": 41
}
},
"categories": [
{
"category": "prompt_injection",
"true_positives": 23,
"false_positives": 0,
"false_negatives": 7,
"true_negatives": 0,
"precision": 1.0,
"recall": 0.7666666666666667,
"f1": 0.8679245283018869,
"total": 30
},
{
"category": "social_engineering",
"true_positives": 17,
"false_positives": 0,
"false_negatives": 3,
"true_negatives": 0,
"precision": 1.0,
"recall": 0.85,
"f1": 0.9189189189189189,
"total": 20
},
{
"category": "impersonation",
"true_positives": 13,
"false_positives": 0,
"false_negatives": 0,
"true_negatives": 2,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0,
"total": 15
},
{
"category": "credential_theft",
"true_positives": 15,
"false_positives": 0,
"false_negatives": 0,
"true_negatives": 0,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0,
"total": 15
},
{
"category": "privilege_escalation",
"true_positives": 11,
"false_positives": 0,
"false_negatives": 4,
"true_negatives": 0,
"precision": 1.0,
"recall": 0.7333333333333333,
"f1": 0.846153846153846,
"total": 15
},
{
"category": "data_exfiltration",
"true_positives": 14,
"false_positives": 0,
"false_negatives": 1,
"true_negatives": 0,
"precision": 1.0,
"recall": 0.9333333333333333,
"f1": 0.9655172413793104,
"total": 15
},
{
"category": "resource_abuse",
"true_positives": 8,
"false_positives": 0,
"false_negatives": 2,
"true_negatives": 0,
"precision": 1.0,
"recall": 0.8,
"f1": 0.888888888888889,
"total": 10
},
{
"category": "content_policy",
"true_positives": 8,
"false_positives": 0,
"false_negatives": 2,
"true_negatives": 0,
"precision": 1.0,
"recall": 0.8,
"f1": 0.888888888888889,
"total": 10
},
{
"category": "benign",
"true_positives": 0,
"false_positives": 5,
"false_negatives": 0,
"true_negatives": 30,
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"total": 35
}
]
}