eliza/packages/benchmarks/trust/types.ts at develop · elizaOS/eliza · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/**
 * Trust Security Benchmark Types
 *
 * Defines the interfaces for adversarial testing of runtime trust /
 * security detection capabilities.
 */

export type ThreatCategory =
  | "prompt_injection"
  | "social_engineering"
  | "impersonation"
  | "credential_theft"
  | "benign";

export interface TestCase {
  id: string;
  category: ThreatCategory;
  /** The message or input to test */
  input: string;
  /** For impersonation tests: existing usernames */
  existingUsers?: string[];
  /** Whether the input is actually malicious */
  expectedMalicious: boolean;
  /** Expected detection type if malicious */
  expectedType?: string;
  /** Description of what this test covers */
  description: string;
  /** Difficulty level */
  difficulty: "easy" | "medium" | "hard";
}

export interface DetectionResult {
  testId: string;
  detected: boolean;
  confidence: number;
  detectedType?: string;
}

export interface CategoryScore {
  category: ThreatCategory;
  truePositives: number;
  falsePositives: number;
  falseNegatives: number;
  trueNegatives: number;
  precision: number;
  recall: number;
  f1: number;
  total: number;
}

export interface BenchmarkResult {
  categories: CategoryScore[];
  overallF1: number;
  falsePositiveRate: number;
  totalTests: number;
  timestamp: number;
}

export interface TrustBenchmarkHandler {
  name: string;

  /** Analyze a message for prompt injection */
  detectInjection(
    message: string,
  ): Promise<{ detected: boolean; confidence: number }>;

  /** Analyze a message for social engineering */
  detectSocialEngineering(
    message: string,
  ): Promise<{ detected: boolean; confidence: number }>;

  /** Check if a username is impersonating an existing user */
  detectImpersonation(
    username: string,
    existingUsers: string[],
  ): Promise<{ detected: boolean; confidence: number }>;

  /** Analyze a message for credential theft */
  detectCredentialTheft(
    message: string,
  ): Promise<{ detected: boolean; confidence: number }>;
}