-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathresponse-harmful-content.tengo
More file actions
59 lines (50 loc) · 1.73 KB
/
response-harmful-content.tengo
File metadata and controls
59 lines (50 loc) · 1.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
// Response Filter: Block Harmful Content
// This filter prevents LLMs from providing harmful instructions
text := import("text")
// Get response text (works for both streaming and non-streaming)
response_text := input.is_chunk ? input.current_buffer : input.raw_input
// For streaming: wait for sufficient context before evaluation
// This threshold can be adjusted based on use case
min_evaluation_length := 150
// Default output
output := {
block: false,
message: ""
}
// Only evaluate if we have enough context (for non-streaming or streaming with sufficient buffer)
if !input.is_chunk || len(response_text) >= min_evaluation_length {
// Check for harmful instruction patterns
harmful_patterns := [
"instructions for making",
"how to build a weapon",
"steps to create explosives",
"recipe for making",
"how to hack",
"bypass security"
]
is_harmful := false
detected_pattern := ""
for pattern in harmful_patterns {
if text.contains(text.to_lower(response_text), pattern) {
is_harmful = true
detected_pattern = pattern
break
}
}
compliance_events_list := []
if is_harmful {
compliance_events_list = [
{
event_type: "harmful_content_detected",
severity: "critical",
description: "Harmful pattern detected: '" + detected_pattern + "'",
metadata: { "matched_pattern": detected_pattern }
}
]
}
output = {
block: is_harmful,
message: is_harmful ? "Response blocked: Potentially harmful content detected (" + detected_pattern + ")" : "",
compliance_events: compliance_events_list
}
}