Skip to content

Commit 339bbe1

Browse files
author
Oracles Technologies LLC
committed
feat: added critical child safety violation category & patterns. updated README, bumped v2.6.2
1 parent 8cb083b commit 339bbe1

4 files changed

Lines changed: 82 additions & 12 deletions

File tree

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -204,18 +204,19 @@ Guardian protects your AI system from adversarial inputs designed to:
204204
- **Coordinate across modalities** — split-channel attacks that distribute threat signals across text and visual inputs, each appearing benign in isolation *(API)*
205205
- **Hide payloads in video** — injection content embedded across video frames, including temporally recurring signals designed to survive frame-level filtering *(API)*
206206

207-
The community edition covers the six most prevalent categories. The API covers 100+.
207+
The community edition covers seven categories (six OWASP-aligned attack vectors + an absolute-block child safety category). The API covers 126+.
208208

209209
---
210210

211211
## Community vs API
212212

213213
| | Community | API — Free | API — Pro | API — ENT |
214214
|---|---|---|---|---|
215-
| **Threat categories** | 6 | 100+ | 100+ | 100+ |
216-
| **Regex patterns** | 26 | 1000+ | 1000+ | 1000+ |
215+
| **Threat categories** | 7 | 126+ | 126+ | 126+ |
216+
| **Regex patterns** | 34 | 1,000+ | 1,000+ | 1,000+ |
217+
| **Child safety (absolute block)** |||||
217218
| **Semantic model** | Hash-based fallback | Full ONNX MiniLM-L6-v2 | Full ONNX MiniLM-L6-v2 | Full ONNX MiniLM-L6-v2 |
218-
| **Semantic fingerprints** | Runtime-only | 1,000+ pre-loaded + runtime | 1,000+ pre-loaded + runtime | 1,000+ pre-loaded + runtime |
219+
| **Semantic fingerprints** | Runtime-only | 1,959 pre-loaded + runtime | 1,959 pre-loaded + runtime | 1,959 pre-loaded + runtime |
219220
| **RAG / indirect injection** |||||
220221
| **Agentic pipeline protection** |||||
221222
| **Tool call validation** |||||

ethicore_guardian/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"""
88

99
# Version information
10-
__version__ = "2.6.1"
10+
__version__ = "2.6.2"
1111
__author__ = "Oracles Technologies LLC"
1212

1313
# Core exports — full API-tier guardian preferred; community fallback for wheel installs

ethicore_guardian/data/threat_patterns.py

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
55
Version: 1.0.0 (Community)
66
7-
This is the open-source community edition, covering 5 OWASP LLM Top-10
8-
aligned threat categories. The API tier adds 50+ additional categories,
7+
This is the open-source community edition, covering 6 OWASP LLM Top-10
8+
aligned threat categories plus an absolute-block child safety category.
9+
The API tier adds 100+ additional categories,
910
complete ONNX semantic embeddings, and advanced agentic/multi-turn threat
1011
detection.
1112
@@ -51,7 +52,7 @@ class FalsePositiveRisk(Enum):
5152

5253

5354
# ---------------------------------------------------------------------------
54-
# Community threat pattern registry — 5 categories
55+
# Community threat pattern registry — 7 categories
5556
#
5657
# Patterns sourced from publicly documented research:
5758
# OWASP LLM01 (Prompt Injection), LLM02 (Insecure Output Handling),
@@ -225,6 +226,73 @@ class FalsePositiveRisk(Enum):
225226
),
226227
},
227228

229+
# ------------------------------------------------------------------
230+
# childSafetyViolation — ABSOLUTE BLOCK (v2.6.2)
231+
# Guiding Principle 15: Sacred Protection of the Innocent (Mishmar HaYeledim)
232+
# Matthew 18:6 — "It would be better for them to have a large millstone
233+
# hung around their neck and to be drowned in the depths of the sea."
234+
#
235+
# Absolute prohibition: CSAM/CSEM under ANY framing (fictional, artistic,
236+
# educational, aged-up characters), grooming communication strategies,
237+
# child exploitation facilitation, child luring scripts, and content
238+
# that sexualizes or exploits minors. No mitigating context. No exceptions.
239+
# Weight 100 — maximum. False positives vastly preferable to false negatives.
240+
# ------------------------------------------------------------------
241+
"childSafetyViolation": {
242+
"patterns": [
243+
# Sexual content involving minors under ANY framing
244+
r"(?:sexual|explicit|erotic|nude|intimate)\s+(?:content|scene|story|description|image|fiction)\s+(?:involving|featuring|with|about)\s+(?:a\s+)?(?:minor|child|teen(?:ager)?|underage|young\s+(?:person|girl|boy)|kid|juvenile)",
245+
# Grooming strategies — building inappropriate trust with a child
246+
r"(?:groom|grooming)\s+(?:a\s+)?(?:child|minor|kid|teen(?:ager)?|underage)\s+(?:by|through|using|via)\s+(?:building|establishing|gaining)\s+(?:trust|rapport|a\s+relationship|access)",
247+
# Requests for scripts/messages to send to a child for grooming
248+
r"(?:write|generate|give\s+me|create|draft)\s+(?:a\s+)?(?:message|script|text|conversation)\s+(?:to\s+(?:send\s+to|use\s+with|approach))\s+(?:a\s+)?(?:child|minor|kid|teen(?:ager)?|young\s+(?:person|girl|boy))\s+(?:to\s+(?:build|gain)\s+(?:trust|rapport))",
249+
# Instructions for gaining unsupervised access to children
250+
r"(?:gain|get)\s+(?:(?:physical|digital|online|unsupervised|private)\s+)?access\s+to\s+(?:a\s+)?(?:child|minor|kid|children|minors)\s+(?:(?:without|bypassing?|evading?)\s+(?:parental|guardian|adult|supervisory)\s+(?:oversight|supervision|monitoring|control))",
251+
# CSAM/CSEM under any label: written, fictional, illustrated, simulated
252+
r"(?:written|fictional|illustrated|simulated|described|fantasy|not\s+real|non[- ]?existent)\s+(?:CSAM|CSEM|child\s+(?:sexual\s+abuse\s+material|pornography|porn)|child\s+(?:exploitation|sexual\s+content))",
253+
# Requesting manipulation strategies to convince a child to keep secrets
254+
r"(?:convince|persuade|get|teach)\s+(?:a\s+)?(?:child|minor|kid|teen(?:ager)?)\s+to\s+keep\s+(?:(?:a\s+)?secret|(?:this|our)\s+(?:secret|between\s+us)|quiet)\s+from\s+(?:(?:their\s+)?(?:parents?|guardians?|adults?|family)|anyone)",
255+
# Child luring via false affection, gifts, promises
256+
r"(?:use|offer|give|promise)\s+(?:gifts?|presents?|money|candy|compliments?|affection|friendship)\s+to\s+(?:lure|attract|manipulate|gain\s+the\s+trust\s+of)\s+(?:a\s+)?(?:child|minor|kid|teen(?:ager)?)",
257+
# Content normalizing adult-child sexual relationships under any framing
258+
r"(?:normalize|portray|depict|describe|present)\s+(?:adult[- ]child|child[- ]adult)\s+(?:sexual|romantic|intimate)\s+(?:relationship|interaction|dynamic)\s+(?:as\s+)?(?:normal|acceptable|natural|cultural|historical|fictional)",
259+
],
260+
"severity": ThreatSeverity.CRITICAL,
261+
"weight": 100,
262+
"description": (
263+
"ABSOLUTE BLOCK — child safety violation. Covers CSAM/CSEM generation under "
264+
"any framing, grooming communication strategies, child exploitation facilitation, "
265+
"child luring scripts, and content sexualizing or exploiting minors. "
266+
"No mitigating context. No creative/fictional/educational exceptions. "
267+
"Matthew 18:6. False positives vastly preferable to false negatives."
268+
),
269+
"semanticFingerprint": [
270+
"CSAM generation request disguised as fictional creative writing involving minor characters",
271+
"grooming communication strategy to build inappropriate trust with child victim",
272+
"sexual content involving minor characters despite fictional or aged-up framing",
273+
"child luring script using false affection and gift promises to manipulate minor",
274+
"instruction to isolate child from protective adults as grooming preparation",
275+
"secret-keeping manipulation strategy to prevent child from disclosing abuse",
276+
"content normalizing adult-child relationship despite fictional cultural framing",
277+
"strategy to circumvent parental oversight to gain unsupervised access to children",
278+
],
279+
"contextHints": {
280+
"escalators": [
281+
"child", "minor", "underage", "young", "teenager", "kid",
282+
"fictional", "not real", "aged up", "just characters", "18 in the story",
283+
"grooming", "secret", "trust", "alone with",
284+
],
285+
"mitigators": [], # No mitigating context is accepted for this category
286+
},
287+
"falsePositiveRisk": FalsePositiveRisk.LOW,
288+
"mitigationStrategy": (
289+
"ABSOLUTE BLOCK — no mitigating context reduces this verdict. "
290+
"Fictional or artistic framing, aged-up character claims, historical/cultural "
291+
"context, and educational justifications are all explicitly rejected. "
292+
"Escalate immediately to human review with maximum severity flag."
293+
),
294+
},
295+
228296
# ------------------------------------------------------------------
229297
# systemPromptLeaks — OWASP LLM07 (System Prompt Leakage)
230298
# Attempts to extract the model's system prompt or initial instructions.

ethicore_guardian/versions.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
Ethicore Engine™ - Guardian SDK - Version Information
33
"""
44

5-
__version__ = "2.6.1"
5+
__version__ = "2.6.2"
66
__version_info__ = tuple(map(int, __version__.split('.')))
77

88
# Build information
9-
__build__ = "stable.2"
10-
__release_date__ = "2026-05-18"
9+
__build__ = "stable.1"
10+
__release_date__ = "2026-05-20"
1111

1212
# Feature flags
1313
FEATURES = {
@@ -19,12 +19,13 @@
1919
"framework_integrations": True,
2020
"supply_chain_integrity": True, # v2.6.0: guardian verify + init self-check
2121
"local_provider_support": True, # v2.6.1: LM Studio, llama.cpp, LocalAI, Jan.ai providers
22+
"child_safety_protection": True, # v2.6.2: childSafetyViolation absolute-block (Matthew 18:6)
2223
}
2324

2425
# Model versions
2526
MODEL_VERSIONS = {
2627
"orchestrator": "3.0.0",
27-
"pattern_analyzer": "1.2.0", # 94 categories; +rlhfLayerExploitation, supplyChainDependencyInjection
28+
"pattern_analyzer": "1.3.0", # 7 community categories; +childSafetyViolation (CRITICAL, weight 100)
2829
"semantic_analyzer": "1.1.0",
2930
"behavioral_analyzer": "1.0.0",
3031
"ml_inference_engine": "3.2.0", # retrained 125k samples, 94 categories, 1230 fingerprints, 2026-05-18

0 commit comments

Comments
 (0)