Skip to content

Commit 738da08

Browse files
authored
fix(security): datamarking substitutes zero-width formatting characters (closes #215) (#231)
PR #214 used `char::is_whitespace()` as the datamarking classifier. That predicate follows the Unicode `White_Space` property, which excludes zero-width formatting codepoints (ZWSP `U+200B`, ZWNJ `U+200C`, ZWJ `U+200D`, WJ `U+2060`, BOM `U+FEFF`). Those codepoints are documented prompt-injection vectors used to smuggle invisible instructions inside otherwise-benign Data zones, so they were passing through the transform unchanged. Add `is_substitutable_whitespace(c)` = `c.is_whitespace()` plus the five zero-width codepoints, and use it in the substitution loop in place of the bare `char::is_whitespace` call. Tests: - `zwsp_is_not_substituted_by_design` renamed/inverted to `zwsp_is_substituted` (now asserts ZWSP IS replaced). - New per-codepoint coverage: zwnj/zwj/word_joiner/bom. - `mixed_whitespace_classes_all_substituted` left unchanged (still validates the Unicode `White_Space` set). - `idempotence_apply_twice_equals_apply_once` extended with a mixed ordinary + zero-width input to exercise both classifier branches; second pass remains a no-op.
1 parent 17b90c6 commit 738da08

1 file changed

Lines changed: 81 additions & 20 deletions

File tree

crates/llmtrace-security/src/datamarking.rs

Lines changed: 81 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,14 @@
1717
//! zones pass through unchanged. The Microsoft Spotlighting paper's
1818
//! threat model requires the instruction surface (system prompt,
1919
//! user's own question) to remain a normal natural-language signal.
20-
//! * Whitespace is `char::is_whitespace()` (Unicode whitespace
21-
//! property) — matches the paper's intent and covers ASCII space,
22-
//! tab, newline, NBSP (`U+00A0`), ZWSP (`U+200B`), etc.
20+
//! * Whitespace is [`is_substitutable_whitespace`] — the Unicode
21+
//! `White_Space` property plus the zero-width / formatting codepoints
22+
//! used as invisible prompt-injection vectors (ZWSP, ZWNJ, ZWJ, WJ,
23+
//! BOM). See that function's docs for the rationale.
2324
//! * The transform is idempotent: applying it twice to the same input
2425
//! produces the same output. This is required because the proxy may
2526
//! retry requests and because the marker is also a PUA codepoint
26-
//! that `is_whitespace()` rejects.
27+
//! that the predicate rejects.
2728
//! * Marker selection: try the configured default first. If it appears
2829
//! inside the zone content (a vanishingly rare collision), resample
2930
//! from `PUA_RANGE` until a non-colliding codepoint is found. The
@@ -193,8 +194,26 @@ impl DatamarkingTransform {
193194
// Internal helpers
194195
// ---------------------------------------------------------------------------
195196

196-
/// Replace every Unicode whitespace codepoint in `content` with `marker`.
197-
/// Returns `(substituted_string, byte_delta)`.
197+
/// Predicate for codepoints the datamarking transform must replace
198+
/// with the marker.
199+
///
200+
/// `char::is_whitespace` follows the Unicode `White_Space` property
201+
/// which excludes zero-width / formatting codepoints (ZWSP `U+200B`,
202+
/// ZWNJ `U+200C`, ZWJ `U+200D`, WJ `U+2060`, BOM `U+FEFF`). Those
203+
/// codepoints are documented prompt-injection vectors used to smuggle
204+
/// invisible instructions inside otherwise-benign Data zones, so the
205+
/// attack surface is wider than the Unicode whitespace property. This
206+
/// predicate closes that gap (issue #215, follow-up to PR #214).
207+
pub fn is_substitutable_whitespace(c: char) -> bool {
208+
c.is_whitespace()
209+
|| matches!(
210+
c,
211+
'\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{2060}' | '\u{FEFF}'
212+
)
213+
}
214+
215+
/// Replace every substitutable whitespace codepoint in `content` with
216+
/// `marker`. Returns `(substituted_string, byte_delta)`.
198217
///
199218
/// Byte delta is `substituted.len() as i64 - content.len() as i64`. A
200219
/// positive value means the marker's UTF-8 width exceeds the
@@ -204,7 +223,7 @@ fn substitute_whitespace(content: &str, marker: char) -> (String, i64) {
204223
let original_len = content.len() as i64;
205224
let mut out = String::with_capacity(content.len());
206225
for ch in content.chars() {
207-
if ch.is_whitespace() {
226+
if is_substitutable_whitespace(ch) {
208227
out.push(marker);
209228
} else {
210229
out.push(ch);
@@ -341,9 +360,9 @@ mod tests {
341360
let t = fixed_marker_transform();
342361
// ASCII space, tab, newline, NBSP, vertical tab, form feed —
343362
// all six are in the Unicode `White_Space` property used by
344-
// `char::is_whitespace()`. ZWSP (`U+200B`) is intentionally
345-
// NOT in that property (Unicode classifies it as format/Cf,
346-
// not whitespace) and is tested separately below.
363+
// `char::is_whitespace()`. Zero-width formatting codepoints
364+
// (ZWSP/ZWNJ/ZWJ/WJ/BOM) are NOT in that property; they are
365+
// covered by the dedicated zero-width tests below.
347366
let zone = data_zone("a b\tc\nd\u{00A0}e\u{000B}f\u{000C}g");
348367
let out = t.apply(&[zone]);
349368
let mz = &out[0];
@@ -355,19 +374,59 @@ mod tests {
355374
}
356375

357376
#[test]
358-
fn zwsp_is_not_substituted_by_design() {
359-
// The brief pinned `char::is_whitespace()` as the classifier.
360-
// ZWSP (`U+200B`) is not in the Unicode `White_Space` property,
361-
// so it MUST pass through unchanged. Documenting this so a
362-
// future "fix" to also substitute ZWSP comes with an
363-
// explicit decision to widen the contract.
377+
fn zwsp_is_substituted() {
378+
// Inverse of the original `zwsp_is_not_substituted_by_design`
379+
// (issue #215). ZWSP (`U+200B`) is a documented prompt-injection
380+
// vector — it MUST be replaced by the marker, not passed through.
364381
let t = fixed_marker_transform();
365382
let zone = data_zone("a\u{200B}b");
366383
let out = t.apply(&[zone]);
367384
let mz = &out[0];
368-
assert!(mz.content.contains('\u{200B}'));
369-
assert!(!mz.content.contains(DEFAULT_MARKER));
385+
assert!(!mz.content.contains('\u{200B}'));
386+
assert!(mz.content.contains(DEFAULT_MARKER));
387+
// ZWSP is 3 bytes in UTF-8, same as U+E000 -> zero net delta.
370388
assert_eq!(mz.byte_delta, 0);
389+
assert_eq!(mz.content, format!("a{}b", DEFAULT_MARKER));
390+
}
391+
392+
#[test]
393+
fn zwnj_is_substituted() {
394+
let t = fixed_marker_transform();
395+
let zone = data_zone("a\u{200C}b");
396+
let out = t.apply(&[zone]);
397+
let mz = &out[0];
398+
assert!(!mz.content.contains('\u{200C}'));
399+
assert!(mz.content.contains(DEFAULT_MARKER));
400+
}
401+
402+
#[test]
403+
fn zwj_is_substituted() {
404+
let t = fixed_marker_transform();
405+
let zone = data_zone("a\u{200D}b");
406+
let out = t.apply(&[zone]);
407+
let mz = &out[0];
408+
assert!(!mz.content.contains('\u{200D}'));
409+
assert!(mz.content.contains(DEFAULT_MARKER));
410+
}
411+
412+
#[test]
413+
fn word_joiner_is_substituted() {
414+
let t = fixed_marker_transform();
415+
let zone = data_zone("a\u{2060}b");
416+
let out = t.apply(&[zone]);
417+
let mz = &out[0];
418+
assert!(!mz.content.contains('\u{2060}'));
419+
assert!(mz.content.contains(DEFAULT_MARKER));
420+
}
421+
422+
#[test]
423+
fn bom_is_substituted() {
424+
let t = fixed_marker_transform();
425+
let zone = data_zone("a\u{FEFF}b");
426+
let out = t.apply(&[zone]);
427+
let mz = &out[0];
428+
assert!(!mz.content.contains('\u{FEFF}'));
429+
assert!(mz.content.contains(DEFAULT_MARKER));
371430
}
372431

373432
#[test]
@@ -395,9 +454,11 @@ mod tests {
395454
#[test]
396455
fn idempotence_apply_twice_equals_apply_once() {
397456
// The marker is not whitespace, so a second pass MUST be a
398-
// no-op (zero new replacements, zero new byte delta).
457+
// no-op (zero new replacements, zero new byte delta). Input
458+
// mixes ordinary whitespace and zero-width codepoints to
459+
// exercise both classifier branches.
399460
let t = fixed_marker_transform();
400-
let zone = data_zone("hello world\nfoo bar");
461+
let zone = data_zone("hello world\nfoo\u{200B}bar\u{FEFF}baz");
401462
let first = t.apply(&[zone]);
402463
let once_content = first[0].content.clone();
403464
let once_byte_range = first[0].byte_range.clone();

0 commit comments

Comments
 (0)