diff --git a/doc/api.rst b/doc/api.rst index aa9892055..587148582 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -315,6 +315,7 @@ API Reference UnicodeSubstitutionConverter UrlConverter VariationConverter + ZalgoConverter ZeroWidthConverter .. automodule:: pyrit.prompt_converter.fuzzer_converter diff --git a/pyrit/prompt_converter/__init__.py b/pyrit/prompt_converter/__init__.py index 328a774da..d8c741e3c 100644 --- a/pyrit/prompt_converter/__init__.py +++ b/pyrit/prompt_converter/__init__.py @@ -61,6 +61,7 @@ from pyrit.prompt_converter.unicode_sub_converter import UnicodeSubstitutionConverter from pyrit.prompt_converter.url_converter import UrlConverter from pyrit.prompt_converter.variation_converter import VariationConverter +from pyrit.prompt_converter.zalgo_converter import ZalgoConverter from pyrit.prompt_converter.zero_width_converter import ZeroWidthConverter @@ -121,5 +122,6 @@ "UrlConverter", "VariationConverter", "VariationSelectorSmugglerConverter", + "ZalgoConverter", "ZeroWidthConverter", ] diff --git a/pyrit/prompt_converter/zalgo_converter.py b/pyrit/prompt_converter/zalgo_converter.py new file mode 100644 index 000000000..df4d699ad --- /dev/null +++ b/pyrit/prompt_converter/zalgo_converter.py @@ -0,0 +1,65 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import logging +import random +from typing import Optional + +from pyrit.models import PromptDataType +from pyrit.prompt_converter import ConverterResult, PromptConverter + +# Unicode combining characters for Zalgo effect (U+0300–U+036F) +ZALGO_MARKS = [chr(code) for code in range(0x0300, 0x036F + 1)] +# Setting a max intensity so people don't do anything unreasonable +MAX_INTENSITY = 100 +logger = logging.getLogger(__name__) + + +class ZalgoConverter(PromptConverter): + def __init__(self, *, intensity: int = 10, seed: Optional[int] = None) -> None: + """ + Initializes the Zalgo converter. + + Args: + intensity (int): Number of combining marks per character (higher = more cursed). Default is 10. + seed (Optional[int]): Optional seed for reproducible output. + """ + self._intensity = self._normalize_intensity(intensity) + self._seed = seed + + def _normalize_intensity(self, intensity: int) -> int: + try: + intensity = int(intensity) + except (TypeError, ValueError): + raise ValueError(f"Invalid intensity value: {intensity!r} (must be an integer)") + normalized_intensity = max(0, min(intensity, MAX_INTENSITY)) + if intensity != normalized_intensity: + logger.warning( + f"ZalgoConverter supports intensity between 0 and {MAX_INTENSITY}, " + f"but received a value of {intensity}. Normalizing to {normalized_intensity}." + ) + return normalized_intensity + + async def convert_async(self, *, prompt: str, input_type: PromptDataType = "text") -> ConverterResult: + """ + Converts text into cursed Zalgo text using combining Unicode marks. + """ + if not self.input_supported(input_type): + raise ValueError("Input type not supported") + + def glitch(char: str) -> str: + return char + "".join(random.choice(ZALGO_MARKS) for _ in range(random.randint(1, self._intensity))) + + if self._intensity <= 0: + output_text = prompt + else: + if self._seed is not None: + random.seed(self._seed) + output_text = "".join(glitch(c) if c.isalnum() else c for c in prompt) + return ConverterResult(output_text=output_text, output_type="text") + + def input_supported(self, input_type: PromptDataType) -> bool: + return input_type == "text" + + def output_supported(self, output_type: PromptDataType) -> bool: + return output_type == "text" diff --git a/tests/unit/converter/test_zalgo_converter.py b/tests/unit/converter/test_zalgo_converter.py new file mode 100644 index 000000000..172476941 --- /dev/null +++ b/tests/unit/converter/test_zalgo_converter.py @@ -0,0 +1,86 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import pytest + +from pyrit.prompt_converter import ZalgoConverter + + +@pytest.mark.asyncio +async def test_zalgo_output_changes_text(): + prompt = "hello" + converter = ZalgoConverter(intensity=5, seed=42) + result = await converter.convert_async(prompt=prompt) + assert result.output_text != prompt + assert all(c in result.output_text for c in prompt) # should still contain all original letters + + +@pytest.mark.asyncio +async def test_zalgo_reproducible_seed(): + prompt = "seed test" + converter1 = ZalgoConverter(intensity=5, seed=123) + converter2 = ZalgoConverter(intensity=5, seed=123) + result1 = await converter1.convert_async(prompt=prompt) + result2 = await converter2.convert_async(prompt=prompt) + assert result1.output_text == result2.output_text + + +@pytest.mark.asyncio +async def test_zalgo_zero_intensity_returns_original(): + prompt = "no chaos please" + converter = ZalgoConverter(intensity=0) + result = await converter.convert_async(prompt=prompt) + assert result.output_text == prompt + + +@pytest.mark.asyncio +async def test_zalgo_intensity_caps_at_max(caplog): + prompt = "much zalgo!" + converter = ZalgoConverter(intensity=1000, seed=1) + result = await converter.convert_async(prompt=prompt) + # Should still complete successfully without crashing and adjust to max intensity + # check if it warns + assert any( + record.levelname == "WARNING" and "ZalgoConverter supports intensity" in record.message + for record in caplog.records + ) + assert isinstance(result.output_text, str) + assert len(result.output_text) > len(prompt) + + +@pytest.mark.asyncio +async def test_zalgo_float_intensity(): + prompt = "test string" + converter = ZalgoConverter(intensity=5.5, seed=1) + result = await converter.convert_async(prompt=prompt) + assert isinstance(result.output_text, str) + assert len(result.output_text) > len(prompt) + + +@pytest.mark.asyncio +async def test_zalgo_string_intensity(): + prompt = "test string" + converter = ZalgoConverter(intensity="7", seed=1) + result = await converter.convert_async(prompt=prompt) + assert isinstance(result.output_text, str) + assert len(result.output_text) > len(prompt) + + +@pytest.mark.asyncio +async def test_zalgo_negative_intensity(caplog): + prompt = "test string" + converter = ZalgoConverter(intensity=-300, seed=1) + result = await converter.convert_async(prompt=prompt) + assert isinstance(result.output_text, str) + assert len(result.output_text) == len(prompt) + assert any( + record.levelname == "WARNING" and "ZalgoConverter supports intensity" in record.message + for record in caplog.records + ) + + +@pytest.mark.asyncio +@pytest.mark.parametrize("bad_intensity", ["this isn't an int", None]) +async def test_zalgo_invalid_intensity(bad_intensity): + with pytest.raises(ValueError): + ZalgoConverter(intensity=bad_intensity, seed=1)