diff --git a/doc/api.rst b/doc/api.rst index cb3cd5df4..5a10c6498 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -321,6 +321,7 @@ API Reference SearchReplaceConverter StringJoinConverter SuffixAppendConverter + SuperscriptConverter TenseConverter TextToHexConverter ToneConverter diff --git a/pyrit/prompt_converter/__init__.py b/pyrit/prompt_converter/__init__.py index ebd884177..cbe1a389b 100644 --- a/pyrit/prompt_converter/__init__.py +++ b/pyrit/prompt_converter/__init__.py @@ -52,6 +52,7 @@ from pyrit.prompt_converter.search_replace_converter import SearchReplaceConverter from pyrit.prompt_converter.string_join_converter import StringJoinConverter from pyrit.prompt_converter.suffix_append_converter import SuffixAppendConverter +from pyrit.prompt_converter.superscript_converter import SuperscriptConverter from pyrit.prompt_converter.tense_converter import TenseConverter from pyrit.prompt_converter.text_to_hex_converter import TextToHexConverter from pyrit.prompt_converter.tone_converter import ToneConverter @@ -115,6 +116,7 @@ "SneakyBitsSmugglerConverter", "StringJoinConverter", "SuffixAppendConverter", + "SuperscriptConverter", "TextToHexConverter", "TenseConverter", "ToneConverter", diff --git a/pyrit/prompt_converter/superscript_converter.py b/pyrit/prompt_converter/superscript_converter.py new file mode 100644 index 000000000..7d7652026 --- /dev/null +++ b/pyrit/prompt_converter/superscript_converter.py @@ -0,0 +1,84 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +from pyrit.prompt_converter.word_level_converter import WordLevelConverter + + +class SuperscriptConverter(WordLevelConverter): + """ + Converts text to superscript. + + Note: + This converter leaves characters that do not have a superscript equivalent unchanged. + """ + + _superscript_map = { + "0": "\u2070", + "1": "\u00b9", + "2": "\u00b2", + "3": "\u00b3", + "4": "\u2074", + "5": "\u2075", + "6": "\u2076", + "7": "\u2077", + "8": "\u2078", + "9": "\u2079", + "a": "\u1d43", + "b": "\u1d47", + "c": "\u1d9c", + "d": "\u1d48", + "e": "\u1d49", + "f": "\u1da0", + "g": "\u1d4d", + "h": "\u02b0", + "i": "\u2071", + "j": "\u02b2", + "k": "\u1d4f", + "l": "\u02e1", + "m": "\u1d50", + "n": "\u207f", + "o": "\u1d52", + "p": "\u1d56", + "r": "\u02b3", + "s": "\u02e2", + "t": "\u1d57", + "u": "\u1d58", + "v": "\u1d5b", + "w": "\u02b7", + "x": "\u02e3", + "y": "\u02b8", + "z": "\u1dbb", + "A": "\u1d2c", + "B": "\u1d2d", + "D": "\u1d30", + "E": "\u1d31", + "G": "\u1d33", + "H": "\u1d34", + "I": "\u1d35", + "J": "\u1d36", + "K": "\u1d37", + "L": "\u1d38", + "M": "\u1d39", + "N": "\u1d3a", + "O": "\u1d3c", + "P": "\u1d3e", + "R": "\u1d3f", + "T": "\u1d40", + "U": "\u1d41", + "V": "\u2c7d", + "W": "\u1d42", + "+": "\u207a", + "-": "\u207b", + "=": "\u207c", + "(": "\u207d", + ")": "\u207e", + } + + async def convert_word_async(self, word: str) -> str: + result = [] + for char in word: + if char in self._superscript_map: + result.append(self._superscript_map[char]) + else: + result.append(char) + return "".join(result) diff --git a/tests/unit/converter/test_superscript_converter.py b/tests/unit/converter/test_superscript_converter.py new file mode 100644 index 000000000..fae5db11f --- /dev/null +++ b/tests/unit/converter/test_superscript_converter.py @@ -0,0 +1,29 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import pytest + +from pyrit.prompt_converter import ConverterResult, SuperscriptConverter + + +async def _check_conversion(converter, prompts, expected_outputs): + for prompt, expected_output in zip(prompts, expected_outputs): + result = await converter.convert_async(prompt=prompt, input_type="text") + assert isinstance(result, ConverterResult) + assert result.output_text == expected_output + + +@pytest.mark.asyncio +async def test_superscript_converter(): + defalut_converter = SuperscriptConverter() + await _check_conversion( + defalut_converter, + ["Let's test this converter!", "Unsupported characters stay the same: qCFQSXYZ"], + [ + "\u1d38\u1d49\u1d57'\u02e2 \u1d57\u1d49\u02e2\u1d57 \u1d57\u02b0\u2071\u02e2 " + "\u1d9c\u1d52\u207f\u1d5b\u1d49\u02b3\u1d57\u1d49\u02b3!", + "\u1d41\u207f\u02e2\u1d58\u1d56\u1d56\u1d52\u02b3\u1d57\u1d49\u1d48 " + "\u1d9c\u02b0\u1d43\u02b3\u1d43\u1d9c\u1d57\u1d49\u02b3\u02e2 " + "\u02e2\u1d57\u1d43\u02b8 \u1d57\u02b0\u1d49 \u02e2\u1d43\u1d50\u1d49: qCFQSXYZ", + ], + )