Skip to content

FEAT: add superscript converter #818

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Jun 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
e6d3f15
feat: add `SuperscriptConverter` (simple implementation)
paulinek13 Mar 19, 2025
f2b99a4
feat: add a simple test for the converter
paulinek13 Mar 19, 2025
53a1185
implement `alternate` mode
paulinek13 Mar 19, 2025
2d5ec0b
tests: extract conversion logic to a helper function
paulinek13 Mar 19, 2025
a8d905b
test 'alternate' mode
paulinek13 Mar 19, 2025
aaa04d4
refactor: move `get_n_random` to `utils.py`
paulinek13 Mar 20, 2025
9ca5938
rename `get_n_random` -> `get_random_indices` and update its logic to…
paulinek13 Mar 20, 2025
e96ae01
rename parameter `percentage` to `sample_ratio`
paulinek13 Mar 21, 2025
35499a3
feat: add 'random' mode
paulinek13 Mar 21, 2025
0878a9a
add tests for 'random' mode
paulinek13 Mar 21, 2025
126e420
fix for `get_random_indices`: when the ratio is 0, return an empty list
paulinek13 Mar 21, 2025
7d1e02b
new test case with more words and 20%
paulinek13 Mar 21, 2025
a8eafef
formatting
paulinek13 Mar 21, 2025
7e00d54
mypy: fix errors
paulinek13 Mar 23, 2025
35eea3e
fix `UnicodeDecodeError` flake8 complaining about some superscript ch…
paulinek13 Mar 23, 2025
a39daf4
Merge remote-tracking branch 'origin/main' into feat/528/superscript_…
paulinek13 Mar 23, 2025
f26bca2
Merge remote-tracking branch 'origin/main' into feat/528/superscript_…
paulinek13 Jun 10, 2025
0a2336d
Reset pyrit\common\utils.py to match origin/main
paulinek13 Jun 10, 2025
ca6dfc4
refactor: simplify SuperscriptConverter by extending WordLevelConverter
paulinek13 Jun 10, 2025
8bd294a
add SuperscriptConverter to api.rst
paulinek13 Jun 10, 2025
e64486b
update docstring
paulinek13 Jun 10, 2025
4619b11
improve docstring formatting
paulinek13 Jun 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ API Reference
SearchReplaceConverter
StringJoinConverter
SuffixAppendConverter
SuperscriptConverter
TenseConverter
TextToHexConverter
ToneConverter
Expand Down
2 changes: 2 additions & 0 deletions pyrit/prompt_converter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from pyrit.prompt_converter.search_replace_converter import SearchReplaceConverter
from pyrit.prompt_converter.string_join_converter import StringJoinConverter
from pyrit.prompt_converter.suffix_append_converter import SuffixAppendConverter
from pyrit.prompt_converter.superscript_converter import SuperscriptConverter
from pyrit.prompt_converter.tense_converter import TenseConverter
from pyrit.prompt_converter.text_to_hex_converter import TextToHexConverter
from pyrit.prompt_converter.tone_converter import ToneConverter
Expand Down Expand Up @@ -115,6 +116,7 @@
"SneakyBitsSmugglerConverter",
"StringJoinConverter",
"SuffixAppendConverter",
"SuperscriptConverter",
"TextToHexConverter",
"TenseConverter",
"ToneConverter",
Expand Down
84 changes: 84 additions & 0 deletions pyrit/prompt_converter/superscript_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from pyrit.prompt_converter.word_level_converter import WordLevelConverter


class SuperscriptConverter(WordLevelConverter):
"""
Converts text to superscript.

Note:
This converter leaves characters that do not have a superscript equivalent unchanged.
"""

_superscript_map = {
"0": "\u2070",
"1": "\u00b9",
"2": "\u00b2",
"3": "\u00b3",
"4": "\u2074",
"5": "\u2075",
"6": "\u2076",
"7": "\u2077",
"8": "\u2078",
"9": "\u2079",
"a": "\u1d43",
"b": "\u1d47",
"c": "\u1d9c",
"d": "\u1d48",
"e": "\u1d49",
"f": "\u1da0",
"g": "\u1d4d",
"h": "\u02b0",
"i": "\u2071",
"j": "\u02b2",
"k": "\u1d4f",
"l": "\u02e1",
"m": "\u1d50",
"n": "\u207f",
"o": "\u1d52",
"p": "\u1d56",
"r": "\u02b3",
"s": "\u02e2",
"t": "\u1d57",
"u": "\u1d58",
"v": "\u1d5b",
"w": "\u02b7",
"x": "\u02e3",
"y": "\u02b8",
"z": "\u1dbb",
"A": "\u1d2c",
"B": "\u1d2d",
"D": "\u1d30",
"E": "\u1d31",
"G": "\u1d33",
"H": "\u1d34",
"I": "\u1d35",
"J": "\u1d36",
"K": "\u1d37",
"L": "\u1d38",
"M": "\u1d39",
"N": "\u1d3a",
"O": "\u1d3c",
"P": "\u1d3e",
"R": "\u1d3f",
"T": "\u1d40",
"U": "\u1d41",
"V": "\u2c7d",
"W": "\u1d42",
"+": "\u207a",
"-": "\u207b",
"=": "\u207c",
"(": "\u207d",
")": "\u207e",
}

async def convert_word_async(self, word: str) -> str:
result = []
for char in word:
if char in self._superscript_map:
result.append(self._superscript_map[char])
else:
result.append(char)
return "".join(result)
29 changes: 29 additions & 0 deletions tests/unit/converter/test_superscript_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import pytest

from pyrit.prompt_converter import ConverterResult, SuperscriptConverter


async def _check_conversion(converter, prompts, expected_outputs):
for prompt, expected_output in zip(prompts, expected_outputs):
result = await converter.convert_async(prompt=prompt, input_type="text")
assert isinstance(result, ConverterResult)
assert result.output_text == expected_output


@pytest.mark.asyncio
async def test_superscript_converter():
defalut_converter = SuperscriptConverter()
await _check_conversion(
defalut_converter,
["Let's test this converter!", "Unsupported characters stay the same: qCFQSXYZ"],
[
"\u1d38\u1d49\u1d57'\u02e2 \u1d57\u1d49\u02e2\u1d57 \u1d57\u02b0\u2071\u02e2 "
"\u1d9c\u1d52\u207f\u1d5b\u1d49\u02b3\u1d57\u1d49\u02b3!",
"\u1d41\u207f\u02e2\u1d58\u1d56\u1d56\u1d52\u02b3\u1d57\u1d49\u1d48 "
"\u1d9c\u02b0\u1d43\u02b3\u1d43\u1d9c\u1d57\u1d49\u02b3\u02e2 "
"\u02e2\u1d57\u1d43\u02b8 \u1d57\u02b0\u1d49 \u02e2\u1d43\u1d50\u1d49: qCFQSXYZ",
],
)