Skip to content

Commit 1cd9e8f

Browse files
sharanvamsifacebook-github-bot
authored andcommitted
Added leetspeak function encoding to encode_text (#264)
Summary: Pull Request resolved: #264 Added leetspeak text encoding function to the enode_text interface and tested comprehensively. Word method level test covers character level test as leetspeak works on character level Differential Revision: D76058699
1 parent 91ad7fe commit 1cd9e8f

File tree

7 files changed

+138
-22
lines changed

7 files changed

+138
-22
lines changed

augly/tests/text_tests/functional_unit_test.py

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -38,39 +38,39 @@ def test_apply_lambda(self) -> None:
3838
augmented_apply_lambda = txtaugs.apply_lambda(self.texts)
3939
self.assertTrue(augmented_apply_lambda[0] == self.texts[0])
4040

41-
def test_change_case(self) -> None:
42-
augmented_words = txtaugs.change_case(self.texts[0], cadence=3.0, case="upper")
43-
self.assertTrue(
44-
augmented_words[0]
45-
== "THE quick brown 'FOX' couldn't jump OVER the green, GRASSY hill.",
46-
)
47-
48-
def test_contractions(self) -> None:
49-
augmented_words = txtaugs.contractions(
50-
"I would call him but I do not know where he has gone", aug_p=0.7
51-
)
52-
self.assertTrue(
53-
augmented_words[0] == "I would call him but I don't know where he's gone"
54-
)
55-
56-
def test_encode_text_base64_sentence(self) -> None:
41+
def test_base64_sentence(self) -> None:
5742
augmented_words = txtaugs.encode_text(
5843
"Hello, world!", 1, 1, 1.0, Method.SENTENCE, Encoding.BASE64
5944
)
6045
self.assertEqual(augmented_words[0], "SGVsbG8sIHdvcmxkIQ==")
6146

62-
def test_encode_text_base64_word(self) -> None:
47+
def test_base64_word(self) -> None:
6348
augmented_words_word = txtaugs.encode_text(
6449
"Hello, world!", 1, 1, 1.0, Method.WORD, Encoding.BASE64
6550
)
6651
self.assertEqual(augmented_words_word[0], "SGVsbG8=, world!")
6752

68-
def test_encode_text_base64_char(self) -> None:
53+
def test_base64_char(self) -> None:
6954
augmented_words_char = txtaugs.encode_text(
7055
"Hello, world!", 1, 1, 1.0, Method.CHAR, Encoding.BASE64
7156
)
7257
self.assertEqual(augmented_words_char[0], "SA==ello LA== dw==orld IQ==")
7358

59+
def test_change_case(self) -> None:
60+
augmented_words = txtaugs.change_case(self.texts[0], cadence=3.0, case="upper")
61+
self.assertTrue(
62+
augmented_words[0]
63+
== "THE quick brown 'FOX' couldn't jump OVER the green, GRASSY hill.",
64+
)
65+
66+
def test_contractions(self) -> None:
67+
augmented_words = txtaugs.contractions(
68+
"I would call him but I do not know where he has gone", aug_p=0.7
69+
)
70+
self.assertTrue(
71+
augmented_words[0] == "I would call him but I don't know where he's gone"
72+
)
73+
7474
def test_get_baseline(self) -> None:
7575
augmented_baseline = txtaugs.get_baseline(self.texts)
7676
self.assertTrue(
@@ -272,6 +272,18 @@ def test_insert_zero_width_chars(self) -> None:
272272
],
273273
)
274274

275+
def test_leetspeak_sentence(self) -> None:
276+
augmented_words = txtaugs.encode_text(
277+
"Hello, world!", 1, 1, 1.0, Method.SENTENCE, Encoding.LEETSPEAK
278+
)
279+
self.assertEqual(augmented_words[0], "h3110, w0r1d!")
280+
281+
def test_leetspeak_word(self) -> None:
282+
augmented_words = txtaugs.encode_text(
283+
"Hello, world!", 1, 1, 1.0, Method.WORD, Encoding.LEETSPEAK
284+
)
285+
self.assertEqual(augmented_words[0], "h3110, world!")
286+
275287
def test_merge_words(self) -> None:
276288
augmented_split_words = txtaugs.merge_words(self.texts, aug_word_p=0.3, n=1)
277289
self.assertTrue(

augly/tests/text_tests/transforms_unit_test.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def test_Compose(self) -> None:
138138
are_equal_metadata(self.metadata, self.expected_metadata["compose"]),
139139
)
140140

141-
def test_EncodeText_Base64_Sentence(self) -> None:
141+
def test_Base64_Sentence(self) -> None:
142142
augmented_text = txtaugs.EncodeTextTransform(
143143
aug_min=1,
144144
aug_max=1,
@@ -158,7 +158,7 @@ def test_EncodeText_Base64_Sentence(self) -> None:
158158
are_equal_metadata(self.metadata, self.expected_metadata["encode_text"])
159159
)
160160

161-
def test_EncodeText_Base64_Word(self) -> None:
161+
def test_Base64_Word(self) -> None:
162162
self.metadata = []
163163

164164
augmented_text = txtaugs.EncodeTextTransform(
@@ -180,7 +180,7 @@ def test_EncodeText_Base64_Word(self) -> None:
180180
metadata_expected[0]["encoder"] = Encoding.BASE64
181181
self.assertTrue(are_equal_metadata(self.metadata, metadata_expected))
182182

183-
def test_EncodeText_Base64_Char(self) -> None:
183+
def test_Base64_Char(self) -> None:
184184
self.metadata = []
185185

186186
augmented_text = txtaugs.EncodeTextTransform(
@@ -291,6 +291,48 @@ def test_InsertZeroWidthChars(self) -> None:
291291
),
292292
)
293293

294+
def test_LeetSpeak_Sentence(self) -> None:
295+
augmented_text = txtaugs.EncodeTextTransform(
296+
aug_min=1,
297+
aug_max=1,
298+
aug_p=1.0,
299+
method=Method.SENTENCE,
300+
encoder=Encoding.LEETSPEAK,
301+
n=1,
302+
p=1.0,
303+
)(
304+
["Hello, world!"],
305+
metadata=self.metadata,
306+
)
307+
308+
self.assertTrue(augmented_text[0] == "h3110, w0r1d!")
309+
self.expected_metadata["encode_text"][0]["encoder"] = Encoding.LEETSPEAK
310+
self.assertTrue(
311+
are_equal_metadata(self.metadata, self.expected_metadata["encode_text"])
312+
)
313+
314+
def test_Leetspeak_Word(self) -> None:
315+
self.metadata = []
316+
317+
augmented_text = txtaugs.EncodeTextTransform(
318+
aug_min=1,
319+
aug_max=1,
320+
aug_p=1.0,
321+
method=Method.WORD,
322+
encoder=Encoding.LEETSPEAK,
323+
n=1,
324+
p=1.0,
325+
)(
326+
["Hello, world!"],
327+
metadata=self.metadata,
328+
)
329+
self.assertEqual(augmented_text[0], "h3110, world!")
330+
331+
metadata_expected = deepcopy(self.expected_metadata["encode_text"])
332+
metadata_expected[0]["method"] = "word"
333+
metadata_expected[0]["encoder"] = Encoding.LEETSPEAK
334+
self.assertTrue(are_equal_metadata(self.metadata, metadata_expected))
335+
294336
def test_MergeWords(self) -> None:
295337
aug_merge_words = txtaugs.MergeWords(aug_word_p=0.3)(
296338
self.texts, metadata=self.metadata

augly/text/augmenters/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from augly.text.augmenters.fun_fonts import FunFontsAugmenter
1818
from augly.text.augmenters.insert_text import InsertTextAugmenter
1919
from augly.text.augmenters.insertion import InsertionAugmenter
20+
from augly.text.augmenters.leetspeak import LeetSpeak
2021
from augly.text.augmenters.letter_replacement import LetterReplacementAugmenter
2122
from augly.text.augmenters.text_replacement import TextReplacementAugmenter
2223
from augly.text.augmenters.typo import TypoAugmenter
@@ -35,6 +36,7 @@
3536
"FunFontsAugmenter",
3637
"InsertTextAugmenter",
3738
"InsertionAugmenter",
39+
"LeetSpeak",
3840
"LetterReplacementAugmenter",
3941
"WordsAugmenter",
4042
"TextReplacementAugmenter",

augly/text/augmenters/leetspeak.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
import random
9+
10+
from augly.text.augmenters.encode_text_strategy import EncodeTextAugmentation
11+
from augly.text.augmenters.utils import Encoding
12+
from nlpaug.util import Method
13+
14+
15+
class LeetSpeak(EncodeTextAugmentation):
16+
def __init__(
17+
self,
18+
aug_min: int,
19+
aug_max: int,
20+
aug_p: float,
21+
method: Method,
22+
):
23+
super().__init__(
24+
name="LeetSpeak",
25+
aug_min=aug_min,
26+
aug_max=aug_max,
27+
aug_p=aug_p,
28+
encoder=Encoding.LEETSPEAK,
29+
method=str(method),
30+
)
31+
assert 0 <= aug_min <= aug_max
32+
assert 0 <= aug_p <= 1
33+
34+
def encode(self, input_string: str) -> str:
35+
leet_map = {
36+
"a": ["4", "@"],
37+
"b": ["8"],
38+
"e": ["3"],
39+
"g": ["6"],
40+
"i": ["1", "!"],
41+
"l": ["1"],
42+
"o": ["0"],
43+
"s": ["5", "$"],
44+
"t": ["7", "+"],
45+
"z": ["2"],
46+
}
47+
input_string = input_string.lower()
48+
return "".join(
49+
random.choice(leet_map.get(char, [char])) for char in input_string
50+
)

augly/text/augmenters/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,3 +274,4 @@ def get_aug_idxes(
274274

275275
class Encoding(Enum):
276276
BASE64 = "base64"
277+
LEETSPEAK = "leetspeak"

augly/text/functional.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,8 @@ def encode_text(
212212
texts = [texts]
213213
if encoder == Encoding.BASE64:
214214
encoder_strategy = a.Base64(aug_min, aug_max, aug_p, method)
215-
# pyre-ignore
215+
else:
216+
encoder_strategy = a.LeetSpeak(aug_min, aug_max, aug_p, method)
216217
encoder_context = a.EncodeText(encoder_strategy)
217218
aug_texts = encoder_context.augmenter(texts)
218219

augly/text/intensity.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ def encode_text_intensity(
3838
) -> float:
3939
if encoder == Encoding.BASE64:
4040
return base64_intensity(method, aug_p, aug_max)
41+
elif encoder == Encoding.LEETSPEAK:
42+
return leetspeak_intensity(method, aug_p, aug_max)
4143
else:
4244
raise NotImplementedError(
4345
f"Intensity function for encoder {encoder} is not implemented"
@@ -76,6 +78,12 @@ def insert_zero_width_chars_intensity(
7678
return char_insertion_intensity_helper(granularity, cadence)
7779

7880

81+
def leetspeak_intensity(method: Method, aug_p: float, aug_max: int, **kwargs) -> float:
82+
return (
83+
100.0 if method == Method.SENTENCE else replace_intensity_helper(aug_p, aug_max)
84+
)
85+
86+
7987
def merge_words_intensity(aug_word_p: float, aug_word_max: int, **kwargs) -> float:
8088
return replace_intensity_helper(aug_word_p, aug_word_max)
8189

0 commit comments

Comments
 (0)