Skip to content

Commit 83fbf0c

Browse files
authored
Add word-level scrambler function (#72)
## Description Adds a function to scramble text at the word level, ensuring that only unique indices are swapped. Also removed Eric as a code owner so that he won't be tagged in PRs and updated the handler event to be an SQS Event based on our conversation with Geo earlier this week. I moved `augmentation.py` into the `dibbs_text_to_code` directory instead of nesting it under `data_curation_and_augmentation` because it made the imports nicer but let me know if you feel strongly about the nesting. ## Related Issues Closes #66 Fixes #69 ## Additional Notes Also removed Eric as a code owner so that he won't be tagged in PRs and updated the handler event to be an SQS Event based on our conversation with Geo earlier this week.
1 parent c5480e3 commit 83fbf0c

File tree

5 files changed

+68
-2
lines changed

5 files changed

+68
-2
lines changed

.github/CODEOWNERS

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
# Define owners for all files in the repository
2-
* @m-goggins @bamader @ericbuckley @BradySkylight
2+
3+
- @m-goggins @bamader @BradySkylight
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import random
2+
3+
4+
def scramble_word_order(
5+
text: str,
6+
max_perms: int,
7+
min_perms: int = 1,
8+
) -> str:
9+
"""
10+
Scrambles the order of words in the input text by moving a specified
11+
number of words to new positions.
12+
13+
:param text: The input text to scramble.
14+
:param max_perms: The maximum number of words to move.
15+
:param min_perms: The minimum number of words to move.
16+
:return: The text with words scrambled.
17+
"""
18+
words = text.split()
19+
if len(words) < 2:
20+
return text
21+
22+
# Ensure max_perms does not exceed the number of words
23+
num_perms = min(random.randint(min_perms, max_perms), len(words) - 1)
24+
25+
# Select unique indices to scramble
26+
indices_to_move = sorted(random.sample(range(len(words)), num_perms), reverse=True)
27+
28+
for idx in indices_to_move:
29+
new_pos = random.choice([i for i in range(len(words)) if i != idx])
30+
word = words.pop(idx)
31+
words.insert(new_pos, word)
32+
33+
return " ".join(words)

src/dibbs_text_to_code/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from .s3_handler import get_file_content_from_s3_event
77

88

9-
def handler(event: lambda_events.EventBridgeEvent, context: lambda_context.Context):
9+
def handler(event: lambda_events.SQSEvent, context: lambda_context.Context):
1010
"""
1111
Text to Code lambda entry point
1212
"""

tests/unit/conftest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import random
23

34
import boto3
45
import moto
@@ -26,3 +27,8 @@ def moto_setup(monkeypatch):
2627
s3.bucket_name = bucket_name
2728

2829
yield s3
30+
31+
32+
@pytest.fixture(autouse=True)
33+
def fixed_random_seed():
34+
random.seed(42)

tests/unit/test_augmentation.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import pytest
2+
3+
from dibbs_text_to_code import augmentation
4+
5+
6+
@pytest.mark.parametrize(
7+
"text, max_perms, expected",
8+
[
9+
# Empty string
10+
("", 3, ""),
11+
# Single word
12+
("Blood", 3, "Blood"),
13+
# Multiple words with special characters
14+
(
15+
"SARS-CoV-2 E gene Resp Ql NAA+probe",
16+
5,
17+
"E gene Resp SARS-CoV-2 Ql NAA+probe",
18+
),
19+
# More deletions than words
20+
("B pert Spt Ql Cult", 10, "Spt pert B Ql Cult"),
21+
],
22+
)
23+
class TestScrambleWordOrder:
24+
def test_scramble_word_order(self, text, max_perms, expected):
25+
result = augmentation.scramble_word_order(text, max_perms=max_perms)
26+
assert result == expected

0 commit comments

Comments
 (0)