|
| 1 | +"""Tests for torch transforms.""" |
| 2 | + |
| 3 | +import pytest |
| 4 | +import torch |
| 5 | + |
| 6 | +from pyaptamer.trafos.torch import ( |
| 7 | + DNAtoRNA, |
| 8 | + GreedyEncode, |
| 9 | + RandomMask, |
| 10 | + Reverse, |
| 11 | +) |
| 12 | + |
| 13 | + |
| 14 | +class TestReverse: |
| 15 | + def test_reverse(self): |
| 16 | + t = Reverse() |
| 17 | + assert t("ACGT") == "TGCA" |
| 18 | + assert t("AAA") == "AAA" |
| 19 | + assert t("") == "" |
| 20 | + |
| 21 | + def test_repr(self): |
| 22 | + assert "Reverse" in repr(Reverse()) |
| 23 | + |
| 24 | + |
| 25 | +class TestDNAtoRNA: |
| 26 | + def test_convert(self): |
| 27 | + t = DNAtoRNA() |
| 28 | + assert t("ACGT") == "ACGU" |
| 29 | + assert t("TTT") == "UUU" |
| 30 | + assert t("ACG") == "ACG" |
| 31 | + |
| 32 | + def test_repr(self): |
| 33 | + assert "DNAtoRNA" in repr(DNAtoRNA()) |
| 34 | + |
| 35 | + |
| 36 | +class TestGreedyEncode: |
| 37 | + @pytest.fixture |
| 38 | + def vocab(self): |
| 39 | + return {"A": 1, "C": 2, "G": 3, "T": 4, "AC": 5, "GT": 6} |
| 40 | + |
| 41 | + def test_encode_simple(self, vocab): |
| 42 | + t = GreedyEncode(vocab, max_len=5) |
| 43 | + result = t("ACGT") |
| 44 | + assert result.shape == (5,) |
| 45 | + assert result[0].item() == 5 |
| 46 | + assert result[1].item() == 6 |
| 47 | + |
| 48 | + def test_padding(self, vocab): |
| 49 | + t = GreedyEncode(vocab, max_len=10) |
| 50 | + result = t("A") |
| 51 | + assert result.shape == (10,) |
| 52 | + assert result[0].item() == 1 |
| 53 | + assert result[1].item() == 0 |
| 54 | + |
| 55 | + def test_truncation(self, vocab): |
| 56 | + t = GreedyEncode(vocab, max_len=2) |
| 57 | + result = t("ACGTACGT") |
| 58 | + assert result.shape == (2,) |
| 59 | + |
| 60 | + def test_unknown_char(self, vocab): |
| 61 | + t = GreedyEncode(vocab, max_len=5) |
| 62 | + result = t("XYZ") |
| 63 | + assert result[0].item() == 0 |
| 64 | + |
| 65 | + def test_repr(self, vocab): |
| 66 | + t = GreedyEncode(vocab, max_len=10) |
| 67 | + assert "GreedyEncode" in repr(t) |
| 68 | + |
| 69 | + |
| 70 | +class TestRandomMask: |
| 71 | + def test_shape(self): |
| 72 | + t = RandomMask(mask_idx=99, mask_rate=0.5) |
| 73 | + x = torch.tensor([1, 2, 3, 4, 0, 0]) |
| 74 | + assert t(x).shape == x.shape |
| 75 | + |
| 76 | + def test_preserves_padding(self): |
| 77 | + t = RandomMask(mask_idx=99, mask_rate=1.0) |
| 78 | + x = torch.tensor([1, 2, 0, 0]) |
| 79 | + result = t(x) |
| 80 | + assert result[2].item() == 0 |
| 81 | + assert result[3].item() == 0 |
| 82 | + |
| 83 | + def test_applies_mask(self): |
| 84 | + torch.manual_seed(42) |
| 85 | + t = RandomMask(mask_idx=99, mask_rate=0.5) |
| 86 | + x = torch.tensor([1, 2, 3, 4]) |
| 87 | + assert (t(x) == 99).any() |
| 88 | + |
| 89 | + def test_repr(self): |
| 90 | + t = RandomMask(mask_idx=99) |
| 91 | + assert "RandomMask" in repr(t) |
| 92 | + |
| 93 | + |
| 94 | +class TestChaining: |
| 95 | + def test_str_transforms(self): |
| 96 | + t1 = DNAtoRNA() |
| 97 | + t2 = Reverse() |
| 98 | + assert t2(t1("ACGT")) == "UGCA" |
| 99 | + |
| 100 | + def test_tensor_transforms(self): |
| 101 | + vocab = {"A": 1, "C": 2, "G": 3, "U": 4} |
| 102 | + encode = GreedyEncode(vocab, max_len=5) |
| 103 | + mask = RandomMask(mask_idx=99, mask_rate=0.5) |
| 104 | + result = mask(encode("ACGU")) |
| 105 | + assert result.shape == (5,) |
0 commit comments