-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_data_utils.py
More file actions
93 lines (84 loc) · 3.33 KB
/
test_data_utils.py
File metadata and controls
93 lines (84 loc) · 3.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import sys
sys.path.insert(0, '/data/schoiaj/repos/nli_explain')
import pandas as pd
from utils.data_utils import get_token_rationales, get_contiguous_phrases
import unittest
class TestDataUtils(unittest.TestCase):
def test_get_token_rationales(self):
df = pd.DataFrame({
'Sentence1_marked_1': [
'test sent without highlight!',
'*highlight* with *punctuation.*',
'',
'*multiple* *highlights!*',
],
'Sentence1_marked_2': [
'test sent without highlight!',
'highlight with *punctuation.* *highlight*',
'',
'*multiple,* *highlights!*',
],
'Sentence1_marked_3': [
'test sent *without* highlight!',
'highlight with *punctuation.*',
'empty',
'multiple *highlights!*',
],
'Sentence2_marked_1': [
'test sent without highlight!',
'*highlight* with *punctuation.*',
'',
'*multiple* *highlights!*',
],
'Sentence2_marked_2': [
'test sent without highlight!',
'highlight with *punctuation.* *highlight*',
'',
'*multiple* *highlights!*',
],
'Sentence2_marked_3': [
'test sent *without* highlight!',
'highlight with *punctuation.*',
'empty',
'multiple *highlights!*',
],
})
answers_df = pd.DataFrame({
'Sentence1_vote': [[], ['punctuation'], [], ['multiple', 'highlights']],
'Sentence2_vote': [[], ['punctuation'], [], ['multiple', 'highlights']],
'Sentence1_union': [['without'], ['highlight', 'punctuation', 'highlight'],
[], ['multiple', 'highlights']],
'Sentence2_union': [['without'], ['highlight', 'punctuation', 'highlight'],
[], ['multiple', 'highlights']],
})
get_token_rationales(df, 'union')
get_token_rationales(df, 'vote')
for i in range(len(df)):
out = df.iloc[i][[
'Sentence1_vote', 'Sentence2_vote', 'Sentence1_union', 'Sentence2_union'
]]
self.assertTrue((out == answers_df.iloc[i]).all())
def test_get_contiguous_phrases(self):
texts = [
"I love eating food.",
"I love eating food.",
"I love eating food.",
"I love eating food.",
"I love eating food in the evening.",
"I love eating food in the evening.",
]
tokens = [
["I", "eating"],
["I", "food"],
["I", "love", "food"],
["I", "love", "eating"],
["I", "eating", "food", "the"],
["I", "the", "evening"],
]
answers = [[['I'], ['eating']], [['I'], ['food']], [['I', 'love'], ['food']],
[['I', 'love', 'eating']], [['I'], ['eating', 'food'], ['the']],
[['I'], ['the', 'evening']]]
for text, token, answer in zip(texts, tokens, answers):
self.assertEqual(get_contiguous_phrases(text, token), answer)
if __name__ == '__main__':
unittest.main()