Skip to content

Commit 46f254c

Browse files
committed
Functional tests for the hybrid tagger
1 parent d8b7b01 commit 46f254c

3 files changed

Lines changed: 84 additions & 0 deletions

File tree

tests/functional_tests/test_tagger.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,3 +204,71 @@ def test_hybrid_tagger(device: str) -> None:
204204
assert [(0, 2)] == indicies
205205
else:
206206
assert [(token_index, token_index + 1)] == indicies
207+
208+
209+
@pytest.mark.parametrize("with_spacy_gpu", [True, False])
210+
@pytest.mark.parametrize("device", ["cpu", "cuda"])
211+
@pytest.mark.parametrize("with_gpu", [True, False])
212+
def test_hybrid_spacy_tagger(with_gpu: bool, device: str, with_spacy_gpu: bool) -> None:
213+
"""
214+
Test the spaCy hybrid tagger.
215+
"""
216+
217+
if not cuda_available() and (with_gpu or device == "cuda"):
218+
pytest.skip("CUDA not available")
219+
220+
if cuda_available():
221+
# GPU should work with or without spacy gpu enabled
222+
if with_spacy_gpu:
223+
spacy.prefer_gpu()
224+
225+
nlp = spacy.blank('en')
226+
config = {
227+
"top_n": 5,
228+
"tokenizer_kwargs": {"add_prefix_space": True},
229+
"device": device
230+
}
231+
if not are_packages_installed(NEURAL_EXTRA_PACKAGES):
232+
with pytest.raises(ImportError):
233+
nlp.add_pipe('pymusas_hybrid_tagger')
234+
else:
235+
en_single_lexicon_url = ("https://raw.githubusercontent.com/UCREL/Multilingual-USAS/"
236+
"7ccc8baaea36f3fd249e77671db5638c1cba6136/English/semantic_lexicon_en.tsv")
237+
en_mwe_lexicon_url = ("https://raw.githubusercontent.com/UCREL/Multilingual-USAS/"
238+
"7ccc8baaea36f3fd249e77671db5638c1cba6136/English/mwe-en.tsv")
239+
single_lexicon = lexicon_collection.LexiconCollection.from_tsv(en_single_lexicon_url, include_pos=True)
240+
single_lemma_lexicon = lexicon_collection.LexiconCollection.from_tsv(en_single_lexicon_url, include_pos=False)
241+
mwe_lexicon = lexicon_collection.MWELexiconCollection.from_tsv(en_mwe_lexicon_url)
242+
243+
single_rule = single_word.SingleWordRule(single_lexicon, single_lemma_lexicon,
244+
pos_mapper=None)
245+
mwe_rule = mwe.MWERule(mwe_lexicon, pos_mapper=None)
246+
247+
rules = [single_rule, mwe_rule]
248+
ranker = ContextualRuleBasedRanker(*ContextualRuleBasedRanker.get_construction_arguments(rules))
249+
250+
tagger = nlp.add_pipe('pymusas_hybrid_tagger', config=config)
251+
tagger.initialize(rules=rules, # type: ignore[attr-defined]
252+
ranker=ranker,
253+
pretrained_model_name_or_path="ucrelnlp/PyMUSAS-Neural-English-Small-BEM")
254+
test_doc = spacy.tokens.Doc(nlp.vocab,
255+
words=TEST_TOKENS,
256+
spaces=[True] * len(TEST_TOKENS),
257+
lemmas=TEST_TOKENS,
258+
pos=TEST_POS)
259+
output_doc = nlp(test_doc)
260+
expected_output = [
261+
['Df/S5+c'],
262+
['Df/S5+c'],
263+
['Q4.2/S2mf', 'Y2', 'K5.1'],
264+
['A9+', 'Z5', 'A2.2', 'S4'],
265+
['S2', 'N3.2', 'Z5', 'T1.2', 'O3'],
266+
['N1', 'N3.2', 'T1.2', 'T1.3', 'T3']
267+
]
268+
assert len(output_doc) == len(expected_output)
269+
for token_index, token in enumerate(output_doc):
270+
assert expected_output[token_index] == token._.pymusas_tags
271+
if token_index == 0 or token_index == 1:
272+
assert [(0, 2)] == token._.pymusas_mwe_indexes
273+
else:
274+
assert [(token_index, token_index + 1)] == token._.pymusas_mwe_indexes

tests/unit_tests/spacy_api/taggers/test_spacy_api_hybrid.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,17 @@ def test_hybrid_tagger_token_extension_warning() -> None:
7777
remove_extension('mwe_indexes')
7878

7979

80+
def test_to_from_bytes() -> None:
81+
nlp = create_tagger()
82+
83+
initialized_tagger = cast(HybridTagger,
84+
nlp.add_pipe('pymusas_hybrid_tagger'))
85+
with pytest.raises(NotImplementedError):
86+
initialized_tagger.to_bytes()
87+
with pytest.raises(NotImplementedError):
88+
initialized_tagger.from_bytes(b"")
89+
90+
8091
def test_to_from_disk(tmp_path: Path) -> None:
8192
nlp = create_tagger()
8293

tests/unit_tests/test_entry_points.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,8 @@ def test_spacy_factories_rule_based_tagger() -> None:
99
def test_spacy_factories_neural_tagger() -> None:
1010
nlp = spacy.blank("en")
1111
_ = nlp.add_pipe("pymusas_neural_tagger")
12+
13+
14+
def test_spacy_factories_hybrid_tagger() -> None:
15+
nlp = spacy.blank("en")
16+
_ = nlp.add_pipe("pymusas_hybrid_tagger")

0 commit comments

Comments
 (0)