-
Notifications
You must be signed in to change notification settings - Fork 30
Expand file tree
/
Copy pathnamespace_db_test.py
More file actions
156 lines (136 loc) · 5.7 KB
/
namespace_db_test.py
File metadata and controls
156 lines (136 loc) · 5.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# coding=utf-8
# Copyright 2024 The Perch Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for namespace_db."""
import io
import tempfile
from absl import logging
from etils import epath
from perch_hoplite.taxonomy import namespace
from perch_hoplite.taxonomy import namespace_db
from absl.testing import absltest
from absl.testing import parameterized
class NamespaceDbTest(parameterized.TestCase):
def test_load_namespace_db(self):
db = namespace_db.load_db()
# Check a couple ClassLists of known size.
self.assertIn('caples', db.class_lists)
caples_list = db.class_lists['caples']
self.assertEqual(caples_list.namespace, 'ebird2021')
self.assertLen(caples_list.classes, 79)
genus_mapping = db.mappings['ebird2021_to_genus']
caples_genera = caples_list.apply_namespace_mapping(genus_mapping)
self.assertEqual(caples_genera.namespace, 'ebird2021_genera')
self.assertLen(caples_genera.classes, 62)
family_mapping = db.mappings['ebird2021_to_family']
caples_families = caples_list.apply_namespace_mapping(family_mapping)
self.assertEqual(caples_families.namespace, 'ebird2021_families')
self.assertLen(caples_families.classes, 30)
order_mapping = db.mappings['ebird2021_to_order']
caples_orders = caples_list.apply_namespace_mapping(order_mapping)
self.assertEqual(caples_orders.namespace, 'ebird2021_orders')
self.assertLen(caples_orders.classes, 11)
def test_class_map_csv(self):
cl = namespace.ClassList(
'ebird2021', ('amecro', 'amegfi', 'amered', 'amerob')
)
cl_csv = cl.to_csv()
with io.StringIO(cl_csv) as f:
got_cl = namespace.ClassList.from_csv(f)
self.assertEqual(got_cl.namespace, 'ebird2021')
self.assertEqual(got_cl.classes, ('amecro', 'amegfi', 'amered', 'amerob'))
# Check that writing with tf.io.gfile behaves as expected, as newline
# behavior may be different than working with StringIO.
with tempfile.NamedTemporaryFile(suffix='.csv') as f:
with epath.Path(f.name).open(mode='w') as gf:
gf.write(cl_csv)
with open(f.name, 'r') as f:
got_cl = namespace.ClassList.from_csv(f.readlines())
self.assertEqual(got_cl.namespace, 'ebird2021')
self.assertEqual(got_cl.classes, ('amecro', 'amegfi', 'amered', 'amerob'))
def test_namespace_class_list_closure(self):
# Ensure that all classes in class lists appear in their namespace.
db = namespace_db.load_db()
all_missing_classes = set()
for list_name, class_list in db.class_lists.items():
missing_classes = set()
namespace_ = db.namespaces[class_list.namespace]
for cl in class_list.classes:
if cl not in namespace_.classes:
missing_classes.add(cl)
all_missing_classes.add(cl)
if missing_classes:
logging.warning(
'The classes %s in class list %s did not appear in namespace %s.',
missing_classes,
list_name,
class_list.namespace,
)
missing_classes.discard('unknown')
all_missing_classes.discard('unknown')
self.assertEmpty(all_missing_classes)
def test_namespace_mapping_closure(self):
# Ensure that all classes in mappings appear in their namespace.
db = namespace_db.load_db()
all_missing_classes = set()
for mapping_name, mapping in db.mappings.items():
missing_source_classes = set()
missing_target_classes = set()
source_namespace = db.namespaces[mapping.source_namespace]
target_namespace = db.namespaces[mapping.target_namespace]
for source_cl, target_cl in mapping.mapped_pairs.items():
if source_cl not in source_namespace.classes:
missing_source_classes.add(source_cl)
all_missing_classes.add(source_cl)
if target_cl not in target_namespace.classes:
missing_target_classes.add(target_cl)
all_missing_classes.add(target_cl)
if missing_source_classes:
logging.warning(
'The classes %s in mapping %s did not appear in namespace %s.',
missing_source_classes,
mapping_name,
source_namespace.name,
)
if missing_target_classes:
logging.warning(
'The classes %s in mapping %s did not appear in namespace %s.',
missing_target_classes,
mapping_name,
target_namespace.name,
)
missing_target_classes.discard('unknown')
self.assertEmpty(all_missing_classes)
def test_taxonomic_mappings(self):
# Ensure that all ebird2021 species appear in taxonomic mappings.
db = namespace_db.load_db()
ebird = db.namespaces['ebird2021_species']
genera = db.mappings['ebird2021_to_genus'].mapped_pairs
families = db.mappings['ebird2021_to_family'].mapped_pairs
orders = db.mappings['ebird2021_to_order'].mapped_pairs
missing_genera = set()
missing_families = set()
missing_orders = set()
for cl in ebird.classes:
if cl not in genera:
missing_genera.add(cl)
if cl not in families:
missing_families.add(cl)
if cl not in orders:
missing_orders.add(cl)
self.assertEmpty(missing_genera)
self.assertEmpty(missing_families)
self.assertEmpty(missing_orders)
if __name__ == '__main__':
absltest.main()