@@ -97,11 +97,11 @@ def test_composed_metrics(self):
97
97
size , batch_size , pred = 1 , label = 1 ):
98
98
evaluator .process (predictions , data_samples )
99
99
100
- metrics = evaluator .evaluate (size = size )
100
+ metrics_results , averaged_results = evaluator .evaluate (size = size )
101
101
102
- self .assertAlmostEqual (metrics ['Fake/Toy/accuracy' ], 1.0 )
103
- self .assertAlmostEqual (metrics ['Fake/Toy/mAP' ], 0.0 )
104
- self .assertEqual (metrics ['Fake/Toy/size' ], size )
102
+ self .assertAlmostEqual (metrics_results ['Fake/Toy/accuracy' ], 1.0 )
103
+ self .assertAlmostEqual (metrics_results ['Fake/Toy/mAP' ], 0.0 )
104
+ self .assertEqual (metrics_results ['Fake/Toy/size' ], size )
105
105
with self .assertWarns (Warning ):
106
106
evaluator .evaluate (size = 0 )
107
107
@@ -124,9 +124,9 @@ def test_composed_metrics(self):
124
124
for data_samples , predictions in generate_test_results (
125
125
size , batch_size , pred = 1 , label = 1 ):
126
126
evaluator .process (predictions , data_samples )
127
- metrics = evaluator .evaluate (size = size )
128
- self .assertIn ('Fake/Toy/accuracy' , metrics )
129
- self .assertIn ('Fake/accuracy' , metrics )
127
+ metrics_results , averaged_results = evaluator .evaluate (size = size )
128
+ self .assertIn ('Fake/Toy/accuracy' , metrics_results )
129
+ self .assertIn ('Fake/accuracy' , metrics_results )
130
130
131
131
metrics_results = OrderedDict ({
132
132
'dataset1/metric1/accuracy' : 0.9 ,
@@ -135,7 +135,7 @@ def test_composed_metrics(self):
135
135
'dataset2/metric2/f1_score' : 0.75
136
136
})
137
137
138
- evaluator = MultiDatasetsEvaluator ([], [ ])
138
+ evaluator = MultiDatasetsEvaluator (cfg , dataset_prefixes = [ 'Fake' ])
139
139
averaged_results = evaluator .average_results (metrics_results )
140
140
141
141
expected_averaged_results = {
0 commit comments