Skip to content

Commit 72b06e7

Browse files
author
David Warshaw
committed
Merge pull request #4 from davidwarshaw/dev
Metrics complete.
2 parents 30ef91d + c447da1 commit 72b06e7

File tree

8 files changed

+264
-56
lines changed

8 files changed

+264
-56
lines changed

README.md

+52-8
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ Each parent node, of at least one child, will generate a decision tree classific
5757
'labels': ['white'],
5858
'stage': 'light'}]
5959
```
60-
The hmc.DecisionTreeHierarchicalClassifier is idiomatic to the sklearn tree.DecisionTreeClassifier. Fit, predict and score the same way. Traditional multi-classification accuracy is comparable.
60+
The hmc.DecisionTreeHierarchicalClassifier is idiomatic to the sklearn tree.DecisionTreeClassifier. Fit, predict and score the same way. Traditional multi-classification average accuracy is comparable.
6161
```python
6262
from sklearn import tree
6363
dt = tree.DecisionTreeClassifier()
@@ -72,18 +72,62 @@ dth_accuracy = dth.score(X_test, y_test)
7272
```
7373
```python
7474
>>> dt_accuracy
75-
0.46561886051080548
75+
0.4400785854616896
7676
>>> dth_accuracy
77-
0.46758349705304519
77+
0.46561886051080548
7878
```
79-
Hierarchically adjusted classification accuracy scoring is available in addition to traditional accuracy. This metric averages accuracy at each classification stage, penalizing the least harshly cases of the mis-classification of sibling nodes, and most harshly cases where true and predicted classes share no ancestors in the hierarchy.
79+
Additional hierarchical multi-classification specific metrics [2] are provided.
8080
```python
81-
dth_accuracy_adjusted = dth.score_adjusted(X_test, y_test)
81+
import hmc.metrics as metrics
82+
83+
>>> metrics.accuracy_score(ch, dth_predicted, y_test)
84+
0.46561886051080548
85+
>>> metrics.precision_score_ancestors(ch, dth_predicted, y_test)
86+
0.8108614232209738
87+
>>> metrics.recall_score_ancestors(ch, dth_predicted, y_test)
88+
0.7988929889298892
89+
>>> metrics.f1_score_ancestors(ch, dth_predicted, y_test)
90+
0.8048327137546468
91+
>>> metrics.precision_score_descendants(ch, dth_predicted, y_test)
92+
0.6160337552742616
93+
>>> metrics.recall_score_descendants(ch, dth_predicted, y_test)
94+
0.6576576576576577
95+
>>> metrics.f1_score_descendants(ch, dth_predicted, y_test)
96+
0.636165577342048
8297
```
98+
Ancestor and Descendant precision and recall scores are calculated as the fraction of shared ancestor or descendant classes over the sum of either the predicted or true class for precision and recall respectively [3].
8399
```python
84-
>>> dth_accuracy_adjusted
85-
0.66115923150295042
86-
```
100+
true = ['dark', 'white', 'gray']
101+
102+
pred_sibling = ['dark', 'white', 'black']
87103

104+
>>> metrics.accuracy_score(ch, pred_sibling, true)
105+
0.66666666666666663
106+
>>> metrics.precision_score_ancestors(ch, pred_sibling, true)
107+
0.8
108+
>>> metrics.precision_score_descendants(ch, pred_sibling, true)
109+
0.6666666666666666
110+
111+
pred_narrower = ['dark', 'white', 'ash']
112+
113+
>>> metrics.accuracy_score(ch, pred_narrower, true)
114+
0.66666666666666663
115+
>>> metrics.precision_score_ancestors(ch, pred_narrower, true)
116+
1.0
117+
>>> metrics.precision_score_descendants(ch, pred_narrower, true)
118+
0.7777777777777778
119+
120+
pred_broader = ['dark', 'white', 'dark']
121+
122+
>>> metrics.accuracy_score(ch, pred_broader, true)
123+
0.66666666666666663
124+
>>> metrics.precision_score_ancestors(ch, pred_broader, true)
125+
0.8
126+
>>> metrics.precision_score_descendants(ch, pred_broader, true)
127+
1.0
128+
```
88129

89130
1. Vens, C., Struyf, J., Schietgat, L., Džeroski, S., & Blockeel, H. (2008). Decision trees for hierarchical multi-label classification. Mach Learn Machine Learning, 73(2), 185-214.
131+
2. Sokolova, M., & Lapalme, G. (2009). A systematic analysis of performance measures for classification tasks. Information Processing & Management, 45(4), 427-437. doi:10.1016/j.ipm.2009.03.002
132+
3. Costa, E., Lorena, A., Carvalho, A., & Freitas, A. (2007). A review of performance evaluation measures for hierarchical classifiers. In Proceedings of the AAAI
133+
2007 workshop "Evaluation methods for machine learning" (pp. 1–6).

hmc/__init__.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,13 @@
33
from .hmc import DecisionTreeHierarchicalClassifier
44
from .datasets import load_shades_class_hierachy
55
from .datasets import load_shades_data
6+
from .metrics import accuracy_score
67

78
__all__ = ["ClassHierarchy",
89
"DecisionTreeHierarchicalClassifier",
910
"load_shades_class_hierachy",
10-
"load_shades_data"]
11+
"load_shades_data",
12+
"accuracy_score",
13+
"precision_score_ancestors", "recall_score_ancestors",
14+
"precision_score_descendants", "recall_score_descendants",
15+
"f1_score_ancestors", "f1_score_descendants"]

hmc/hmc.py

+23-34
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import numpy as np
1111
import pandas as pd
1212

13+
import metrics
14+
1315
__all__ = ["ClassHierarchy", "DecisionTreeHierarchicalClassifier"]
1416

1517
# =============================================================================
@@ -40,6 +42,25 @@ def _get_children(self, parent):
4042
# Return a list of children nodes in alpha order
4143
return sorted([child for child, childs_parent in self.nodes.iteritems() if childs_parent == parent])
4244

45+
def _get_ancestors(self, child):
46+
# Return a list of the ancestors of this node
47+
# Not including root, not including the child
48+
ancestors = []
49+
while True:
50+
child = self._get_parent(child)
51+
if child == self.root:
52+
break
53+
ancestors.append(child)
54+
return ancestors
55+
56+
def _get_descendants(self, parent):
57+
# Return a list of the descendants of this node
58+
# Not including the parent
59+
descendants = []
60+
self._depth_first(parent, descendants)
61+
descendants.remove(parent)
62+
return descendants
63+
4364
def _is_descendant(self, parent, child):
4465
while child != self.class_hierarchy.root and child != parent:
4566
child = self.class_hierarchy._get_parent(child)
@@ -219,37 +240,5 @@ def score(self, X, y):
219240
"""
220241
# Check that the trees have been fit
221242
self._check_fit()
222-
classes = pd.DataFrame(self.predict(X), columns=['y_hat'], index=y.index)
223-
classes['y'] = pd.DataFrame(y)
224-
classes['correct'] = classes.apply(lambda row: 1 if row['y_hat'] == row['y'] else 0, axis=1)
225-
return classes[['correct']].mean()[0]
226-
227-
def _score_stages(self, X, y):
228-
y_hat = self._predict_stages(X)
229-
y = pd.DataFrame(y)
230-
y_classes = pd.DataFrame(index=y.index)
231-
232-
def assign_ancestor(classes, descendent):
233-
while descendent not in classes and descendent != self.class_hierarchy.root:
234-
descendent = self.class_hierarchy._get_parent(descendent)
235-
if descendent == self.class_hierarchy.root and self.class_hierarchy.root not in classes:
236-
descendent = ""
237-
return descendent
238-
239-
accuracies = []
240-
for stage in self.stages:
241-
y_hat[stage['stage'] + "_true"] = y.apply(lambda row: assign_ancestor(stage['classes'], row[0]), axis=1)
242-
y_hat[stage['stage'] + "_correct"] = y_hat.apply(lambda row: 1 if row[stage['stage'] + "_true"] == row[stage['stage']] else 0, axis=1)
243-
y_hat[stage['stage'] + "_included"] = y_hat.apply(lambda row: 1 if len(row[stage['stage'] + "_true"]) > 0 else 0, axis=1)
244-
accuracy = y_hat[[stage['stage'] + "_correct"]].sum()[0] / y_hat[[stage['stage'] + "_included"]].sum()[0]
245-
accuracies.append(accuracy)
246-
return accuracies
247-
248-
def score_adjusted(self, X, y):
249-
"""
250-
Returns the hierachy adjusted mean accuracy on the given test data (X, y).
251-
"""
252-
# Check that the trees have been fit
253-
self._check_fit()
254-
accuracies = self._score_stages(X, y)
255-
return (1 / len(self.stages)) * sum(accuracies)
243+
y_pred = pd.DataFrame(self.predict(X), columns=['y_hat'], index=y.index)
244+
return metrics.accuracy_score(self.class_hierarchy, y, y_pred)

hmc/metrics.py

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
"""
2+
Metrics for evaluating hierachical multi-classification performance.
3+
"""
4+
5+
from __future__ import print_function
6+
from __future__ import division
7+
8+
from sklearn import tree
9+
from sklearn import metrics as skmetrics
10+
from sklearn.utils import check_consistent_length
11+
from sklearn.utils import column_or_1d
12+
from sklearn.utils.multiclass import type_of_target
13+
14+
from itertools import chain
15+
16+
import numpy as np
17+
import pandas as pd
18+
19+
def _check_targets_hmc(y_true, y_pred):
20+
check_consistent_length(y_true, y_pred)
21+
y_type = set([type_of_target(y_true), type_of_target(y_pred)])
22+
if y_type == set(["binary", "multiclass"]):
23+
y_type = set(["multiclass"])
24+
if y_type != set(["multiclass"]):
25+
raise ValueError("{0} is not supported".format(y_type))
26+
y_true = column_or_1d(y_true)
27+
y_pred = column_or_1d(y_pred)
28+
return y_true, y_pred
29+
30+
## General Scores
31+
# Average accuracy
32+
def accuracy_score(class_hierarchy, y_true, y_pred):
33+
y_true, y_pred = _check_targets_hmc(y_true, y_pred)
34+
return skmetrics.accuracy_score(y_true, y_pred)
35+
36+
## Hierarchy Precision / Recall
37+
def _aggregate_class_sets(set_function, y_true, y_pred):
38+
intersection_sum = 0
39+
true_sum = 0
40+
predicted_sum = 0
41+
for true, pred in zip(y_true.tolist(), y_pred.tolist()):
42+
true_set = set([true] + set_function(true))
43+
pred_set = set([pred] + set_function(pred))
44+
intersection_sum += len(true_set.intersection(pred_set))
45+
true_sum += len(true_set)
46+
predicted_sum += len(pred_set)
47+
return (true_sum, predicted_sum, intersection_sum)
48+
49+
# Ancestors Scores (Super Class)
50+
# Precision
51+
def precision_score_ancestors(class_hierarchy, y_true, y_pred):
52+
y_true, y_pred = _check_targets_hmc(y_true, y_pred)
53+
true_sum, predicted_sum, intersection_sum = _aggregate_class_sets(class_hierarchy._get_ancestors, y_true, y_pred)
54+
return intersection_sum / predicted_sum
55+
56+
# Recall
57+
def recall_score_ancestors(class_hierarchy, y_true, y_pred):
58+
y_true, y_pred = _check_targets_hmc(y_true, y_pred)
59+
true_sum, predicted_sum, intersection_sum = _aggregate_class_sets(class_hierarchy._get_ancestors, y_true, y_pred)
60+
return intersection_sum / true_sum
61+
62+
# Descendants Scores (Sub Class)
63+
# Precision
64+
def precision_score_descendants(class_hierarchy, y_true, y_pred):
65+
y_true, y_pred = _check_targets_hmc(y_true, y_pred)
66+
true_sum, predicted_sum, intersection_sum = _aggregate_class_sets(class_hierarchy._get_descendants, y_true, y_pred)
67+
return intersection_sum / predicted_sum
68+
69+
# Recall
70+
def recall_score_descendants(class_hierarchy, y_true, y_pred):
71+
y_true, y_pred = _check_targets_hmc(y_true, y_pred)
72+
true_sum, predicted_sum, intersection_sum = _aggregate_class_sets(class_hierarchy._get_descendants, y_true, y_pred)
73+
return intersection_sum / true_sum
74+
75+
# Hierarchy Fscore
76+
def _fbeta_score_class_sets(set_function, y_true, y_pred, beta=1):
77+
y_true, y_pred = _check_targets_hmc(y_true, y_pred)
78+
true_sum, predicted_sum, intersection_sum = _aggregate_class_sets(set_function, y_true, y_pred)
79+
precision = intersection_sum / predicted_sum
80+
recall = intersection_sum / true_sum
81+
return ((beta ** 2 + 1) * precision * recall) / ((beta ** 2 * precision) + recall)
82+
83+
def f1_score_ancestors(class_hierarchy, y_true, y_pred):
84+
y_true, y_pred = _check_targets_hmc(y_true, y_pred)
85+
return _fbeta_score_class_sets(class_hierarchy._get_ancestors, y_true, y_pred)
86+
87+
def f1_score_descendants(class_hierarchy, y_true, y_pred):
88+
y_true, y_pred = _check_targets_hmc(y_true, y_pred)
89+
return _fbeta_score_class_sets(class_hierarchy._get_descendants, y_true, y_pred)
90+
91+
# # Classification Report
92+
# def classification_report(class_hierarchy, y_true, y_pred):

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@
77
description='Decision tree based hierachical multi-classifier',
88
author='David Warshaw',
99
author_email='[email protected]',
10-
py_modules=['hmc', 'datasets'],
10+
py_modules=['hmc', 'datasets', 'metrics'],
1111
requires=['sklearn', 'numpy', 'pandas'])

tests/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from .test_hmc import TestClassHierarchy
22
from .test_hmc import TestDecisionTreeHierarchicalClassifier
3+
from .test_metrics import TestMetrics

tests/test_hmc.py

+10-12
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,16 @@ def test_get_children(self):
2929
ch = hmc.load_shades_class_hierachy()
3030
self.assertEqual(ch._get_children('dark'), ['black', 'gray'])
3131

32+
def test_get_ancestors(self):
33+
ch = hmc.load_shades_class_hierachy()
34+
self.assertEqual(ch._get_ancestors('ash'), ['gray', 'dark'])
35+
self.assertEqual(len(ch._get_ancestors('colors')), 0)
36+
37+
def test_get_descendants(self):
38+
ch = hmc.load_shades_class_hierachy()
39+
self.assertEqual(ch._get_descendants('dark'), ['black', 'gray', 'ash', 'slate'])
40+
self.assertEqual(len(ch._get_descendants('slate')), 0)
41+
3242
def test_add_node(self):
3343
ch = hmc.load_shades_class_hierachy()
3444
old_number = len(ch.nodes_())
@@ -111,18 +121,6 @@ def test_score(self):
111121
# Hierachical classification should be at least as accurate as traditional classification
112122
self.assertTrue(accuracy >= accuracy_nonh)
113123

114-
def test_score_adjusted(self):
115-
ch = hmc.load_shades_class_hierachy()
116-
X, y = hmc.load_shades_data()
117-
X_train, X_test, y_train, y_test = train_test_split(X, y,
118-
test_size = 0.50, random_state = 0)
119-
dt = hmc.DecisionTreeHierarchicalClassifier(ch)
120-
dt = dt.fit(X_train, y_train)
121-
accuracy = dt.score(X_test, y_test)
122-
accuracy_adjusted = dt.score_adjusted(X_test, y_test)
123-
# Adjusted accuracy should be at least as high as final class accuracy
124-
self.assertTrue(accuracy_adjusted >= accuracy)
125-
126124
def test_score_before_fit(self):
127125
ch = hmc.load_shades_class_hierachy()
128126
X, y = hmc.load_shades_data()

tests/test_metrics.py

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
"""
2+
Tests for the hmc metrics module.
3+
"""
4+
5+
import unittest
6+
7+
import pandas as pd
8+
9+
from sklearn import tree
10+
from sklearn.cross_validation import train_test_split
11+
from sklearn import metrics as skmetrics
12+
13+
import hmc
14+
import hmc.metrics as metrics
15+
16+
class TestMetrics(unittest.TestCase):
17+
18+
def setUp(self):
19+
self.ch = hmc.load_shades_class_hierachy()
20+
self.X, self.y = hmc.load_shades_data()
21+
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y,
22+
test_size=0.50, random_state=0)
23+
self.dt = hmc.DecisionTreeHierarchicalClassifier(self.ch)
24+
self.dt_nonh = tree.DecisionTreeClassifier()
25+
self.dt = self.dt.fit(self.X_train, self.y_train)
26+
self.dt_nonh = self.dt_nonh.fit(self.X_train, self.y_train)
27+
self.y_pred = self.dt.predict(self.X_test)
28+
self.y_pred_nonh = self.dt_nonh.predict(self.X_test)
29+
30+
## General Scores
31+
# Average accuracy
32+
def test_accuracy_score(self):
33+
accuracy = metrics.accuracy_score(self.ch, self.y_test, self.y_pred)
34+
accuracy_sk = skmetrics.accuracy_score(self.y_test, self.y_pred)
35+
# Hierachical classification should be at least as accurate as traditional classification
36+
self.assertTrue(accuracy >= accuracy_sk)
37+
38+
## Hierarchy Precision / Recall
39+
# Ancestors Scores (Super Class)
40+
# Precision
41+
def test_precision_score_ancestors(self):
42+
precision_ancestors = metrics.precision_score_ancestors(self.ch, self.y_test, self.y_pred)
43+
precision_sk = skmetrics.precision_score(self.y_test, self.y_pred, average="macro")
44+
self.assertTrue(precision_ancestors >= precision_sk)
45+
46+
# Recall
47+
def test_recall_score_ancestors(self):
48+
recall_ancestors = metrics.recall_score_ancestors(self.ch, self.y_test, self.y_pred)
49+
recall_sk = skmetrics.recall_score(self.y_test, self.y_pred, average="macro")
50+
self.assertTrue(recall_ancestors >= recall_sk)
51+
52+
# Descendants Scores (Sub Class)
53+
# Precision
54+
def test_precision_score_descendants(self):
55+
precision_descendants = metrics.precision_score_descendants(self.ch, self.y_test, self.y_pred)
56+
precision_sk = skmetrics.precision_score(self.y_test, self.y_pred, average="macro")
57+
self.assertTrue(precision_descendants >= precision_sk)
58+
59+
# Recall
60+
def test_recall_score_descendants(self):
61+
recall_descendants = metrics.recall_score_descendants(self.ch, self.y_test, self.y_pred)
62+
recall_sk = skmetrics.recall_score(self.y_test, self.y_pred, average="macro")
63+
self.assertTrue(recall_descendants >= recall_sk)
64+
65+
# F1
66+
# Ancestors
67+
def test_f1_score_ancestors(self):
68+
f1_ancestors = metrics.f1_score_ancestors(self.ch, self.y_test, self.y_pred)
69+
f1_sk = skmetrics.f1_score(self.y_test, self.y_pred, average="macro")
70+
self.assertTrue(f1_ancestors >= f1_sk)
71+
72+
# Descendants
73+
def test_f1_score_descendants(self):
74+
f1_descendants = metrics.f1_score_descendants(self.ch, self.y_test, self.y_pred)
75+
f1_sk = skmetrics.f1_score(self.y_test, self.y_pred, average="macro")
76+
self.assertTrue(f1_descendants >= f1_sk)
77+
78+
if __name__ == '__main__':
79+
unittest.main()

0 commit comments

Comments
 (0)