Skip to content

Commit 24b8b6f

Browse files
committed
bug fixing
1 parent e8a33c7 commit 24b8b6f

File tree

4 files changed

+116
-6
lines changed

4 files changed

+116
-6
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@ Install dependencies if needed: pip install -r requirements.txt
1414

1515
Read data and generate features:<br/>
1616
```
17-
python parse_data.py --num_samples 100 --train_corpus pathToTrainCorpus --train_labels pathToTrainLabels --feature_folder pathToOutputFeatureFolder
17+
python parse_data.py --num_samples 100 --train_corpus pathToTrainCorpus --train_labels pathToTrainLabels --feature_folder pathToOutputFeatureFolder --all_data
1818
```
1919

20+
Remove the '--all_data' flag if you want to reproduce the results on the evaluation set. If the flag is removed, 3837 examples are removed from the train set and used as a validation set.<br/><br/>
2021
Evaluate on development set:<br/>
2122
```
2223
python evaluate.py --feature_folder pathToOutputFeatureFolder

celebrity/src/evaluator.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
#!/usr/bin/python3
2+
import argparse
3+
import json
4+
from typing import Tuple
5+
from collections import Counter
6+
from statistics import mean
7+
8+
9+
def parse_input() -> Tuple[list, list]:
10+
"""
11+
read the files given as parameters and load the newline-delimited json stings
12+
:return: a tuple with [0] a list of dicts with predicted classes,
13+
[1] a list of dicts with the true classes.
14+
"""
15+
parser = argparse.ArgumentParser()
16+
parser.add_argument("-p", "--predictions", default="../results/results.json")
17+
parser.add_argument("-t", "--truth", default="../../data/pan19-celebrity-profiling-test2-dataset-2019-05-02/labels.ndjson")
18+
args = parser.parse_args()
19+
return ([json.loads(u) for u in open(args.predictions).readlines()],
20+
[json.loads(u) for u in open(args.truth).readlines()])
21+
22+
23+
def harmonic_mean(l: list):
24+
"""
25+
calculate the harmonic mean of a list of classes
26+
:param l: a list holding elements
27+
:return:
28+
"""
29+
return len(l) / sum([1 / x for x in l])
30+
31+
32+
def mc_prec_rec(mc_p: list, mc_t: list, hit_function=lambda x, y: x == y) -> Tuple[list, list]:
33+
"""
34+
computes multi value recall and precision. Indices of inputs must match.
35+
:param hit_function: function to calculate true positives
36+
:param mc_p: list of predicted values
37+
:param mc_t: list of true values.
38+
:return: tuple: list of precision for classes, list of recall for class
39+
"""
40+
41+
def safe_divide(x, y):
42+
return x / y if y != 0 else 0
43+
44+
true_positive = [t for p, t in zip(mc_p, mc_t) if hit_function(p, t)]
45+
false_positives = [p for p, t in zip(mc_p, mc_t) if not hit_function(p, t)]
46+
positive_in_prediction = true_positive + false_positives
47+
positive_in_truth = Counter(mc_t)
48+
49+
tp_c = Counter(true_positive)
50+
pp_c = Counter(positive_in_prediction)
51+
52+
precisions = [safe_divide(tp_c.get(cls, 0), pp_c.get(cls, 0))
53+
for cls in positive_in_truth.keys()]
54+
recalls = [tp_c.get(cls, 0) / positive_in_truth.get(cls, 0) for cls in positive_in_truth.keys()]
55+
56+
return precisions, recalls
57+
58+
59+
def age_window_hit(by_predicted, by_truth):
60+
"""
61+
calculates the window for a given truth and checks if the prediction lies within that window
62+
:param by_predicted: the predicted birth year
63+
:param by_truth: the true birth year
64+
:return: true if by_predicted within m-window of by_truth
65+
"""
66+
m = -0.1 * by_truth + 202.8
67+
return int(by_truth - m) <= by_predicted <= int(by_truth + m)
68+
69+
70+
if __name__ == "__main__":
71+
"""
72+
This is the evaluator for the PAN@CLEF19 Task "Celebrity Profiling"
73+
It outputs 5 Metrics:
74+
- cRank, the harmonic mean of the sub metrics below. This is the primary metric.
75+
- F1_gender, the harmonic mean of the average multi class precision and recall for gender prediction
76+
- F1_occupation, same as above for occupation
77+
- F1_fame, same as above for fame
78+
- F1_age, same as above, but positives are lenient in a window around the prediction
79+
80+
For more information visit:
81+
- https://pan.webis.de/clef19/pan19-web/celebrity-profiling.html
82+
83+
Please send any requests or remarks to:
84+
85+
86+
"""
87+
predictions, truth = parse_input()
88+
89+
gender_prec, gender_rec = mc_prec_rec([u["gender"] for u in predictions],
90+
[u["gender"] for u in truth])
91+
occ_prec, occ_rec = mc_prec_rec([u["occupation"] for u in predictions],
92+
[u["occupation"] for u in truth])
93+
fame_prec, fame_rec = mc_prec_rec([u["fame"] for u in predictions],
94+
[u["fame"] for u in truth])
95+
age_prec, age_rec = mc_prec_rec([u["birthyear"] for u in predictions],
96+
[u["birthyear"] for u in truth], hit_function=age_window_hit)
97+
98+
F1_gender = harmonic_mean([mean(gender_prec), mean(gender_rec)])
99+
F1_occupation = harmonic_mean([mean(occ_prec), mean(occ_rec)])
100+
F1_fame = harmonic_mean([mean(fame_prec), mean(fame_rec)])
101+
F1_age = harmonic_mean([mean(age_prec), mean(age_rec)])
102+
103+
print("cRank: ", harmonic_mean([F1_gender, F1_occupation, F1_fame, F1_age]))
104+
print("F1_gender: ", F1_gender)
105+
print("F1_occupation: ", F1_occupation)
106+
print("F1_fame: ", F1_fame)
107+
print("F1_age: ", F1_age)

celebrity/src/parse_data.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import tqdm
33
from sklearn.feature_extraction.text import TfidfVectorizer
44
import pickle
5-
from .tfidf_kingdom import *
5+
from tfidf_kingdom import *
66
import json
77
import pandas as pd
88

@@ -80,12 +80,14 @@ def parse_feeds(fname, labels_file, train_threshold, all=False):
8080
parser.add_argument('--train_corpus', type=str, default='../../data/pan19-celebrity-profiling-training-dataset-2019-01-31/feeds.ndjson', help='Path to PAN train corpus')
8181
parser.add_argument('--train_labels', type=str,default="../../data/pan19-celebrity-profiling-training-dataset-2019-01-31/labels.ndjson", help='Path to PAN train labels')
8282
parser.add_argument('--feature_folder', type=str, default="../train_data", help='Path to output feature folder')
83+
parser.add_argument('--all_data', action='store_true', help='Use all data for trainining. Set this to False if you want to conduct evaluation on the train data. If False, '
84+
'3837 instances will be removed from the train set and used as a validation set')
8385
args = parser.parse_args()
8486
data_inpt = args.train_corpus
8587
num_train = 30000
86-
labels_inpt = args.train_label
88+
labels_inpt = args.train_labels
8789
datafolder = args.feature_folder
88-
a = parse_feeds(data_inpt, labels_inpt, train_threshold=num_train, all=True)
90+
a = parse_feeds(data_inpt, labels_inpt, train_threshold=num_train, all=args.all_data)
8991

9092
train_instances, test_instances, train_labels, test_labels, vectorizer = a
9193
out_obj = {"train_features": train_instances, "test_features": test_instances}

celebrity/src/test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import tqdm
33
from sklearn.feature_extraction.text import TfidfVectorizer
44
import pickle
5-
from .tfidf_kingdom import *
5+
from tfidf_kingdom import *
66
import argparse
77
from collections import defaultdict
88
import json
@@ -29,7 +29,7 @@ def write_output(d, output_file):
2929
help='Choose output result directory')
3030

3131
argparser.add_argument('--input', dest='input', type=str,
32-
default='../../data/pan19-celebrity-profiling-test-dataset-2019-01-31/feeds.ndjson',
32+
default='../../data/pan19-celebrity-profiling-test2-dataset-2019-05-02/feeds.ndjson',
3333
help='Choose input test dataset')
3434
argparser.add_argument('--feature_folder', type=str, default="../train_data", help='Path to output feature folder')
3535
args = argparser.parse_args()

0 commit comments

Comments
 (0)