|
| 1 | +#!/usr/bin/python3 |
| 2 | +import argparse |
| 3 | +import json |
| 4 | +from typing import Tuple |
| 5 | +from collections import Counter |
| 6 | +from statistics import mean |
| 7 | + |
| 8 | + |
| 9 | +def parse_input() -> Tuple[list, list]: |
| 10 | + """ |
| 11 | + read the files given as parameters and load the newline-delimited json stings |
| 12 | + :return: a tuple with [0] a list of dicts with predicted classes, |
| 13 | + [1] a list of dicts with the true classes. |
| 14 | + """ |
| 15 | + parser = argparse.ArgumentParser() |
| 16 | + parser.add_argument("-p", "--predictions", default="../results/results.json") |
| 17 | + parser.add_argument("-t", "--truth", default="../../data/pan19-celebrity-profiling-test2-dataset-2019-05-02/labels.ndjson") |
| 18 | + args = parser.parse_args() |
| 19 | + return ([json.loads(u) for u in open(args.predictions).readlines()], |
| 20 | + [json.loads(u) for u in open(args.truth).readlines()]) |
| 21 | + |
| 22 | + |
| 23 | +def harmonic_mean(l: list): |
| 24 | + """ |
| 25 | + calculate the harmonic mean of a list of classes |
| 26 | + :param l: a list holding elements |
| 27 | + :return: |
| 28 | + """ |
| 29 | + return len(l) / sum([1 / x for x in l]) |
| 30 | + |
| 31 | + |
| 32 | +def mc_prec_rec(mc_p: list, mc_t: list, hit_function=lambda x, y: x == y) -> Tuple[list, list]: |
| 33 | + """ |
| 34 | + computes multi value recall and precision. Indices of inputs must match. |
| 35 | + :param hit_function: function to calculate true positives |
| 36 | + :param mc_p: list of predicted values |
| 37 | + :param mc_t: list of true values. |
| 38 | + :return: tuple: list of precision for classes, list of recall for class |
| 39 | + """ |
| 40 | + |
| 41 | + def safe_divide(x, y): |
| 42 | + return x / y if y != 0 else 0 |
| 43 | + |
| 44 | + true_positive = [t for p, t in zip(mc_p, mc_t) if hit_function(p, t)] |
| 45 | + false_positives = [p for p, t in zip(mc_p, mc_t) if not hit_function(p, t)] |
| 46 | + positive_in_prediction = true_positive + false_positives |
| 47 | + positive_in_truth = Counter(mc_t) |
| 48 | + |
| 49 | + tp_c = Counter(true_positive) |
| 50 | + pp_c = Counter(positive_in_prediction) |
| 51 | + |
| 52 | + precisions = [safe_divide(tp_c.get(cls, 0), pp_c.get(cls, 0)) |
| 53 | + for cls in positive_in_truth.keys()] |
| 54 | + recalls = [tp_c.get(cls, 0) / positive_in_truth.get(cls, 0) for cls in positive_in_truth.keys()] |
| 55 | + |
| 56 | + return precisions, recalls |
| 57 | + |
| 58 | + |
| 59 | +def age_window_hit(by_predicted, by_truth): |
| 60 | + """ |
| 61 | + calculates the window for a given truth and checks if the prediction lies within that window |
| 62 | + :param by_predicted: the predicted birth year |
| 63 | + :param by_truth: the true birth year |
| 64 | + :return: true if by_predicted within m-window of by_truth |
| 65 | + """ |
| 66 | + m = -0.1 * by_truth + 202.8 |
| 67 | + return int(by_truth - m) <= by_predicted <= int(by_truth + m) |
| 68 | + |
| 69 | + |
| 70 | +if __name__ == "__main__": |
| 71 | + """ |
| 72 | + This is the evaluator for the PAN@CLEF19 Task "Celebrity Profiling" |
| 73 | + It outputs 5 Metrics: |
| 74 | + - cRank, the harmonic mean of the sub metrics below. This is the primary metric. |
| 75 | + - F1_gender, the harmonic mean of the average multi class precision and recall for gender prediction |
| 76 | + - F1_occupation, same as above for occupation |
| 77 | + - F1_fame, same as above for fame |
| 78 | + - F1_age, same as above, but positives are lenient in a window around the prediction |
| 79 | +
|
| 80 | + For more information visit: |
| 81 | + - https://pan.webis.de/clef19/pan19-web/celebrity-profiling.html |
| 82 | +
|
| 83 | + Please send any requests or remarks to: |
| 84 | + |
| 85 | + |
| 86 | + """ |
| 87 | + predictions, truth = parse_input() |
| 88 | + |
| 89 | + gender_prec, gender_rec = mc_prec_rec([u["gender"] for u in predictions], |
| 90 | + [u["gender"] for u in truth]) |
| 91 | + occ_prec, occ_rec = mc_prec_rec([u["occupation"] for u in predictions], |
| 92 | + [u["occupation"] for u in truth]) |
| 93 | + fame_prec, fame_rec = mc_prec_rec([u["fame"] for u in predictions], |
| 94 | + [u["fame"] for u in truth]) |
| 95 | + age_prec, age_rec = mc_prec_rec([u["birthyear"] for u in predictions], |
| 96 | + [u["birthyear"] for u in truth], hit_function=age_window_hit) |
| 97 | + |
| 98 | + F1_gender = harmonic_mean([mean(gender_prec), mean(gender_rec)]) |
| 99 | + F1_occupation = harmonic_mean([mean(occ_prec), mean(occ_rec)]) |
| 100 | + F1_fame = harmonic_mean([mean(fame_prec), mean(fame_rec)]) |
| 101 | + F1_age = harmonic_mean([mean(age_prec), mean(age_rec)]) |
| 102 | + |
| 103 | + print("cRank: ", harmonic_mean([F1_gender, F1_occupation, F1_fame, F1_age])) |
| 104 | + print("F1_gender: ", F1_gender) |
| 105 | + print("F1_occupation: ", F1_occupation) |
| 106 | + print("F1_fame: ", F1_fame) |
| 107 | + print("F1_age: ", F1_age) |
0 commit comments