From 470edc6e1dc1f671dbded1cf91e0f2bd83c33e83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roman=20=C5=A0teva=C5=88=C3=A1k?= Date: Sun, 18 Jun 2017 13:24:13 +0200 Subject: [PATCH 1/7] added model testing script --- scripts/test_model.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 scripts/test_model.py diff --git a/scripts/test_model.py b/scripts/test_model.py new file mode 100644 index 0000000..47eae64 --- /dev/null +++ b/scripts/test_model.py @@ -0,0 +1,33 @@ +import os +import pickle +import click +from sklearn.metrics import classification_report +from pkspace.utils.loaders import PKSpaceLoader, PKLotLoader + + +@click.command() +@click.option('--PKSpace', 'dataset_mode', flag_value='PKSpace', + default=True) +@click.option('--PKLot', 'dataset_mode', flag_value='PKLot') +@click.option('--dataset_dir', required=True, + help='Directory of dataset for model to be tested on') +@click.option('--model_file', required=True, + help='Pickle file of exported model') +def test_model(dataset_mode, dataset_dir, model_file): + if not os.path.isdir(dataset_dir): + print('{} is not a directory') + return + + if dataset_mode == 'PKSpace': + loader = PKSpaceLoader() + elif dataset_mode == 'PKLot': + loader = PKLotLoader() + + with open(model_file, 'rb') as mp: + model = pickle.load(mp) + spaces, ground_answers = loader.load(dataset_dir) + model_answers = model.predict(spaces) + print(classification_report(ground_answers, model_answers)) + +if __name__ == '__main__': + test_model() From 0abcfe4ea20bd86d95777fa7a310c7ddcc114a2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roman=20=C5=A0teva=C5=88=C3=A1k?= Date: Sun, 18 Jun 2017 13:26:25 +0200 Subject: [PATCH 2/7] fixed pep-8 error --- scripts/test_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/test_model.py b/scripts/test_model.py index 47eae64..d1124f2 100644 --- a/scripts/test_model.py +++ b/scripts/test_model.py @@ -29,5 +29,6 @@ def test_model(dataset_mode, dataset_dir, model_file): model_answers = model.predict(spaces) print(classification_report(ground_answers, model_answers)) + if __name__ == '__main__': test_model() From 00875940ff5e2b82188577fad0b27dda947da708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roman=20=C5=A0teva=C5=88=C3=A1k?= Date: Tue, 20 Jun 2017 11:48:08 +0200 Subject: [PATCH 3/7] changed required options to arguments --- scripts/test_model.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/scripts/test_model.py b/scripts/test_model.py index d1124f2..a798b29 100644 --- a/scripts/test_model.py +++ b/scripts/test_model.py @@ -6,21 +6,18 @@ @click.command() -@click.option('--PKSpace', 'dataset_mode', flag_value='PKSpace', - default=True) -@click.option('--PKLot', 'dataset_mode', flag_value='PKLot') -@click.option('--dataset_dir', required=True, - help='Directory of dataset for model to be tested on') -@click.option('--model_file', required=True, - help='Pickle file of exported model') -def test_model(dataset_mode, dataset_dir, model_file): +@click.option('--loader', '-l', type=click.Choice(['PKLot, PKSpace']), + default='PKSpace', help='Loader to be used to load dataset') +@click.argument('dataset_dir') +@click.argument('model_file') +def test_model(loader, dataset_dir, model_file): if not os.path.isdir(dataset_dir): print('{} is not a directory') return - if dataset_mode == 'PKSpace': + if loader == 'PKSpace': loader = PKSpaceLoader() - elif dataset_mode == 'PKLot': + elif loader == 'PKLot': loader = PKLotLoader() with open(model_file, 'rb') as mp: From 1579e400a05aeb3c7a50ad0fea79ac8be2788519 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roman=20=C5=A0teva=C5=88=C3=A1k?= Date: Tue, 20 Jun 2017 11:52:06 +0200 Subject: [PATCH 4/7] loader argument fix --- scripts/test_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/test_model.py b/scripts/test_model.py index a798b29..22c4920 100644 --- a/scripts/test_model.py +++ b/scripts/test_model.py @@ -6,8 +6,8 @@ @click.command() -@click.option('--loader', '-l', type=click.Choice(['PKLot, PKSpace']), - default='PKSpace', help='Loader to be used to load dataset') +@click.option('--loader', '-l', type=click.Choice(['PKLot', 'PKSpace']), + default='PKSpace', help='Loader used to load dataset') @click.argument('dataset_dir') @click.argument('model_file') def test_model(loader, dataset_dir, model_file): From 9c32e26139a485c9ec0befc40ba4fe22df9fc2e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roman=20=C5=A0teva=C5=88=C3=A1k?= Date: Wed, 21 Jun 2017 20:06:56 +0200 Subject: [PATCH 5/7] click now checks input paths, changed pickle to joblib --- scripts/test_model.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/scripts/test_model.py b/scripts/test_model.py index 22c4920..8116c17 100644 --- a/scripts/test_model.py +++ b/scripts/test_model.py @@ -1,27 +1,25 @@ -import os -import pickle import click from sklearn.metrics import classification_report +from sklearn.externals import joblib from pkspace.utils.loaders import PKSpaceLoader, PKLotLoader @click.command() @click.option('--loader', '-l', type=click.Choice(['PKLot', 'PKSpace']), default='PKSpace', help='Loader used to load dataset') -@click.argument('dataset_dir') -@click.argument('model_file') +@click.argument('dataset_dir', + type=click.Path(exists=True, file_okay=False, dir_okay=True, + resolve_path=True)) +@click.argument('model_file', + type=click.Path(exists=True, file_okay=True, dir_okay=False, + resolve_path=True)) def test_model(loader, dataset_dir, model_file): - if not os.path.isdir(dataset_dir): - print('{} is not a directory') - return - if loader == 'PKSpace': loader = PKSpaceLoader() elif loader == 'PKLot': loader = PKLotLoader() - with open(model_file, 'rb') as mp: - model = pickle.load(mp) + model = joblib.load(model_file) spaces, ground_answers = loader.load(dataset_dir) model_answers = model.predict(spaces) print(classification_report(ground_answers, model_answers)) From 62f5df65ed7c00eaf84f1dfa7ce50e50778b5234 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roman=20=C5=A0teva=C5=88=C3=A1k?= Date: Thu, 22 Jun 2017 10:50:50 +0200 Subject: [PATCH 6/7] Added machine friendly printing --- scripts/test_model.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/scripts/test_model.py b/scripts/test_model.py index 8116c17..c6f7f19 100644 --- a/scripts/test_model.py +++ b/scripts/test_model.py @@ -1,6 +1,9 @@ +import json import click +from collections import Counter from sklearn.metrics import classification_report from sklearn.externals import joblib +from sklearn.metrics import f1_score, recall_score, precision_score from pkspace.utils.loaders import PKSpaceLoader, PKLotLoader @@ -13,7 +16,9 @@ @click.argument('model_file', type=click.Path(exists=True, file_okay=True, dir_okay=False, resolve_path=True)) -def test_model(loader, dataset_dir, model_file): +@click.option("--machine_friendly", '-f', is_flag=True, + help='prints serialized dictionary of results') +def test_model(loader, dataset_dir, model_file, machine_friendly): if loader == 'PKSpace': loader = PKSpaceLoader() elif loader == 'PKLot': @@ -22,7 +27,29 @@ def test_model(loader, dataset_dir, model_file): model = joblib.load(model_file) spaces, ground_answers = loader.load(dataset_dir) model_answers = model.predict(spaces) - print(classification_report(ground_answers, model_answers)) + if machine_friendly: + answer = {'avg': {}, 0: {}, 1: {}} + metrics = [precision_score, recall_score, f1_score] + classes_counter = Counter(ground_answers) + for i in [0, 1]: + for func in metrics: + score = func(ground_answers, model_answers, pos_label=i) + answer[i][func.__name__] = score + class_support = classes_counter[i] + answer[i]['support'] = class_support + old_sum_support = answer['avg'].get('support', 0) + answer['avg']['support'] = old_sum_support + class_support + + for column in [x.__name__ for x in metrics]: + col_sum = 0 + for ans_class in [0, 1]: + row = answer[ans_class] + col_sum += row[column] * row['support'] + answer['avg'][column] = col_sum / answer['avg']['support'] + print(json.dumps(answer)) + + else: + print(classification_report(ground_answers, model_answers)) if __name__ == '__main__': From 8d14223d165d6c707d792aaeae811557ca8b0a8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roman=20=C5=A0teva=C5=88=C3=A1k?= Date: Thu, 22 Jun 2017 10:54:14 +0200 Subject: [PATCH 7/7] ad-hoc import solution, added some commentary --- scripts/test_model.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/test_model.py b/scripts/test_model.py index c6f7f19..7aaeaba 100644 --- a/scripts/test_model.py +++ b/scripts/test_model.py @@ -4,7 +4,10 @@ from sklearn.metrics import classification_report from sklearn.externals import joblib from sklearn.metrics import f1_score, recall_score, precision_score -from pkspace.utils.loaders import PKSpaceLoader, PKLotLoader +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from pkspace.utils.loaders import PKSpaceLoader, PKLotLoader # noqa @click.command() @@ -31,15 +34,19 @@ def test_model(loader, dataset_dir, model_file, machine_friendly): answer = {'avg': {}, 0: {}, 1: {}} metrics = [precision_score, recall_score, f1_score] classes_counter = Counter(ground_answers) + for i in [0, 1]: for func in metrics: score = func(ground_answers, model_answers, pos_label=i) answer[i][func.__name__] = score class_support = classes_counter[i] + + # summing total support answer[i]['support'] = class_support old_sum_support = answer['avg'].get('support', 0) answer['avg']['support'] = old_sum_support + class_support + # calculating weighted average for all functions for column in [x.__name__ for x in metrics]: col_sum = 0 for ans_class in [0, 1]: