-
Notifications
You must be signed in to change notification settings - Fork 308
/
Copy pathcomment_classifier.py
86 lines (66 loc) · 2.62 KB
/
comment_classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# -*- coding: utf-8 -*-
import argparse
import os
from logging import INFO, basicConfig, getLogger
import numpy as np
import requests
from bugbug import bugzilla, db
from bugbug.models import get_model_class
from bugbug.utils import download_model
basicConfig(level=INFO)
logger = getLogger(__name__)
def classify_comments(model_name: str, comment_id: int) -> None:
model_file_name = f"{model_name}model"
if not os.path.exists(model_file_name):
logger.info("%s does not exist. Downloading the model....", model_file_name)
try:
download_model(model_name)
except requests.HTTPError:
logger.error(
"A pre-trained model is not available, you will need to train it yourself using the trainer script"
)
raise SystemExit(1)
model_class = get_model_class(model_name)
model = model_class.load(model_file_name)
if comment_id:
# Get a comment by its id
comments = list(bugzilla.get_comment(comment_id).values())
assert comments, f"A comment with a comment id of {comment_id} was not found"
else:
assert db.download(bugzilla.BUGS_DB)
bugs = bugzilla.get_bugs()
comments = [
{**comment, "bug_id": bug["id"]}
for bug in bugs
for comment in bug["comments"]
]
for comment in comments:
print(
f'https://bugzilla.mozilla.org/show_bug.cgi?id={comment["bug_id"]}#c{comment["count"]}'
)
if model.calculate_importance:
probas, importance = model.classify(
comment, probabilities=True, importances=True
)
model.print_feature_importances(
importance["importances"], class_probabilities=probas
)
else:
probas = model.classify(comment, probabilities=True, importances=False)
probability = probas[0]
pred_index = np.argmax(probability)
if len(probability) > 2:
pred_class = model.le.inverse_transform([pred_index])[0]
else:
pred_class = "Positive" if pred_index == 1 else "Negative"
print(f"{pred_class} {probability}")
input()
def main() -> None:
description = "Perform evaluation on comments using the specified model"
parser = argparse.ArgumentParser(description=description)
parser.add_argument("model", help="Which model to use for evaluation")
parser.add_argument("--comment-id", help="Classify the given comment id", type=int)
args = parser.parse_args()
classify_comments(args.model, args.comment_id)
if __name__ == "__main__":
main()