-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrainer.py
More file actions
107 lines (93 loc) · 3.57 KB
/
trainer.py
File metadata and controls
107 lines (93 loc) · 3.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""
A trainer class.
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from model.gcn import GCNClassifier
from utils import constant, torch_utils
class Trainer(object):
def __init__(self, opt, emb_matrix=None):
raise NotImplementedError
def update(self, batch):
raise NotImplementedError
def predict(self, batch):
raise NotImplementedError
def update_lr(self, new_lr):
torch_utils.change_lr(self.optimizer, new_lr)
def load(self, filename):
try:
checkpoint = torch.load(filename)
except BaseException:
print("Cannot load model from {}".format(filename))
exit()
self.model.load_state_dict(checkpoint['model'])
self.opt = checkpoint['config']
def save(self, filename, epoch):
params = {
'model': self.model.state_dict(),
'config': self.opt,
}
try:
torch.save(params, filename)
print("model saved to {}".format(filename))
except BaseException:
print("[Warning: Saving failed... continuing anyway.]")
def unpack_batch(batch, cuda):
if cuda:
inputs = [Variable(b.cuda()) for b in batch[:10]]
labels = Variable(batch[10].cuda())
else:
inputs = [Variable(b) for b in batch[:10]]
labels = Variable(batch[10])
tokens = batch[0]
head = batch[5]
subj_pos = batch[6]
obj_pos = batch[7]
lens = batch[1].eq(0).long().sum(1).squeeze()
return inputs, labels, tokens, head, subj_pos, obj_pos, lens
class GCNTrainer(Trainer):
def __init__(self, opt, emb_matrix=None):
self.opt = opt
self.emb_matrix = emb_matrix
self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
self.criterion = nn.CrossEntropyLoss()
self.parameters = [p for p in self.model.parameters() if p.requires_grad]
if opt['cuda']:
self.model.cuda()
self.criterion.cuda()
self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr'])
def update(self, batch):
inputs, labels, tokens, head, subj_pos, obj_pos, lens = unpack_batch(batch, self.opt['cuda'])
# step forward
self.model.train()
self.optimizer.zero_grad()
logits, pooling_output = self.model(inputs)
loss = self.criterion(logits, labels)
# l2 decay on all conv layers
if self.opt.get('conv_l2', 0) > 0:
loss += self.model.conv_l2() * self.opt['conv_l2']
# l2 penalty on output representations
if self.opt.get('pooling_l2', 0) > 0:
loss += self.opt['pooling_l2'] * (pooling_output ** 2).sum(1).mean()
loss_val = loss.item()
# backward
loss.backward()
torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.opt['max_grad_norm'])
self.optimizer.step()
return loss_val
def predict(self, batch, unsort=True):
inputs, labels, tokens, head, subj_pos, obj_pos, lens = unpack_batch(batch, self.opt['cuda'])
orig_idx = batch[11]
# forward
self.model.eval()
logits, _ = self.model(inputs)
loss = self.criterion(logits, labels)
probs = F.softmax(logits, 1).data.cpu().numpy().tolist()
predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist()
if unsort:
_, predictions, probs = [list(t) for t in zip(*sorted(zip(orig_idx,\
predictions, probs)))]
return predictions, probs, loss.item()