forked from Harisudhan5/Train-Custom-NER-Model-With-SpaCy
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain.py
More file actions
58 lines (46 loc) · 1.41 KB
/
train.py
File metadata and controls
58 lines (46 loc) · 1.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import spacy
from spacy.training.example import Example
from spacy.util import minibatch
from data import training_data
from spacy.lookups import Lookups
import random
new_labels = [
"PROGRAMMING_LANGUAGE",
"FRAMEWORK_LIBRARY",
"HARDWARE",
"ALGORITHM_MODEL",
"PROTOCOL",
"FILE_FORMAT",
"CYBERSECURITY_TERM",
]
epoch = 30
train_data = training_data
random.shuffle(train_data)
nlp = spacy.load("en_core_web_lg")
print(nlp.pipe_names)
if 'ner' not in nlp.pipe_names:
ner = nlp.add_pipe('ner')
else:
ner = nlp.get_pipe('ner')
for data_sample, annotations in train_data:
for ent in annotations['entities']:
if ent[2] not in ner.labels:
ner.add_label(ent[2])
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes):
optimizer = nlp.resume_training()
epochs = epoch
for epoch in range(epochs):
random.shuffle(train_data)
losses = {}
batches = minibatch(train_data, size = 128)
for batch in batches:
examples = []
for text, annotations in batch:
doc = nlp.make_doc(text)
example = Example.from_dict(doc, annotations)
examples.append(example)
nlp.update(examples, drop = 0.15, losses = losses)
print(f'Epoch : {epoch + 1}, Loss : {losses}')
nlp.to_disk('Model/ner')
print("Model is trained and saved to Model directory!!!")