-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathLangSummary.py
More file actions
112 lines (98 loc) · 3.52 KB
/
LangSummary.py
File metadata and controls
112 lines (98 loc) · 3.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import LoadTransformer
import DrawSequence
import argparse
from Helper_Functions import prepare_directory, things_in_path
from Helper_Functions import clean_val as _clean_val
from copy import copy
def padto(s,width):
if len(s)>width:
return s
d = width-len(s)
n1 = int(d/2)
n2 = d-n1
return (" "*n1) + s+ (" "*n2)
def clean_val(v,width,digits=5):
if not isinstance(v,str):
v = _clean_val(v,digits=digits)
return padto(str(v),width)
def get_all_langs():
return things_in_path("lms",only_folders=True)
def make_txt(lang="all",seqs=[],valwidth_factor=-1):
if lang=="all":
langs = get_all_langs()
else:
langs=[lang]
for l in langs:
_make_txt(l,seqs=seqs,valwidth_factor=valwidth_factor)
def _make_txt(lang,seqs=[],valwidth_factor=-1):
all_models = LoadTransformer.load_any_transformer_for_lang(lang,load_all=True,report_folder=True)
infos = {}
id2f = {}
for a,f in all_models:
if None is a:
continue
i = len(infos)
f = f.split("/")[-1]
infos[f] = model_info(a,seqs)
infos[f]["id"] = i
id2f[i]=f
is_classifier = a.is_classifier
some_f = f
nfolders = len(infos)
if not nfolders:
return
stats = ["id","test acc","heads","layers","dim","ff","epochs","gamma","lr","position","train e20","val e20"]
stats += sorted([k for k in infos[some_f].keys() if (k not in stats) and (not k=="seq_outs")]) # f is last loaded model folder, fine to be arbitrary here
min_stat_width=6
alll_seqs = copy(seqs)
for f in infos:
if alll_seqs and isinstance(infos[f]["seq_outs"][0],str):
alll_seqs += infos[f]["seq_outs"]
seq_widths = max([len(s) for s in alll_seqs]+[1])+3
val_widths = 6 if is_classifier else seq_widths # check from last model, they'll all be the same on same task
filename = "summaries/"+lang
prepare_directory(filename)
with open(filename,"w") as f:
print("lang:",lang,"\n\n",file=f)
stat_widths = [max(len(s)+1,min_stat_width) for s in stats]
print(*[clean_val(s,width) for s,width in zip(stats,stat_widths)],"\n",file=f)
for i in range(nfolders):
folder = id2f[i]
if i>0 and i%3==0:
print("",file=f) # break up lines for readability
print(*[clean_val(infos[folder][s],width) for s,width in zip(stats,stat_widths)],file=f)
print("\n\nsome example seqs:\n",file=f)
print(clean_val("seq",seq_widths),*[clean_val("out-"+str(i),val_widths) for i in range(nfolders)],file=f)
for si,s in enumerate(seqs):
print(clean_val(s,seq_widths),*[clean_val(infos[id2f[i]]["seq_outs"][si],val_widths) \
for i in range(nfolders)],file=f)
print("\n\nfolders:",file=f)
for i in range(nfolders):
print(i,":\t\t",id2f[i],file=f)
def get_list_val(l,i=-1,d=-1):
try:
return l[i]
except:
return d
def model_info(model,seqs):
res = {}
test_accs = model.metrics["test_accs"]
res["test acc"] = get_list_val(model.metrics["test_accs"])
res["train acc"] = get_list_val(model.metrics["train_accs"])
res["val acc"] = get_list_val(model.metrics["val_accs"])
res["train e20"] = get_list_val(model.metrics["train_accs"],19)
res["val e20"] = get_list_val(model.metrics["val_accs"],19)
# this may happen if run this script while some models are still training
res["epochs"] = len(model.metrics["train_accs"])
res["heads"] = model.nheads
res["layers"] = model.nlayers
res["dim"] = model.d_model
res["ff"] = model.ff
res["position"] = model.positional_encoding_type
res["gamma"] = getattr(model,"last_gamma",-1)
res["lr"] = getattr(model,"last_initial_lr",0.0003)
res["seq_outs"] = [model.classify(s) for s in seqs]
return res
if __name__ == "__main__":
# execute only if run as a script
make_txt()