1+ import sys , os
2+
3+ suffix = ""
4+
5+ import jittor as jt
6+ import time
7+ from pathlib import Path
8+ home_path = str (Path .home ())
9+ perf_path = os .path .join (home_path , ".cache" , "jittor_perf" )
10+
11+ def main ():
12+ os .makedirs (perf_path + "/src/jittor" , exist_ok = True )
13+ os .makedirs (perf_path + "/src/jittor_utils" , exist_ok = True )
14+ os .system (f"cp -rL { jt .flags .jittor_path } { perf_path + '/src/' } " )
15+ os .system (f"cp -rL { jt .flags .jittor_path } /../jittor_utils { perf_path + '/src/' } " )
16+ use_torch_1_4 = os .environ .get ("use_torch_1_4" , "0" ) == "1"
17+ dockerfile_src = r"""
18+ FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04
19+
20+ RUN echo \
21+ "deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic main restricted universe multiverse\n\
22+ deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse\n\
23+ deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse\n\
24+ deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-security main restricted universe multiverse" > /etc/apt/sources.list
25+
26+ # RUN rm -rf /var/lib/apt/lists/*
27+ RUN apt update || true
28+
29+ RUN apt install wget \
30+ python3.7 python3.7-dev \
31+ g++ build-essential -y
32+
33+ WORKDIR /usr/src
34+
35+ RUN apt download python3-distutils && dpkg-deb -x ./python3-distutils* / \
36+ && wget -O - https://bootstrap.pypa.io/get-pip.py | python3.7
37+
38+ # change tsinghua mirror
39+ RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
40+
41+ RUN pip3 install \
42+ pybind11 \
43+ numpy \
44+ tqdm \
45+ pillow \
46+ astunparse
47+
48+ RUN pip3 install torch torchvision
49+ """
50+ global suffix
51+ if use_torch_1_4 :
52+ suffix = "_1_4"
53+ dockerfile_src = dockerfile_src .replace ("torch " , "torch==1.4.0 " )
54+ dockerfile_src = dockerfile_src .replace ("torchvision" , "torchvision==0.5.0" )
55+ with open ("/tmp/perf_dockerfile" , 'w' ) as f :
56+ f .write (dockerfile_src )
57+ assert os .system ("sudo nvidia-smi -lgc 1500" ) == 0
58+ assert os .system (f"sudo docker build --tag jittor/jittor-perf{ suffix } -f /tmp/perf_dockerfile ." ) == 0
59+ # run once for compile source
60+ jt_fps = test_main ("jittor" , "resnet50" , 1 )
61+
62+ logs = ""
63+ # resnext50_32x4d with bs=8 cannot pass this test
64+ #### inference test
65+ for model_name in ["resnet50" , "wide_resnet50_2" , # "resnext50_32x4d",
66+ "resnet152" , "wide_resnet101_2" , "resnext101_32x8d" ,
67+ "alexnet" , "vgg11" , "squeezenet1_1" , "mobilenet_v2" ,
68+ "densenet121" , "densenet169" , "densenet201" ,
69+ "res2net50" , "res2net101" ]:
70+ for bs in [1 , 2 , 4 , 8 , 16 , 32 , 64 , 128 ]:
71+ jt_fps = test_main ("jittor" , model_name , bs )
72+ logs += f"jittor-{ model_name } -{ bs } { jt_fps } \n "
73+ tc_fps = test_main ("torch" , model_name , bs )
74+ logs += f"torch-{ model_name } -{ bs } { tc_fps } \n "
75+ logs += f"compare-{ model_name } -{ bs } { jt_fps / tc_fps } \n "
76+ print (logs )
77+ #### train test
78+ for model_name in ["train_resnet50" , "train_resnet101"
79+ ]:
80+ for bs in [1 , 2 , 4 , 8 , 16 , 32 , 64 , 128 ]:
81+ jt_fps = test_main ("jittor" , model_name , bs )
82+ logs += f"jittor-{ model_name } -{ bs } { jt_fps } \n "
83+ tc_fps = test_main ("torch" , model_name , bs )
84+ logs += f"torch-{ model_name } -{ bs } { tc_fps } \n "
85+ logs += f"compare-{ model_name } -{ bs } { jt_fps / tc_fps } \n "
86+ print (logs )
87+ with open (f"{ perf_path } /jittor-perf{ suffix } -latest.txt" , "w" ) as f :
88+ f .write (logs )
89+ from datetime import datetime
90+ with open (f"{ perf_path } /jittor-perf{ suffix } -{ datetime .now ()} .txt" , "w" ) as f :
91+ f .write (logs )
92+
93+ def test_main (name , model_name , bs ):
94+ cmd = f"sudo docker run --gpus all --rm -v { perf_path } :/root/.cache/jittor --network host jittor/jittor-perf{ suffix } bash -c 'PYTHONPATH=/root/.cache/jittor/src python3.7 /root/.cache/jittor/src/jittor/test/perf/perf.py { name } { model_name } { bs } '"
95+ fps = - 1
96+ try :
97+ print ("run cmd:" , cmd )
98+ if os .system (cmd ) == 0 :
99+ with open (f"{ perf_path } /{ name } -{ model_name } -{ bs } .txt" , 'r' ) as f :
100+ fps = float (f .read ().split ()[3 ])
101+ except :
102+ pass
103+ return fps
104+
105+ def time_iter (duration = 2 , min_iter = 5 ):
106+ start = time .time ()
107+ for i in range (10000000 ):
108+ yield i
109+ end = time .time ()
110+ if end - start > duration and i >= min_iter :
111+ return
112+
113+ def test (name , model_name , bs ):
114+ print ("hello" , name , model_name , bs )
115+ import numpy as np
116+ import time
117+ is_train = False
118+ _model_name = model_name
119+ if model_name .startswith ("train_" ):
120+ is_train = True
121+ model_name = model_name [6 :]
122+ if name == "torch" :
123+ import torch
124+ import torchvision .models as tcmodels
125+ from torch import optim
126+ from torch import nn
127+ torch .backends .cudnn .deterministic = False
128+ torch .backends .cudnn .benchmark = True
129+ model = tcmodels .__dict__ [model_name ]()
130+ model = model .cuda ()
131+ else :
132+ import jittor as jt
133+ from jittor import optim
134+ from jittor import nn
135+ jt .flags .use_cuda = 1
136+ jt .cudnn .set_algorithm_cache_size (10000 )
137+ import jittor .models as jtmodels
138+ model = jtmodels .__dict__ [model_name ]()
139+ if (model == "resnet152" or model == "resnet101" ) and bs == 128 and is_train :
140+ jt .cudnn .set_max_workspace_ratio (0.05 )
141+ if is_train :
142+ model .train ()
143+ else :
144+ model .eval ()
145+ img_size = 224
146+ if model_name == "inception_v3" :
147+ img_size = 300
148+ test_img = np .random .random ((bs , 3 , img_size , img_size )).astype ("float32" )
149+ if is_train :
150+ label = (np .random .random ((bs ,)) * 1000 ).astype ("int32" )
151+ if name == "torch" :
152+ test_img = torch .Tensor (test_img ).cuda ()
153+ if is_train :
154+ label = torch .LongTensor (label ).cuda ()
155+ opt = optim .SGD (model .parameters (), 0.001 )
156+ sync = lambda : torch .cuda .synchronize ()
157+ jt = torch
158+ else :
159+ test_img = jt .array (test_img ).stop_grad ()
160+ if is_train :
161+ label = jt .array (label ).stop_grad ()
162+ opt = optim .SGD (model .parameters (), 0.001 )
163+ sync = lambda : jt .sync_all (True )
164+
165+ sync ()
166+ use_profiler = os .environ .get ("use_profiler" , "0" ) == "1"
167+ if hasattr (jt , "nograd" ):
168+ ng = jt .no_grad ()
169+ ng .__enter__ ()
170+ def iter ():
171+ x = model (test_img )
172+ if isinstance (x , tuple ):
173+ x = x [0 ]
174+ if is_train :
175+ loss = nn .CrossEntropyLoss ()(x , label )
176+ if name == "jittor" :
177+ opt .step (loss )
178+ else :
179+ opt .zero_grad ()
180+ loss .backward ()
181+ opt .step ()
182+ else :
183+ x .sync ()
184+ sync ()
185+ for i in time_iter ():
186+ iter ()
187+ sync ()
188+ for i in time_iter ():
189+ iter ()
190+ sync ()
191+ if use_profiler :
192+ if name == "torch" :
193+ prof = torch .autograd .profiler .profile (use_cuda = True )
194+ else :
195+ prof = jt .profile_scope ()
196+ prof .__enter__ ()
197+ if name == "jittor" :
198+ if hasattr (jt .flags , "use_parallel_op_compiler" ):
199+ jt .flags .use_parallel_op_compiler = 0
200+ start = time .time ()
201+ for i in time_iter (10 ):
202+ iter ()
203+ sync ()
204+ end = time .time ()
205+ if use_profiler :
206+ prof .__exit__ (None ,None ,None )
207+ if name == "torch" :
208+ print (prof .key_averages ().table (sort_by = "cuda_time_total" , row_limit = 30 ))
209+ total_iter = i + 1
210+ print ("duration:" , end - start , "FPS:" , total_iter * bs / (end - start ))
211+ fpath = f"{ home_path } /.cache/jittor/{ name } -{ _model_name } -{ bs } .txt"
212+ with open (fpath , 'w' ) as f :
213+ f .write (f"duration: { end - start } FPS: { total_iter * bs / (end - start )} " )
214+ os .chmod (fpath , 0x666 )
215+
216+ if len (sys .argv ) <= 1 :
217+ main ()
218+ else :
219+ name , model , bs = sys .argv [1 :]
220+ bs = int (bs )
221+ test (name , model , bs )
0 commit comments