Skip to content

Commit 9c06bd9

Browse files
author
minghui.qmh
committed
update examples
1 parent 1b6eeaa commit 9c06bd9

File tree

99 files changed

+6273
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

99 files changed

+6273
-0
lines changed

Diff for: examples/application_tutorials/test_ddp/cifar10.py

+161
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# pai -name pytorch -Dscript="file:///tmp/dist_cifar10.py" -Dvolumes="odps://algo_platform_dev/volumes/pytorch/cifar10" -DworkerCount=2;
4+
5+
from __future__ import print_function
6+
import argparse
7+
import torch
8+
import torchvision.transforms as transforms
9+
10+
from torchvision import datasets
11+
from torch.autograd import Variable
12+
import torch.nn as nn
13+
import torch.optim as optim
14+
15+
16+
classes = (
17+
'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
18+
19+
20+
def parse_args():
21+
'''Parsing input arguments.'''
22+
parser = argparse.ArgumentParser(description="Arguments for training.")
23+
parser.add_argument(
24+
"--per_device_train_batch_size",
25+
type=int,
26+
default=32,
27+
help="Batch size (per device) for the training dataloader.",
28+
)
29+
parser.add_argument(
30+
"--per_device_eval_batch_size",
31+
type=int,
32+
default=4,
33+
help="Batch size (per device) for the evaluation dataloader.",
34+
)
35+
parser.add_argument(
36+
"--learning_rate",
37+
type=float,
38+
default=0.01,
39+
help="Initial learning rate (after the potential warmup period) to use.",
40+
)
41+
parser.add_argument(
42+
"--data_dir",
43+
type=str,
44+
default='./data/',
45+
help="Data dir for training and evaluation.",
46+
)
47+
args = parser.parse_args()
48+
return args
49+
50+
51+
class Net(nn.Module):
52+
def __init__(self):
53+
super(Net, self).__init__()
54+
self.conv1 = nn.Conv2d(3, 6, 5)
55+
self.bn1 = nn.BatchNorm2d(6)
56+
self.pool = nn.MaxPool2d(2, 2)
57+
self.conv2 = nn.Conv2d(6, 16, 5)
58+
self.fc1 = nn.Linear(16 * 5 * 5, 120)
59+
self.fc2 = nn.Linear(120, 84)
60+
self.fc3 = nn.Linear(84, 10)
61+
self.relu = nn.ReLU()
62+
63+
def forward(self, x):
64+
x = self.conv1(x)
65+
x = self.bn1(x)
66+
x = self.relu(x)
67+
x = self.pool(x)
68+
x = self.pool(self.relu(self.conv2(x)))
69+
x = x.view(-1, 16 * 5 * 5)
70+
x = self.relu(self.fc1(x))
71+
x = self.relu(self.fc2(x))
72+
x = self.fc3(x)
73+
return x
74+
75+
76+
def train(args):
77+
transform = transforms.Compose(
78+
[transforms.ToTensor(),
79+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
80+
81+
train_dataset = datasets.CIFAR10(
82+
root=args.data_dir,
83+
train=True,
84+
download=False,
85+
transform=transform)
86+
trainloader = torch.utils.data.DataLoader(
87+
train_dataset,
88+
batch_size=args.per_device_train_batch_size,
89+
shuffle=True,
90+
num_workers=2,
91+
drop_last=True)
92+
93+
test_dataset = datasets.CIFAR10(
94+
root=args.data_dir,
95+
train=False,
96+
download=False,
97+
transform=transform)
98+
testloader = torch.utils.data.DataLoader(
99+
test_dataset,
100+
batch_size=args.per_device_eval_batch_size,
101+
shuffle=False,
102+
num_workers=2,
103+
drop_last=True)
104+
105+
net = Net().cuda() # GPU
106+
107+
criterion = nn.CrossEntropyLoss()
108+
optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=0.9)
109+
110+
for epoch in range(10): # loop over the dataset multiple times
111+
running_loss = 0.0
112+
for i, data in enumerate(trainloader):
113+
# get the inputs
114+
inputs, labels = data
115+
116+
# wrap them in Variable
117+
inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda()) # GPU
118+
119+
# zero the parameter gradients
120+
optimizer.zero_grad()
121+
122+
# forward + backward + optimize
123+
outputs = net(inputs)
124+
loss = criterion(outputs, labels)
125+
loss.backward()
126+
optimizer.step()
127+
128+
# print statistics
129+
running_loss += loss.item()
130+
if i % 100 == 99: # print every 100 mini-batches
131+
print('[%d, %5d] loss: %.3f' %
132+
(epoch + 1, i + 1, running_loss / 100))
133+
running_loss = 0.0
134+
135+
136+
with torch.no_grad():
137+
####################################################################
138+
# The results seem pretty good.
139+
#
140+
# Let us look at how the network performs on the whole dataset.
141+
142+
correct = 0
143+
total = 0
144+
for data in testloader:
145+
images, labels = data
146+
images, labels = images.cuda(), labels.cuda() # GPU
147+
outputs = net(Variable(images))
148+
_, predicted = torch.max(outputs.data, 1)
149+
total += labels.size(0)
150+
correct += (predicted == labels).sum().item()
151+
152+
print('Accuracy of the network on the 10000 test images: %d %%' % (
153+
100 * correct / total))
154+
155+
156+
def main():
157+
args = parse_args()
158+
train(args)
159+
160+
if __name__ == '__main__':
161+
main()
+190
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# pai -name pytorch -Dscript="file:///tmp/dist_cifar10.py" -Dvolumes="odps://algo_platform_dev/volumes/pytorch/cifar10" -DworkerCount=2;
4+
5+
from __future__ import print_function
6+
import argparse
7+
import torch
8+
import torchvision.transforms as transforms
9+
10+
from torchvision import datasets
11+
from torch.autograd import Variable
12+
import torch.nn as nn
13+
import torch.optim as optim
14+
import torch.distributed as dist
15+
16+
17+
classes = (
18+
'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
19+
20+
21+
def parse_args():
22+
'''Parsing input arguments.'''
23+
parser = argparse.ArgumentParser(description="Arguments for training.")
24+
parser.add_argument(
25+
"--per_device_train_batch_size",
26+
type=int,
27+
default=32,
28+
help="Batch size (per device) for the training dataloader.",
29+
)
30+
parser.add_argument(
31+
"--per_device_eval_batch_size",
32+
type=int,
33+
default=4,
34+
help="Batch size (per device) for the evaluation dataloader.",
35+
)
36+
parser.add_argument(
37+
"--learning_rate",
38+
type=float,
39+
default=0.01,
40+
help="Initial learning rate (after the potential warmup period) to use.",
41+
)
42+
parser.add_argument(
43+
"--data_dir",
44+
type=str,
45+
default='./data/',
46+
help="Data dir for training and evaluation.",
47+
)
48+
parser.add_argument(
49+
"--local_rank",
50+
type=int,
51+
default=-1,
52+
help="Local rank passed from distributed launcher.",
53+
)
54+
args = parser.parse_args()
55+
return args
56+
57+
58+
class Net(nn.Module):
59+
def __init__(self):
60+
super(Net, self).__init__()
61+
self.conv1 = nn.Conv2d(3, 6, 5)
62+
self.bn1 = nn.BatchNorm2d(6)
63+
self.pool = nn.MaxPool2d(2, 2)
64+
self.conv2 = nn.Conv2d(6, 16, 5)
65+
self.fc1 = nn.Linear(16 * 5 * 5, 120)
66+
self.fc2 = nn.Linear(120, 84)
67+
self.fc3 = nn.Linear(84, 10)
68+
self.relu = nn.ReLU()
69+
70+
def forward(self, x):
71+
x = self.conv1(x)
72+
x = self.bn1(x)
73+
x = self.relu(x)
74+
x = self.pool(x)
75+
x = self.pool(self.relu(self.conv2(x)))
76+
x = x.view(-1, 16 * 5 * 5)
77+
x = self.relu(self.fc1(x))
78+
x = self.relu(self.fc2(x))
79+
x = self.fc3(x)
80+
return x
81+
82+
83+
def train(args, device):
84+
transform = transforms.Compose(
85+
[transforms.ToTensor(),
86+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
87+
88+
train_dataset = datasets.CIFAR10(
89+
root=args.data_dir,
90+
train=True,
91+
download=False,
92+
transform=transform)
93+
train_sampler = torch.utils.data.distributed.DistributedSampler(
94+
train_dataset,
95+
shuffle=True)
96+
trainloader = torch.utils.data.DataLoader(
97+
train_dataset,
98+
batch_size=args.per_device_train_batch_size,
99+
sampler=train_sampler,
100+
num_workers=2,
101+
drop_last=True)
102+
103+
test_dataset = datasets.CIFAR10(
104+
root=args.data_dir,
105+
train=False,
106+
download=False,
107+
transform=transform)
108+
testloader = torch.utils.data.DataLoader(
109+
test_dataset,
110+
batch_size=args.per_device_eval_batch_size,
111+
shuffle=False,
112+
num_workers=2,
113+
drop_last=True)
114+
115+
net = Net().to(device) # GPU
116+
# ddp
117+
net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[args.local_rank],
118+
output_device=args.local_rank,
119+
find_unused_parameters=True)
120+
121+
criterion = nn.CrossEntropyLoss()
122+
optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=0.9)
123+
124+
for epoch in range(20): # loop over the dataset multiple times
125+
trainloader.sampler.set_epoch(epoch)
126+
running_loss = 0.0
127+
for i, data in enumerate(trainloader):
128+
# get the inputs
129+
inputs, labels = data
130+
131+
# wrap them in Variable
132+
inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda()) # GPU
133+
134+
# zero the parameter gradients
135+
optimizer.zero_grad()
136+
137+
# forward + backward + optimize
138+
outputs = net(inputs)
139+
loss = criterion(outputs, labels)
140+
loss.backward()
141+
optimizer.step()
142+
143+
# print statistics
144+
running_loss += loss.item()
145+
if args.local_rank == 0 and i % 100 == 99: # print every 100 mini-batches
146+
print('[%d, %5d] loss: %.3f' %
147+
(epoch + 1, i + 1, running_loss / 100))
148+
running_loss = 0.0
149+
150+
if i % 100 == 0:
151+
print(" rank:", args.rank, " local rank:", args.local_rank, \
152+
'batch', i, 'epoch', epoch)
153+
154+
print('Finished Training')
155+
156+
with torch.no_grad():
157+
################################################################################
158+
# The results seem pretty good.
159+
#
160+
# Let us look at how the network performs on the whole dataset.
161+
162+
correct = 0
163+
total = 0
164+
device_cpu = torch.device("cpu")
165+
for data in testloader:
166+
images, labels = data
167+
images, labels = images.cuda(), labels.cuda() # GPU
168+
outputs = net(Variable(images))
169+
_, predicted = torch.max(outputs.data, 1)
170+
total += labels.size(0)
171+
correct += (predicted == labels).sum().item()
172+
173+
if args.local_rank == 0:
174+
print('Accuracy of the network on the 10000 test images: %d %%' % (
175+
100 * correct / total))
176+
177+
178+
def main():
179+
args = parse_args()
180+
torch.cuda.set_device(args.local_rank)
181+
device = torch.device("cuda", args.local_rank)
182+
torch.distributed.init_process_group(backend='nccl')
183+
dist.barrier()
184+
args.world_size = dist.get_world_size()
185+
args.rank = dist.get_rank()
186+
print("world size:", args.world_size, " rank:", args.rank, " local rank:", args.local_rank)
187+
train(args, device)
188+
189+
if __name__ == '__main__':
190+
main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
CUDA_VISIBLE_DEVICES=0,1,2,3 nohup python -m torch.distributed.launch --nproc_per_node=4 cifar10_ddp.py >> ./nohup.log 2>&1 &
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/bin/bash
2+
3+
if [ $# -lt 0 ]; then
4+
export CUDA_VISIBLE_DEVICES=$1
5+
fi
6+
7+
WORKER_COUNT=1
8+
WORKER_GPU=1
9+
10+
if [ ! -f ./dev.tsv ]; then
11+
wget https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com/release/tutorials/m6/sentence_classification/dev.tsv
12+
fi
13+
14+
echo '=========[ Augment news data with Chinese RoBERTa ]========='
15+
easynlp \
16+
--app_name=data_augmentation \
17+
--worker_count=${WORKER_COUNT} \
18+
--worker_gpu=${WORKER_GPU} \
19+
--mode=predict \
20+
--tables=dev.tsv \
21+
--input_schema=index:str:1,sent:str:1,label:str:1 \
22+
--first_sequence=sent \
23+
--label_name=label \
24+
--outputs=aug.tsv \
25+
--output_schema=augmented_data \
26+
--checkpoint_dir=_ \
27+
--sequence_length=128 \
28+
--micro_batch_size=8 \
29+
--user_defined_parameters="
30+
pretrain_model_name_or_path=hfl/chinese-roberta-wwm-ext
31+
type=mlm_da
32+
expansion_rate=2
33+
mask_proportion=0.1
34+
remove_blanks=True
35+
"

0 commit comments

Comments
 (0)