Skip to content

Commit cc40433

Browse files
committed
add batch directory
1 parent 6f44e32 commit cc40433

File tree

5 files changed

+265
-0
lines changed

5 files changed

+265
-0
lines changed

batch/accelerate-example.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/usr/bin/env bash
2+
3+
#COILED n-tasks 20
4+
#COILED vm-type g6.xlarge
5+
#COILED task-on-scheduler True
6+
7+
accelerate launch \
8+
--multi_gpu \
9+
--machine_rank $COILED_BATCH_TASK_ID \
10+
--main_process_ip $COILED_BATCH_SCHEDULER_ADDRESS \
11+
--main_process_port 12345 \
12+
--num_machines $COILED_BATCH_TASK_COUNT \
13+
--num_processes $COILED_BATCH_TASK_COUNT \
14+
nlp_example.py

batch/gdal.sh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/usr/bin/env bash
2+
3+
#COILED n-tasks 3111
4+
#COILED max-workers 200
5+
#COILED region us-west-2
6+
#COILED memory 8 GiB
7+
#COILED container ghcr.io/osgeo/gdal
8+
#COILED forward-aws-credentials
9+
10+
# Install aws CLI
11+
if [ ! "$(which aws)" ]; then
12+
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
13+
unzip -qq awscliv2.zip
14+
./aws/install
15+
fi
16+
17+
# Download file to be processed
18+
filename=$(aws s3 ls --no-sign-request --recursive s3://sentinel-cogs/sentinel-s2-l2a-cogs/54/E/XR/ | \
19+
grep ".tif" | \
20+
awk '{print $4}' | \
21+
awk "NR==$(($COILED_ARRAY_TASK_ID + 1))")
22+
aws s3 cp --no-sign-request s3://sentinel-cogs/$filename in.tif
23+
24+
# Reproject GeoTIFF
25+
gdalwarp -t_srs EPSG:4326 in.tif out.tif
26+
27+
# Move result to processed bucket
28+
aws s3 mv out.tif s3://oss-scratch-space/sentinel-reprojected/$filename

batch/hello.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#COILED memory 8 GiB
2+
#COILED ntasks 10
3+
4+
#COILED region us-east-2
5+
6+
import os
7+
print("Hello from", os.environ["COILED_ARRAY_TASK_ID"])

batch/hello.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#COILED memory 8 GiB
2+
#COILED ntasks 100
3+
#COILED container ubuntu:latest
4+
#COILED region us-east-2
5+
6+
7+
echo Hello from $COILED_ARRAY_TASK_ID !

batch/nlp_example.py

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import argparse
15+
16+
import evaluate
17+
import torch
18+
from datasets import load_dataset
19+
from torch.optim import AdamW
20+
from torch.utils.data import DataLoader
21+
from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed
22+
23+
from accelerate import Accelerator, DistributedType
24+
25+
26+
########################################################################
27+
# This is a fully working simple example to use Accelerate
28+
#
29+
# This example trains a Bert base model on GLUE MRPC
30+
# in any of the following settings (with the same script):
31+
# - single CPU or single GPU
32+
# - multi GPUS (using PyTorch distributed mode)
33+
# - (multi) TPUs
34+
# - fp16 (mixed-precision) or fp32 (normal precision)
35+
#
36+
# To run it in each of these various modes, follow the instructions
37+
# in the readme for examples:
38+
# https://github.com/huggingface/accelerate/tree/main/examples
39+
#
40+
########################################################################
41+
42+
43+
MAX_GPU_BATCH_SIZE = 16
44+
EVAL_BATCH_SIZE = 32
45+
46+
47+
def get_dataloaders(accelerator: Accelerator, batch_size: int = 16):
48+
"""
49+
Creates a set of `DataLoader`s for the `glue` dataset,
50+
using "bert-base-cased" as the tokenizer.
51+
52+
Args:
53+
accelerator (`Accelerator`):
54+
An `Accelerator` object
55+
batch_size (`int`, *optional*):
56+
The batch size for the train and validation DataLoaders.
57+
"""
58+
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
59+
datasets = load_dataset("glue", "mrpc")
60+
61+
def tokenize_function(examples):
62+
# max_length=None => use the model max length (it's actually the default)
63+
outputs = tokenizer(examples["sentence1"], examples["sentence2"], truncation=True, max_length=None)
64+
return outputs
65+
66+
# Apply the method we just defined to all the examples in all the splits of the dataset
67+
# starting with the main process first:
68+
with accelerator.main_process_first():
69+
tokenized_datasets = datasets.map(
70+
tokenize_function,
71+
batched=True,
72+
remove_columns=["idx", "sentence1", "sentence2"],
73+
)
74+
75+
# We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the
76+
# transformers library
77+
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
78+
79+
def collate_fn(examples):
80+
# For Torchxla, it's best to pad everything to the same length or training will be very slow.
81+
max_length = 128 if accelerator.distributed_type == DistributedType.XLA else None
82+
# When using mixed precision we want round multiples of 8/16
83+
if accelerator.mixed_precision == "fp8":
84+
pad_to_multiple_of = 16
85+
elif accelerator.mixed_precision != "no":
86+
pad_to_multiple_of = 8
87+
else:
88+
pad_to_multiple_of = None
89+
90+
return tokenizer.pad(
91+
examples,
92+
padding="longest",
93+
max_length=max_length,
94+
pad_to_multiple_of=pad_to_multiple_of,
95+
return_tensors="pt",
96+
)
97+
98+
# Instantiate dataloaders.
99+
train_dataloader = DataLoader(
100+
tokenized_datasets["train"], shuffle=True, collate_fn=collate_fn, batch_size=batch_size, drop_last=True
101+
)
102+
eval_dataloader = DataLoader(
103+
tokenized_datasets["validation"],
104+
shuffle=False,
105+
collate_fn=collate_fn,
106+
batch_size=EVAL_BATCH_SIZE,
107+
drop_last=(accelerator.mixed_precision == "fp8"),
108+
)
109+
110+
return train_dataloader, eval_dataloader
111+
112+
113+
def training_function(config, args):
114+
# Initialize accelerator
115+
accelerator = Accelerator(cpu=args.cpu, mixed_precision=args.mixed_precision)
116+
# Sample hyper-parameters for learning rate, batch size, seed and a few other HPs
117+
lr = config["lr"]
118+
num_epochs = int(config["num_epochs"])
119+
seed = int(config["seed"])
120+
batch_size = int(config["batch_size"])
121+
122+
metric = evaluate.load("glue", "mrpc")
123+
124+
# If the batch size is too big we use gradient accumulation
125+
gradient_accumulation_steps = 1
126+
if batch_size > MAX_GPU_BATCH_SIZE and accelerator.distributed_type != DistributedType.XLA:
127+
gradient_accumulation_steps = batch_size // MAX_GPU_BATCH_SIZE
128+
batch_size = MAX_GPU_BATCH_SIZE
129+
130+
set_seed(seed)
131+
train_dataloader, eval_dataloader = get_dataloaders(accelerator, batch_size)
132+
# Instantiate the model (we build the model here so that the seed also control new weights initialization)
133+
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", return_dict=True)
134+
135+
# We could avoid this line since the accelerator is set with `device_placement=True` (default value).
136+
# Note that if you are placing tensors on devices manually, this line absolutely needs to be before the optimizer
137+
# creation otherwise training will not work on TPU (`accelerate` will kindly throw an error to make us aware of that).
138+
model = model.to(accelerator.device)
139+
# Instantiate optimizer
140+
optimizer = AdamW(params=model.parameters(), lr=lr)
141+
142+
# Instantiate scheduler
143+
lr_scheduler = get_linear_schedule_with_warmup(
144+
optimizer=optimizer,
145+
num_warmup_steps=100,
146+
num_training_steps=(len(train_dataloader) * num_epochs) // gradient_accumulation_steps,
147+
)
148+
149+
# Prepare everything
150+
# There is no specific order to remember, we just need to unpack the objects in the same order we gave them to the
151+
# prepare method.
152+
153+
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler = accelerator.prepare(
154+
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
155+
)
156+
157+
# Now we train the model
158+
for epoch in range(num_epochs):
159+
model.train()
160+
for step, batch in enumerate(train_dataloader):
161+
# We could avoid this line since we set the accelerator with `device_placement=True`.
162+
batch.to(accelerator.device)
163+
outputs = model(**batch)
164+
loss = outputs.loss
165+
loss = loss / gradient_accumulation_steps
166+
accelerator.backward(loss)
167+
if step % gradient_accumulation_steps == 0:
168+
optimizer.step()
169+
lr_scheduler.step()
170+
optimizer.zero_grad()
171+
172+
model.eval()
173+
for step, batch in enumerate(eval_dataloader):
174+
# We could avoid this line since we set the accelerator with `device_placement=True`.
175+
batch.to(accelerator.device)
176+
with torch.no_grad():
177+
outputs = model(**batch)
178+
predictions = outputs.logits.argmax(dim=-1)
179+
predictions, references = accelerator.gather_for_metrics((predictions, batch["labels"]))
180+
metric.add_batch(
181+
predictions=predictions,
182+
references=references,
183+
)
184+
185+
eval_metric = metric.compute()
186+
# Use accelerator.print to print only on the main process.
187+
accelerator.print(f"epoch {epoch}:", eval_metric)
188+
accelerator.end_training()
189+
190+
191+
def main():
192+
parser = argparse.ArgumentParser(description="Simple example of training script.")
193+
parser.add_argument(
194+
"--mixed_precision",
195+
type=str,
196+
default=None,
197+
choices=["no", "fp16", "bf16", "fp8"],
198+
help="Whether to use mixed precision. Choose"
199+
"between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >= 1.10."
200+
"and an Nvidia Ampere GPU.",
201+
)
202+
parser.add_argument("--cpu", action="store_true", help="If passed, will train on the CPU.")
203+
args = parser.parse_args()
204+
config = {"lr": 2e-5, "num_epochs": 3, "seed": 42, "batch_size": 16}
205+
training_function(config, args)
206+
207+
208+
if __name__ == "__main__":
209+
main()

0 commit comments

Comments
 (0)