Skip to content

Commit c430571

Browse files
Copilotvfdev-5
andcommitted
Address review comments: remove cuda parameter from GradScaler, update PyTorch version requirements, revert notebook changes
Co-authored-by: vfdev-5 <[email protected]>
1 parent 2083015 commit c430571

File tree

8 files changed

+23
-23
lines changed

8 files changed

+23
-23
lines changed

examples/cifar10/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ def create_trainer(model, optimizer, criterion, lr_scheduler, train_sampler, con
288288
# - Two progress bars on epochs and optionally on iterations
289289

290290
with_amp = config["with_amp"]
291-
scaler = GradScaler('cuda', enabled=with_amp)
291+
scaler = GradScaler(enabled=with_amp)
292292

293293
def train_step(engine, batch):
294294
x, y = batch[0], batch[1]

examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def main(dataset_path, batch_size=256, max_epochs=10):
2525
optimizer = SGD(model.parameters(), lr=0.01)
2626
criterion = CrossEntropyLoss().to(device)
2727

28-
scaler = GradScaler('cuda')
28+
scaler = GradScaler()
2929

3030
def train_step(engine, batch):
3131
x = convert_tensor(batch[0], device, non_blocking=True)

examples/cifar10_qat/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def create_trainer(model, optimizer, criterion, lr_scheduler, train_sampler, con
272272
# - Two progress bars on epochs and optionally on iterations
273273

274274
with_amp = config["with_amp"]
275-
scaler = GradScaler('cuda', enabled=with_amp)
275+
scaler = GradScaler(enabled=with_amp)
276276

277277
def train_step(engine, batch):
278278
x, y = batch[0], batch[1]

examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -875,10 +875,10 @@
875875
"As suggested, we divide the objective by 2 while optimizing D, which slows down the rate at which D learns, relative to the rate of G. \n",
876876
"\n",
877877
"According to the paper:\n",
878-
"- generator A is trained minimize $\\text{mean}_{x \\in A}[(D_B(G(x)) \u2212 1)^2]$ and cycle loss $\\text{mean}_{x \\in A}\\left[ |F(G(x)) - x|_1 \\right]$\n",
879-
"- generator B is trained minimize $\\text{mean}_{y \\in B}[(D_A(F(y)) \u2212 1)^2]$ and cycle loss $\\text{mean}_{y \\in B}\\left[ |G(F(y)) - y|_1 \\right]$\n",
880-
"- discriminators A is trained to minimize $\\text{mean}_{x \\in A}[(D_A(x) \u2212 1)^2] + \\text{mean}_{y \\in B}[D_A(F(y))^2]$.\n",
881-
"- discriminator B is trained to minimize $\\text{mean}_{y \\in B}[(D_B(y) \u2212 1)^2] + \\text{mean}_{x \\in A}[D_B(G(x))^2]$."
878+
"- generator A is trained minimize $\\text{mean}_{x \\in A}[(D_B(G(x)) 1)^2]$ and cycle loss $\\text{mean}_{x \\in A}\\left[ |F(G(x)) - x|_1 \\right]$\n",
879+
"- generator B is trained minimize $\\text{mean}_{y \\in B}[(D_A(F(y)) 1)^2]$ and cycle loss $\\text{mean}_{y \\in B}\\left[ |G(F(y)) - y|_1 \\right]$\n",
880+
"- discriminators A is trained to minimize $\\text{mean}_{x \\in A}[(D_A(x) 1)^2] + \\text{mean}_{y \\in B}[D_A(F(y))^2]$.\n",
881+
"- discriminator B is trained to minimize $\\text{mean}_{y \\in B}[(D_B(y) 1)^2] + \\text{mean}_{x \\in A}[D_B(G(x))^2]$."
882882
]
883883
},
884884
{
@@ -887,7 +887,7 @@
887887
"id": "JE8dLeEfIl_Z"
888888
},
889889
"source": [
890-
"We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
890+
"We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.cuda.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
891891
]
892892
},
893893
{
@@ -896,7 +896,7 @@
896896
"id": "vrJls4p-FRcA"
897897
},
898898
"source": [
899-
"from torch.amp import GradScaler\n",
899+
"from torch.cuda.amp import GradScaler\n",
900900
"from torch.amp import autocast\n",
901901
"\n",
902902
"from ignite.utils import convert_tensor\n",
@@ -924,7 +924,7 @@
924924
"\n",
925925
"\n",
926926
"def compute_loss_discriminator(decision_real, decision_fake):\n",
927-
" # loss = mean (D_b(y) \u2212 1)^2 + mean D_b(G(x))^2 \n",
927+
" # loss = mean (D_b(y) 1)^2 + mean D_b(G(x))^2 \n",
928928
" loss = F.mse_loss(decision_fake, torch.zeros_like(decision_fake))\n",
929929
" loss += F.mse_loss(decision_real, torch.ones_like(decision_real))\n",
930930
" return loss\n",
@@ -954,10 +954,10 @@
954954
" decision_fake_b = discriminator_B(fake_b)\n",
955955
"\n",
956956
" # Compute loss for generators and update generators\n",
957-
" # loss_a2b = GAN loss: mean (D_b(G(x)) \u2212 1)^2 + Forward cycle loss: || F(G(x)) - x ||_1 \n",
957+
" # loss_a2b = GAN loss: mean (D_b(G(x)) 1)^2 + Forward cycle loss: || F(G(x)) - x ||_1 \n",
958958
" loss_a2b = compute_loss_generator(decision_fake_b, real_a, rec_a, lambda_value) \n",
959959
"\n",
960-
" # loss_b2a = GAN loss: mean (D_a(F(x)) \u2212 1)^2 + Backward cycle loss: || G(F(y)) - y ||_1\n",
960+
" # loss_b2a = GAN loss: mean (D_a(F(x)) 1)^2 + Backward cycle loss: || G(F(y)) - y ||_1\n",
961961
" loss_b2a = compute_loss_generator(decision_fake_a, real_b, rec_b, lambda_value)\n",
962962
"\n",
963963
" # total generators loss:\n",
@@ -977,10 +977,10 @@
977977
" decision_real_a, decision_fake_a = discriminator_forward_pass(discriminator_A, real_a, fake_a.detach(), fake_a_buffer) \n",
978978
" decision_real_b, decision_fake_b = discriminator_forward_pass(discriminator_B, real_b, fake_b.detach(), fake_b_buffer) \n",
979979
" # Compute loss for discriminators and update discriminators\n",
980-
" # loss_a = mean (D_a(y) \u2212 1)^2 + mean D_a(F(x))^2\n",
980+
" # loss_a = mean (D_a(y) 1)^2 + mean D_a(F(x))^2\n",
981981
" loss_a = compute_loss_discriminator(decision_real_a, decision_fake_a)\n",
982982
"\n",
983-
" # loss_b = mean (D_b(y) \u2212 1)^2 + mean D_b(G(x))^2\n",
983+
" # loss_b = mean (D_b(y) 1)^2 + mean D_b(G(x))^2\n",
984984
" loss_b = compute_loss_discriminator(decision_real_b, decision_fake_b)\n",
985985
" \n",
986986
" # total discriminators loss:\n",
@@ -1578,4 +1578,4 @@
15781578
"outputs": []
15791579
}
15801580
]
1581-
}
1581+
}

examples/references/classification/imagenet/main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
try:
99
from torch.amp import autocast, GradScaler
1010
except ImportError:
11-
raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
11+
raise RuntimeError("Please, use recent PyTorch version, e.g. >=2.3.1")
1212

1313
import dataflow as data
1414
import utils
@@ -139,7 +139,7 @@ def create_trainer(model, optimizer, criterion, train_sampler, config, logger, w
139139
model_output_transform = config.get("model_output_transform", lambda x: x)
140140

141141
with_amp = config.get("with_amp", True)
142-
scaler = GradScaler('cuda', enabled=with_amp)
142+
scaler = GradScaler(enabled=with_amp)
143143

144144
def training_step(engine, batch):
145145
model.train()

examples/references/segmentation/pascal_voc2012/main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
try:
99
from torch.amp import autocast, GradScaler
1010
except ImportError:
11-
raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
11+
raise RuntimeError("Please, use recent PyTorch version, e.g. >=2.3.1")
1212

1313
import dataflow as data
1414
import utils
@@ -186,7 +186,7 @@ def create_trainer(model, optimizer, criterion, train_sampler, config, logger, w
186186
model_output_transform = config.get("model_output_transform", lambda x: x)
187187

188188
with_amp = config.get("with_amp", True)
189-
scaler = GradScaler('cuda', enabled=with_amp)
189+
scaler = GradScaler(enabled=with_amp)
190190

191191
def forward_pass(batch):
192192
model.train()

examples/transformers/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ def create_trainer(model, optimizer, criterion, lr_scheduler, train_sampler, con
297297
# - Two progress bars on epochs and optionally on iterations
298298

299299
with_amp = config["with_amp"]
300-
scaler = GradScaler('cuda', enabled=with_amp)
300+
scaler = GradScaler(enabled=with_amp)
301301

302302
def train_step(engine, batch):
303303
input_batch = batch[0]

ignite/engine/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def supervised_training_step_amp(
187187
try:
188188
from torch.amp import autocast, GradScaler
189189
except ImportError:
190-
raise ImportError("Please install torch>=1.12.0 to use amp_mode='amp'.")
190+
raise ImportError("Please install torch>=2.3.1 to use amp_mode='amp'.")
191191

192192
if gradient_accumulation_steps <= 0:
193193
raise ValueError(
@@ -412,8 +412,8 @@ def _check_arg(
412412
try:
413413
from torch.amp import GradScaler
414414
except ImportError:
415-
raise ImportError("Please install torch>=1.6.0 to use scaler argument.")
416-
scaler = GradScaler('cuda', enabled=True)
415+
raise ImportError("Please install torch>=2.3.1 to use scaler argument.")
416+
scaler = GradScaler(enabled=True)
417417

418418
if on_tpu:
419419
return "tpu", None

0 commit comments

Comments
 (0)