Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ Few pointers to get you started:
- [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/ignite/blob/master/examples/notebooks/FastaiLRFinder_MNIST.ipynb) [Basic example of LR finder on
MNIST](https://github.com/pytorch/ignite/blob/master/examples/notebooks/FastaiLRFinder_MNIST.ipynb)
- [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/ignite/blob/master/examples/notebooks/Cifar100_bench_amp.ipynb) [Benchmark mixed precision training on Cifar100:
torch.cuda.amp vs nvidia/apex](https://github.com/pytorch/ignite/blob/master/examples/notebooks/Cifar100_bench_amp.ipynb)
torch.amp vs nvidia/apex](https://github.com/pytorch/ignite/blob/master/examples/notebooks/Cifar100_bench_amp.ipynb)
- [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/ignite/blob/master/examples/notebooks/MNIST_on_TPU.ipynb) [MNIST training on a single
TPU](https://github.com/pytorch/ignite/blob/master/examples/notebooks/MNIST_on_TPU.ipynb)
- [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1E9zJrptnLJ_PKhmaP5Vhb6DTVRvyrKHx) [CIFAR10 Training on multiple TPUs](https://github.com/pytorch/ignite/tree/master/examples/cifar10)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def run(self):
("py:class", "torch.optim.optimizer.Optimizer"),
("py:class", "torch.utils.data.dataset.Dataset"),
("py:class", "torch.utils.data.sampler.BatchSampler"),
("py:class", "torch.cuda.amp.grad_scaler.GradScaler"),
("py:class", "torch.amp.grad_scaler.GradScaler"),
("py:class", "torch.optim.lr_scheduler._LRScheduler"),
("py:class", "torch.optim.lr_scheduler.LRScheduler"),
("py:class", "torch.utils.data.dataloader.DataLoader"),
Expand Down
3 changes: 1 addition & 2 deletions examples/cifar10/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
import torch.nn as nn
import torch.optim as optim
import utils
from torch.amp import autocast
from torch.cuda.amp import GradScaler
from torch.amp import autocast, GradScaler

import ignite
import ignite.distributed as idist
Expand Down
3 changes: 1 addition & 2 deletions examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import fire
import torch
from torch.amp import autocast
from torch.cuda.amp import GradScaler
from torch.amp import autocast, GradScaler
from torch.nn import CrossEntropyLoss
from torch.optim import SGD
from torchvision.models import wide_resnet50_2
Expand Down
3 changes: 1 addition & 2 deletions examples/cifar10_qat/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
import torch.nn as nn
import torch.optim as optim
import utils
from torch.amp import autocast
from torch.cuda.amp import GradScaler
from torch.amp import autocast, GradScaler

import ignite
import ignite.distributed as idist
Expand Down
23 changes: 11 additions & 12 deletions examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -875,10 +875,10 @@
"As suggested, we divide the objective by 2 while optimizing D, which slows down the rate at which D learns, relative to the rate of G. \n",
"\n",
"According to the paper:\n",
"- generator A is trained minimize $\\text{mean}_{x \\in A}[(D_B(G(x)) 1)^2]$ and cycle loss $\\text{mean}_{x \\in A}\\left[ |F(G(x)) - x|_1 \\right]$\n",
"- generator B is trained minimize $\\text{mean}_{y \\in B}[(D_A(F(y)) 1)^2]$ and cycle loss $\\text{mean}_{y \\in B}\\left[ |G(F(y)) - y|_1 \\right]$\n",
"- discriminators A is trained to minimize $\\text{mean}_{x \\in A}[(D_A(x) 1)^2] + \\text{mean}_{y \\in B}[D_A(F(y))^2]$.\n",
"- discriminator B is trained to minimize $\\text{mean}_{y \\in B}[(D_B(y) 1)^2] + \\text{mean}_{x \\in A}[D_B(G(x))^2]$."
"- generator A is trained minimize $\\text{mean}_{x \\in A}[(D_B(G(x)) \u2212 1)^2]$ and cycle loss $\\text{mean}_{x \\in A}\\left[ |F(G(x)) - x|_1 \\right]$\n",
"- generator B is trained minimize $\\text{mean}_{y \\in B}[(D_A(F(y)) \u2212 1)^2]$ and cycle loss $\\text{mean}_{y \\in B}\\left[ |G(F(y)) - y|_1 \\right]$\n",
"- discriminators A is trained to minimize $\\text{mean}_{x \\in A}[(D_A(x) \u2212 1)^2] + \\text{mean}_{y \\in B}[D_A(F(y))^2]$.\n",
"- discriminator B is trained to minimize $\\text{mean}_{y \\in B}[(D_B(y) \u2212 1)^2] + \\text{mean}_{x \\in A}[D_B(G(x))^2]$."
]
},
{
Expand All @@ -887,7 +887,7 @@
"id": "JE8dLeEfIl_Z"
},
"source": [
"We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.cuda.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
"We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
]
},
{
Expand All @@ -896,8 +896,7 @@
"id": "vrJls4p-FRcA"
},
"source": [
"from torch.cuda.amp import GradScaler\n",
"from torch.amp import autocast\n",
"from torch.amp import autocast, GradScaler\n",
"\n",
"from ignite.utils import convert_tensor\n",
"import torch.nn.functional as F\n",
Expand All @@ -924,7 +923,7 @@
"\n",
"\n",
"def compute_loss_discriminator(decision_real, decision_fake):\n",
" # loss = mean (D_b(y) 1)^2 + mean D_b(G(x))^2 \n",
" # loss = mean (D_b(y) \u2212 1)^2 + mean D_b(G(x))^2 \n",
" loss = F.mse_loss(decision_fake, torch.zeros_like(decision_fake))\n",
" loss += F.mse_loss(decision_real, torch.ones_like(decision_real))\n",
" return loss\n",
Expand Down Expand Up @@ -954,10 +953,10 @@
" decision_fake_b = discriminator_B(fake_b)\n",
"\n",
" # Compute loss for generators and update generators\n",
" # loss_a2b = GAN loss: mean (D_b(G(x)) 1)^2 + Forward cycle loss: || F(G(x)) - x ||_1 \n",
" # loss_a2b = GAN loss: mean (D_b(G(x)) \u2212 1)^2 + Forward cycle loss: || F(G(x)) - x ||_1 \n",
" loss_a2b = compute_loss_generator(decision_fake_b, real_a, rec_a, lambda_value) \n",
"\n",
" # loss_b2a = GAN loss: mean (D_a(F(x)) 1)^2 + Backward cycle loss: || G(F(y)) - y ||_1\n",
" # loss_b2a = GAN loss: mean (D_a(F(x)) \u2212 1)^2 + Backward cycle loss: || G(F(y)) - y ||_1\n",
" loss_b2a = compute_loss_generator(decision_fake_a, real_b, rec_b, lambda_value)\n",
"\n",
" # total generators loss:\n",
Expand All @@ -977,10 +976,10 @@
" decision_real_a, decision_fake_a = discriminator_forward_pass(discriminator_A, real_a, fake_a.detach(), fake_a_buffer) \n",
" decision_real_b, decision_fake_b = discriminator_forward_pass(discriminator_B, real_b, fake_b.detach(), fake_b_buffer) \n",
" # Compute loss for discriminators and update discriminators\n",
" # loss_a = mean (D_a(y) 1)^2 + mean D_a(F(x))^2\n",
" # loss_a = mean (D_a(y) \u2212 1)^2 + mean D_a(F(x))^2\n",
" loss_a = compute_loss_discriminator(decision_real_a, decision_fake_a)\n",
"\n",
" # loss_b = mean (D_b(y) 1)^2 + mean D_b(G(x))^2\n",
" # loss_b = mean (D_b(y) \u2212 1)^2 + mean D_b(G(x))^2\n",
" loss_b = compute_loss_discriminator(decision_real_b, decision_fake_b)\n",
" \n",
" # total discriminators loss:\n",
Expand Down
3 changes: 1 addition & 2 deletions examples/references/classification/imagenet/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
import torch

try:
from torch.amp import autocast
from torch.cuda.amp import GradScaler
from torch.amp import autocast, GradScaler
except ImportError:
raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")

Expand Down
3 changes: 1 addition & 2 deletions examples/references/segmentation/pascal_voc2012/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
import torch

try:
from torch.amp import autocast
from torch.cuda.amp import GradScaler
from torch.amp import autocast, GradScaler
except ImportError:
raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")

Expand Down
3 changes: 1 addition & 2 deletions examples/transformers/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
import torch.nn as nn
import torch.optim as optim
import utils
from torch.amp import autocast
from torch.cuda.amp import GradScaler
from torch.amp import autocast, GradScaler

import ignite
import ignite.distributed as idist
Expand Down
18 changes: 9 additions & 9 deletions ignite/engine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,11 @@ def supervised_training_step_amp(
prepare_batch: Callable = _prepare_batch,
model_transform: Callable[[Any], Any] = lambda output: output,
output_transform: Callable[[Any, Any, Any, torch.Tensor], Any] = lambda x, y, y_pred, loss: loss.item(),
scaler: Optional["torch.cuda.amp.GradScaler"] = None,
scaler: Optional["torch.amp.GradScaler"] = None,
gradient_accumulation_steps: int = 1,
model_fn: Callable[[torch.nn.Module, Any], Any] = lambda model, x: model(x),
) -> Callable:
"""Factory function for supervised training using ``torch.cuda.amp``.
"""Factory function for supervised training using ``torch.amp``.

Args:
model: the model to train.
Expand Down Expand Up @@ -170,7 +170,7 @@ def supervised_training_step_amp(
model = ...
optimizer = ...
loss_fn = ...
scaler = torch.cuda.amp.GradScaler(2**10)
scaler = torch.amp.GradScaler('cuda', 2**10)

update_fn = supervised_training_step_amp(model, optimizer, loss_fn, 'cuda', scaler=scaler)
trainer = Engine(update_fn)
Expand All @@ -185,7 +185,7 @@ def supervised_training_step_amp(
"""

try:
from torch.amp import autocast
from torch.amp import autocast, GradScaler
except ImportError:
raise ImportError("Please install torch>=1.12.0 to use amp_mode='amp'.")

Expand Down Expand Up @@ -393,8 +393,8 @@ def update(engine: Engine, batch: Sequence[torch.Tensor]) -> Union[Any, Tuple[to


def _check_arg(
on_tpu: bool, on_mps: bool, amp_mode: Optional[str], scaler: Optional[Union[bool, "torch.cuda.amp.GradScaler"]]
) -> Tuple[Optional[str], Optional["torch.cuda.amp.GradScaler"]]:
on_tpu: bool, on_mps: bool, amp_mode: Optional[str], scaler: Optional[Union[bool, "torch.amp.GradScaler"]]
) -> Tuple[Optional[str], Optional["torch.amp.GradScaler"]]:
"""Checking tpu, mps, amp and GradScaler instance combinations."""
if on_mps and amp_mode:
raise ValueError("amp_mode cannot be used with mps device. Consider using amp_mode=None or device='cuda'.")
Expand All @@ -410,7 +410,7 @@ def _check_arg(
raise ValueError(f"scaler argument is {scaler}, but amp_mode is {amp_mode}. Consider using amp_mode='amp'.")
elif amp_mode == "amp" and isinstance(scaler, bool):
try:
from torch.cuda.amp import GradScaler
from torch.amp import GradScaler
except ImportError:
raise ImportError("Please install torch>=1.6.0 to use scaler argument.")
scaler = GradScaler(enabled=True)
Expand All @@ -434,7 +434,7 @@ def create_supervised_trainer(
output_transform: Callable[[Any, Any, Any, torch.Tensor], Any] = lambda x, y, y_pred, loss: loss.item(),
deterministic: bool = False,
amp_mode: Optional[str] = None,
scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
scaler: Union[bool, "torch.amp.GradScaler"] = False,
gradient_accumulation_steps: int = 1,
model_fn: Callable[[torch.nn.Module, Any], Any] = lambda model, x: model(x),
) -> Engine:
Expand All @@ -459,7 +459,7 @@ def create_supervised_trainer(
:class:`~ignite.engine.deterministic.DeterministicEngine`, otherwise :class:`~ignite.engine.engine.Engine`
(default: False).
amp_mode: can be ``amp`` or ``apex``, model and optimizer will be casted to float16 using
`torch.cuda.amp <https://pytorch.org/docs/stable/amp.html>`_ for ``amp`` and
`torch.amp <https://pytorch.org/docs/stable/amp.html>`_ for ``amp`` and
using `apex <https://nvidia.github.io/apex>`_ for ``apex``. (default: None)
scaler: GradScaler instance for gradient scaling if `torch>=1.6.0`
and ``amp_mode`` is ``amp``. If ``amp_mode`` is ``apex``, this argument will be ignored.
Expand Down
14 changes: 7 additions & 7 deletions tests/ignite/engine/test_create_supervised.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _default_create_supervised_trainer(
trainer_device: Optional[str] = None,
trace: bool = False,
amp_mode: str = None,
scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
scaler: Union[bool, "torch.amp.GradScaler"] = False,
with_model_transform: bool = False,
with_model_fn: bool = False,
):
Expand Down Expand Up @@ -104,7 +104,7 @@ def _test_create_supervised_trainer(
trainer_device: Optional[str] = None,
trace: bool = False,
amp_mode: str = None,
scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
scaler: Union[bool, "torch.amp.GradScaler"] = False,
with_model_transform: bool = False,
with_model_fn: bool = False,
):
Expand Down Expand Up @@ -170,18 +170,18 @@ def _():
@pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
def test_create_supervised_training_scalar_assignment():
with mock.patch("ignite.engine._check_arg") as check_arg_mock:
check_arg_mock.return_value = None, torch.cuda.amp.GradScaler(enabled=False)
check_arg_mock.return_value = None, torch.amp.GradScaler(enabled=False)
trainer, _ = _default_create_supervised_trainer(model_device="cpu", trainer_device="cpu", scaler=True)
assert hasattr(trainer.state, "scaler")
assert isinstance(trainer.state.scaler, torch.cuda.amp.GradScaler)
assert isinstance(trainer.state.scaler, torch.amp.GradScaler)


def _test_create_mocked_supervised_trainer(
model_device: Optional[str] = None,
trainer_device: Optional[str] = None,
trace: bool = False,
amp_mode: str = None,
scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
scaler: Union[bool, "torch.amp.GradScaler"] = False,
):
with mock.patch("ignite.engine.supervised_training_step_amp") as training_step_amp_mock:
with mock.patch("ignite.engine.supervised_training_step_apex") as training_step_apex_mock:
Expand Down Expand Up @@ -462,7 +462,7 @@ def test_create_supervised_trainer_amp_error(mock_torch_cuda_amp_module):

@pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
def test_create_supervised_trainer_scaler_not_amp():
scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())
scaler = torch.amp.GradScaler(enabled=torch.cuda.is_available())

with pytest.raises(ValueError, match=f"scaler argument is {scaler}, but amp_mode is None."):
_test_create_supervised_trainer(amp_mode=None, scaler=scaler)
Expand Down Expand Up @@ -540,7 +540,7 @@ def test_create_supervised_trainer_on_cuda_amp_scaler():
_test_create_mocked_supervised_trainer(
model_device=model_device, trainer_device=trainer_device, amp_mode="amp", scaler=True
)
scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())
scaler = torch.amp.GradScaler(enabled=torch.cuda.is_available())
_test_create_supervised_trainer(
gradient_accumulation_steps=1,
model_device=model_device,
Expand Down
Loading