@@ -128,8 +128,26 @@ Click here to see `the code for this example
128128 """ HuggingFace Example from https://github.com/huggingface/accelerate/blob/main/examples/by_feature/checkpointing.py
129129
130130 Differences with the reference example:
131- - Uses the slurm job ID
132-
131+ - Supports checkpointing using the slurm job ID as part of the checkpoint directory
132+
133+ This is a fully working simple example to use Accelerate,
134+ specifically showcasing the checkpointing capability,
135+ and builds off the `nlp_example.py` script.
136+ This example trains a Bert base model on GLUE MRPC
137+ in any of the following settings (with the same script):
138+ - single CPU or single GPU
139+ - multi GPUS (using PyTorch distributed mode)
140+ - (multi) TPUs
141+ - fp16 (mixed-precision) or fp32 (normal precision)
142+
143+ To help focus on the differences in the code, building `DataLoaders`
144+ was refactored into its own function.
145+ New additions from the base script can be found quickly by
146+ looking for the # New Code # tags
147+
148+ To run it in each of these various modes, follow the instructions
149+ in the readme for examples:
150+ https://github.com/huggingface/accelerate/tree/main/examples
133151 """
134152
135153 # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
@@ -176,28 +194,7 @@ Click here to see `the code for this example
176194 )
177195 from transformers.optimization import get_linear_schedule_with_warmup
178196 from accelerate.logging import get_logger
179- # #######################################################################
180- # This is a fully working simple example to use Accelerate,
181- # specifically showcasing the checkpointing capability,
182- # and builds off the `nlp_example.py` script.
183- #
184- # This example trains a Bert base model on GLUE MRPC
185- # in any of the following settings (with the same script):
186- # - single CPU or single GPU
187- # - multi GPUS (using PyTorch distributed mode)
188- # - (multi) TPUs
189- # - fp16 (mixed-precision) or fp32 (normal precision)
190- #
191- # To help focus on the differences in the code, building `DataLoaders`
192- # was refactored into its own function.
193- # New additions from the base script can be found quickly by
194- # looking for the # New Code # tags
195- #
196- # To run it in each of these various modes, follow the instructions
197- # in the readme for examples:
198- # https://github.com/huggingface/accelerate/tree/main/examples
199- #
200- # #######################################################################
197+
201198
202199 MAX_GPU_BATCH_SIZE = 16
203200 EVAL_BATCH_SIZE = 32
0 commit comments