|
18 | 18 | import yaml |
19 | 19 | import json |
20 | 20 | import logging |
21 | | -import shortuuid |
22 | 21 | import copy |
23 | 22 | import torch |
24 | 23 | import argparse |
@@ -278,13 +277,13 @@ def consume_deepy_args(cls): |
278 | 277 | "--wandb_group", |
279 | 278 | type=str, |
280 | 279 | default=None, |
281 | | - help='Weights and Biases group name - used to group together "runs".', |
| 280 | + help='Weights & Biases group name - used to group together "runs".', |
282 | 281 | ) |
283 | 282 | group.add_argument( |
284 | 283 | "--wandb_team", |
285 | 284 | type=str, |
286 | 285 | default=None, |
287 | | - help="Team name for Weights and Biases.", |
| 286 | + help="Weights & Biases team name.", |
288 | 287 | ) |
289 | 288 |
|
290 | 289 | group = parser.add_argument_group(title="Eval args") |
@@ -372,11 +371,22 @@ def consume_deepy_args(cls): |
372 | 371 | paths_to_yml_files=conf_files, overwrite_values=overwrite_values |
373 | 372 | ) |
374 | 373 |
|
375 | | - if neox_args.wandb_group is not None: |
376 | | - # concat the wandb group name with a uid to make sure it's unique |
377 | | - import wandb |
| 374 | + if neox_args.use_wandb: |
| 375 | + try: |
| 376 | + import wandb |
| 377 | + |
| 378 | + # Check if the W&B group name is configured |
| 379 | + if neox_args.wandb_group is None: |
| 380 | + # Set a randomized string as group name if no group name is provided |
| 381 | + neox_args.wandb_group = wandb.sdk.lib.runid.generate_id() |
| 382 | + else: |
| 383 | + # Concatenate the W&B group name with a randomized string to ensure uniqueness. |
| 384 | + neox_args.wandb_group += "_" + wandb.sdk.lib.runid.generate_id() |
| 385 | + except ModuleNotFoundError as e: |
| 386 | + if e.name == "wandb": |
| 387 | + e.msg += "\nWeights & Biases monitoring was requested but `wandb` was not found. Install `wandb` to use Weights & Biases, or set the `use_wandb` configuration option to a boolean false to disable Weights & Biases logging." |
| 388 | + raise e |
378 | 389 |
|
379 | | - neox_args.wandb_group += "_" + wandb.util.generate_id() |
380 | 390 | neox_args.print() |
381 | 391 |
|
382 | 392 | return neox_args |
@@ -736,12 +746,6 @@ def calculate_derived(self): |
736 | 746 | Derives additional configuration values necessary for training from the current config |
737 | 747 | """ |
738 | 748 |
|
739 | | - # wandb |
740 | | - # sets a unique wandb group |
741 | | - if self.wandb_group is None: |
742 | | - # if none is defined a uuid is set for the run |
743 | | - self.wandb_group = shortuuid.uuid() |
744 | | - |
745 | 749 | # number of gpus |
746 | 750 | # Get number of GPUs param or hostfile to determine train_batch_size |
747 | 751 | global_num_gpus = getattr(self, "global_num_gpus", None) |
|
0 commit comments