|
18 | 18 | import yaml
|
19 | 19 | import json
|
20 | 20 | import logging
|
21 |
| -import shortuuid |
22 | 21 | import copy
|
23 | 22 | import torch
|
24 | 23 | import argparse
|
@@ -278,13 +277,13 @@ def consume_deepy_args(cls):
|
278 | 277 | "--wandb_group",
|
279 | 278 | type=str,
|
280 | 279 | default=None,
|
281 |
| - help='Weights and Biases group name - used to group together "runs".', |
| 280 | + help='Weights & Biases group name - used to group together "runs".', |
282 | 281 | )
|
283 | 282 | group.add_argument(
|
284 | 283 | "--wandb_team",
|
285 | 284 | type=str,
|
286 | 285 | default=None,
|
287 |
| - help="Team name for Weights and Biases.", |
| 286 | + help="Weights & Biases team name.", |
288 | 287 | )
|
289 | 288 |
|
290 | 289 | group = parser.add_argument_group(title="Eval args")
|
@@ -372,11 +371,22 @@ def consume_deepy_args(cls):
|
372 | 371 | paths_to_yml_files=conf_files, overwrite_values=overwrite_values
|
373 | 372 | )
|
374 | 373 |
|
375 |
| - if neox_args.wandb_group is not None: |
376 |
| - # concat the wandb group name with a uid to make sure it's unique |
377 |
| - import wandb |
| 374 | + if neox_args.use_wandb: |
| 375 | + try: |
| 376 | + import wandb |
| 377 | + |
| 378 | + # Check if the W&B group name is configured |
| 379 | + if neox_args.wandb_group is None: |
| 380 | + # Set a randomized string as group name if no group name is provided |
| 381 | + neox_args.wandb_group = wandb.sdk.lib.runid.generate_id() |
| 382 | + else: |
| 383 | + # Concatenate the W&B group name with a randomized string to ensure uniqueness. |
| 384 | + neox_args.wandb_group += "_" + wandb.sdk.lib.runid.generate_id() |
| 385 | + except ModuleNotFoundError as e: |
| 386 | + if e.name == "wandb": |
| 387 | + e.msg += "\nWeights & Biases monitoring was requested but `wandb` was not found. Install `wandb` to use Weights & Biases, or set the `use_wandb` configuration option to a boolean false to disable Weights & Biases logging." |
| 388 | + raise e |
378 | 389 |
|
379 |
| - neox_args.wandb_group += "_" + wandb.util.generate_id() |
380 | 390 | neox_args.print()
|
381 | 391 |
|
382 | 392 | return neox_args
|
@@ -736,12 +746,6 @@ def calculate_derived(self):
|
736 | 746 | Derives additional configuration values necessary for training from the current config
|
737 | 747 | """
|
738 | 748 |
|
739 |
| - # wandb |
740 |
| - # sets a unique wandb group |
741 |
| - if self.wandb_group is None: |
742 |
| - # if none is defined a uuid is set for the run |
743 |
| - self.wandb_group = shortuuid.uuid() |
744 |
| - |
745 | 749 | # number of gpus
|
746 | 750 | # Get number of GPUs param or hostfile to determine train_batch_size
|
747 | 751 | global_num_gpus = getattr(self, "global_num_gpus", None)
|
|
0 commit comments