Cant run python train.py - betas must be either both floats or both Tensors

My code:
os.system(f'python dataset_tool.py --source={source_data} '
          f'--dest={train_data_2} ' 
          f'--resolution=1024x1024 ' 
          f'--max-images={img_num} '
          )

!python train.py --outdir=/kaggle/working/training_run_512 --cfg=stylegan3-r --data=/kaggle/working/train_data_512.zip \
    --gpus=2 --batch=16 --gamma=6.6 --mirror=1 --kimg=5000 --snap=5 \
    --resume=https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/stylegan3-r-ffhqu-1024x1024.pkl

Error message:
Training options:
{
  "G_kwargs": {
    "class_name": "training.networks_stylegan3.Generator",
    "z_dim": 512,
    "w_dim": 512,
    "mapping_kwargs": {
      "num_layers": 2
    },
    "channel_base": 65536,
    "channel_max": 1024,
    "magnitude_ema_beta": 0.9994456359721023,
    "conv_kernel": 1,
    "use_radial_filters": true
  },
  "D_kwargs": {
    "class_name": "training.networks_stylegan2.Discriminator",
    "block_kwargs": {
      "freeze_layers": 0
    },
    "mapping_kwargs": {},
    "epilogue_kwargs": {
      "mbstd_group_size": 4
    },
    "channel_base": 32768,
    "channel_max": 512
  },
  "G_opt_kwargs": {
    "class_name": "torch.optim.Adam",
    "betas": [
      0,
      0.99
    ],
    "eps": 1e-08,
    "lr": 0.0025
  },
  "D_opt_kwargs": {
    "class_name": "torch.optim.Adam",
    "betas": [
      0,
      0.99
    ],
    "eps": 1e-08,
    "lr": 0.002
  },
  "loss_kwargs": {
    "class_name": "training.loss.StyleGAN2Loss",
    "r1_gamma": 6.6,
    "blur_init_sigma": 0,
    "blur_fade_kimg": 100.0
  },
  "data_loader_kwargs": {
    "pin_memory": true,
    "prefetch_factor": 2,
    "num_workers": 3
  },
  "training_set_kwargs": {
    "class_name": "training.dataset.ImageFolderDataset",
    "path": "/kaggle/working/train_data_512.zip",
    "use_labels": false,
    "max_size": 2000,
    "xflip": true,
    "resolution": 1024,
    "random_seed": 0
  },
  "num_gpus": 2,
  "batch_size": 16,
  "batch_gpu": 8,
  "metrics": [
    "fid50k_full"
  ],
  "total_kimg": 5000,
  "kimg_per_tick": 4,
  "image_snapshot_ticks": 5,
  "network_snapshot_ticks": 5,
  "random_seed": 0,
  "ema_kimg": 5.0,
  "augment_kwargs": {
    "class_name": "training.augment.AugmentPipe",
    "xflip": 1,
    "rotate90": 1,
    "xint": 1,
    "scale": 1,
    "rotate": 1,
    "aniso": 1,
    "xfrac": 1,
    "brightness": 1,
    "contrast": 1,
    "lumaflip": 1,
    "hue": 1,
    "saturation": 1
  },
  "ada_target": 0.6,
  "resume_pkl": "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/stylegan3-r-ffhqu-1024x1024.pkl",
  "ada_kimg": 100,
  "ema_rampup": null,
  "run_dir": "/kaggle/working/training_run_512/00000-stylegan3-r-train_data_512-gpus2-batch16-gamma6.6"
}

Output directory:    /kaggle/working/training_run_512/00000-stylegan3-r-train_data_512-gpus2-batch16-gamma6.6
Number of GPUs:      2
Batch size:          16 images
Training duration:   5000 kimg
Dataset path:        /kaggle/working/train_data_512.zip
Dataset size:        2000 images
Dataset resolution:  1024
Dataset labels:      False
Dataset x-flips:     True

Creating output directory...
Launching processes...
Loading training set...
/usr/local/lib/python3.11/dist-packages/torch/utils/data/sampler.py:77: UserWarning: `data_source` argument is not used and will be removed in 2.2.0.You may still have custom implementation that utilizes it.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/torch/utils/data/sampler.py:77: UserWarning: `data_source` argument is not used and will be removed in 2.2.0.You may still have custom implementation that utilizes it.
  warnings.warn(

Num images:  4000
Image shape: [3, 1024, 1024]
Label shape: [0]

Constructing networks...
Resuming from "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/stylegan3-r-ffhqu-1024x1024.pkl"
Downloading https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/stylegan3-r-ffhqu-1024x1024.pkl ... done
Setting up PyTorch plugin "bias_act_plugin"... /usr/local/lib/python3.11/dist-packages/torch/utils/cpp_extension.py:2059: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. 
If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].
  warnings.warn(
Done.
Setting up PyTorch plugin "filtered_lrelu_plugin"... /usr/local/lib/python3.11/dist-packages/torch/utils/cpp_extension.py:2059: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. 
If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].
  warnings.warn(
Done.

Generator                      Parameters  Buffers  Output shape          Datatype
---                            ---         ---      ---                   ---     
mapping.fc0                    262656      -        [8, 512]              float32 
mapping.fc1                    262656      -        [8, 512]              float32 
mapping                        -           512      [8, 16, 512]          float32 
synthesis.input.affine         2052        -        [8, 4]                float32 
synthesis.input                1048576     3081     [8, 1024, 36, 36]     float32 
synthesis.L0_36_1024.affine    525312      -        [8, 1024]             float32 
synthesis.L0_36_1024           1049600     157      [8, 1024, 36, 36]     float32 
synthesis.L1_36_1024.affine    525312      -        [8, 1024]             float32 
synthesis.L1_36_1024           1049600     157      [8, 1024, 36, 36]     float32 
synthesis.L2_52_1024.affine    525312      -        [8, 1024]             float32 
synthesis.L2_52_1024           1049600     169      [8, 1024, 52, 52]     float32 
synthesis.L3_52_1024.affine    525312      -        [8, 1024]             float32 
synthesis.L3_52_1024           1049600     157      [8, 1024, 52, 52]     float32 
synthesis.L4_84_1024.affine    525312      -        [8, 1024]             float32 
synthesis.L4_84_1024           1049600     169      [8, 1024, 84, 84]     float32 
synthesis.L5_148_1024.affine   525312      -        [8, 1024]             float32 
synthesis.L5_148_1024          1049600     169      [8, 1024, 148, 148]   float16 
synthesis.L6_148_1024.affine   525312      -        [8, 1024]             float32 
synthesis.L6_148_1024          1049600     157      [8, 1024, 148, 148]   float16 
synthesis.L7_276_645.affine    525312      -        [8, 1024]             float32 
synthesis.L7_276_645           661125      169      [8, 645, 276, 276]    float16 
synthesis.L8_276_406.affine    330885      -        [8, 645]              float32 
synthesis.L8_276_406           262276      157      [8, 406, 276, 276]    float16 
synthesis.L9_532_256.affine    208278      -        [8, 406]              float32 
synthesis.L9_532_256           104192      169      [8, 256, 532, 532]    float16 
synthesis.L10_1044_161.affine  131328      -        [8, 256]              float32 
synthesis.L10_1044_161         41377       169      [8, 161, 1044, 1044]  float16 
synthesis.L11_1044_102.affine  82593       -        [8, 161]              float32 
synthesis.L11_1044_102         16524       157      [8, 102, 1044, 1044]  float16 
synthesis.L12_1044_64.affine   52326       -        [8, 102]              float32 
synthesis.L12_1044_64          6592        25       [8, 64, 1044, 1044]   float16 
synthesis.L13_1024_64.affine   32832       -        [8, 64]               float32 
synthesis.L13_1024_64          4160        25       [8, 64, 1024, 1024]   float16 
synthesis.L14_1024_3.affine    32832       -        [8, 64]               float32 
synthesis.L14_1024_3           195         1        [8, 3, 1024, 1024]    float16 
synthesis                      -           -        [8, 3, 1024, 1024]    float32 
---                            ---         ---      ---                   ---     
Total                          15093151    5600     -                     -       

Setting up PyTorch plugin "upfirdn2d_plugin"... /usr/local/lib/python3.11/dist-packages/torch/utils/cpp_extension.py:2059: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. 
If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].
  warnings.warn(
Done.

Discriminator  Parameters  Buffers  Output shape         Datatype
---            ---         ---      ---                  ---     
b1024.fromrgb  128         16       [8, 32, 1024, 1024]  float16 
b1024.skip     2048        16       [8, 64, 512, 512]    float16 
b1024.conv0    9248        16       [8, 32, 1024, 1024]  float16 
b1024.conv1    18496       16       [8, 64, 512, 512]    float16 
b1024          -           16       [8, 64, 512, 512]    float16 
b512.skip      8192        16       [8, 128, 256, 256]   float16 
b512.conv0     36928       16       [8, 64, 512, 512]    float16 
b512.conv1     73856       16       [8, 128, 256, 256]   float16 
b512           -           16       [8, 128, 256, 256]   float16 
b256.skip      32768       16       [8, 256, 128, 128]   float16 
b256.conv0     147584      16       [8, 128, 256, 256]   float16 
b256.conv1     295168      16       [8, 256, 128, 128]   float16 
b256           -           16       [8, 256, 128, 128]   float16 
b128.skip      131072      16       [8, 512, 64, 64]     float16 
b128.conv0     590080      16       [8, 256, 128, 128]   float16 
b128.conv1     1180160     16       [8, 512, 64, 64]     float16 
b128           -           16       [8, 512, 64, 64]     float16 
b64.skip       262144      16       [8, 512, 32, 32]     float32 
b64.conv0      2359808     16       [8, 512, 64, 64]     float32 
b64.conv1      2359808     16       [8, 512, 32, 32]     float32 
b64            -           16       [8, 512, 32, 32]     float32 
b32.skip       262144      16       [8, 512, 16, 16]     float32 
b32.conv0      2359808     16       [8, 512, 32, 32]     float32 
b32.conv1      2359808     16       [8, 512, 16, 16]     float32 
b32            -           16       [8, 512, 16, 16]     float32 
b16.skip       262144      16       [8, 512, 8, 8]       float32 
b16.conv0      2359808     16       [8, 512, 16, 16]     float32 
b16.conv1      2359808     16       [8, 512, 8, 8]       float32 
b16            -           16       [8, 512, 8, 8]       float32 
b8.skip        262144      16       [8, 512, 4, 4]       float32 
b8.conv0       2359808     16       [8, 512, 8, 8]       float32 
b8.conv1       2359808     16       [8, 512, 4, 4]       float32 
b8             -           16       [8, 512, 4, 4]       float32 
b4.mbstd       -           -        [8, 513, 4, 4]       float32 
b4.conv        2364416     16       [8, 512, 4, 4]       float32 
b4.fc          4194816     -        [8, 512]             float32 
b4.out         513         -        [8, 1]               float32 
---            ---         ---      ---                  ---     
Total          29012513    544      -                    -       

Setting up augmentation...
Distributing across 2 GPUs...
Setting up training phases...
[rank0]:[W1111 15:17:42.853614781 ProcessGroupNCCL.cpp:1496] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())
W1111 15:17:43.083000 1457 torch/multiprocessing/spawn.py:169] Terminating process 1464 via signal SIGTERM
Traceback (most recent call last):
  File "/kaggle/working/stylegan3/train.py", line 286, in <module>
    main() # pylint: disable=no-value-for-parameter
    ^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/click/core.py", line 1462, in __call__
    return self.main(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/click/core.py", line 1383, in main
    rv = self.invoke(ctx)
         ^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/click/core.py", line 1246, in invoke
    return ctx.invoke(self.callback, **ctx.params)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/click/core.py", line 814, in invoke
    return callback(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/stylegan3/train.py", line 281, in main
    launch_training(c=c, desc=desc, outdir=opts.outdir, dry_run=opts.dry_run)
  File "/kaggle/working/stylegan3/train.py", line 98, in launch_training
    torch.multiprocessing.spawn(fn=subprocess_fn, args=(c, temp_dir), nprocs=c.num_gpus)
  File "/usr/local/lib/python3.11/dist-packages/torch/multiprocessing/spawn.py", line 340, in spawn
    return start_processes(fn, args, nprocs, join, daemon, start_method="spawn")
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/multiprocessing/spawn.py", line 296, in start_processes
    while not context.join():
              ^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/multiprocessing/spawn.py", line 215, in join
    raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException: 

-- Process 1 terminated with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/multiprocessing/spawn.py", line 90, in _wrap
    fn(i, *args)
  File "/kaggle/working/stylegan3/train.py", line 47, in subprocess_fn
    training_loop.training_loop(rank=rank, **c)
  File "/kaggle/working/stylegan3/training/training_loop.py", line 197, in training_loop
    opt = dnnlib.util.construct_class_by_name(params=module.parameters(), **opt_kwargs) # subclass of torch.optim.Optimizer
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/stylegan3/dnnlib/util.py", line 303, in construct_class_by_name
    return call_func_by_name(*args, func_name=class_name, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/stylegan3/dnnlib/util.py", line 298, in call_func_by_name
    return func_obj(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/optim/adam.py", line 71, in __init__
    raise ValueError("betas must be either both floats or both Tensors")
ValueError: betas must be either both floats or both Tensors


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Cant run python train.py - betas must be either both floats or both Tensors #656

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Cant run python train.py - betas must be either both floats or both Tensors #656

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions