generalized_mean_defaults.yaml

trainer:
  accelerator: cpu
  max_epochs: 1000
  min_epochs: 0
  max_time: 00:00:3:00
  precision: 32
  num_sanity_val_steps: 0
  logger:
    class_path: pytorch_lightning.loggers.WandbLogger
    init_args:
      offline: true # set to true to not upload during testing
      log_model: false # set to true to save the model at the end
      name: null # can set name or have it automatically generated
      project: symmetry_examples
      group: null # can group related runs
      tags:
        - basic_example
  callbacks:
    - class_path: pytorch_lightning.callbacks.LearningRateMonitor
      init_args:
        logging_interval: step
        log_momentum: false
    # - class_path: pytorch_lightning.callbacks.ModelCheckpoint
    #   init_args:
    #     filename: best
    #     monitor: _loss
    #     verbose: false
    #     save_last: true
    #     save_top_k: 1
    #     save_weights_only: true
    #     mode: min
    #     auto_insert_metric_name: true
    - class_path: pytorch_lightning.callbacks.EarlyStopping
      init_args:
        monitor: train_loss # val_loss or val_rel_error is normally a better choice, but looking at fitting/overfitting here.
        min_delta: 0.0
        patience: 1000000
        mode: min
        check_finite: true
        divergence_threshold: 1e5 # stops if larger
        stopping_threshold: 1.0e-6   # If using val_rel_error then order of magnitude 0.05 or so
        # verbose: true
optimizer:
  class_path: torch.optim.Adam
  init_args:
    lr: 0.005 # low learning rate seems to converge relative quickly here.

# Schedulers either non-binding or not helpful for this problem.
# lr_scheduler:
#   class_path: torch.optim.lr_scheduler.StepLR
#   init_args:
#     step_size: 1000 # number of epochs
#     gamma: 0.8

# lr_scheduler:
#   class_path: pytorch_lightning.cli.ReduceLROnPlateau #torch.optim.lr_scheduler.ReduceLROnPlateau
#   init_args:
#     monitor: val_rel_error
#     factor: 0.99
#     patience: 10

model:
  # Model parameters
  a_min: 1.0  # If this is too tight of an interval and std is too large then there may be draws where it is undefined due to negatives
  a_max: 3.0
  std: 0.3 # want to make sure it isn't too small or else it is too easy of a problem
  X_distribution: normal # uniform or normal
  N: 256 # 32 # dimensionality of the state
  p: 1.5

  # Settings for output
  verbose: false
  hpo_objective_name: test_loss
  always_log_hpo_objective: false  
  print_metrics: false
  save_metrics: false
  save_test_results: false
  test_seed: 0 # set to 0 to use default RNG seed
  train_data_seed: 0 # set to 0 to use default RNG seed

  # algorithm settings
  num_train_points: 10
  num_val_points: 50
  num_test_points: 200
  batch_size: 32
  shuffle_training: true
  test_loss_success_threshold: 0.0 # 1e-4
  
  ## Invariance with NN
  ml_model:
    class_path: econ_layers.layers.DeepSet
    init_args:
      n_in: 1
      n_out: 1
      L: 2

      phi_layers: 2
      phi_hidden_dim: 128
      phi_hidden_bias: true
      phi_last_bias: true
      phi_activator:
        class_path: torch.nn.ReLU

      rho_layers: 2
      rho_hidden_dim: 128
      rho_hidden_bias: false
      rho_last_bias: true
      rho_activator:
        class_path: torch.nn.ReLU

  ## No invariants
  # python generalized_mean.py --model.ml_model.class_path=econ_layers.layers.FlexibleSequential --model.N=32 --model.ml_model.n_in=32 --model.ml_model.layers=3 --model.ml_model.hidden_dim=256 --trainer.max_epochs=1000
  # ml_model:
  #   class_path: econ_layers.layers.FlexibleSequential
  #   init_args:
  #     n_in: 32 # must match N
  #     n_out: 1
  #     layers: 3
  #     hidden_dim: 256
  #     hidden_bias: true
  #     last_bias: true


# CLI for the loggers only modifies the last one:
# python generalized_mean.py --trainer.max_epochs=5 --trainer.callbacks.stopping_threshold=0.01    


# LBFGS it is very fast, but sometimes can't hit the lower stopping_threshold for some N.  set to 0.0075 for example and it can usually achieve it.
# set trainer.max_epochs=1
# Set the maximum number of iterations higher?
# python generalized_mean.py --optimizer.class_path=torch.optim.LBFGS --optimizer.tolerance_grad=1.0e-7 --optimizer.lr=1.0 --optimizer.max_iter=5000

# LBFGS has trouble with bigger variances
# optimizer:
#   class_path: torch.optim.LBFGS
#   init_args:
#     tolerance_grad: 1.0e-7
#     max_iter: 5000
#     lr: 1.0
#     line_search_fn: null # 'strong_wolfe' doesn't seem to help much here