-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathgeneralized_mean_defaults.yaml
141 lines (126 loc) · 4.38 KB
/
generalized_mean_defaults.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
trainer:
accelerator: cpu
max_epochs: 1000
min_epochs: 0
max_time: 00:00:3:00
precision: 32
num_sanity_val_steps: 0
logger:
class_path: pytorch_lightning.loggers.WandbLogger
init_args:
offline: true # set to true to not upload during testing
log_model: false # set to true to save the model at the end
name: null # can set name or have it automatically generated
project: symmetry_examples
group: null # can group related runs
tags:
- basic_example
callbacks:
- class_path: pytorch_lightning.callbacks.LearningRateMonitor
init_args:
logging_interval: step
log_momentum: false
# - class_path: pytorch_lightning.callbacks.ModelCheckpoint
# init_args:
# filename: best
# monitor: _loss
# verbose: false
# save_last: true
# save_top_k: 1
# save_weights_only: true
# mode: min
# auto_insert_metric_name: true
- class_path: pytorch_lightning.callbacks.EarlyStopping
init_args:
monitor: train_loss # val_loss or val_rel_error is normally a better choice, but looking at fitting/overfitting here.
min_delta: 0.0
patience: 1000000
mode: min
check_finite: true
divergence_threshold: 1e5 # stops if larger
stopping_threshold: 1.0e-6 # If using val_rel_error then order of magnitude 0.05 or so
# verbose: true
optimizer:
class_path: torch.optim.Adam
init_args:
lr: 0.005 # low learning rate seems to converge relative quickly here.
# Schedulers either non-binding or not helpful for this problem.
# lr_scheduler:
# class_path: torch.optim.lr_scheduler.StepLR
# init_args:
# step_size: 1000 # number of epochs
# gamma: 0.8
# lr_scheduler:
# class_path: pytorch_lightning.cli.ReduceLROnPlateau #torch.optim.lr_scheduler.ReduceLROnPlateau
# init_args:
# monitor: val_rel_error
# factor: 0.99
# patience: 10
model:
# Model parameters
a_min: 1.0 # If this is too tight of an interval and std is too large then there may be draws where it is undefined due to negatives
a_max: 3.0
std: 0.3 # want to make sure it isn't too small or else it is too easy of a problem
X_distribution: normal # uniform or normal
N: 256 # 32 # dimensionality of the state
p: 1.5
# Settings for output
verbose: false
hpo_objective_name: test_loss
always_log_hpo_objective: false
print_metrics: false
save_metrics: false
save_test_results: false
test_seed: 0 # set to 0 to use default RNG seed
train_data_seed: 0 # set to 0 to use default RNG seed
# algorithm settings
num_train_points: 10
num_val_points: 50
num_test_points: 200
batch_size: 32
shuffle_training: true
test_loss_success_threshold: 0.0 # 1e-4
## Invariance with NN
ml_model:
class_path: econ_layers.layers.DeepSet
init_args:
n_in: 1
n_out: 1
L: 2
phi_layers: 2
phi_hidden_dim: 128
phi_hidden_bias: true
phi_last_bias: true
phi_activator:
class_path: torch.nn.ReLU
rho_layers: 2
rho_hidden_dim: 128
rho_hidden_bias: false
rho_last_bias: true
rho_activator:
class_path: torch.nn.ReLU
## No invariants
# python generalized_mean.py --model.ml_model.class_path=econ_layers.layers.FlexibleSequential --model.N=32 --model.ml_model.n_in=32 --model.ml_model.layers=3 --model.ml_model.hidden_dim=256 --trainer.max_epochs=1000
# ml_model:
# class_path: econ_layers.layers.FlexibleSequential
# init_args:
# n_in: 32 # must match N
# n_out: 1
# layers: 3
# hidden_dim: 256
# hidden_bias: true
# last_bias: true
# CLI for the loggers only modifies the last one:
# python generalized_mean.py --trainer.max_epochs=5 --trainer.callbacks.stopping_threshold=0.01
# LBFGS it is very fast, but sometimes can't hit the lower stopping_threshold for some N. set to 0.0075 for example and it can usually achieve it.
# set trainer.max_epochs=1
# Set the maximum number of iterations higher?
# python generalized_mean.py --optimizer.class_path=torch.optim.LBFGS --optimizer.tolerance_grad=1.0e-7 --optimizer.lr=1.0 --optimizer.max_iter=5000
# LBFGS has trouble with bigger variances
# optimizer:
# class_path: torch.optim.LBFGS
# init_args:
# tolerance_grad: 1.0e-7
# max_iter: 5000
# lr: 1.0
# line_search_fn: null # 'strong_wolfe' doesn't seem to help much here