|
26 | 26 | "Benchmark": "llama31_8b", |
27 | 27 | "Creator": "NVIDIA", |
28 | 28 | "When": "Reference RCPs before 5.1 submission", |
29 | | - "Platform": "2xDGX-B200", |
| 29 | + "Platform": "4xDGX-B200", |
30 | 30 | "Precision": "BF16", |
31 | 31 | "BS": 64, |
32 | 32 | "Hyperparams": { |
33 | | - "opt_base_learning_rate": 1e-03, |
34 | | - "opt_learning_rate_warmup_samples": 16348, |
35 | | - "gradient_accumulation_steps": 4 |
| 33 | + "opt_base_learning_rate": 8e-04, |
| 34 | + "opt_learning_rate_warmup_samples": 6144, |
| 35 | + "gradient_accumulation_steps": 2 |
36 | 36 | }, |
37 | 37 | "Epochs to converge": [ |
38 | | - 233472, 221184, 233472, 221184, 221184, |
39 | | - 245760, 233472, 233472, 208896, 245760, |
40 | | - 233472, 221184, 233472, 233472, 221184, |
41 | | - 245760, 221184, 233472, 233472, 233472 |
| 38 | + 233472, 208896, 208896, 233472, 233472, |
| 39 | + 233472, 233472, 233472, 208896, 233472, |
| 40 | + 233472, 233472, 245760, 221184, 208896, |
| 41 | + 233472, 233472, 221184, 221184, 221184 |
42 | 42 | ] |
43 | 43 | }, |
44 | 44 |
|
|
88 | 88 |
|
89 | 89 |
|
90 | 90 |
|
| 91 | + |
0 commit comments