forked from Samsung/TICO
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathllama_eval_suite.yaml
More file actions
81 lines (72 loc) · 1.58 KB
/
Copy pathllama_eval_suite.yaml
File metadata and controls
81 lines (72 loc) · 1.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
model:
family: llama
name_or_path: Maykeye/TinyLLama-v0
trust_remote_code: false
hf_token: null
cache_dir: null
runtime:
device: cuda
dtype: float32
seed: 42
show_progress: true
model_args:
profile: reference_eval
calibration:
dataset: wikitext
dataset_config: wikitext-2-raw-v1
split: train
n_samples: 128
seq_len: 2048
decode_steps: 0
pipeline:
- name: spinquant
enabled: true
- name: cle
enabled: false
pairs:
- model.layers.*.mlp.up_proj:model.layers.*.mlp.down_proj
method: absmax
max_iter: 1
- name: gptq
enabled: true
weight_bits: 4
weight_bits_overrides: {}
perchannel: true
symmetric: false
mse: mse
sensitivity_path: null
percdamp: 0.01
groupsize: -1
actorder: true
static_groups: false
quantize_lm_head: false
use_orig_model_inference: false
verbose: false
show_progress: true
- name: ptq
enabled: true
profile: reference_eval
activation_dtype: int16
default_qscheme: per_tensor_symm
linear_weight_bits: 4
embedding_weight_bits: 8
lm_head_weight_bits: 8
spin_rotation_weight_bits: 8
norm_weight_dtype: int16
strict_wrap: true
decode_calibration_steps: 0
evaluation:
enabled: true
perplexity:
dataset: wikitext
dataset_config: wikitext-2-raw-v1
split: test
lm_eval_tasks: hellaswag,mmlu,piqa,truthfulqa_mc1,truthfulqa_mc2,race,triviaqa
max_seq_len: 2048
export:
enabled: false
output_dir: ./out/llama_spinquant_gptq_ptq_eval
max_seq_len: 2048
prefill_decode: false
artifacts:
- ptq_checkpoint