@@ -118,11 +118,12 @@ function run_speculative_beam_search() {
118
118
local NBEST=${3:- 5} # Number of best sequences
119
119
local DRAFT_LEN=${4:- 10} # Draft sequence length
120
120
local N_DRAFTS=${5:- 23} # Maximum number of parallel drafts
121
- local GPU=${6:- 1}
122
- local SAVE_PREDICTIONS=${7:- false} # Whether to save predictions to disk. Slows down the run.
121
+ local SMART_DRAFTS_MODE=${6:- false}
122
+ local GPU=${7:- 0}
123
+ local SAVE_PREDICTIONS=${8:- false} # Whether to save predictions to disk. Slows down the run.
123
124
124
125
local DEVICE=" --trainer.accelerator cpu --trainer.devices 1"
125
- if [ -n " ${GPU} " ]; then
126
+ if [ " ${GPU} " != " false " ]; then
126
127
DEVICE=" --trainer.accelerator gpu --trainer.devices [${GPU} ]"
127
128
fi
128
129
@@ -143,6 +144,7 @@ function run_speculative_beam_search() {
143
144
--model.report_prediction_file ${OUTPUT_DIR} /report.txt \
144
145
--data.batch_size ${BS} \
145
146
--model.generation beam_search_speculative \
147
+ --model.smart_drafts_mode ${SMART_DRAFTS_MODE} \
146
148
--model.draft_len ${DRAFT_LEN} \
147
149
--model.beam_size ${NBEST} \
148
150
--model.max_len ${MAX_LEN} \
193
195
194
196
SAVE_PREDICTIONS=false
195
197
N_BEST=5
198
+ SMART_DRAFTS=false
196
199
197
200
# Beam search decoding with five hypotheses
198
201
# Five runs for time spread estimation
@@ -203,28 +206,28 @@ for i in {1..6}; do
203
206
draft_len=10
204
207
n_drafts=23
205
208
run_beam_search results_product_final_beam_search ${batch_size} ${N_BEST} ${GPU} ${SAVE_PREDICTIONS}
206
- run_speculative_beam_search results_product_final_beam_search_speculative ${batch_size} ${N_BEST} ${draft_len} ${n_drafts} ${GPU} ${SAVE_PREDICTIONS}
209
+ run_speculative_beam_search results_product_final_beam_search_speculative ${batch_size} ${N_BEST} ${draft_len} ${n_drafts} ${SMART_DRAFTS} ${ GPU} ${SAVE_PREDICTIONS}
207
210
208
211
# Batch size 2, 14 draft tokens, 10 drafts
209
212
batch_size=2
210
213
draft_len=14
211
214
n_drafts=10
212
215
run_beam_search results_product_final_beam_search ${batch_size} ${N_BEST} ${GPU} ${SAVE_PREDICTIONS}
213
- run_speculative_beam_search results_product_final_beam_search_speculative ${batch_size} ${N_BEST} ${draft_len} ${n_drafts} ${GPU} ${SAVE_PREDICTIONS}
216
+ run_speculative_beam_search results_product_final_beam_search_speculative ${batch_size} ${N_BEST} ${draft_len} ${n_drafts} ${SMART_DRAFTS} ${ GPU} ${SAVE_PREDICTIONS}
214
217
215
218
# Batch size 3, 9 draft tokens, 10 drafts
216
219
batch_size=3
217
220
draft_len=9
218
221
n_drafts=10
219
222
run_beam_search results_product_final_beam_search ${batch_size} ${N_BEST} ${GPU} ${SAVE_PREDICTIONS}
220
- run_speculative_beam_search results_product_final_beam_search_speculative ${batch_size} ${N_BEST} ${draft_len} ${n_drafts} ${GPU} ${SAVE_PREDICTIONS}
223
+ run_speculative_beam_search results_product_final_beam_search_speculative ${batch_size} ${N_BEST} ${draft_len} ${n_drafts} ${SMART_DRAFTS} ${ GPU} ${SAVE_PREDICTIONS}
221
224
222
225
# Batch size 4, 10 draft tokens, 7 drafts
223
226
batch_size=4
224
227
draft_len=10
225
228
n_drafts=7
226
229
run_beam_search results_product_final_beam_search ${batch_size} ${N_BEST} ${GPU} ${SAVE_PREDICTIONS}
227
- run_speculative_beam_search results_product_final_beam_search_speculative ${batch_size} ${N_BEST} ${draft_len} ${n_drafts} ${GPU} ${SAVE_PREDICTIONS}
230
+ run_speculative_beam_search results_product_final_beam_search_speculative ${batch_size} ${N_BEST} ${draft_len} ${n_drafts} ${SMART_DRAFTS} ${ GPU} ${SAVE_PREDICTIONS}
228
231
229
232
if [ " $i " -eq 5 ]; then
230
233
SAVE_PREDICTIONS=true
0 commit comments