|
| 1 | +# ------------------------------------------------Environment Settings------------------------------------------------# |
| 2 | +# Directory paths for data and outputs |
| 3 | +data_dir: "/mnt/nas/thinhlpg/code/DeepSearch/data/flashrag_datasets/" |
| 4 | +save_dir: "/mnt/nas/thinhlpg/code/DeepSearch/logs" |
| 5 | + |
| 6 | +# Seed for reproducibility |
| 7 | +seed: 2024 |
| 8 | + |
| 9 | +# Whether save intermediate data |
| 10 | +save_intermediate_data: True |
| 11 | +save_note: 'experiment' |
| 12 | + |
| 13 | +# -------------------------------------------------Retrieval Settings------------------------------------------------# |
| 14 | +# If set the remote url, the retriever will be a remote retriever and ignore following settings |
| 15 | +use_remote_retriever: True |
| 16 | +remote_retriever_url: "localhost:8001" |
| 17 | + |
| 18 | +instruction: ~ # instruction for retrieval model |
| 19 | +retrieval_topk: 5 # number of retrieved documents |
| 20 | +retrieval_batch_size: 256 # batch size for retrieval |
| 21 | +retrieval_use_fp16: True # whether to use fp16 for retrieval model |
| 22 | +retrieval_query_max_length: 128 # max length of the query |
| 23 | +save_retrieval_cache: False # whether to save the retrieval cache |
| 24 | +use_retrieval_cache: False # whether to use the retrieval cache |
| 25 | +retrieval_cache_path: ~ # path to the retrieval cache |
| 26 | +retrieval_pooling_method: ~ # set automatically if not provided |
| 27 | + |
| 28 | +# -------------------------------------------------Generator Settings------------------------------------------------# |
| 29 | +framework: sgl_remote # inference frame work of LLM, supporting: 'hf','vllm','fschat' |
| 30 | +sgl_remote_url: "localhost:8002" |
| 31 | +generator_model: "janhq/250404-llama-3.2-3b-instruct-grpo-03-s250" # name or path of the generator model, for laoding tokenizer |
| 32 | +generator_max_input_len: 2048 # max length of the input |
| 33 | +generation_params: |
| 34 | + do_sample: False |
| 35 | + max_tokens: 8192 |
| 36 | + |
| 37 | +# -------------------------------------------------Evaluation Settings------------------------------------------------# |
| 38 | +# Metrics to evaluate the result |
| 39 | +metrics: [ 'em','f1','acc','precision','recall'] |
| 40 | +# Specify setting for metric, will be called within certain metrics |
| 41 | +metric_setting: |
| 42 | + retrieval_recall_topk: 5 |
| 43 | +save_metric_score: True # whether to save the metric score into txt file |
| 44 | + |
| 45 | + |
| 46 | + |
0 commit comments