|
| 1 | +CUSTOM_UNITXT_TASK_DATA = { |
| 2 | + "task_list": { |
| 3 | + "custom": { |
| 4 | + "systemPrompts": [ |
| 5 | + {"name": "sp_0", "value": "Be concise. At every point give the shortest acceptable answer."} |
| 6 | + ], |
| 7 | + "templates": [ |
| 8 | + { |
| 9 | + "name": "tp_0", |
| 10 | + "value": '{ "__type__": "input_output_template", ' |
| 11 | + '"input_format": "{text_a_type}: {text_a}\\n' |
| 12 | + '{text_b_type}: {text_b}", ' |
| 13 | + '"output_format": "{label}", ' |
| 14 | + '"target_prefix": ' |
| 15 | + '"The {type_of_relation} class is ", ' |
| 16 | + '"instruction": "Given a {text_a_type} and {text_b_type} ' |
| 17 | + 'classify the {type_of_relation} of the {text_b_type} to one of {classes}.",' |
| 18 | + ' "postprocessors": [ "processors.take_first_non_empty_line",' |
| 19 | + ' "processors.lower_case_till_punc" ] }', |
| 20 | + } |
| 21 | + ], |
| 22 | + }, |
| 23 | + "taskRecipes": [{"card": {"name": "cards.wnli"}, "systemPrompt": {"ref": "sp_0"}, "template": {"ref": "tp_0"}}], |
| 24 | + } |
| 25 | +} |
| 26 | + |
| 27 | +LLMAAJ_TASK_DATA = { |
| 28 | + "task_list": { |
| 29 | + "custom": { |
| 30 | + "templates": [ |
| 31 | + { |
| 32 | + "name": "response_assessment.rating.mt_bench_single_turn", |
| 33 | + "value": '{\n "__type__": "input_output_template",\n "instruction":' |
| 34 | + ' "Please act as an impartial judge and evaluate the quality of the ' |
| 35 | + "response provided by an AI assistant to the user question displayed below." |
| 36 | + " Your evaluation should consider factors such as the helpfulness, relevance," |
| 37 | + " accuracy, depth, creativity, and level of detail of the response. Begin your" |
| 38 | + " evaluation by providing a short explanation. Be as objective as possible. " |
| 39 | + "After providing your explanation, you must rate the response on a scale of 1 to 10" |
| 40 | + ' by strictly following this format: \\"[[rating]]\\", for example: \\"Rating: ' |
| 41 | + '[[5]]\\".\\n\\n",\n "input_format": "[Question]\\n{question}\\n\\n[The Start ' |
| 42 | + "of Assistant's Answer]\\n{answer}\\n[The End of Assistant's Answer]\",\n " |
| 43 | + '"output_format": "[[{rating}]]",\n "postprocessors": [\n ' |
| 44 | + '"processors.extract_mt_bench_rating_judgment"\n ]\n}\n', |
| 45 | + } |
| 46 | + ], |
| 47 | + "tasks": [ |
| 48 | + { |
| 49 | + "name": "response_assessment.rating.single_turn", |
| 50 | + "value": '{\n "__type__": "task",\n "input_fields": {\n ' |
| 51 | + '"question": "str",\n "answer": "str"\n },\n ' |
| 52 | + '"outputs": {\n "rating": "float"\n },\n ' |
| 53 | + '"metrics": [\n "metrics.spearman"\n ]\n}\n', |
| 54 | + } |
| 55 | + ], |
| 56 | + "metrics": [ |
| 57 | + { |
| 58 | + "name": "llmaaj_metric", |
| 59 | + "value": '{\n "__type__": "llm_as_judge",\n "inference_model": {\n ' |
| 60 | + '"__type__": "hf_pipeline_based_inference_engine",\n ' |
| 61 | + '"model_name": "rgeada/tiny-untrained-granite",\n ' |
| 62 | + '"max_new_tokens": 256,\n "use_fp16": true},\n ' |
| 63 | + '"template": "templates.response_assessment.rating.mt_bench_single_turn",\n ' |
| 64 | + '"task": "response_assessment.rating.single_turn",\n ' |
| 65 | + '"main_score": "mistral_7b_instruct_v0_2_huggingface_template_mt_bench_single_turn"\n}', |
| 66 | + } |
| 67 | + ], |
| 68 | + }, |
| 69 | + "taskRecipes": [ |
| 70 | + { |
| 71 | + "card": { |
| 72 | + "custom": '{\n "__type__": "task_card",\n "loader": ' |
| 73 | + '{\n "__type__": "load_hf",\n ' |
| 74 | + '"path": "OfirArviv/mt_bench_single_score_gpt4_judgement",\n ' |
| 75 | + '"split": "train"\n },\n "preprocess_steps": [\n ' |
| 76 | + '{\n "__type__": "rename_splits",\n ' |
| 77 | + '"mapper": {\n "train": "test"\n }\n },\n ' |
| 78 | + '{\n "__type__": "filter_by_condition",\n ' |
| 79 | + '"values": {\n "turn": 1\n },\n ' |
| 80 | + '"condition": "eq"\n },\n {\n ' |
| 81 | + '"__type__": "filter_by_condition",\n ' |
| 82 | + '"values": {\n "reference": "[]"\n },\n ' |
| 83 | + '"condition": "eq"\n },\n {\n ' |
| 84 | + '"__type__": "rename",\n "field_to_field": {\n ' |
| 85 | + '"model_input": "question",\n ' |
| 86 | + '"score": "rating",\n ' |
| 87 | + '"category": "group",\n ' |
| 88 | + '"model_output": "answer"\n }\n },\n ' |
| 89 | + '{\n "__type__": "literal_eval",\n ' |
| 90 | + '"field": "question"\n },\n ' |
| 91 | + '{\n "__type__": "copy",\n ' |
| 92 | + '"field": "question/0",\n ' |
| 93 | + '"to_field": "question"\n },\n ' |
| 94 | + '{\n "__type__": "literal_eval",\n ' |
| 95 | + '"field": "answer"\n },\n {\n ' |
| 96 | + '"__type__": "copy",\n ' |
| 97 | + '"field": "answer/0",\n ' |
| 98 | + '"to_field": "answer"\n }\n ],\n ' |
| 99 | + '"task": "tasks.response_assessment.rating.single_turn",\n ' |
| 100 | + '"templates": [\n ' |
| 101 | + '"templates.response_assessment.rating.mt_bench_single_turn"\n ]\n}\n', |
| 102 | + "template": {"ref": "response_assessment.rating.mt_bench_single_turn"}, |
| 103 | + "metrics": [{"ref": "llmaaj_metric"}], |
| 104 | + } |
| 105 | + } |
| 106 | + ], |
| 107 | + } |
| 108 | +} |
0 commit comments