opendatahub-tests/tests/model_explainability/lm_eval/constants.py at 6205fa6d566c00f4cd4899db9e3f0ba3d14540a0 · ssaleem-rh/opendatahub-tests · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
CUSTOM_UNITXT_TASK_DATA = {
    "task_list": {
        "custom": {
            "systemPrompts": [
                {"name": "sp_0", "value": "Be concise. At every point give the shortest acceptable answer."}
            ],
            "templates": [
                {
                    "name": "tp_0",
                    "value": '{ "__type__": "input_output_template", '
                    '"input_format": "{text_a_type}: {text_a}\\n'
                    '{text_b_type}: {text_b}", '
                    '"output_format": "{label}", '
                    '"target_prefix": '
                    '"The {type_of_relation} class is ", '
                    '"instruction": "Given a {text_a_type} and {text_b_type} '
                    'classify the {type_of_relation} of the {text_b_type} to one of {classes}.",'
                    ' "postprocessors": [ "processors.take_first_non_empty_line",'
                    ' "processors.lower_case_till_punc" ] }',
                }
            ],
        },
        "taskRecipes": [{"card": {"name": "cards.wnli"}, "systemPrompt": {"ref": "sp_0"}, "template": {"ref": "tp_0"}}],
    }
}

LLMAAJ_TASK_DATA = {
    "task_list": {
        "custom": {
            "templates": [
                {
                    "name": "response_assessment.rating.mt_bench_single_turn",
                    "value": '{\n    "__type__": "input_output_template",\n    "instruction":'
                    ' "Please act as an impartial judge and evaluate the quality of the '
                    "response provided by an AI assistant to the user question displayed below."
                    " Your evaluation should consider factors such as the helpfulness, relevance,"
                    " accuracy, depth, creativity, and level of detail of the response. Begin your"
                    " evaluation by providing a short explanation. Be as objective as possible. "
                    "After providing your explanation, you must rate the response on a scale of 1 to 10"
                    ' by strictly following this format: \\"[[rating]]\\", for example: \\"Rating: '
                    '[[5]]\\".\\n\\n",\n    "input_format": "[Question]\\n{question}\\n\\n[The Start '
                    "of Assistant's Answer]\\n{answer}\\n[The End of Assistant's Answer]\",\n    "
                    '"output_format": "[[{rating}]]",\n    "postprocessors": [\n        '
                    '"processors.extract_mt_bench_rating_judgment"\n    ]\n}\n',
                }
            ],
            "tasks": [
                {
                    "name": "response_assessment.rating.single_turn",
                    "value": '{\n    "__type__": "task",\n    "input_fields": {\n        '
                    '"question": "str",\n        "answer": "str"\n    },\n    '
                    '"outputs": {\n        "rating": "float"\n    },\n    '
                    '"metrics": [\n        "metrics.spearman"\n    ]\n}\n',
                }
            ],
            "metrics": [
                {
                    "name": "llmaaj_metric",
                    "value": '{\n    "__type__": "llm_as_judge",\n    "inference_model": {\n        '
                    '"__type__": "hf_pipeline_based_inference_engine",\n        '
                    '"model_name": "rgeada/tiny-untrained-granite",\n        '
                    '"max_new_tokens": 256,\n "use_fp16": true},\n    '
                    '"template": "templates.response_assessment.rating.mt_bench_single_turn",\n    '
                    '"task": "response_assessment.rating.single_turn",\n    '
                    '"main_score": "mistral_7b_instruct_v0_2_huggingface_template_mt_bench_single_turn"\n}',
                }
            ],
        },
        "taskRecipes": [
            {
                "card": {
                    "custom": '{\n    "__type__": "task_card",\n    "loader": '
                    '{\n        "__type__": "load_hf",\n        '
                    '"path": "OfirArviv/mt_bench_single_score_gpt4_judgement",\n        '
                    '"split": "train"\n    },\n    "preprocess_steps": [\n        '
                    '{\n            "__type__": "rename_splits",\n            '
                    '"mapper": {\n                "train": "test"\n            }\n        },\n        '
                    '{\n            "__type__": "filter_by_condition",\n            '
                    '"values": {\n                "turn": 1\n            },\n            '
                    '"condition": "eq"\n        },\n        {\n            '
                    '"__type__": "filter_by_condition",\n            '
                    '"values": {\n                "reference": "[]"\n            },\n            '
                    '"condition": "eq"\n        },\n        {\n            '
                    '"__type__": "rename",\n            "field_to_field": {\n                '
                    '"model_input": "question",\n                '
                    '"score": "rating",\n                '
                    '"category": "group",\n                '
                    '"model_output": "answer"\n            }\n        },\n        '
                    '{\n            "__type__": "literal_eval",\n            '
                    '"field": "question"\n        },\n        '
                    '{\n            "__type__": "copy",\n            '
                    '"field": "question/0",\n            '
                    '"to_field": "question"\n        },\n        '
                    '{\n            "__type__": "literal_eval",\n            '
                    '"field": "answer"\n        },\n        {\n            '
                    '"__type__": "copy",\n            '
                    '"field": "answer/0",\n            '
                    '"to_field": "answer"\n        }\n    ],\n    '
                    '"task": "tasks.response_assessment.rating.single_turn",\n    '
                    '"templates": [\n        '
                    '"templates.response_assessment.rating.mt_bench_single_turn"\n    ]\n}\n',
                    "template": {"ref": "response_assessment.rating.mt_bench_single_turn"},
                    "metrics": [{"ref": "llmaaj_metric"}],
                }
            }
        ],
    }
}

FLAN_T5_IMAGE: str = (
    "quay.io/trustyai_testing/lmeval-assets-flan-t5-base"
    "@sha256:f7326d5b4069e9aa0b12ab77b1e8aa8dd25dd0bffd77b08fcc84988ea8869f7f"
)

ARC_EASY_DATASET_IMAGE: str = (
    "quay.io/trustyai_testing/lmeval-assets-arc-easy"
    "@sha256:1558997a838f2ac8ecd887b4f77485d810e5120b9f2700ecb71627e37c6d3a1b"
)

LMEVAL_OCI_REPO = "lmeval/offline-oci"
LMEVAL_OCI_TAG = "v1"

# Accelerator identifier mapping for GPU types
ACCELERATOR_IDENTIFIER: dict[str, str] = {
    "nvidia": "nvidia.com/gpu",
    "amd": "amd.com/gpu",
    "gaudi": "habana.ai/gaudi",
}