RobustFT/self-select.py at main · luo-junyu/RobustFT · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import argparse
import os
import pandas as pd
import copy
from eval import Eval

from common import *
from config import *

SELECT_TEMPLATE = """
You are a critical check expert. Your task is to carefully examine a question and its various predictions to determine if the provided potential answer(ground truth) might be noisy or incorrect.

Question Information:
Question: {question}
{options_str}

Potential Answer (Maybe noisy): {potential_answer}

Please analyze above information and determine if the potential answer is noisy/incorrect.

Your Options:
- Y: the potential answer appears to be noisy/incorrect.
- N: the potential answer seems reliable.

Your response must follow this format:

Answer: [Y/N] (Output the answer directly without any spaces or other punctuation. )
""".strip()


def format_selfselect_question(data):
    return SELECT_TEMPLATE.format(
        question=data['question'],
        options_str=format_option_str(data),
        potential_answer=data['answer'],
    )

def extract_selfselect_result(response):
    answer = extract_result(response)
    return answer

parser = argparse.ArgumentParser(description='Noisy Free Fine-tuning')
parser.add_argument('--task', type=str, default='mmlu', help='Task name')
parser.add_argument('--model', type=str, default='llama3.1', help='Model name')
parser.add_argument('--noise_ratio', type=int, default=50, help='Noise ratio')
parser.add_argument('--base_url', type=str, default='http://localhost:8002/v1', help='Base URL')

args = parser.parse_args()

# Replace hardcoded values with command line arguments
task = args.task
model = args.model
noise_ratio = args.noise_ratio
base_url = args.base_url

model_config = MODELS_CONFIG[model]

os.environ['LLM_BASE_URL'] = base_url
if 'OPENAI_API_KEY' in model_config:
    os.environ['OPENAI_API_KEY'] = model_config['OPENAI_API_KEY']

infer_config = {
    'type': model_config["method"],
    'task': task,
    'config': {
        "model": model_config['name'],
        "temperature": 1,
        "max_tokens": 512,
        "logprobs": True
    }
}

eval_config = TASK_CONFIG[task]
question_type = eval_config['question_type']
check_fn = eval_config['check_fn']

noisy_dir = f'./data/{task}/noisy'
noisy_labeled_path = f'{noisy_dir}/noisy{noise_ratio}.csv'

selfselect_dir = f'./data/{task}/selfselect'
os.makedirs(selfselect_dir, exist_ok=True)
selfselect_path = f'{selfselect_dir}/selfselect{noise_ratio}.csv'

df = pd.read_csv(noisy_labeled_path, sep=',')
all_data = []

for _, row in df.iterrows():
    data = row.to_dict()
    data['question_type'] = question_type.lower()
    data['additional_prompt'] = eval_config['additional_prompt']
    all_data.append(data)


inference_data = copy.deepcopy(all_data)
selfselect_eval = Eval(samples=inference_data, **infer_config)
_ = selfselect_eval.eval(
    format_fn=format_selfselect_question,
    check_fn=check_fn,
    extract_fn=extract_selfselect_result
)
selfselect_predictions = selfselect_eval.get_results()

clean_data = []

for i in range(len(selfselect_predictions)):
    data = all_data[i]
    selfselect_pred = selfselect_predictions[i]['PredAnswer']
    if selfselect_pred.startswith('N'):
        clean_data.append(data)

clean_data_df = pd.DataFrame(clean_data)
clean_data_df.to_csv(selfselect_path, index=False)