-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathmodels.py
241 lines (206 loc) · 11.5 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import openai
from transformers import AutoModelForCausalLM, AutoTokenizer
import ast
from utilities import FIRST_STEP_INSTANTIATION_PROMPT
class Navigator:
"""
A class for navigating and controlling the reasoning process of a language model.
This class is responsible for initializing the reasoning trajectory, guiding the model
through different reasoning steps, and dynamically adjusting the reasoning flow.
"""
def __init__(self, model_path):
"""
Initializes the Navigator with a pre-trained language model.
Args:
model_path (str): The path to the pre-trained language model.
"""
self.model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
self.reasoning_thoughts = []
self.reasoning_flow = []
self.instantiation = []
self.reasoning_rounds = 0
self.reasoning_instructions = []
def initializing_reasoning_trajectory(self, prompt, problem):
"""
Initializes the reasoning trajectory by generating a template from the language model.
Args:
prompt (str): The system prompt to guide the template generation.
problem (str): The problem description.
Returns:
str: The thoughts generated for building the template.
"""
messages = [
{"role": "system", "content": prompt},
{"role": "user", "content": problem}
]
response = self.generate(messages)
template_str = response.split('</think>')[-1]
self.template = ast.literal_eval(template_str)
thoughts_for_template_building = response.split('</think>')[0] + '</think>'
self.reasoning_thoughts.append(thoughts_for_template_building)
self.reasoning_flow = self.template['reason_flow']
self.reasoning_rounds = len(self.reasoning_flow)
return thoughts_for_template_building
def initialize_reason_problem(self, problem, reason_step):
"""
Initializes or continues the reasoning process for a given problem and reasoning step.
Args:
problem (str): The problem description.
reason_step (str): The current reasoning step.
Returns:
str: The response generated by the language model.
"""
system_prompt = "You are a math tutor guiding a student to solve a math problem based on the given step. " \
"Your task is to help your student to learn how to apply the steps to solve the problem. " \
"Based on the problem description and the current step, give a clear and high-level instruction " \
"for your student to help them apply the method in the current step to solve the problem." + '\nProblem:' + problem
continue_prompt = "Now based on the student's response and the previous steps, please continue to instruct students to implement this step. "
if not self.reasoning_instructions: # Equivalent to `len(self.reasoning_instructions) == 0` but more Pythonic
messages = [
{"role": "system", "content": system_prompt},
{"role": "system", "content": 'Current step: Step 1:\n' + reason_step}
]
response = self.generate(messages)
return response
else:
messages = []
messages.append({"role": "system", "content": system_prompt})
for i in range(len(self.reasoning_instructions)):
messages.append({"role": "system", "content": 'Current step: Step ' + str(i + 1) + ':\n' + self.reasoning_flow[i]})
messages.append({"role": "assistant", "content": 'Step ' + str(i + 1) + ':\n' + self.reasoning_instructions[i]})
messages.append({"role": "user", "content": f'Student Response for Step {i + 1}:\n' + self.instantiation[i]})
messages.append({"role": "system", "content": continue_prompt + '\n'})
messages.append({"role": "system", "content": 'Current step: Step ' + str(len(self.reasoning_instructions) + 1) + ':\n' + reason_step})
response = self.generate(messages)
return response
def dynamic_adjustment(self, prompt):
"""
Dynamically adjusts the reasoning flow based on the model's response to a given prompt.
Args:
prompt (str): The prompt for dynamic adjustment.
"""
response = self.generate(prompt)
thought = response.split('</think>')[-1].split('</think>')[0] # Consider reviewing this logic, might be redundant split
new_reasoning_flow = response.split('</think>')[-1] # Consider reviewing this logic, might be redundant split
self.update_reasoning_flow(new_reasoning_flow)
def update_reasoning_flow(self, new_reasoning_flow):
"""
Updates the reasoning flow by extracting it from a given text and parsing it as a Python list.
Args:
new_reasoning_flow (str): Text containing the new reasoning flow in Python list format.
"""
prompt = 'Please extract the reasoning flows from the following text and output in python list format, ' \
'each element in the python list should be a step in the reasoning process:\n'
messages = [
{"role": "system", "content": prompt},
{"role": "user", "content": 'Input Reasoning Flow:\n' + new_reasoning_flow +
'\n Note that only output the python list of reasoning flow in your response'}
]
response = self.generate(messages)
extracted_reasoning_flow = [] # Renamed variable for clarity
while not extracted_reasoning_flow or not isinstance(extracted_reasoning_flow, list): # More robust type checking
try:
extracted_reasoning_flow = ast.literal_eval(response)
except (SyntaxError, ValueError): # Catch specific exceptions for robustness
response = self.generate(messages)
self.reasoning_flow = extracted_reasoning_flow
self.reasoning_rounds = len(self.reasoning_flow)
self.template['reason_flow'] = self.reasoning_flow
def generate(self, messages):
"""
Generates a response from the language model based on the provided messages.
Args:
messages (list): A list of message dictionaries for the language model.
Returns:
str: The generated response from the language model.
"""
text = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
generated_ids = self.model.generate(
**model_inputs,
max_new_tokens=3072,
temperature=0.6,
top_p=0.95,
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return response
class InferenceLLM:
"""
A class for inference using a language model, designed for student-tutor interplay.
This class handles the interaction with the language model to simulate a student
responding to tutor instructions in a step-by-step problem-solving scenario.
"""
def __init__(self, model_path, inherit: bool = False, inherit_model=None, inherit_tokenizer=None):
"""
Initializes the InferenceLLM, optionally inheriting a model and tokenizer.
Args:
model_path (str): Path to the pre-trained model if not inheriting.
inherit (bool): Whether to inherit an existing model and tokenizer. Defaults to False.
inherit_model (optional): Model to inherit if inherit is True.
inherit_tokenizer (optional): Tokenizer to inherit if inherit is True.
"""
if inherit:
self.model = inherit_model
self.tokenizer = inherit_tokenizer
else:
self.model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
def interplay(self, instruction, problem, previous_instruction, previous_reasoning):
"""
Simulates the interplay between a student and a tutor for problem-solving.
Args:
instruction (str): The current instruction from the tutor.
problem (str): The problem description.
previous_instruction (list): List of previous tutor instructions.
previous_reasoning (list): List of student's reasoning for previous steps.
Returns:
tuple: A tuple containing the student's thought process and the solution.
"""
system_prompt = "Now you are a student who are interacting with your tutor, Your teacher will gradually guide you to solve a problem. " \
"Please follow the instructions and guidance given by the teacher to solve the problem step by step. " \
"At the same time, please think carefully and put your thoughts and reasoning process within <think></think>, " \
"and output your solution for this step after </think>. Please be sure to think carefully before replying." + '\nProblem:' + problem
messages = []
messages.append({"role": "system", "content": system_prompt})
assert len(previous_instruction) == len(previous_reasoning)
for i in range(len(previous_instruction)):
messages.append({"role": "user", "content": f'Teacher Instruction for Step{len(previous_instruction)}:' + previous_instruction[i]}) # Consider changing f-string here for consistency of step numbering
messages.append({"role": "assistant", "content": previous_reasoning[i]})
messages.append({"role": "user", "content": f'Teacher Instruction for Step{len(previous_instruction) + 1}:' + instruction}) # Consider changing f-string here for consistency of step numbering
thought, solution = self.generate(messages)
return thought, solution
def generate(self, messages):
"""
Generates a response from the language model and extracts thought and solution.
Args:
messages (list): A list of message dictionaries for the language model.
Returns:
tuple: A tuple containing the extracted thought and solution from the response.
"""
text = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
generated_ids = self.model.generate(
**model_inputs,
max_new_tokens=3072,
temperature=0.6,
top_p=0.95,
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
thought = response.split('<think>')[-1].split('</think>')[0]
solution = response.split('</think>')[-1]
return thought, solution