models.py

import openai
from transformers import AutoModelForCausalLM, AutoTokenizer
import ast
from utilities import FIRST_STEP_INSTANTIATION_PROMPT

class Navigator:
    """
    A class for navigating and controlling the reasoning process of a language model.

    This class is responsible for initializing the reasoning trajectory, guiding the model
    through different reasoning steps, and dynamically adjusting the reasoning flow.
    """
    def __init__(self, model_path):
        """
        Initializes the Navigator with a pre-trained language model.

        Args:
            model_path (str): The path to the pre-trained language model.
        """
        self.model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        self.reasoning_thoughts = []
        self.reasoning_flow = []
        self.instantiation = []
        self.reasoning_rounds = 0
        self.reasoning_instructions = []

    def initializing_reasoning_trajectory(self, prompt, problem):
        """
        Initializes the reasoning trajectory by generating a template from the language model.

        Args:
            prompt (str): The system prompt to guide the template generation.
            problem (str): The problem description.

        Returns:
            str: The thoughts generated for building the template.
        """
        messages = [
            {"role": "system", "content": prompt},
            {"role": "user", "content": problem}
        ]
        response = self.generate(messages)
        template_str = response.split('</think>')[-1]
        self.template = ast.literal_eval(template_str)
        thoughts_for_template_building = response.split('</think>')[0] + '</think>'
        self.reasoning_thoughts.append(thoughts_for_template_building)
        self.reasoning_flow = self.template['reason_flow']
        self.reasoning_rounds = len(self.reasoning_flow)

        return thoughts_for_template_building

    def initialize_reason_problem(self, problem, reason_step):
        """
        Initializes or continues the reasoning process for a given problem and reasoning step.

        Args:
            problem (str): The problem description.
            reason_step (str): The current reasoning step.

        Returns:
            str: The response generated by the language model.
        """
        system_prompt = "You are a math tutor guiding a student to solve a math problem based on the given step. " \
                        "Your task is to help your student to learn how to apply the steps to solve the problem. " \
                        "Based on the problem description and the current step, give a clear and high-level instruction " \
                        "for your student to help them apply the method in the current step to solve the problem." + '\nProblem:' + problem
        continue_prompt = "Now based on the student's response and the previous steps, please continue to instruct students to implement this step. "
        if not self.reasoning_instructions: # Equivalent to `len(self.reasoning_instructions) == 0` but more Pythonic
            messages = [
                {"role": "system", "content": system_prompt},
                {"role": "system", "content": 'Current step: Step 1:\n' + reason_step}
            ]
            response = self.generate(messages)
            return response
        else:
            messages = []
            messages.append({"role": "system", "content": system_prompt})
            for i in range(len(self.reasoning_instructions)):
                messages.append({"role": "system", "content": 'Current step: Step ' + str(i + 1) + ':\n' + self.reasoning_flow[i]})
                messages.append({"role": "assistant", "content": 'Step ' + str(i + 1) + ':\n' + self.reasoning_instructions[i]})
                messages.append({"role": "user", "content": f'Student Response for Step {i + 1}:\n' + self.instantiation[i]})
            messages.append({"role": "system", "content": continue_prompt + '\n'})
            messages.append({"role": "system", "content": 'Current step: Step ' + str(len(self.reasoning_instructions) + 1) + ':\n' + reason_step})
            response = self.generate(messages)
            return response

    def dynamic_adjustment(self, prompt):
        """
        Dynamically adjusts the reasoning flow based on the model's response to a given prompt.

        Args:
            prompt (str): The prompt for dynamic adjustment.
        """
        response = self.generate(prompt)
        thought = response.split('</think>')[-1].split('</think>')[0] # Consider reviewing this logic, might be redundant split
        new_reasoning_flow = response.split('</think>')[-1] # Consider reviewing this logic, might be redundant split
        self.update_reasoning_flow(new_reasoning_flow)

    def update_reasoning_flow(self, new_reasoning_flow):
        """
        Updates the reasoning flow by extracting it from a given text and parsing it as a Python list.

        Args:
            new_reasoning_flow (str): Text containing the new reasoning flow in Python list format.
        """
        prompt = 'Please extract the reasoning flows from the following text and output in python list format, ' \
                 'each element in the python list should be a step in the reasoning process:\n'
        messages = [
            {"role": "system", "content": prompt},
            {"role": "user", "content": 'Input Reasoning Flow:\n' + new_reasoning_flow +
             '\n Note that only output the python list of reasoning flow in your response'}
        ]
        response = self.generate(messages)
        extracted_reasoning_flow = [] # Renamed variable for clarity
        while not extracted_reasoning_flow or not isinstance(extracted_reasoning_flow, list): # More robust type checking
            try:
                extracted_reasoning_flow = ast.literal_eval(response)
            except (SyntaxError, ValueError): # Catch specific exceptions for robustness
                response = self.generate(messages)
        self.reasoning_flow = extracted_reasoning_flow
        self.reasoning_rounds = len(self.reasoning_flow)
        self.template['reason_flow'] = self.reasoning_flow

    def generate(self, messages):
        """
        Generates a response from the language model based on the provided messages.

        Args:
            messages (list): A list of message dictionaries for the language model.

        Returns:
            str: The generated response from the language model.
        """
        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)

        generated_ids = self.model.generate(
            **model_inputs,
            max_new_tokens=3072,
            temperature=0.6,
            top_p=0.95,
        )

        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]

        response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

        return response


class InferenceLLM:
    """
    A class for inference using a language model, designed for student-tutor interplay.

    This class handles the interaction with the language model to simulate a student
    responding to tutor instructions in a step-by-step problem-solving scenario.
    """
    def __init__(self, model_path, inherit: bool = False, inherit_model=None, inherit_tokenizer=None):
        """
        Initializes the InferenceLLM, optionally inheriting a model and tokenizer.

        Args:
            model_path (str): Path to the pre-trained model if not inheriting.
            inherit (bool): Whether to inherit an existing model and tokenizer. Defaults to False.
            inherit_model (optional): Model to inherit if inherit is True.
            inherit_tokenizer (optional): Tokenizer to inherit if inherit is True.
        """
        if inherit:
            self.model = inherit_model
            self.tokenizer = inherit_tokenizer
        else:
            self.model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
            self.tokenizer = AutoTokenizer.from_pretrained(model_path)

    def interplay(self, instruction, problem, previous_instruction, previous_reasoning):
        """
        Simulates the interplay between a student and a tutor for problem-solving.

        Args:
            instruction (str): The current instruction from the tutor.
            problem (str): The problem description.
            previous_instruction (list): List of previous tutor instructions.
            previous_reasoning (list): List of student's reasoning for previous steps.

        Returns:
            tuple: A tuple containing the student's thought process and the solution.
        """
        system_prompt = "Now you are a student who are interacting with your tutor, Your teacher will gradually guide you to solve a problem. " \
                        "Please follow the instructions and guidance given by the teacher to solve the problem step by step. " \
                        "At the same time, please think carefully and put your thoughts and reasoning process within <think></think>, " \
                        "and output your solution for this step after </think>. Please be sure to think carefully before replying." + '\nProblem:' + problem
        messages = []
        messages.append({"role": "system", "content": system_prompt})
        assert len(previous_instruction) == len(previous_reasoning)
        for i in range(len(previous_instruction)):
            messages.append({"role": "user", "content": f'Teacher Instruction for Step{len(previous_instruction)}:' + previous_instruction[i]}) # Consider changing f-string here for consistency of step numbering
            messages.append({"role": "assistant", "content": previous_reasoning[i]})
        messages.append({"role": "user", "content": f'Teacher Instruction for Step{len(previous_instruction) + 1}:' + instruction}) # Consider changing f-string here for consistency of step numbering
        thought, solution = self.generate(messages)
        return thought, solution

    def generate(self, messages):
        """
        Generates a response from the language model and extracts thought and solution.

        Args:
            messages (list): A list of message dictionaries for the language model.

        Returns:
            tuple: A tuple containing the extracted thought and solution from the response.
        """
        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)

        generated_ids = self.model.generate(
            **model_inputs,
            max_new_tokens=3072,
            temperature=0.6,
            top_p=0.95,
        )

        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]

        response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

        thought = response.split('<think>')[-1].split('</think>')[0] 
        solution = response.split('</think>')[-1] 
        return thought, solution