From 9cbf38be9ef25dc0d5eedcdddd06cfc186e83e91 Mon Sep 17 00:00:00 2001 From: Naragod Date: Wed, 26 Nov 2025 17:43:27 -0500 Subject: [PATCH 01/10] USTORY-549: Rename remote_model param to model_name --- ai_feedback/__main__.py | 4 ++-- ai_feedback/code_processing.py | 4 ++-- ai_feedback/helpers/constants.py | 2 +- ai_feedback/image_processing.py | 4 ++-- ai_feedback/text_processing.py | 4 ++-- presentation_materials/Example_commands.md | 6 ++++-- stdout | 13 +++++++++++++ 7 files changed, 26 insertions(+), 11 deletions(-) create mode 100644 stdout diff --git a/ai_feedback/__main__.py b/ai_feedback/__main__.py index a4f8d2f..cc01f0f 100644 --- a/ai_feedback/__main__.py +++ b/ai_feedback/__main__.py @@ -199,10 +199,10 @@ def main() -> int: help=HELP_MESSAGES["model"], ) parser.add_argument( - "--remote_model", + "--model_name", type=str, required=False, - help=HELP_MESSAGES["remote_model"], + help=HELP_MESSAGES["model_name"], ) parser.add_argument( "--output", diff --git a/ai_feedback/code_processing.py b/ai_feedback/code_processing.py index 5725500..2e9c579 100644 --- a/ai_feedback/code_processing.py +++ b/ai_feedback/code_processing.py @@ -70,8 +70,8 @@ def process_code( if args.model in model_mapping: model_class = model_mapping[args.model] - if model_class.__name__ == 'RemoteModel' and args.remote_model: - model = model_class(model_name=args.remote_model) + if model_class.__name__ == 'RemoteModel' and args.model_name: + model = model_class(model_name=args.model_name) else: model = model_class() else: diff --git a/ai_feedback/helpers/constants.py b/ai_feedback/helpers/constants.py index f1d57ba..ff7a68d 100644 --- a/ai_feedback/helpers/constants.py +++ b/ai_feedback/helpers/constants.py @@ -8,7 +8,7 @@ "solution": "The file path for the solution file.", "question": "The specific question number to analyze within the assignment (if applicable).", "model": "The name of the LLM model to use for evaluation.", - "remote_model": "When using --remote=model, this option specifies the remote model to use.", + "model_name": "When using --remote=model, this option specifies the remote model to use.", "output": "Format to display the output response.", "llama_mode": "Specifies how to invoke llama.cpp: either directly via its command‐line interface (CLI) or by sending requests to a running llama-server instance.", "test_output": "The output of tests from evaluating the assignment.", diff --git a/ai_feedback/image_processing.py b/ai_feedback/image_processing.py index 3e71160..cf6b06e 100644 --- a/ai_feedback/image_processing.py +++ b/ai_feedback/image_processing.py @@ -164,8 +164,8 @@ def process_image( elif args.model == Models.CLAUDE.value: responses.append(anthropic_call(message, model="claude-3-7-sonnet-20250219")) elif args.model == Models.REMOTE.value: - if args.remote_model: - model = RemoteModel(model_name=args.remote_model) + if args.model_name: + model = RemoteModel(model_name=args.model_name) else: model = RemoteModel() diff --git a/ai_feedback/text_processing.py b/ai_feedback/text_processing.py index fcb8dfe..3038008 100644 --- a/ai_feedback/text_processing.py +++ b/ai_feedback/text_processing.py @@ -50,8 +50,8 @@ def process_text( if args.model in model_mapping: model_class = model_mapping[args.model] - if model_class.__name__ == 'RemoteModel' and args.remote_model: - model = model_class(model_name=args.remote_model) + if model_class.__name__ == 'RemoteModel' and args.model_name: + model = model_class(model_name=args.model_name) else: model = model_class() else: diff --git a/presentation_materials/Example_commands.md b/presentation_materials/Example_commands.md index 30e766a..06ee7a3 100644 --- a/presentation_materials/Example_commands.md +++ b/presentation_materials/Example_commands.md @@ -9,10 +9,12 @@ python -m ai_feedback \ --submission_type jupyter \ --prompt code_explanation \ --scope code \ ---assignment presentation_materials/iris_image_examples/image_test_incorrect \ +--submission presentation_materials/iris_image_examples/image_test_incorrect/student_submission.ipynb \ --question "4" \ ---model claude-3.7-sonnet \ +--model deepSeek-R1:70B \ +--model_options max_tokens=20000 \ --output stdout + ``` # Example Response diff --git a/stdout b/stdout new file mode 100644 index 0000000..717c36c --- /dev/null +++ b/stdout @@ -0,0 +1,13 @@ +Let me review your submission and provide feedback on any errors I found: + +1. **Error in Boxplot Implementation**: +- **Line 50**: `by='species'` +- **Why it's an error**: The boxplots are using numerical species codes (0,1,2) instead of the actual species names for labeling. This makes the plot less informative since viewers won't know which number corresponds to which species. +- **Guidance**: Instead of using the numerical `species` column, use the `species name` column you created earlier to make the boxplot labels more descriptive and meaningful. + +2. **Error in Species Mapping**: +- **Line 38**: `df['species name'] = iris.target_names[df['species']]` +- **Why it's an error**: While this line creates a mapping from species numbers to names, it's not being utilized properly in the boxplot implementation (as noted above). This leaves the final visualization unclear. +- **Guidance**: Ensure that any visualizations using `species` information use the `species name` column instead of the numerical codes for better readability. + +These changes will make your code more robust and your visualizations clearer. Keep up the good work! From e48bf3678ce06a0786bc438e9cb7d8517a73c568 Mon Sep 17 00:00:00 2001 From: Naragod Date: Wed, 26 Nov 2025 20:52:55 -0500 Subject: [PATCH 02/10] USTORY-549: Update model cli parameter to provider --- ai_feedback/__main__.py | 6 +++--- ai_feedback/helpers/constants.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ai_feedback/__main__.py b/ai_feedback/__main__.py index cc01f0f..c095d10 100644 --- a/ai_feedback/__main__.py +++ b/ai_feedback/__main__.py @@ -192,11 +192,11 @@ def main() -> int: parser.add_argument("--solution", type=str, required=False, default="", help=HELP_MESSAGES["solution"]) parser.add_argument("--question", type=str, required=False, help=HELP_MESSAGES["question"]) parser.add_argument( - "--model", + "--provider", type=str, choices=arg_options.get_enum_values(arg_options.Models), required=True, - help=HELP_MESSAGES["model"], + help=HELP_MESSAGES["provider"], ) parser.add_argument( "--model_name", @@ -310,7 +310,7 @@ def main() -> int: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_text = markdown_template.format( question=args.question or "N/A", - model=args.model, + model=args.provider, request=request, response=response, timestamp=timestamp, diff --git a/ai_feedback/helpers/constants.py b/ai_feedback/helpers/constants.py index ff7a68d..0534968 100644 --- a/ai_feedback/helpers/constants.py +++ b/ai_feedback/helpers/constants.py @@ -7,8 +7,8 @@ "submission": "The file path for the submission file.", "solution": "The file path for the solution file.", "question": "The specific question number to analyze within the assignment (if applicable).", - "model": "The name of the LLM model to use for evaluation.", - "model_name": "When using --remote=model, this option specifies the remote model to use.", + "provider": "The name of the LLM provider to use for evaluation.", + "model_name": "The name of the LLM model to use for evaluation, this option specifies the model to use.", "output": "Format to display the output response.", "llama_mode": "Specifies how to invoke llama.cpp: either directly via its command‐line interface (CLI) or by sending requests to a running llama-server instance.", "test_output": "The output of tests from evaluating the assignment.", From 9da3469a0a493556831d11232cce34bec3db9ec3 Mon Sep 17 00:00:00 2001 From: Naragod Date: Wed, 26 Nov 2025 20:56:09 -0500 Subject: [PATCH 03/10] USTORY-549: Extract image processing implementation to model implementation --- ai_feedback/helpers/image_extractor.py | 6 ++++ ai_feedback/models/ClaudeModel.py | 41 ++++++++++++++++++++++++-- ai_feedback/models/Model.py | 18 +++++++++-- ai_feedback/models/OpenAIModel.py | 38 +++++++++++++++++++++--- ai_feedback/models/RemoteModel.py | 18 +++++++++-- 5 files changed, 109 insertions(+), 12 deletions(-) diff --git a/ai_feedback/helpers/image_extractor.py b/ai_feedback/helpers/image_extractor.py index 8ec76e5..e5b0164 100644 --- a/ai_feedback/helpers/image_extractor.py +++ b/ai_feedback/helpers/image_extractor.py @@ -8,6 +8,12 @@ from typing import Any, Dict, List, Optional +def encode_image(image_path: os.PathLike) -> bytes: + """Encodes the image found at {image_path} to a base64 string""" + with open(image_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode("utf-8") + + def extract_images(input_notebook_path: os.PathLike, output_directory: os.PathLike, output_name: str) -> List[Path]: image_paths = [] with open(input_notebook_path, "r") as file: diff --git a/ai_feedback/models/ClaudeModel.py b/ai_feedback/models/ClaudeModel.py index 8971639..c4c5fb8 100644 --- a/ai_feedback/models/ClaudeModel.py +++ b/ai_feedback/models/ClaudeModel.py @@ -1,10 +1,12 @@ import os from pathlib import Path +from ollama import Message from typing import Optional, Tuple import anthropic from dotenv import load_dotenv +from ..helpers.image_extractor import encode_image from ..helpers.model_options_helpers import cast_to_type, claude_option_schema from .Model import Model @@ -13,11 +15,12 @@ class ClaudeModel(Model): - def __init__(self) -> None: + def __init__(self, model_name: str = None) -> None: """ Initializes the ClaudeModel with the Anthropic client using an API key. """ - super().__init__() + super().__init__(model_name) + self.model_name = model_name if model_name else "claude-3-7-sonnet-20250219" self.client = anthropic.Anthropic(api_key=os.getenv("CLAUDE_API_KEY")) def generate_response( @@ -62,7 +65,7 @@ def generate_response( # Construct request parameters request_kwargs = { - "model": "claude-3-7-sonnet-20250219", + "model": self.model_name, "system": system_instructions, "messages": [{"role": "user", "content": request}], **model_options, @@ -75,3 +78,35 @@ def generate_response( return None return prompt, response.content[0].text + + def process_image(self, message: Message, args) -> str: + """Sends a request to Claude""" + images = [ + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": f"{encode_image(image.value)}", + }, + } + for image in message.images + ] + response = self.client.messages.create( + max_tokens=2048, + model=self.model_name, + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": message.content, + } + ] + + images, + } + ], + temperature=0.33, + ) + return response.content[0].text diff --git a/ai_feedback/models/Model.py b/ai_feedback/models/Model.py index a6be673..dfb2d60 100644 --- a/ai_feedback/models/Model.py +++ b/ai_feedback/models/Model.py @@ -1,4 +1,5 @@ from typing import Any, Tuple +from ollama import chat, Message """ Parent Class for LLMs. @@ -10,11 +11,11 @@ class Model: - def __init__(self): + def __init__(self, model_name: str = None): """ Initialize the model. """ - pass + self.model_name = model_name def generate_response(self, prompt: str, **kwargs: Any) -> Tuple[str, str]: """ @@ -37,3 +38,16 @@ def generate_response(self, prompt: str, **kwargs: Any) -> Tuple[str, str]: NotImplementedError: If the method is not implemented by the subclass. """ raise NotImplementedError("Subclasses must implement the `generate_response` method.") + + def process_image(self, message: Message, args) -> str | None: + """ + Process an image-based request and generate a response. + + Args: + message: The message containing the prompt and images + args: Command-line arguments containing model configuration + + Returns: + str | None: The model's response or None if processing fails + """ + return chat(model=self.model_name, messages=[message], options={"temperature": 0.33}).message.content diff --git a/ai_feedback/models/OpenAIModel.py b/ai_feedback/models/OpenAIModel.py index b7e5a68..c466c7c 100644 --- a/ai_feedback/models/OpenAIModel.py +++ b/ai_feedback/models/OpenAIModel.py @@ -1,8 +1,9 @@ import json import os -import re from pathlib import Path +from ollama import Message from typing import Optional, Tuple +from ..helpers.image_extractor import encode_image import openai from dotenv import load_dotenv @@ -18,13 +19,14 @@ class OpenAIModel(Model): - def __init__(self) -> None: + def __init__(self, model_name: str = None) -> None: """ Initialize an OpenAIModel instance. Loads the OpenAI API key from environment variables and prepares the client. """ - super().__init__() + super().__init__(model_name) + self.model_name = model_name if model_name else "gpt-4o" self.client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY")) def generate_response( @@ -70,6 +72,34 @@ def generate_response( response = self._call_openai(prompt, system_instructions, model_options, schema) return prompt, response + def process_image(self, message: Message, args) -> str: + """Sends a request to OpenAI""" + + images = [ + { + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{encode_image(image.value)}"}, + } + for image in message.images + ] + completion = self.client.chat.completions.create( + model=self.model_name, + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": message.content, + } + ] + + images, + } + ], + temperature=0.33, + ) + return completion.choices[0].message.content + def _call_openai( self, prompt: str, system_instructions: str, model_options: Optional[dict] = None, schema: Optional[dict] = None ) -> str: @@ -91,7 +121,7 @@ def _call_openai( model_options = cast_to_type(openai_chat_option_schema, model_options) response = self.client.chat.completions.create( - model="gpt-4o-mini", + model=self.model_name, messages=[ {"role": "system", "content": system_instructions}, {"role": "user", "content": prompt}, diff --git a/ai_feedback/models/RemoteModel.py b/ai_feedback/models/RemoteModel.py index e5442db..4bb148e 100644 --- a/ai_feedback/models/RemoteModel.py +++ b/ai_feedback/models/RemoteModel.py @@ -1,11 +1,10 @@ import json import os -import re -import sys from pathlib import Path -from typing import Optional, Tuple +from typing import Any, Optional, Tuple import requests +from ollama import Message from dotenv import load_dotenv from .Model import Model @@ -26,6 +25,7 @@ def __init__( model_name: str = "gpt-oss:120b", ) -> None: """Initializes the remote model with a remote URL and model name.""" + super().__init__(model_name) self.remote_url = remote_url self.model_name = model_name @@ -92,3 +92,15 @@ def generate_response( response = requests.post(self.remote_url, data=data, headers=headers, files=files) return prompt, response.json() + + def process_image(self, message: Message, args: Any) -> str: + _request, response = self.generate_response( + args.rendered_prompt, + args.submission, + system_instructions=args.system_instructions, + question=args.question, + submission_image=args.submission_image, + json_schema=args.json_schema, + model_options=args.model_options, + ) + return str(response) From f6cf527c4ea4eafa2a4294401eb613a9be94eb78 Mon Sep 17 00:00:00 2001 From: Naragod Date: Wed, 26 Nov 2025 20:59:57 -0500 Subject: [PATCH 04/10] USTORY-549: Implement model factory to use dependency injection --- ai_feedback/code_processing.py | 15 ++-- ai_feedback/helpers/arg_options.py | 13 --- ai_feedback/image_processing.py | 115 ++++----------------------- ai_feedback/models/CodeLlamaModel.py | 9 +-- ai_feedback/models/__init__.py | 83 ++++++++++++++++++- ai_feedback/text_processing.py | 15 ++-- 6 files changed, 109 insertions(+), 141 deletions(-) diff --git a/ai_feedback/code_processing.py b/ai_feedback/code_processing.py index 2e9c579..718fce9 100644 --- a/ai_feedback/code_processing.py +++ b/ai_feedback/code_processing.py @@ -3,7 +3,7 @@ from pathlib import Path from typing import Callable, Optional, Tuple -from .helpers.arg_options import model_mapping +from .models import ModelFactory from .helpers.file_converter import rename_files from .helpers.template_utils import render_prompt_template @@ -68,14 +68,11 @@ def process_code( marking_instructions=marking_instructions, ) - if args.model in model_mapping: - model_class = model_mapping[args.model] - if model_class.__name__ == 'RemoteModel' and args.model_name: - model = model_class(model_name=args.model_name) - else: - model = model_class() - else: - print("Invalid model selected for code scope.") + try: + model_args = {'model_name': args.model_name} if args.model_name else {} + model = ModelFactory.create(args.provider, **model_args) + except ValueError as e: + print(f"Error: {e}") sys.exit(1) if args.scope == "code": diff --git a/ai_feedback/helpers/arg_options.py b/ai_feedback/helpers/arg_options.py index 870dbc5..992d8f3 100644 --- a/ai_feedback/helpers/arg_options.py +++ b/ai_feedback/helpers/arg_options.py @@ -1,7 +1,5 @@ from enum import Enum -from .. import models - def get_enum_values(enum_class: type[Enum]) -> list[str]: """ @@ -68,17 +66,6 @@ def __str__(self): return self.value -model_mapping = { - "deepSeek-R1:70B": models.DeepSeekModel, - "openai": models.OpenAIModel, - "openai-vector": models.OpenAIModelVector, - "codellama:latest": models.CodeLlamaModel, - "claude-3.7-sonnet": models.ClaudeModel, - "remote": models.RemoteModel, - "deepSeek-v3": models.DeepSeekV3Model, -} - - class Models(Enum): """ Enum representing the available AI model types. diff --git a/ai_feedback/image_processing.py b/ai_feedback/image_processing.py index cf6b06e..a8ddd54 100644 --- a/ai_feedback/image_processing.py +++ b/ai_feedback/image_processing.py @@ -1,91 +1,15 @@ -import base64 +import sys from pathlib import Path from typing import Optional -from anthropic import Anthropic -from dotenv import load_dotenv -from ollama import Image, Message, chat -from openai import OpenAI +from ollama import Image, Message from PIL import Image as PILImage +from .models import ModelFactory from .helpers.arg_options import Models from .helpers.image_extractor import extract_images, extract_qmd_python_images from .helpers.image_reader import * from .helpers.template_utils import render_prompt_template -from .models.RemoteModel import RemoteModel - - -def encode_image(image_path: os.PathLike) -> bytes: - """Encodes the image found at {image_path} to a base64 string""" - with open(image_path, "rb") as image_file: - return base64.b64encode(image_file.read()).decode("utf-8") - - -def openai_call(message: Message, model: str) -> str | None: - """Sends a request to OpenAI""" - # Load environment variables from .env file - load_dotenv() - client = OpenAI() - images = [ - { - "type": "image_url", - "image_url": {"url": f"data:image/png;base64,{encode_image(image.value)}"}, - } - for image in message.images - ] - completion = client.chat.completions.create( - model=model, - messages=[ - { - "role": "user", - "content": [ - { - "type": "text", - "text": message.content, - } - ] - + images, - } - ], - temperature=0.33, - ) - return completion.choices[0].message.content - - -def anthropic_call(message: Message, model: str) -> str | None: - """Sends a request to OpenAI""" - # Load environment variables from .env file - load_dotenv() - client = Anthropic(api_key=os.getenv("CLAUDE_API_KEY")) - images = [ - { - "type": "image", - "source": { - "type": "base64", - "media_type": "image/png", - "data": f"{encode_image(image.value)}", - }, - } - for image in message.images - ] - message = client.messages.create( - max_tokens=2048, - model=model, - messages=[ - { - "role": "user", - "content": [ - { - "type": "text", - "text": message.content, - } - ] - + images, - } - ], - temperature=0.33, - ) - return message.content[0].text def process_image( @@ -159,27 +83,16 @@ def process_image( # Prompt the LLM requests.append(f"{message.content}\n\n{[str(image.value) for image in message.images]}") - if args.model == Models.OPENAI.value: - responses.append(openai_call(message, model="gpt-4o")) - elif args.model == Models.CLAUDE.value: - responses.append(anthropic_call(message, model="claude-3-7-sonnet-20250219")) - elif args.model == Models.REMOTE.value: - if args.model_name: - model = RemoteModel(model_name=args.model_name) - else: - model = RemoteModel() - - _request, response = model.generate_response( - rendered_prompt, - args.submission, - system_instructions=system_instructions, - question=question, - submission_image=args.submission_image, - json_schema=args.json_schema, - model_options=args.model_options, - ) - responses.append(str(response)) - else: - responses.append(chat(model=args.model, messages=[message], options={"temperature": 0.33}).message.content) + + try: + model_args = {'model_name': args.model_name} if args.model_name else {} + model = ModelFactory.create(args.provider, **model_args) + except ValueError as e: + print(f"Error: {e}") + sys.exit(1) + + args.rendered_prompt = rendered_prompt + args.system_instructions = system_instructions + responses.append(model.process_image(message, args)) return "\n\n---\n\n".join(requests), "\n\n---\n\n".join(responses) diff --git a/ai_feedback/models/CodeLlamaModel.py b/ai_feedback/models/CodeLlamaModel.py index e738716..f082f91 100644 --- a/ai_feedback/models/CodeLlamaModel.py +++ b/ai_feedback/models/CodeLlamaModel.py @@ -11,13 +11,12 @@ class CodeLlamaModel(Model): - def __init__(self) -> None: + def __init__(self, model_name: str = None) -> None: """ Initializes the CodeLlamaModel with configuration for the model and system instructions. """ - self.model = { - "model": "codellama:latest", - } + super().__init__(model_name) + self.model_name = model_name if model_name else "codellama:latest" def generate_response( self, @@ -64,7 +63,7 @@ def generate_response( model_options = cast_to_type(ollama_option_schema, model_options) response = ollama.chat( - model=self.model["model"], + model=self.model_name, messages=[ {"role": "system", "content": system_instructions}, {"role": "user", "content": prompt}, diff --git a/ai_feedback/models/__init__.py b/ai_feedback/models/__init__.py index 8880171..bf9c35c 100644 --- a/ai_feedback/models/__init__.py +++ b/ai_feedback/models/__init__.py @@ -1,7 +1,82 @@ +from typing import Type, Dict + +from .Model import Model from .ClaudeModel import ClaudeModel -from .CodeLlamaModel import CodeLlamaModel -from .DeepSeekModel import DeepSeekModel -from .DeepSeekV3Model import DeepSeekV3Model +from .RemoteModel import RemoteModel from .OpenAIModel import OpenAIModel +from .CodeLlamaModel import CodeLlamaModel from .OpenAIModelVector import OpenAIModelVector -from .RemoteModel import RemoteModel +from .DeepSeekModelModified import DeepSeekModelModified + +class ModelFactory: + """Factory for creating AI model instances with proper dependency injection.""" + + _registry: Dict[str, Type[Model]] = { + "remote": RemoteModel, + "claude": ClaudeModel, + "openai": OpenAIModel, + "codellama": CodeLlamaModel, + "deepseek": DeepSeekModelModified, + "openai-vector": OpenAIModelVector, + } + + @classmethod + def create(cls, provider: str, **kwargs) -> Model: + """ + Create a model instance with dependency injection. + + Args: + provider: The model provider identifier + **kwargs: Additional arguments to pass to the model constructor + + Returns: + Model: An instance of the requested model + + Raises: + ValueError: If the provider is not registered + """ + model_class = cls.get_model_class(provider) + return model_class(**kwargs) + + @classmethod + def get_model_class(cls, provider: str) -> Type[Model]: + """ + Get the model class for a provider without instantiating it. + + Args: + provider: The model provider identifier + + Returns: + Type[Model]: The model class + + Raises: + ValueError: If the provider is not registered + """ + if provider not in cls._registry: + available = ", ".join(sorted(cls._registry.keys())) + raise ValueError( + f"Unknown model provider '{provider}'. " + f"Available providers: {available}" + ) + return cls._registry[provider] + + @classmethod + def register(cls, provider: str, model_class: Type[Model]) -> None: + """ + Register a new model provider (for extensibility). + + Args: + provider: The model provider identifier + model_class: The model class to register + """ + cls._registry[provider] = model_class + + @classmethod + def is_registered(cls, provider: str) -> bool: + """Check if a provider is registered.""" + return provider in cls._registry + + @classmethod + def get_available_providers(cls) -> list[str]: + """Get list of all available provider names.""" + return sorted(cls._registry.keys()) diff --git a/ai_feedback/text_processing.py b/ai_feedback/text_processing.py index 3038008..ce0e817 100644 --- a/ai_feedback/text_processing.py +++ b/ai_feedback/text_processing.py @@ -2,7 +2,7 @@ from pathlib import Path from typing import Optional, Tuple -from .helpers.arg_options import model_mapping +from .models import ModelFactory from .helpers.template_utils import render_prompt_template @@ -48,14 +48,11 @@ def process_text( marking_instructions=marking_instructions, ) - if args.model in model_mapping: - model_class = model_mapping[args.model] - if model_class.__name__ == 'RemoteModel' and args.model_name: - model = model_class(model_name=args.model_name) - else: - model = model_class() - else: - print("Invalid model selected for text scope.") + try: + model_args = {'model_name': args.model_name} if args.model_name else {} + model = ModelFactory.create(args.provider, **model_args) + except ValueError as e: + print(f"Error: {e}") sys.exit(1) if args.question: From 1fba897be64e87c22e7f864ab1d156820f00393c Mon Sep 17 00:00:00 2001 From: Naragod Date: Wed, 26 Nov 2025 21:00:37 -0500 Subject: [PATCH 05/10] USTORY-549: Combine both DeepSeek models into a single class. --- .gitignore | 3 + ai_feedback/models/DeepSeekModelModified.py | 309 ++++++++++++++++++++ stdout | 13 - 3 files changed, 312 insertions(+), 13 deletions(-) create mode 100644 ai_feedback/models/DeepSeekModelModified.py delete mode 100644 stdout diff --git a/.gitignore b/.gitignore index 9db19c1..94ba97c 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,6 @@ cython_debug/ # macOS system files .DS_Store + +# output +stdout \ No newline at end of file diff --git a/ai_feedback/models/DeepSeekModelModified.py b/ai_feedback/models/DeepSeekModelModified.py new file mode 100644 index 0000000..aa10bdd --- /dev/null +++ b/ai_feedback/models/DeepSeekModelModified.py @@ -0,0 +1,309 @@ +import os +import sys +import json +import subprocess +from pathlib import Path +from typing import Optional, Tuple + +import ollama +import requests +from dotenv import load_dotenv + +from .Model import Model +from ..helpers.model_options_helpers import cast_to_type, ollama_option_schema + +load_dotenv() + +class DeepSeekModelModified(Model): + """ + Backends: + - ollama: Use Ollama API (for models like deepseek-r1:70b) + - llama: Use llama.cpp (CLI or server mode, for models like deepseek-v3) + + Examples: + # DeepSeek R1 via Ollama + model = DeepSeekModelModified(model_name="deepseek-r1:70b", backend="ollama") + + # DeepSeek V3 via llama.cpp server + model = DeepSeekModelModified(model_name="deepseek-v3", backend="llama", llama_mode="server") + + # DeepSeek V3 via llama.cpp CLI + model = DeepSeekModelModified(model_name="deepseek-v3", backend="llama", llama_mode="cli") + """ + + def __init__( + self, + model_name: str = "deepseek-r1:70b", + backend: str = "ollama", + llama_mode: str = "cli", + ) -> None: + """ + Initializes the unified DeepSeekModel with configurable backend. + + Args: + model_name (str): The model name/identifier to use. + backend (str): Backend to use - "ollama" or "llama". + llama_mode (str): Mode for llama.cpp backend - "cli" or "server". + """ + super().__init__(model_name) + self.backend = backend + self.model_name = model_name + self.llama_mode = llama_mode + + # Load llama.cpp configuration + if backend == "llama": + self.gpu_layers = os.getenv('GPU_LAYERS', '40') + self.llama_cli_path = os.getenv('LLAMA_CLI_PATH', '') + self.llama_model_path = os.getenv('LLAMA_MODEL_PATH', '') + self.llama_server_url = os.getenv('LLAMA_SERVER_URL', '').strip() + + def generate_response( + self, + prompt: str, + submission_file: Path, + system_instructions: str, + model_options: Optional[dict] = None, + question: Optional[str] = None, + solution_file: Optional[Path] = None, + test_output: Optional[Path] = None, + scope: Optional[str] = None, + llama_mode: Optional[str] = None, + json_schema: Optional[str] = None, + ) -> Optional[Tuple[str, str]]: + """ + Generate a model response using the prompt and assignment files. + + Args: + prompt (str): The input prompt provided by the user. + submission_file (Optional[Path]): The path to the submission file. + solution_file (Optional[Path]): The path to the solution file. + test_output (Optional[Path]): The path to the test output file. + scope (Optional[str]): The scope to use for generating the response. + question (Optional[str]): An optional question to target specific content. + system_instructions (str): Instructions for the model. + llama_mode (Optional[str]): Optional mode to invoke llama.cpp in (overrides constructor). + json_schema (Optional[str]): Optional json schema to use. + model_options (Optional[dict]): The optional model options to use for generating the response. + + Returns: + Optional[Tuple[str, str]]: A tuple containing the prompt and the model's response, + or None if the response was invalid. + """ + # Load JSON schema if provided + schema = self._load_json_schema(json_schema) + + # Use llama_mode from parameter if provided, otherwise use constructor value + effective_llama_mode = llama_mode if llama_mode else self.llama_mode + + # Route to appropriate backend + if self.backend == "ollama": + response = self._generate_with_ollama(prompt, system_instructions, model_options, schema) + elif self.backend == "llama": + response = self._generate_with_llama(prompt, system_instructions, model_options, schema, effective_llama_mode) + else: + raise ValueError(f"Unsupported backend: {self.backend}. Use 'ollama' or 'llama'.") + + return response + + def _load_json_schema(self, json_schema: Optional[str]) -> Optional[dict]: + """ + Load JSON schema from file if provided. + + Args: + json_schema (Optional[str]): Path to JSON schema file. + + Returns: + Optional[dict]: Loaded schema or None. + """ + if json_schema: + schema_path = Path(json_schema) + if not schema_path.exists(): + raise FileNotFoundError(f"JSON schema file not found: {schema_path}") + with open(schema_path, "r", encoding="utf-8") as f: + return json.load(f) + return None + + def _generate_with_ollama( + self, + prompt: str, + system_instructions: str, + model_options: Optional[dict], + schema: Optional[dict], + ) -> Optional[Tuple[str, str]]: + """ + Generate response using Ollama backend. + + Args: + prompt (str): User prompt. + system_instructions (str): System instructions. + model_options (Optional[dict]): Model options. + schema (Optional[dict]): JSON schema for structured output. + + Returns: + Optional[Tuple[str, str]]: Tuple of (prompt, response) or None. + """ + model_options = cast_to_type(ollama_option_schema, model_options) + + response = ollama.chat( + model=self.model_name, + messages=[ + {"role": "system", "content": system_instructions}, + {"role": "user", "content": prompt}, + ], + format=schema['schema'] if schema else None, + options=model_options if model_options else None, + ) + + if not response or "message" not in response or "content" not in response["message"]: + print("Error: Invalid or empty response from Ollama.") + return None + + return prompt, response["message"]["content"] + + def _generate_with_llama( + self, + prompt: str, + system_instructions: str, + model_options: Optional[dict], + schema: Optional[dict], + llama_mode: str, + ) -> Optional[Tuple[str, str]]: + """ + Generate response using llama.cpp backend (CLI or server). + + Args: + prompt (str): User prompt. + system_instructions (str): System instructions. + model_options (Optional[dict]): Model options. + schema (Optional[dict]): JSON schema for structured output. + llama_mode (str): "cli" or "server". + + Returns: + Optional[Tuple[str, str]]: Tuple of (full_prompt, response) or None. + """ + # Combine system instructions and prompt + full_prompt = f"{system_instructions}\n{prompt}" + + if llama_mode == 'server': + self._ensure_env_vars('llama_server_url') + response = self._get_response_server(full_prompt, model_options, schema) + else: # cli mode + self._ensure_env_vars('llama_model_path', 'llama_cli_path') + response = self._get_response_cli(full_prompt, model_options, schema) + + response = response.strip() + + # Remove end of response marker + end_marker = "[end of text]" + if response.endswith(end_marker): + response = response[: -len(end_marker)] + response = response.strip() + + return full_prompt, response + + def _ensure_env_vars(self, *names): + """ + Ensure that each of the given attribute names exists on self and is truthy. + + Args: + *names (str): One or more attribute names to validate. + + Raises: + RuntimeError: If any of the specified attributes is missing or has a falsy value. + """ + missing = [n for n in names if not getattr(self, n, None)] + if missing: + raise RuntimeError( + f"Error: Required configuration variable(s) {', '.join(missing)} not set. " + f"Please check your environment variables." + ) + + def _get_response_server( + self, prompt: str, model_options: Optional[dict] = None, schema: Optional[dict] = None + ) -> str: + """ + Generate a model response using llama.cpp server. + + Args: + prompt (str): The input prompt provided by the user. + schema (Optional[dict]): Optional schema provided by the user. + model_options (Optional[dict]): The optional model options to use for generating the response. + + Returns: + str: The model response. + """ + url = f"{self.llama_server_url}/v1/completions" + + payload = {"prompt": prompt, **(model_options or {})} + + if schema: + raw_schema = schema.get("schema", schema) + payload["json_schema"] = raw_schema + + try: + response = requests.post(url, json=payload, timeout=3000) + response.raise_for_status() + except requests.RequestException as e: + raise RuntimeError(f"ERROR: Request to llama-server failed: {str(e)}") + + data = response.json() + + try: + model_output = data["choices"][0]["text"] + except (KeyError, IndexError): + print("ERROR: Unexpected JSON format from llama-server:", data, file=sys.stderr, flush=True) + model_output = '' + + return model_output + + def _get_response_cli( + self, prompt: str, model_options: Optional[dict] = None, schema: Optional[dict] = None + ) -> str: + """ + Generate a model response using llama.cpp CLI. + + Args: + prompt (str): The input prompt provided by the user. + schema (Optional[dict]): Optional schema provided by the user. + model_options (Optional[dict]): The optional model options to use for generating the response. + + Returns: + str: The model response. + """ + cmd = [ + self.llama_cli_path, + "-m", + self.llama_model_path, + "--n-gpu-layers", + self.gpu_layers, + "--single-turn", + "--no-display-prompt", + ] + + if schema: + raw_schema = schema["schema"] if "schema" in schema else schema + cmd += ["--json-schema", json.dumps(raw_schema)] + + if model_options: + for key, value in model_options.items(): + cmd += ["--" + key, str(value)] + + try: + completed = subprocess.run( + cmd, input=prompt.encode(), check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=300 + ) + except subprocess.TimeoutExpired as e: + # If the process hangs for more than 5 minutes, print whatever has been captured so far + print("ERROR: llama-cli timed out after 5 minutes.", file=sys.stdout, flush=True) + print("Partial stdout:", e.stdout, file=sys.stdout, flush=True) + print("Partial stderr:", e.stderr, file=sys.stdout, flush=True) + raise + except subprocess.CalledProcessError as e: + # If llama-cli returns a non-zero exit code, print its stdout/stderr and re-raise + print("ERROR: llama-cli returned non-zero exit code.", file=sys.stdout, flush=True) + print("llama-cli stdout:", e.stdout, file=sys.stdout, flush=True) + print("llama-cli stderr:", e.stderr, file=sys.stdout, flush=True) + raise RuntimeError(f"llama.cpp failed (code {e.returncode}): {e.stderr.strip()}") + + # Decode with 'replace' so invalid UTF-8 bytes become U+FFFD + return completed.stdout.decode('utf-8', errors='replace') diff --git a/stdout b/stdout deleted file mode 100644 index 717c36c..0000000 --- a/stdout +++ /dev/null @@ -1,13 +0,0 @@ -Let me review your submission and provide feedback on any errors I found: - -1. **Error in Boxplot Implementation**: -- **Line 50**: `by='species'` -- **Why it's an error**: The boxplots are using numerical species codes (0,1,2) instead of the actual species names for labeling. This makes the plot less informative since viewers won't know which number corresponds to which species. -- **Guidance**: Instead of using the numerical `species` column, use the `species name` column you created earlier to make the boxplot labels more descriptive and meaningful. - -2. **Error in Species Mapping**: -- **Line 38**: `df['species name'] = iris.target_names[df['species']]` -- **Why it's an error**: While this line creates a mapping from species numbers to names, it's not being utilized properly in the boxplot implementation (as noted above). This leaves the final visualization unclear. -- **Guidance**: Ensure that any visualizations using `species` information use the `species name` column instead of the numerical codes for better readability. - -These changes will make your code more robust and your visualizations clearer. Keep up the good work! From 6dc8c93caf6c1b9e94cb79fe1e81d2677dcaf429 Mon Sep 17 00:00:00 2001 From: Naragod Date: Thu, 27 Nov 2025 13:26:46 -0500 Subject: [PATCH 06/10] USTORY-549: Move model acquisition to top level __main__.py file --- ai_feedback/__main__.py | 19 ++++++++++++++----- ai_feedback/code_processing.py | 11 +---------- ai_feedback/image_processing.py | 11 +---------- ai_feedback/text_processing.py | 11 +---------- 4 files changed, 17 insertions(+), 35 deletions(-) diff --git a/ai_feedback/__main__.py b/ai_feedback/__main__.py index c095d10..1f2f3df 100644 --- a/ai_feedback/__main__.py +++ b/ai_feedback/__main__.py @@ -1,5 +1,4 @@ import argparse -import json import os import os.path import sys @@ -7,6 +6,7 @@ from pathlib import Path from . import code_processing, image_processing, text_processing +from .models import ModelFactory from .helpers import arg_options from .helpers.constants import HELP_MESSAGES @@ -194,7 +194,7 @@ def main() -> int: parser.add_argument( "--provider", type=str, - choices=arg_options.get_enum_values(arg_options.Models), + choices=ModelFactory.get_available_providers(), required=True, help=HELP_MESSAGES["provider"], ) @@ -294,16 +294,25 @@ def main() -> int: if args.prompt_text: prompt_content += args.prompt_text + try: + model_args = {'model_name': args.model_name} if args.model_name else {} + model = ModelFactory.create(args.provider, **model_args) + except ValueError as e: + print(f"Error: {e}") + sys.exit(1) + if args.scope == "image": prompt = {"prompt_content": prompt_content} - request, response = image_processing.process_image(args, prompt, system_instructions, marking_instructions) + request, response = image_processing.process_image( + model, args, prompt, system_instructions, marking_instructions + ) elif args.scope == "text": request, response = text_processing.process_text( - args, prompt_content, system_instructions, marking_instructions + model, args, prompt_content, system_instructions, marking_instructions ) else: request, response = code_processing.process_code( - args, prompt_content, system_instructions, marking_instructions + model, args, prompt_content, system_instructions, marking_instructions ) markdown_template = load_markdown_template(args.output_template) diff --git a/ai_feedback/code_processing.py b/ai_feedback/code_processing.py index 718fce9..ee329cb 100644 --- a/ai_feedback/code_processing.py +++ b/ai_feedback/code_processing.py @@ -1,9 +1,7 @@ import os -import sys from pathlib import Path from typing import Callable, Optional, Tuple -from .models import ModelFactory from .helpers.file_converter import rename_files from .helpers.template_utils import render_prompt_template @@ -11,7 +9,7 @@ def process_code( - args, prompt: str, system_instructions: str, marking_instructions: Optional[str] = None + model, args, prompt: str, system_instructions: str, marking_instructions: Optional[str] = None ) -> Tuple[str, str]: """ Processes assignment files and generates a response using the selected model. @@ -68,13 +66,6 @@ def process_code( marking_instructions=marking_instructions, ) - try: - model_args = {'model_name': args.model_name} if args.model_name else {} - model = ModelFactory.create(args.provider, **model_args) - except ValueError as e: - print(f"Error: {e}") - sys.exit(1) - if args.scope == "code": if args.question: request, response = model.generate_response( diff --git a/ai_feedback/image_processing.py b/ai_feedback/image_processing.py index a8ddd54..a0ba93b 100644 --- a/ai_feedback/image_processing.py +++ b/ai_feedback/image_processing.py @@ -5,15 +5,13 @@ from ollama import Image, Message from PIL import Image as PILImage -from .models import ModelFactory -from .helpers.arg_options import Models from .helpers.image_extractor import extract_images, extract_qmd_python_images from .helpers.image_reader import * from .helpers.template_utils import render_prompt_template def process_image( - args, prompt: dict, system_instructions: str, marking_instructions: Optional[str] = None + model, args, prompt: dict, system_instructions: str, marking_instructions: Optional[str] = None ) -> tuple[str, str]: """Generates feedback for an image submission. Returns the LLM prompt delivered and the returned response.""" @@ -84,13 +82,6 @@ def process_image( # Prompt the LLM requests.append(f"{message.content}\n\n{[str(image.value) for image in message.images]}") - try: - model_args = {'model_name': args.model_name} if args.model_name else {} - model = ModelFactory.create(args.provider, **model_args) - except ValueError as e: - print(f"Error: {e}") - sys.exit(1) - args.rendered_prompt = rendered_prompt args.system_instructions = system_instructions responses.append(model.process_image(message, args)) diff --git a/ai_feedback/text_processing.py b/ai_feedback/text_processing.py index ce0e817..8f80cf2 100644 --- a/ai_feedback/text_processing.py +++ b/ai_feedback/text_processing.py @@ -1,13 +1,11 @@ -import sys from pathlib import Path from typing import Optional, Tuple -from .models import ModelFactory from .helpers.template_utils import render_prompt_template def process_text( - args, prompt: str, system_instructions: str, marking_instructions: Optional[str] = None + model, args, prompt: str, system_instructions: str, marking_instructions: Optional[str] = None ) -> Tuple[str, str]: """ Processes text-based assignment files and generates a response using the selected model. @@ -48,13 +46,6 @@ def process_text( marking_instructions=marking_instructions, ) - try: - model_args = {'model_name': args.model_name} if args.model_name else {} - model = ModelFactory.create(args.provider, **model_args) - except ValueError as e: - print(f"Error: {e}") - sys.exit(1) - if args.question: request, response = model.generate_response( prompt=rendered_prompt, From 563cbb6110b032a40d61bdf200e971d87cfc57ef Mon Sep 17 00:00:00 2001 From: Naragod Date: Thu, 27 Nov 2025 14:33:22 -0500 Subject: [PATCH 07/10] USTORY-549: Update documentation --- README.md | 44 +++++++++++++++++++--- presentation_materials/Example_commands.md | 3 +- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 41c89b9..671be78 100644 --- a/README.md +++ b/README.md @@ -31,18 +31,19 @@ For the image scope, the program takes up to two files, depending on the prompt | `--submission_type` | Type of submission (from `arg_options.FileType`) | ❌ | | `--prompt` | Pre-defined prompt name or file path to custom prompt file | ❌ **| | `--prompt_text` | String prompt | ❌ ** | -| `--scope` | Processing scope (`image` or `code` or `text`) | ✅ | +| `--scope` | Processing scope (from `arg_options.Scope`) | ✅ | | `--submission` | Submission file path | ✅ | | `--question` | Specific question to evaluate | ❌ | -| `--model` | Model type (from `arg_options.Models`) | ✅ | +| `--provider` | Model provider (available providers from `ModelFactory`) | ✅ | +| `--model_name` | Specific model name to override provider's default model | ❌ | | `--output` | File path for where to record the output | ❌ | | `--solution` | File path for the solution file | ❌ | | `--test_output` | File path for the file containing the results from tests | ❌ | | `--submission_image` | File path for the submission image file | ❌ | | `--solution_image` | File path for the solution image file | ❌ | | `--system_prompt` | Pre-defined system prompt name or file path to custom system prompt | ❌ | -| `--llama_mode` | How to invoke deepSeek-v3 (choices in `arg_options.LlamaMode`) | ❌ | -| `--output_template` | Output template file (from `arg_options.OutputTemplate) | ❌ | +| `--llama_mode` | How to invoke llama.cpp (from `arg_options.LlamaMode`) | ❌ | +| `--output_template` | Output template file (from `arg_options.OutputTemplate`) | ❌ | | `--json_schema` | File path to json file for schema for structured output | ❌ | | `--marking_instructions` | File path to marking instructions/rubric | ❌ | | `--model_options` | Comma-separated key-value pairs of model options and their values | ❌ | @@ -145,6 +146,38 @@ System prompts define the AI model's behavior, tone, and approach to providing f ## Marking Instructions The `--marking_instructions` argument accepts a file path to a text file containing rubric or marking instructions. If the prompt template contains a `{marking_instructions}` placeholder, the contents of the file will be inserted at that location in the prompt. +## Providers and Models + +The `--provider` argument specifies which model provider to use. Each provider has a default model that will be used unless you override it with the `--model_name` argument. + +### Available Providers + +To see all available providers, run: +```bash +python -m ai_feedback --help +``` + +Current providers include: +- `claude` - Uses Claude AI models (requires CLAUDE_API_KEY) +- `codellama` - Uses CodeLlama via Ollama (default: `codellama:latest`) +- `deepseek` - Uses DeepSeek models via Ollama (default: `deepseek-r1:70b`) +- `openai` - Uses OpenAI models (requires OPENAI_API_KEY, default: `gpt-4o`) +- `openai-vector` - Uses OpenAI with vector store functionality (requires OPENAI_API_KEY) +- `remote` - Uses remote API server (default: `gpt-oss:120b`) + +### Using --model_name + +The `--model_name` argument allows you to override the provider's default model. For example: +```bash +# Use Claude provider with a specific model +python -m ai_feedback --provider claude --model_name claude-3-opus-20240229 ... + +# Use DeepSeek provider with a specific Ollama model +python -m ai_feedback --provider deepseek --model_name "deepSeek-R1:70b" ... +``` + +If `--model_name` is not specified, each provider will use its default model. + ## Models The models used can be seen under the ai_feedback/models folder. ### OpenAI Vector Store @@ -275,7 +308,8 @@ python -m ai_feedback \ --submission_image \ --solution_image \ --question \ - --model \ + --provider \ + --model_name \ --output \ --output_template \ --system_prompt \ diff --git a/presentation_materials/Example_commands.md b/presentation_materials/Example_commands.md index 06ee7a3..2a5b97e 100644 --- a/presentation_materials/Example_commands.md +++ b/presentation_materials/Example_commands.md @@ -11,7 +11,8 @@ python -m ai_feedback \ --scope code \ --submission presentation_materials/iris_image_examples/image_test_incorrect/student_submission.ipynb \ --question "4" \ ---model deepSeek-R1:70B \ +--provider deepseek \ +--model_name deepSeek-R1:70B \ --model_options max_tokens=20000 \ --output stdout From aa23b7034978c85e21cb77ac89dc6459a0441580 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 27 Nov 2025 20:00:38 +0000 Subject: [PATCH 08/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .gitignore | 2 +- ai_feedback/__main__.py | 2 +- ai_feedback/models/ClaudeModel.py | 2 +- ai_feedback/models/DeepSeekModelModified.py | 11 +++++++---- ai_feedback/models/Model.py | 3 ++- ai_feedback/models/OpenAIModel.py | 4 ++-- ai_feedback/models/RemoteModel.py | 2 +- ai_feedback/models/__init__.py | 16 +++++++--------- 8 files changed, 22 insertions(+), 20 deletions(-) diff --git a/.gitignore b/.gitignore index 94ba97c..849937f 100644 --- a/.gitignore +++ b/.gitignore @@ -174,4 +174,4 @@ cython_debug/ .DS_Store # output -stdout \ No newline at end of file +stdout diff --git a/ai_feedback/__main__.py b/ai_feedback/__main__.py index 1f2f3df..5e65358 100644 --- a/ai_feedback/__main__.py +++ b/ai_feedback/__main__.py @@ -6,9 +6,9 @@ from pathlib import Path from . import code_processing, image_processing, text_processing -from .models import ModelFactory from .helpers import arg_options from .helpers.constants import HELP_MESSAGES +from .models import ModelFactory _TYPE_BY_EXTENSION = { '.c': 'C', diff --git a/ai_feedback/models/ClaudeModel.py b/ai_feedback/models/ClaudeModel.py index c4c5fb8..b3f8df0 100644 --- a/ai_feedback/models/ClaudeModel.py +++ b/ai_feedback/models/ClaudeModel.py @@ -1,10 +1,10 @@ import os from pathlib import Path -from ollama import Message from typing import Optional, Tuple import anthropic from dotenv import load_dotenv +from ollama import Message from ..helpers.image_extractor import encode_image from ..helpers.model_options_helpers import cast_to_type, claude_option_schema diff --git a/ai_feedback/models/DeepSeekModelModified.py b/ai_feedback/models/DeepSeekModelModified.py index aa10bdd..d279ec3 100644 --- a/ai_feedback/models/DeepSeekModelModified.py +++ b/ai_feedback/models/DeepSeekModelModified.py @@ -1,7 +1,7 @@ -import os -import sys import json +import os import subprocess +import sys from pathlib import Path from typing import Optional, Tuple @@ -9,11 +9,12 @@ import requests from dotenv import load_dotenv -from .Model import Model from ..helpers.model_options_helpers import cast_to_type, ollama_option_schema +from .Model import Model load_dotenv() + class DeepSeekModelModified(Model): """ Backends: @@ -99,7 +100,9 @@ def generate_response( if self.backend == "ollama": response = self._generate_with_ollama(prompt, system_instructions, model_options, schema) elif self.backend == "llama": - response = self._generate_with_llama(prompt, system_instructions, model_options, schema, effective_llama_mode) + response = self._generate_with_llama( + prompt, system_instructions, model_options, schema, effective_llama_mode + ) else: raise ValueError(f"Unsupported backend: {self.backend}. Use 'ollama' or 'llama'.") diff --git a/ai_feedback/models/Model.py b/ai_feedback/models/Model.py index dfb2d60..28c256d 100644 --- a/ai_feedback/models/Model.py +++ b/ai_feedback/models/Model.py @@ -1,5 +1,6 @@ from typing import Any, Tuple -from ollama import chat, Message + +from ollama import Message, chat """ Parent Class for LLMs. diff --git a/ai_feedback/models/OpenAIModel.py b/ai_feedback/models/OpenAIModel.py index c466c7c..7cc503a 100644 --- a/ai_feedback/models/OpenAIModel.py +++ b/ai_feedback/models/OpenAIModel.py @@ -1,18 +1,18 @@ import json import os from pathlib import Path -from ollama import Message from typing import Optional, Tuple -from ..helpers.image_extractor import encode_image import openai from dotenv import load_dotenv +from ollama import Message from ai_feedback.helpers.model_options_helpers import ( cast_to_type, openai_chat_option_schema, ) +from ..helpers.image_extractor import encode_image from .Model import Model load_dotenv() diff --git a/ai_feedback/models/RemoteModel.py b/ai_feedback/models/RemoteModel.py index 4bb148e..a72dc01 100644 --- a/ai_feedback/models/RemoteModel.py +++ b/ai_feedback/models/RemoteModel.py @@ -4,8 +4,8 @@ from typing import Any, Optional, Tuple import requests -from ollama import Message from dotenv import load_dotenv +from ollama import Message from .Model import Model diff --git a/ai_feedback/models/__init__.py b/ai_feedback/models/__init__.py index bf9c35c..476e9ab 100644 --- a/ai_feedback/models/__init__.py +++ b/ai_feedback/models/__init__.py @@ -1,12 +1,13 @@ -from typing import Type, Dict +from typing import Dict, Type -from .Model import Model from .ClaudeModel import ClaudeModel -from .RemoteModel import RemoteModel -from .OpenAIModel import OpenAIModel from .CodeLlamaModel import CodeLlamaModel -from .OpenAIModelVector import OpenAIModelVector from .DeepSeekModelModified import DeepSeekModelModified +from .Model import Model +from .OpenAIModel import OpenAIModel +from .OpenAIModelVector import OpenAIModelVector +from .RemoteModel import RemoteModel + class ModelFactory: """Factory for creating AI model instances with proper dependency injection.""" @@ -54,10 +55,7 @@ def get_model_class(cls, provider: str) -> Type[Model]: """ if provider not in cls._registry: available = ", ".join(sorted(cls._registry.keys())) - raise ValueError( - f"Unknown model provider '{provider}'. " - f"Available providers: {available}" - ) + raise ValueError(f"Unknown model provider '{provider}'. " f"Available providers: {available}") return cls._registry[provider] @classmethod From 8d43f43329827876171c83576eeef3bbf8d7aba9 Mon Sep 17 00:00:00 2001 From: Naragod Date: Thu, 27 Nov 2025 16:42:05 -0500 Subject: [PATCH 09/10] USTORY-549: Update examples and documentation --- .gitignore | 1 + README.md | 158 +++++++++++---------- presentation_materials/Example_commands.md | 52 +++---- 3 files changed, 104 insertions(+), 107 deletions(-) diff --git a/.gitignore b/.gitignore index 849937f..7db1cca 100644 --- a/.gitignore +++ b/.gitignore @@ -175,3 +175,4 @@ cython_debug/ # output stdout +output_images/ \ No newline at end of file diff --git a/README.md b/README.md index 671be78..f281545 100644 --- a/README.md +++ b/README.md @@ -244,15 +244,20 @@ For code, write the heading in a comment line (e.g., ### Question 1 in Python). Matching is case-insensitive and normalizes smart quotes, dashes, and extra whitespace. ## Test Files -- Any subdirectory of /test_submissions can be run locally. More examples can be added to this directory using a similar fashion. +Example submission files are available in the `/presentation_materials` directory, including: +- `iris_code_example/` - Python code examples +- `iris_image_examples/` - Jupyter notebook examples with image outputs +- `pdf_example/` - PDF submission examples -## GGR274 Test File Assumptions +See the "Example Commands" section above for tested usage examples. + +## File Structure Assumptions ### Code Scope -To test the program using the GGR274 files, we assume that the test assignment files follow a specific directory structure. Currently, this program has been tested using *Homework 5* of the *GGR274* class at the *University of Toronto*. +When evaluating code submissions, especially Jupyter notebooks, the program expects files to follow specific formatting conventions. ##### Directory Structure -Within the `test_submissions/ggr274_homework5` directory, mock submissions are contained in a separate subdirectories `test_submissions/ggr274_homework5/test#`. The following naming convention is used for the files: +For organized test scenarios, mock submissions can be structured in separate subdirectories. The following naming convention is recommended: - `Homework_5_solution.ipynb` – Instructor-provided solution file - `student_submission.ipynb` – Student's submission file @@ -278,9 +283,10 @@ To ensure proper extraction and evaluation of student responses, the following f ### Image Scope #### Test Files -Mock student submissions are stored in `ggr274_homework5/image_test#`. The following naming convention is used for the files: -- `solution.ipynb` – Instructor-provided solution file +Example image submissions are available in `presentation_materials/iris_image_examples/image_test_*/`. Each directory contains: - `student_submission.ipynb` – Student's submission file +- `student_submission.png` – Generated image output +- `student_submission.txt` – Converted notebook text ##### Notebook Preprocessing To grade a specific question using the `--question` argument, add the tag `markus_question_name: ` to the metadata for the code cell that generates an image to be graded. The previous cell's markdown content will be used as the question's context. @@ -323,68 +329,79 @@ python -m ai_feedback -h ### Example Commands -#### Evaluate cnn_example test using openAI model -```bash -python -m ai_feedback --prompt code_lines --scope code --submission test_submissions/cnn_example/cnn_submission --solution test_submissions/cnn_example/cnn_solution.py --model openai -``` +The following examples demonstrate common usage patterns using files available in the `presentation_materials/` directory. -#### Evaluate cnn_example test using openAI model and custom prompt +#### Evaluate iris code example using DeepSeek model ```bash -python -m ai_feedback --prompt_text "Evaluate the student's code readability." --scope code --submission test_submissions/cnn_example/cnn_submission.py --model openai +python -m ai_feedback \ + --prompt code_lines \ + --scope code \ + --submission presentation_materials/iris_code_example/student_submission.py \ + --solution presentation_materials/iris_code_example/instructor_solution.py \ + --provider deepseek \ + --model_name "deepSeek-R1:70b" ``` -#### Evaluate pdf_example test using openAI model +#### Evaluate iris code example with custom prompt text ```bash -python -m ai_feedback --prompt text_pdf_analyze --scope text --submission test_submissions/pdf_example/student_pdf_submission.pdf --model openai +python -m ai_feedback \ + --prompt_text "Evaluate the student's code readability and identify any logical errors." \ + --scope code \ + --submission presentation_materials/iris_code_example/student_submission.py \ + --solution presentation_materials/iris_code_example/instructor_solution.py \ + --provider deepseek ``` -#### Evaluate question1 of test1 of ggr274 homework using DeepSeek model +#### Evaluate PDF example using DeepSeek model ```bash -python -m ai_feedback --prompt code_table \ - --scope code --submission test_submissions/ggr274_homework5/test1/student_submission.ipynb --question 1 --model deepSeek-R1:70B -``` - -#### Evaluate the image for question 5b of ggr274 homework with Llama3.2-vision -```sh -python -m ai_feedback --prompt image_analyze --scope image --solution ./test_submissions/ggr274_homework5/image_test2/student_submission.ipynb --submission_image test_submissions/ggr274_homework5/image_test2/student_submission.png --question "Question 5b" --model llama3.2-vision:90b -``` - -### Evaluate the bfs example with remote model to test_file using the verbose template -```sh -python -m ai_feedback --prompt code_lines --scope code --solution ./test_submissions/bfs_example/bfs_solution.py --submission test_submissions/bfs_example/bfs_submission.py --model remote --output --output test_file --output_template verbose -``` - -#### Evalute the Jupyter notebook of test1 of ggr274 using DeepSeek-v3 via llama.cpp server -```sh -python3 -m ai_feedback --prompt code_table --scope code \ - --submission test_submissions/ggr274_homework5/test1/student_submission.ipynb \ - --solution test_submissions/ggr274_homework5/test1/Homework_5_solution.ipynb \ - --model deepSeek-v3 --llama_mode server -``` - -#### Evalute the Jupyter notebook of test1 of ggr274 using DeepSeek-v3 via llama.cpp cli -```sh -python3 -m ai_feedback --prompt code_table --scope code \ - --submission test_submissions/ggr274_homework5/test1/student_submission.ipynb \ - --solution test_submissions/ggr274_homework5/test1/Homework_5_solution.ipynb \ - --model deepSeek-v3 --llama_mode cli +python -m ai_feedback \ + --prompt text_pdf_analyze \ + --scope text \ + --submission presentation_materials/pdf_example/student_submission.pdf \ + --solution presentation_materials/pdf_example/instructor_solution.pdf \ + --provider deepseek \ + --model_name "deepSeek-R1:70b" ``` - -#### Get annotations for cnn_example test using openAI model +#### Evaluate with custom prompt file path ```bash -python -m ai_feedback --prompt code_annotations --scope code --submission test_submissions/cnn_example/cnn_submission --solution test_submissions/cnn_example/cnn_solution.py --model openai --json_schema ai_feedback/data/schema/code_annotation_schema.json +python -m ai_feedback \ + --prompt ai_feedback/data/prompts/user/code_table.md \ + --scope code \ + --submission presentation_materials/iris_code_example/student_submission.py \ + --solution presentation_materials/iris_code_example/instructor_solution.py \ + --provider codellama \ + --model_name "codellama:latest" ``` -#### Evaluate using custom prompt file path +#### Evaluate with output file and verbose template ```bash -python -m ai_feedback --prompt ai_feedback/data/prompts/user/code_overall.md --scope code --submission test_submissions/csc108/correct_submission/correct_submission.py --solution test_submissions/csc108/solution.py --model codellama:latest +python -m ai_feedback \ + --prompt code_lines \ + --scope code \ + --submission presentation_materials/iris_code_example/student_submission.py \ + --solution presentation_materials/iris_code_example/instructor_solution.py \ + --provider deepseek \ + --output ./feedback_output.md \ + --output_template verbose ``` -### Evaluate using custom model_options + +#### Evaluate with custom model_options (requires OpenAI API key) ```bash -python3 -m ai_feedback --prompt code_table --scope code --submission ../ai-autograding-feedback-eval/test_submissions/108/hard_coding_submission.py --model openai-vector --submission_type python --model_options "max_tokens=1200,temperature=0.4,top_p=0.92" +python -m ai_feedback \ + --prompt code_table \ + --scope code \ + --submission presentation_materials/iris_code_example/student_submission.py \ + --solution presentation_materials/iris_code_example/instructor_solution.py \ + --provider openai-vector \ + --model_options "max_tokens=1200,temperature=0.4,top_p=0.92" ``` +**Note:** Some examples require additional setup: +- OpenAI and Claude providers require API keys in `.env` file +- Image scope examples require vision-capable models (e.g., `llama3.2-vision:90b`, `llava:34b`) +- Remote provider requires a running remote API server + #### Using Ollama In order to run this project on Bigmouth: @@ -448,26 +465,20 @@ Along with any other packages that the submission or solution file uses. 5. Ensure the Timeout is set to 120 seconds or longer. #### Running Python Autotester Examples -##### CNN Example -- Look at the /test_submissions/cnn_example directory for the following files -- Instructor uploads: cnn_solution.py, cnn_test.py, llm_helpers.py, python_tester_llm_code.py files -- Separate test groups for cnn_test.py and python_tester_llm_code.py -- cnn_test.py Autotester package requirements: torch numpy -- python_tester_llm_code.py Autotester package requirements: git+https://github.com/MarkUsProject/ai-autograding-feedback.git#egg=ai_feedback numpy torch -- Student uploads: cnn_submission.pdf - -##### BFS Example -- Look at the /test_submissions/bfs_example directory for the following files -- Instructor uploads: bfs_solution.py, test_bfs.py, llm_helpers.py, python_tester_llm_code.py files -- Separate test groups for test_bfs.py and python_tester_llm_code.py -- python_tester_llm_code.py Autotester package requirements: git+https://github.com/MarkUsProject/ai-autograding-feedback.git#egg=ai_feedback -- Student uploads: bfs_submission.pdf - -##### PDF Example -- Look at the /test_submissions/pdf_example directory for the following files -- Instructor uploads: instructor_pdf_solution.pdf, llm_helpers.py, python_tester_llm_pdf.py files + +These examples describe recommended file structures for MarkUs autotester integration. Refer to `/markus_test_scripts` for the actual test script files. + +##### Code Evaluation Example +- Instructor uploads: solution.py, test_file.py, llm_helpers.py, python_tester_llm_code.py files +- Separate test groups for test_file.py and python_tester_llm_code.py +- test_file.py Autotester package requirements: (depends on submission code) +- python_tester_llm_code.py Autotester package requirements: git+https://github.com/MarkUsProject/ai-autograding-feedback.git#egg=ai_feedback (plus any packages needed by submission) +- Student uploads: submission.py + +##### PDF Evaluation Example +- Instructor uploads: instructor_solution.pdf, llm_helpers.py, python_tester_llm_pdf.py files (from `/markus_test_scripts`) - Autotester package requirements: git+https://github.com/MarkUsProject/ai-autograding-feedback.git#egg=ai_feedback -- Student uploads: student_pdf_submission.pdf +- Student uploads: student_submission.pdf #### Custom Tester Usage 1. Ensure the student has submitted a submission file. @@ -486,11 +497,10 @@ Also pip install other packages that the submission or solution file uses. 5. Ensure the Timeout is set to 120 seconds or longer. 6. Ensure Markus Autotester docker container has the API Keys in an .env file and specified in the docker compose file. -##### GGR274 Test1 Example -- Look at the /test_submissions/ggr274_hw5_custom_tester directory for the following files -- Instructor uploads: Homework_5_solution.ipynb, test_hw5.py, test_output.txt, custom_tester_llm_code.sh, run_hw5_test.sh -- Two separate test groups: one for run_hw5_test.sh, and one for custom_tester_llm_code.sh -- Student uploads: test1_submission.ipynb, test1_submission.txt +##### Jupyter Notebook Custom Tester Example +- Instructor uploads: solution.ipynb, test_script.py, test_output.txt, custom_tester_llm_code.sh (from `/markus_test_scripts`), run_test.sh +- Two separate test groups: one for run_test.sh, and one for custom_tester_llm_code.sh +- Student uploads: submission.ipynb, submission.txt NOTE: if the LLM Test Group appears to be blank/does not turn green, try increasing the timeout. diff --git a/presentation_materials/Example_commands.md b/presentation_materials/Example_commands.md index 2a5b97e..d2acd8f 100644 --- a/presentation_materials/Example_commands.md +++ b/presentation_materials/Example_commands.md @@ -1,9 +1,9 @@ # Use Venv ```sh -source ai-autograding-feedback/LlmVenv/bin/activate +source venv/bin/activate ``` -# Analyze code correctness using Claude +# Analyze code correctness using DeepSeek ```sh python -m ai_feedback \ --submission_type jupyter \ @@ -12,40 +12,25 @@ python -m ai_feedback \ --submission presentation_materials/iris_image_examples/image_test_incorrect/student_submission.ipynb \ --question "4" \ --provider deepseek \ ---model_name deepSeek-R1:70B \ ---model_options max_tokens=20000 \ ---output stdout +--model_name "deepSeek-R1:70b" \ +--model_options max_tokens=5000 ``` # Example Response -## Feedback for Task 4 +Let me identify some mistakes in your submission and explain why they occur: -## Mistake in part a: -```python -df['species name'] = iris.target_names[df['species']] -``` - -This line contains an error because you're trying to use a Series (`df['species']`) as an index for the `iris.target_names` array. You can't directly index a NumPy array with a Series like this. When you try to use a Series to index into an array, Python expects each element of the Series to be a valid index for the array, but it tries to do this all at once rather than element by element. - -## Mistake in part b: -```python -boxplots_labelled = df.boxplot( - column=[ - 'sepal length (cm)', - 'sepal width (cm)', - 'petal length (cm)', - 'petal width (cm)'], - by='species', - figsize=(10, 10) -) -``` +1. **Line 25:** `df['species'] = iris.target` + - **Why it's a mistake:** The `iris.target` array contains numerical values (0, 1, 2) representing species, but these numbers are not informative without context. This makes the data harder to interpret for someone unfamiliar with the dataset. + - **Guidance:** Instead of directly assigning numerical values, you should map these values to their corresponding species names using `iris.target_names` first. -There are two issues with this code: -1. You're using `by='species'` when the instructions specifically ask you to use the `'species name'` column for the boxplot grouping -2. You're using `df` instead of `df_labelled` as specified in the instructions +2. **Line 38:** `df['species name'] = iris.target_names[df['species']]` + - **Why it's a mistake:** This line is correct in itself, but it would work better if the `species` column contained meaningful categorical values (like "setosa", "versicolor") instead of numerical values (0, 1, 2). Currently, it maps numbers to names, which works but isn't as intuitive. + - **Guidance:** Consider modifying your earlier code to store species names directly in the `species` column and use this column for mapping. -The boxplot should be grouped by the more informative `'species name'` column so that the x-axis labels show the actual species names rather than numeric codes. +3. **Lines 44-52:** `df.boxplot(...)` + - **Why it's a mistake:** The `by='species'` parameter will create boxplots grouped by numerical values (0, 1, 2) because your `species` column contains numbers. This makes the plot less informative as the x-axis labels won't show the actual species names. + - **Guidance:** Use the `species name` column instead of `species` for grouping to make the boxplots more interpretable. # Analyze image correctness using OpenAI ```sh @@ -53,11 +38,12 @@ python -m ai_feedback \ --submission_type jupyter \ --prompt image_analyze \ --scope image \ ---assignment presentation_materials/iris_image_examples/image_test_incorrect \ +--submission_image presentation_materials/iris_image_examples/image_test_incorrect/student_submission.png \ +--submission presentation_materials/iris_image_examples/image_test_incorrect/student_submission.ipynb \ --question "4" \ ---model openai \ ---output stdout +--provider openai + ``` # Example Response -The graphs in the image do not fully solve the problem as specified. While they do show side-by-side boxplots for sepal lengths, sepal widths, petal lengths, and petal widths, the horizontal axis uses numerical labels (0, 1, 2) instead of the species names. The problem asks for the species names to be used, making the ticks on the horizontal axes informative. +The graphs in the attached image do not fully solve the problem. While they do show side-by-side boxplots for sepal lengths, sepal widths, petal lengths, and petal widths, the x-axis labels use numeric codes (0, 1, 2) instead of the species names. The problem specifies that the ticks on the horizontal axes should be informative by using the species names. From 462af78616cc0ed3a741eb56cd76f1bda6d08501 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 27 Nov 2025 21:59:37 +0000 Subject: [PATCH 10/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 7db1cca..068701e 100644 --- a/.gitignore +++ b/.gitignore @@ -175,4 +175,4 @@ cython_debug/ # output stdout -output_images/ \ No newline at end of file +output_images/