From 2dfebbbf8f120738a43a194ff25928567c6ce5c5 Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 10:20:25 -0400
Subject: [PATCH 01/26] styles

---
 ai_feedback/__main__.py                 | 7 +++++++
 ai_feedback/code_processing.py          | 2 ++
 ai_feedback/helpers/constants.py        | 1 +
 ai_feedback/image_processing.py         | 1 +
 ai_feedback/models/ClaudeModel.py       | 2 ++
 ai_feedback/models/CodeLlamaModel.py    | 1 +
 ai_feedback/models/DeepSeekModel.py     | 1 +
 ai_feedback/models/DeepSeekV3Model.py   | 1 +
 ai_feedback/models/OpenAIModel.py       | 1 +
 ai_feedback/models/OpenAIModelVector.py | 1 +
 ai_feedback/models/RemoteModel.py       | 1 +
 ai_feedback/text_processing.py          | 2 ++
 12 files changed, 21 insertions(+)

diff --git a/ai_feedback/__main__.py b/ai_feedback/__main__.py
index 57813fb..2ef6a7f 100644
--- a/ai_feedback/__main__.py
+++ b/ai_feedback/__main__.py
@@ -159,6 +159,13 @@ def main() -> int:
         default="cli",
         help=HELP_MESSAGES["llama_mode"],
     )
+    parser.add_argument(
+        "--json_schema",
+        type=str,
+        required=False,
+        default="",
+        help=HELP_MESSAGES["json_schema"],
+    )
 
     args = parser.parse_args()
 
diff --git a/ai_feedback/code_processing.py b/ai_feedback/code_processing.py
index e034882..a10cc2c 100644
--- a/ai_feedback/code_processing.py
+++ b/ai_feedback/code_processing.py
@@ -87,6 +87,7 @@ def process_code(args, prompt: str, system_instructions: str) -> Tuple[str, str]
                 question_num=args.question,
                 system_instructions=system_instructions,
                 llama_mode=args.llama_mode,
+                json_schema=args.json_schema,
             )
         else:
             request, response = model.generate_response(
@@ -96,6 +97,7 @@ def process_code(args, prompt: str, system_instructions: str) -> Tuple[str, str]
                 test_output=test_output_file,
                 system_instructions=system_instructions,
                 llama_mode=args.llama_mode,
+                json_schema=args.json_schema,
             )
 
     return request, response
diff --git a/ai_feedback/helpers/constants.py b/ai_feedback/helpers/constants.py
index ecb716b..a307346 100644
--- a/ai_feedback/helpers/constants.py
+++ b/ai_feedback/helpers/constants.py
@@ -16,4 +16,5 @@
     "submission_image": "The file path for the image file.",
     "solution_image": "The file path to the solution image.",
     "system_prompt": "The specific system instructions to send to the AI Model.",
+    "json_schema": "file path to a json file that contains the schema for ai output",
 }
diff --git a/ai_feedback/image_processing.py b/ai_feedback/image_processing.py
index 206bc40..a7374ac 100644
--- a/ai_feedback/image_processing.py
+++ b/ai_feedback/image_processing.py
@@ -165,6 +165,7 @@ def process_image(args, prompt: dict, system_instructions: str) -> tuple[str, st
                 system_instructions=system_instructions,
                 question_num=question,
                 submission_image=args.submission_image,
+                json_schema=args.json_schema,
             )
             responses.append(str(response))
         else:
diff --git a/ai_feedback/models/ClaudeModel.py b/ai_feedback/models/ClaudeModel.py
index 584baa9..e3fae4d 100644
--- a/ai_feedback/models/ClaudeModel.py
+++ b/ai_feedback/models/ClaudeModel.py
@@ -29,6 +29,7 @@ def generate_response(
         question_num: Optional[int] = None,
         test_output: Optional[Path] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Optional[Tuple[str, str]]:
         """
         Generates a response from Claude using the provided prompt and assignment file context.
@@ -42,6 +43,7 @@ def generate_response(
             question_num (Optional[int]): Specific task number to extract from text files.
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             Optional[Tuple[str, str]]: The original prompt and the model's response, or None if the response is invalid.
diff --git a/ai_feedback/models/CodeLlamaModel.py b/ai_feedback/models/CodeLlamaModel.py
index 8299348..35e1789 100644
--- a/ai_feedback/models/CodeLlamaModel.py
+++ b/ai_feedback/models/CodeLlamaModel.py
@@ -26,6 +26,7 @@ def generate_response(
         test_output: Optional[Path] = None,
         scope: Optional[str] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Optional[Tuple[str, str]]:
         """
         Generates a response from the CodeLlama model using the provided prompt
diff --git a/ai_feedback/models/DeepSeekModel.py b/ai_feedback/models/DeepSeekModel.py
index ebace4c..2f1fa09 100644
--- a/ai_feedback/models/DeepSeekModel.py
+++ b/ai_feedback/models/DeepSeekModel.py
@@ -24,6 +24,7 @@ def generate_response(
         test_output: Optional[Path] = None,
         scope: Optional[str] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Optional[Tuple[str, str]]:
         """
         Generate a model response using the prompt and assignment files.
diff --git a/ai_feedback/models/DeepSeekV3Model.py b/ai_feedback/models/DeepSeekV3Model.py
index 2b0975f..9c7ee31 100644
--- a/ai_feedback/models/DeepSeekV3Model.py
+++ b/ai_feedback/models/DeepSeekV3Model.py
@@ -31,6 +31,7 @@ def generate_response(
         question_num: Optional[int] = None,
         test_output: Optional[Path] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Optional[Tuple[str, str]]:
         """
         Generate a model response using the prompt and assignment files.
diff --git a/ai_feedback/models/OpenAIModel.py b/ai_feedback/models/OpenAIModel.py
index a765ec6..44b3710 100644
--- a/ai_feedback/models/OpenAIModel.py
+++ b/ai_feedback/models/OpenAIModel.py
@@ -30,6 +30,7 @@ def generate_response(
         test_output: Optional[Path] = None,
         scope: Optional[str] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Tuple[str, str]:
         """
         Generate a response based on the given prompt and assignment context.
diff --git a/ai_feedback/models/OpenAIModelVector.py b/ai_feedback/models/OpenAIModelVector.py
index 84d8986..c4a7fab 100644
--- a/ai_feedback/models/OpenAIModelVector.py
+++ b/ai_feedback/models/OpenAIModelVector.py
@@ -43,6 +43,7 @@ def generate_response(
         test_output: Optional[Path] = None,
         scope: Optional[str] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> tuple[str, str]:
         """
         Generate a response from the OpenAI model using the provided prompt and assignment files.
diff --git a/ai_feedback/models/RemoteModel.py b/ai_feedback/models/RemoteModel.py
index 43645fe..d33ba80 100644
--- a/ai_feedback/models/RemoteModel.py
+++ b/ai_feedback/models/RemoteModel.py
@@ -39,6 +39,7 @@ def generate_response(
         scope: Optional[str] = None,
         llama_mode: Optional[str] = None,
         submission_image: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Optional[Tuple[str, str]]:
         """
         Generate a model response using the prompt and assignment files.
diff --git a/ai_feedback/text_processing.py b/ai_feedback/text_processing.py
index 9250090..81b0dd6 100644
--- a/ai_feedback/text_processing.py
+++ b/ai_feedback/text_processing.py
@@ -60,6 +60,7 @@ def process_text(args, prompt: str, system_instructions: str) -> Tuple[str, str]
             question_num=args.question,
             system_instructions=system_instructions,
             llama_mode=args.llama_mode,
+            json_schema=args.json_schema,
         )
     else:
         request, response = model.generate_response(
@@ -69,6 +70,7 @@ def process_text(args, prompt: str, system_instructions: str) -> Tuple[str, str]
             scope=args.scope,
             system_instructions=system_instructions,
             llama_mode=args.llama_mode,
+            json_schema=args.json_schema,
         )
 
     return request, response

From 9a41cdfc1a5c65ac31f1f94b1a08cf6c6a6c9f40 Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 11:10:24 -0400
Subject: [PATCH 02/26] styles

---
 ai_feedback/models/OpenAIModel.py | 45 +++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/ai_feedback/models/OpenAIModel.py b/ai_feedback/models/OpenAIModel.py
index 44b3710..b1dbb93 100644
--- a/ai_feedback/models/OpenAIModel.py
+++ b/ai_feedback/models/OpenAIModel.py
@@ -1,4 +1,6 @@
+import json
 import os
+import re
 from pathlib import Path
 from typing import Optional, Tuple
 
@@ -48,10 +50,19 @@ def generate_response(
         Returns:
             Tuple[str, str]: The full prompt and the generated response from OpenAI.
         """
-        response = self._call_openai(prompt, system_instructions)
+        if json_schema:
+            schema_path = Path(json_schema)
+            if not schema_path.exists():
+                raise FileNotFoundError(f"JSON schema file not found: {schema_path}")
+            with open(schema_path, "r", encoding="utf-8") as f:
+                schema = json.load(f)
+        else:
+            schema = None
+
+        response = self._call_openai(prompt, system_instructions, schema)
         return prompt, response
 
-    def _call_openai(self, prompt: str, system_instructions: str) -> str:
+    def _call_openai(self, prompt: str, system_instructions: str, schema: Optional[dict] = None) -> str:
         """
         Send a prompt to OpenAI's chat completion API and retrieve the generated response.
 
@@ -61,6 +72,36 @@ def _call_openai(self, prompt: str, system_instructions: str) -> str:
         Returns:
             str: The model's response text.
         """
+        if schema:
+            function_name = re.sub(r"[^a-zA-Z0-9_-]", "_", schema.get("title", "structured_output")).lower()
+
+            response = self.client.chat.completions.create(
+                model="gpt-4-turbo",
+                messages=[
+                    {"role": "system", "content": system_instructions},
+                    {"role": "user", "content": prompt},
+                ],
+                tools=[
+                    {
+                        "type": "function",
+                        "function": {
+                            "name": function_name,
+                            "description": schema.get("description", "Structured response."),
+                            "parameters": schema,
+                        },
+                    }
+                ],
+                tool_choice="required",
+                temperature=0.5,
+                max_tokens=1000,
+            )
+
+            tool_calls = response.choices[0].message.tool_calls
+            if tool_calls and tool_calls[0].function.arguments:
+                return tool_calls[0].function.arguments  # still a string
+            else:
+                return response.choices[0].message.content  # fallback to raw text
+
         response = self.client.chat.completions.create(
             model="gpt-4-turbo",
             messages=[

From 4e4e5dd8491357d89fb85abab8b265313ea1309a Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 11:24:33 -0400
Subject: [PATCH 03/26] testing structured output for ollama

---
 .../data/schema/code_annotation_schema.json   | 52 +++++++++++++++++++
 ai_feedback/models/DeepSeekModel.py           |  7 +++
 2 files changed, 59 insertions(+)
 create mode 100644 ai_feedback/data/schema/code_annotation_schema.json

diff --git a/ai_feedback/data/schema/code_annotation_schema.json b/ai_feedback/data/schema/code_annotation_schema.json
new file mode 100644
index 0000000..48a2be9
--- /dev/null
+++ b/ai_feedback/data/schema/code_annotation_schema.json
@@ -0,0 +1,52 @@
+{
+  "title": "Student Code Annotation",
+  "type": "object",
+  "description": "List of code annotations describing specific mistakes in the student's code.",
+  "properties": {
+    "annotations": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "filename": {
+            "type": "string",
+            "description": "The name of the student's file where the issue was found."
+          },
+          "content": {
+            "type": "string",
+            "description": "A short description of the mistake or issue."
+          },
+          "line_start": {
+            "type": "integer",
+            "description": "The starting line number where the issue begins.",
+            "minimum": 1
+          },
+          "line_end": {
+            "type": "integer",
+            "description": "The ending line number where the issue ends.",
+            "minimum": 1
+          },
+          "column_start": {
+            "type": "integer",
+            "description": "The starting column position of the mistake.",
+            "minimum": 0
+          },
+          "column_end": {
+            "type": "integer",
+            "description": "The ending column position of the mistake.",
+            "minimum": 0
+          }
+        },
+        "required": [
+          "filename",
+          "content",
+          "line_start",
+          "line_end",
+          "column_start",
+          "column_end"
+        ]
+      }
+    }
+  },
+  "required": ["annotations"]
+}
diff --git a/ai_feedback/models/DeepSeekModel.py b/ai_feedback/models/DeepSeekModel.py
index 2f1fa09..fbecee4 100644
--- a/ai_feedback/models/DeepSeekModel.py
+++ b/ai_feedback/models/DeepSeekModel.py
@@ -1,3 +1,4 @@
+import json
 from pathlib import Path
 from typing import Optional, Tuple
 
@@ -43,6 +44,11 @@ def generate_response(
             Optional[Tuple[str, str]]: A tuple containing the prompt and the model's response,
                                        or None if the response was invalid.
         """
+        if json_schema:
+            with open(json_schema, "r", encoding="utf-8") as f:
+                schema = json.load(f)
+        else:
+            schema = None
 
         response = ollama.chat(
             model=self.model["model"],
@@ -50,6 +56,7 @@ def generate_response(
                 {"role": "system", "content": system_instructions},
                 {"role": "user", "content": prompt},
             ],
+            options={"schema": schema} if schema else None,
         )
 
         if not response or "message" not in response or "content" not in response["message"]:

From 88f10018a8db436095b01cae7e2b13872dcf49bc Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 11:32:01 -0400
Subject: [PATCH 04/26] test

---
 ai_feedback/models/DeepSeekModel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ai_feedback/models/DeepSeekModel.py b/ai_feedback/models/DeepSeekModel.py
index fbecee4..e37cda5 100644
--- a/ai_feedback/models/DeepSeekModel.py
+++ b/ai_feedback/models/DeepSeekModel.py
@@ -49,7 +49,7 @@ def generate_response(
                 schema = json.load(f)
         else:
             schema = None
-
+        print(schema)
         response = ollama.chat(
             model=self.model["model"],
             messages=[

From ae9a93cb59ea1305092479ac114684ada16371a6 Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 11:40:29 -0400
Subject: [PATCH 05/26] test

---
 ai_feedback/models/DeepSeekModel.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ai_feedback/models/DeepSeekModel.py b/ai_feedback/models/DeepSeekModel.py
index e37cda5..c5eea43 100644
--- a/ai_feedback/models/DeepSeekModel.py
+++ b/ai_feedback/models/DeepSeekModel.py
@@ -49,14 +49,13 @@ def generate_response(
                 schema = json.load(f)
         else:
             schema = None
-        print(schema)
         response = ollama.chat(
             model=self.model["model"],
             messages=[
                 {"role": "system", "content": system_instructions},
                 {"role": "user", "content": prompt},
             ],
-            options={"schema": schema} if schema else None,
+            format=schema,
         )
 
         if not response or "message" not in response or "content" not in response["message"]:

From 106787141bd691458b49fb6ae1003d3a8eab6d91 Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 12:11:36 -0400
Subject: [PATCH 06/26] styles

---
 ai_feedback/models/DeepSeekV3Model.py | 37 +++++++++++++++++----------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/ai_feedback/models/DeepSeekV3Model.py b/ai_feedback/models/DeepSeekV3Model.py
index 9c7ee31..3889cc4 100644
--- a/ai_feedback/models/DeepSeekV3Model.py
+++ b/ai_feedback/models/DeepSeekV3Model.py
@@ -1,3 +1,4 @@
+import json
 import os
 import subprocess
 import sys
@@ -45,18 +46,23 @@ def generate_response(
             test_output (Optional[Path]): Path Object pointing to the test output file.
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
             question_num (Optional[int]): An optional question number to target specific content.
-
+            json_schema (Optional[str]): Optional json schema to use.
         Returns:
             Optional[Tuple[str, str]]: A tuple containing the prompt and the model's response,
                                        or None if the response was invalid.
         """
+        schema = None
+        if json_schema:
+            with open(json_schema) as f:
+                schema = json.load(f)
+
         prompt = f"{system_instructions}\n{prompt}"
         if llama_mode == 'server':
             self._ensure_env_vars('LLAMA_SERVER_URL')
-            response = self._get_response_server(prompt)
+            response = self._get_response_server(prompt, schema)
         else:
             self._ensure_env_vars('LLAMA_MODEL_PATH', 'LLAMA_CLI_PATH')
-            response = self._get_response_cli(prompt)
+            response = self._get_response_cli(prompt, schema)
 
         response = response.strip()
 
@@ -82,25 +88,29 @@ def _ensure_env_vars(self, *names):
         if missing:
             raise RuntimeError(f"Error: Environment variable(s) {', '.join(missing)} not set")
 
-    def _get_response_server(
-        self,
-        prompt: str,
-    ) -> str:
+    def _get_response_server(self, prompt: str, schema: Optional[dict] = None) -> str:
         """
         Generate a model response using the prompt
 
         Args:
             prompt (str): The input prompt provided by the user.
+            schema (Optional[dict]): Optional schema provided by the user.
 
         Returns:
             str: A tuple containing the model response or None if the response was invalid.
         """
-        url = f"{LLAMA_SERVER_URL}/v1/completions"
+        url = f"{LLAMA_SERVER_URL}/v1/chat/completions"
 
         payload = {
-            "prompt": prompt,
+            "messages": [
+                {"role": "user", "content": prompt},
+            ],
+            "temperature": 0.5,
         }
 
+        if schema:
+            payload["response_format"] = {"type": "json_schema", "schema": schema}
+
         try:
             response = requests.post(url, json=payload, timeout=3000)
             response.raise_for_status()
@@ -117,15 +127,13 @@ def _get_response_server(
 
         return model_output
 
-    def _get_response_cli(
-        self,
-        prompt: str,
-    ) -> str:
+    def _get_response_cli(self, prompt: str, schema: Optional[dict] = None) -> str:
         """
         Generate a model response using the prompt
 
         Args:
             prompt (str): The input prompt provided by the user.
+            schema (Optional[dict]): Optional schema provided by the user.
 
         Returns:
             str: The model response or None if the response was invalid.
@@ -142,6 +150,9 @@ def _get_response_cli(
             "--no-display-prompt",
         ]
 
+        if schema:
+            cmd += ["--json-schema", schema]
+
         try:
             completed = subprocess.run(
                 cmd, input=prompt.encode(), check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=300

From 1609ee6e9121a8ca47e449ed6006db55c6c3e07d Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 12:16:52 -0400
Subject: [PATCH 07/26] updated the models to use json dumps

---
 ai_feedback/models/DeepSeekV3Model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ai_feedback/models/DeepSeekV3Model.py b/ai_feedback/models/DeepSeekV3Model.py
index 3889cc4..f5571b2 100644
--- a/ai_feedback/models/DeepSeekV3Model.py
+++ b/ai_feedback/models/DeepSeekV3Model.py
@@ -99,7 +99,7 @@ def _get_response_server(self, prompt: str, schema: Optional[dict] = None) -> st
         Returns:
             str: A tuple containing the model response or None if the response was invalid.
         """
-        url = f"{LLAMA_SERVER_URL}/v1/chat/completions"
+        url = f"{LLAMA_SERVER_URL}/v1/completions"
 
         payload = {
             "messages": [
@@ -151,7 +151,7 @@ def _get_response_cli(self, prompt: str, schema: Optional[dict] = None) -> str:
         ]
 
         if schema:
-            cmd += ["--json-schema", schema]
+            cmd += ["--json-schema", json.dumps(schema)]
 
         try:
             completed = subprocess.run(

From c85bd571017bab7820ba613c9b984e2311ddfd37 Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 15:16:19 -0400
Subject: [PATCH 08/26] upgrade OpenAIModel to gpt-4o-mini

---
 ai_feedback/models/OpenAIModel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ai_feedback/models/OpenAIModel.py b/ai_feedback/models/OpenAIModel.py
index b1dbb93..cb9177d 100644
--- a/ai_feedback/models/OpenAIModel.py
+++ b/ai_feedback/models/OpenAIModel.py
@@ -76,7 +76,7 @@ def _call_openai(self, prompt: str, system_instructions: str, schema: Optional[d
             function_name = re.sub(r"[^a-zA-Z0-9_-]", "_", schema.get("title", "structured_output")).lower()
 
             response = self.client.chat.completions.create(
-                model="gpt-4-turbo",
+                model="gpt-4o-mini",
                 messages=[
                     {"role": "system", "content": system_instructions},
                     {"role": "user", "content": prompt},

From 38c0326e5bdd2beb843479ca264a2bcce83fc6af Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 15:33:54 -0400
Subject: [PATCH 09/26] updated OpenAIModel to use structured response

---
 ai_feedback/models/OpenAIModel.py | 36 ++++++-------------------------
 1 file changed, 6 insertions(+), 30 deletions(-)

diff --git a/ai_feedback/models/OpenAIModel.py b/ai_feedback/models/OpenAIModel.py
index cb9177d..4492dac 100644
--- a/ai_feedback/models/OpenAIModel.py
+++ b/ai_feedback/models/OpenAIModel.py
@@ -72,43 +72,19 @@ def _call_openai(self, prompt: str, system_instructions: str, schema: Optional[d
         Returns:
             str: The model's response text.
         """
+        response_format = None
         if schema:
-            function_name = re.sub(r"[^a-zA-Z0-9_-]", "_", schema.get("title", "structured_output")).lower()
-
-            response = self.client.chat.completions.create(
-                model="gpt-4o-mini",
-                messages=[
-                    {"role": "system", "content": system_instructions},
-                    {"role": "user", "content": prompt},
-                ],
-                tools=[
-                    {
-                        "type": "function",
-                        "function": {
-                            "name": function_name,
-                            "description": schema.get("description", "Structured response."),
-                            "parameters": schema,
-                        },
-                    }
-                ],
-                tool_choice="required",
-                temperature=0.5,
-                max_tokens=1000,
-            )
-
-            tool_calls = response.choices[0].message.tool_calls
-            if tool_calls and tool_calls[0].function.arguments:
-                return tool_calls[0].function.arguments  # still a string
-            else:
-                return response.choices[0].message.content  # fallback to raw text
+            response_format = {"type": "json_schema", "json_schema": schema}
 
         response = self.client.chat.completions.create(
-            model="gpt-4-turbo",
+            model="gpt-4o-mini-2024-07-18",
             messages=[
                 {"role": "system", "content": system_instructions},
                 {"role": "user", "content": prompt},
             ],
-            max_tokens=1000,
+            response_format=response_format,
             temperature=0.5,
+            max_tokens=1000,
         )
+
         return response.choices[0].message.content

From 19e651e83ebd49e150c9acc984d45ce18c635c0a Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 15:51:34 -0400
Subject: [PATCH 10/26] updated models and annotations to be compatible with
 new openai model

---
 .../data/schema/code_annotation_schema.json   | 96 ++++++++++---------
 ai_feedback/models/CodeLlamaModel.py          |  7 ++
 ai_feedback/models/DeepSeekModel.py           |  1 +
 3 files changed, 57 insertions(+), 47 deletions(-)

diff --git a/ai_feedback/data/schema/code_annotation_schema.json b/ai_feedback/data/schema/code_annotation_schema.json
index 48a2be9..7193602 100644
--- a/ai_feedback/data/schema/code_annotation_schema.json
+++ b/ai_feedback/data/schema/code_annotation_schema.json
@@ -1,52 +1,54 @@
 {
-  "title": "Student Code Annotation",
-  "type": "object",
+  "name": "student_code_annotation",
   "description": "List of code annotations describing specific mistakes in the student's code.",
-  "properties": {
-    "annotations": {
-      "type": "array",
-      "items": {
-        "type": "object",
-        "properties": {
-          "filename": {
-            "type": "string",
-            "description": "The name of the student's file where the issue was found."
+  "schema": {
+    "type": "object",
+    "properties": {
+      "annotations": {
+        "type": "array",
+        "items": {
+          "type": "object",
+          "properties": {
+            "filename": {
+              "type": "string",
+              "description": "The name of the student's file where the issue was found."
+            },
+            "content": {
+              "type": "string",
+              "description": "A short description of the mistake or issue."
+            },
+            "line_start": {
+              "type": "integer",
+              "description": "The starting line number where the issue begins.",
+              "minimum": 1
+            },
+            "line_end": {
+              "type": "integer",
+              "description": "The ending line number where the issue ends.",
+              "minimum": 1
+            },
+            "column_start": {
+              "type": "integer",
+              "description": "The starting column position of the mistake.",
+              "minimum": 0
+            },
+            "column_end": {
+              "type": "integer",
+              "description": "The ending column position of the mistake.",
+              "minimum": 0
+            }
           },
-          "content": {
-            "type": "string",
-            "description": "A short description of the mistake or issue."
-          },
-          "line_start": {
-            "type": "integer",
-            "description": "The starting line number where the issue begins.",
-            "minimum": 1
-          },
-          "line_end": {
-            "type": "integer",
-            "description": "The ending line number where the issue ends.",
-            "minimum": 1
-          },
-          "column_start": {
-            "type": "integer",
-            "description": "The starting column position of the mistake.",
-            "minimum": 0
-          },
-          "column_end": {
-            "type": "integer",
-            "description": "The ending column position of the mistake.",
-            "minimum": 0
-          }
-        },
-        "required": [
-          "filename",
-          "content",
-          "line_start",
-          "line_end",
-          "column_start",
-          "column_end"
-        ]
+          "required": [
+            "filename",
+            "content",
+            "line_start",
+            "line_end",
+            "column_start",
+            "column_end"
+          ]
+        }
       }
-    }
-  },
-  "required": ["annotations"]
+    },
+    "required": ["annotations"]
+  }
 }
diff --git a/ai_feedback/models/CodeLlamaModel.py b/ai_feedback/models/CodeLlamaModel.py
index 35e1789..1ba17a4 100644
--- a/ai_feedback/models/CodeLlamaModel.py
+++ b/ai_feedback/models/CodeLlamaModel.py
@@ -1,3 +1,4 @@
+import json
 from pathlib import Path
 from typing import Optional, Tuple
 
@@ -46,6 +47,11 @@ def generate_response(
             Optional[Tuple[str, str]]: A tuple of the request and the model's response,
                                        or None if no valid response is returned.
         """
+        if json_schema:
+            with open(json_schema, "r", encoding="utf-8") as f:
+                schema = json.load(f)
+        else:
+            schema = None
 
         response = ollama.chat(
             model=self.model["model"],
@@ -53,6 +59,7 @@ def generate_response(
                 {"role": "system", "content": system_instructions},
                 {"role": "user", "content": prompt},
             ],
+            format=schema,
         )
 
         if not response or "message" not in response or "content" not in response["message"]:
diff --git a/ai_feedback/models/DeepSeekModel.py b/ai_feedback/models/DeepSeekModel.py
index c5eea43..1e87a59 100644
--- a/ai_feedback/models/DeepSeekModel.py
+++ b/ai_feedback/models/DeepSeekModel.py
@@ -49,6 +49,7 @@ def generate_response(
                 schema = json.load(f)
         else:
             schema = None
+
         response = ollama.chat(
             model=self.model["model"],
             messages=[

From dea83b98431b5f4ff8bffcc911dc51e5e52078e5 Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 16:13:50 -0400
Subject: [PATCH 11/26] styling

---
 ai_feedback/models/DeepSeekModel.py     |  2 +-
 ai_feedback/models/OpenAIModel.py       |  2 +-
 ai_feedback/models/OpenAIModelVector.py | 25 +++++++++++++++++++++----
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/ai_feedback/models/DeepSeekModel.py b/ai_feedback/models/DeepSeekModel.py
index 1e87a59..9261bc4 100644
--- a/ai_feedback/models/DeepSeekModel.py
+++ b/ai_feedback/models/DeepSeekModel.py
@@ -56,7 +56,7 @@ def generate_response(
                 {"role": "system", "content": system_instructions},
                 {"role": "user", "content": prompt},
             ],
-            format=schema,
+            format=schema['schema'],
         )
 
         if not response or "message" not in response or "content" not in response["message"]:
diff --git a/ai_feedback/models/OpenAIModel.py b/ai_feedback/models/OpenAIModel.py
index 4492dac..d325680 100644
--- a/ai_feedback/models/OpenAIModel.py
+++ b/ai_feedback/models/OpenAIModel.py
@@ -77,7 +77,7 @@ def _call_openai(self, prompt: str, system_instructions: str, schema: Optional[d
             response_format = {"type": "json_schema", "json_schema": schema}
 
         response = self.client.chat.completions.create(
-            model="gpt-4o-mini-2024-07-18",
+            model="gpt-4o-mini",
             messages=[
                 {"role": "system", "content": system_instructions},
                 {"role": "user", "content": prompt},
diff --git a/ai_feedback/models/OpenAIModelVector.py b/ai_feedback/models/OpenAIModelVector.py
index c4a7fab..e7ab685 100644
--- a/ai_feedback/models/OpenAIModelVector.py
+++ b/ai_feedback/models/OpenAIModelVector.py
@@ -1,3 +1,4 @@
+import json
 import os
 from pathlib import Path
 from typing import List, Optional
@@ -28,7 +29,7 @@ def __init__(self) -> None:
         self.vector_store = self.client.vector_stores.create(name="Markus LLM Vector Store")
         self.model = self.client.beta.assistants.create(
             name="Markus LLM model",
-            model="gpt-4-turbo",
+            model="gpt-4o-mini",
             tools=[{"type": "file_search"}],
             tool_resources={"file_search": {"vector_store_ids": [self.vector_store.id]}},
         )
@@ -65,6 +66,11 @@ def generate_response(
         if not self.model:
             raise RuntimeError("Model was not created successfully.")
 
+        schema = None
+        if json_schema:
+            with open(json_schema, "r") as f:
+                schema = json.load(f)
+
         request = "Uploaded Files: "
         file_ids: List[str] = []
         assignment_files = [f for f in (submission_file, solution_file, test_output) if f]
@@ -78,7 +84,7 @@ def generate_response(
         if question_num:
             prompt += f" Identify and generate a response for the mistakes **only** in task ${question_num}. "
 
-        response = self._call_openai(prompt)
+        response = self._call_openai(prompt, schema)
         self._cleanup_resources(file_ids)
 
         request = f"\n{system_instructions}\n{prompt}"
@@ -99,7 +105,7 @@ def _upload_file(self, file_path: Path) -> str:
             self.client.vector_stores.files.create(vector_store_id=self.vector_store.id, file_id=response.id)
         return response.id
 
-    def _call_openai(self, prompt: str) -> str:
+    def _call_openai(self, prompt: str, schema: Optional[dict] = None) -> str:
         """
         Send the user prompt to OpenAI's assistant model and retrieve the generated response.
 
@@ -113,7 +119,18 @@ def _call_openai(self, prompt: str) -> str:
 
         self.client.beta.threads.messages.create(thread_id=thread.id, role="user", content=prompt)
 
-        run = self.client.beta.threads.runs.create(thread_id=thread.id, assistant_id=self.model.id)
+        response_format = None
+        if schema:
+            response_format = {
+                "type": "json_schema",
+                "json_schema": schema,
+            }
+
+        run = self.client.beta.threads.runs.create(
+            thread_id=thread.id,
+            assistant_id=self.model.id,
+            **({"response_format": response_format} if response_format else {}),
+        )
 
         while run.status not in ["completed", "failed"]:
             run = self.client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)

From 6c8cfb1cb75e086e09e79fc5ef629f85a06f7f3a Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 16:19:54 -0400
Subject: [PATCH 12/26] updated codellama and deepseekv3 to work with new style
 of annotations

---
 ai_feedback/models/CodeLlamaModel.py  | 2 +-
 ai_feedback/models/DeepSeekV3Model.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ai_feedback/models/CodeLlamaModel.py b/ai_feedback/models/CodeLlamaModel.py
index 1ba17a4..92134f5 100644
--- a/ai_feedback/models/CodeLlamaModel.py
+++ b/ai_feedback/models/CodeLlamaModel.py
@@ -59,7 +59,7 @@ def generate_response(
                 {"role": "system", "content": system_instructions},
                 {"role": "user", "content": prompt},
             ],
-            format=schema,
+            format=schema['schema'],
         )
 
         if not response or "message" not in response or "content" not in response["message"]:
diff --git a/ai_feedback/models/DeepSeekV3Model.py b/ai_feedback/models/DeepSeekV3Model.py
index f5571b2..e221b17 100644
--- a/ai_feedback/models/DeepSeekV3Model.py
+++ b/ai_feedback/models/DeepSeekV3Model.py
@@ -109,7 +109,7 @@ def _get_response_server(self, prompt: str, schema: Optional[dict] = None) -> st
         }
 
         if schema:
-            payload["response_format"] = {"type": "json_schema", "schema": schema}
+            payload["response_format"] = {"type": "json_schema", "schema": schema["schema"]}
 
         try:
             response = requests.post(url, json=payload, timeout=3000)

From 99af119f91cfad7516eb1335c3c99c9b79a9b00c Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 16:22:07 -0400
Subject: [PATCH 13/26] updated deepseekv3 cli mode to work with new schema
 format

---
 ai_feedback/models/DeepSeekV3Model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ai_feedback/models/DeepSeekV3Model.py b/ai_feedback/models/DeepSeekV3Model.py
index e221b17..29e5999 100644
--- a/ai_feedback/models/DeepSeekV3Model.py
+++ b/ai_feedback/models/DeepSeekV3Model.py
@@ -151,7 +151,8 @@ def _get_response_cli(self, prompt: str, schema: Optional[dict] = None) -> str:
         ]
 
         if schema:
-            cmd += ["--json-schema", json.dumps(schema)]
+            raw_schema = schema["schema"] if "schema" in schema else schema
+            cmd += ["--json-schema", json.dumps(raw_schema)]
 
         try:
             completed = subprocess.run(

From cd54e693b93fa96a084aaa220cf9e7e743f35e6d Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 16:28:01 -0400
Subject: [PATCH 14/26] updated deepseekv3 server payload

---
 ai_feedback/models/DeepSeekV3Model.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/ai_feedback/models/DeepSeekV3Model.py b/ai_feedback/models/DeepSeekV3Model.py
index 29e5999..ac116a8 100644
--- a/ai_feedback/models/DeepSeekV3Model.py
+++ b/ai_feedback/models/DeepSeekV3Model.py
@@ -101,12 +101,7 @@ def _get_response_server(self, prompt: str, schema: Optional[dict] = None) -> st
         """
         url = f"{LLAMA_SERVER_URL}/v1/completions"
 
-        payload = {
-            "messages": [
-                {"role": "user", "content": prompt},
-            ],
-            "temperature": 0.5,
-        }
+        payload = {"prompt": prompt, "temperature": 0.7, "max_tokens": 1000}
 
         if schema:
             payload["response_format"] = {"type": "json_schema", "schema": schema["schema"]}

From 46181744dcd345a81ca0547a818a3e7fc7c2fed1 Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 16:36:34 -0400
Subject: [PATCH 15/26] change how payload recives schema

---
 ai_feedback/models/DeepSeekV3Model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ai_feedback/models/DeepSeekV3Model.py b/ai_feedback/models/DeepSeekV3Model.py
index ac116a8..27589e6 100644
--- a/ai_feedback/models/DeepSeekV3Model.py
+++ b/ai_feedback/models/DeepSeekV3Model.py
@@ -104,7 +104,8 @@ def _get_response_server(self, prompt: str, schema: Optional[dict] = None) -> st
         payload = {"prompt": prompt, "temperature": 0.7, "max_tokens": 1000}
 
         if schema:
-            payload["response_format"] = {"type": "json_schema", "schema": schema["schema"]}
+            raw_schema = schema.get("schema", schema)
+            payload["json_schema"] = raw_schema
 
         try:
             response = requests.post(url, json=payload, timeout=3000)

From d0a7cefa94d0ad2b47f2cbb717550dcd177f196b Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 16:58:59 -0400
Subject: [PATCH 16/26] stlyes

---
 .../integration_test.py                       | 66 +++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 tests/structured_output_validation_tests/integration_test.py

diff --git a/tests/structured_output_validation_tests/integration_test.py b/tests/structured_output_validation_tests/integration_test.py
new file mode 100644
index 0000000..98d0ede
--- /dev/null
+++ b/tests/structured_output_validation_tests/integration_test.py
@@ -0,0 +1,66 @@
+import json
+import subprocess
+from pathlib import Path
+
+import pytest
+
+BASE_DIR = Path(__file__).resolve().parent.parent.parent
+SUBMISSION = BASE_DIR / "test_submissions/cnn_example/cnn_submission.py"
+SOLUTION = BASE_DIR / "test_submissions/cnn_example/cnn_solution.py"
+SCHEMA_PATH = BASE_DIR / "ai_feedback/data/schema/code_annotation_schema.json"
+
+
+def run_cli(model_name: str) -> dict:
+    command = [
+        "python3",
+        "-m",
+        "ai_feedback",
+        "--prompt",
+        "code_annotation",
+        "--scope",
+        "code",
+        "--submission",
+        str(SUBMISSION),
+        "--solution",
+        str(SOLUTION),
+        "--model",
+        model_name,
+        "--json_schema",
+        str(SCHEMA_PATH),
+    ]
+    result = subprocess.run(command, capture_output=True, text=True)
+    assert result.returncode == 0, f"{model_name} failed: {result.stderr}"
+
+    output = result.stdout.strip()
+    json_start = output.find("{")
+    assert json_start != -1, f"{model_name} output has no JSON object"
+
+    return json.loads(output[json_start:])
+
+
+def validate_json_schema(result: dict):
+    assert "annotations" in result, "Missing 'annotations' key"
+    assert isinstance(result["annotations"], list), "'annotations' must be a list"
+    for item in result["annotations"]:
+        assert isinstance(item, dict), "Each annotation must be an object"
+        for key in ["filename", "content", "line_start", "line_end", "column_start", "column_end"]:
+            assert key in item, f"Missing key: {key}"
+            if key in ["filename", "content"]:
+                assert isinstance(item[key], str), f"{key} must be a string"
+            else:
+                assert isinstance(item[key], int), f"{key} must be an integer"
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "openai",
+        "openai-vector",
+        "codellama:latest",
+        "deepSeek-R1:70B",
+        "deepSeek-v3",
+    ],
+)
+def test_model_outputs_valid_json_schema(model):
+    result = run_cli(model)
+    validate_json_schema(result)

From 964161bfc45ef8c59973635d2f3f42f183fbccd6 Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 17:04:48 -0400
Subject: [PATCH 17/26] updated readme

---
 README.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/README.md b/README.md
index 36b93d0..049e35c 100644
--- a/README.md
+++ b/README.md
@@ -44,6 +44,8 @@ For the image scope, the program takes up to two files, depending on the prompt
 | `--system_prompt`    | File path for the system instructions prompt                      | ❌ |
 | `--llama_mode`       | How to invoke deepSeek-v3 (choices in `arg_options.LlamaMode`)    | ❌ |
 | `--output_template`  | Output template file (from `arg_options.OutputTemplate)           | ❌ |
+| `--json_schema`      | File path to json file for schema for structured output           | ❌ |
+
 ** One of either prompt, prompt_custom, or prompt_text must be selected.
 
 ## Scope
@@ -303,6 +305,11 @@ python3 -m ai_feedback --prompt code_table --scope code \
         --model deepSeek-v3 --llama_mode cli
 ```
 
+#### Get annotations for cnn_example test using openAI model
+```bash
+python -m ai_feedback --prompt code_annotations --scope code --submission test_submissions/cnn_example/cnn_submission --solution test_submissions/cnn_example/cnn_solution.py --model openai --json_schema ai_feedback/data/schema/code_annotation_schema.json
+```
+
 #### Using Ollama
 In order to run this project on Bigmouth:
 1. SSH into teach.cs

From 0e79f7c5e2be0ee1a75e329569c8c03ed6ccd716 Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 17:05:17 -0400
Subject: [PATCH 18/26] updated readme

---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index 049e35c..bd380cc 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,6 @@ For the image scope, the program takes up to two files, depending on the prompt
 | `--llama_mode`       | How to invoke deepSeek-v3 (choices in `arg_options.LlamaMode`)    | ❌ |
 | `--output_template`  | Output template file (from `arg_options.OutputTemplate)           | ❌ |
 | `--json_schema`      | File path to json file for schema for structured output           | ❌ |
-
 ** One of either prompt, prompt_custom, or prompt_text must be selected.
 
 ## Scope

From 18567c2ad951b9fa580f376159f6fbd840e144d5 Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 17:07:37 -0400
Subject: [PATCH 19/26] updated model descriptions

---
 ai_feedback/models/CodeLlamaModel.py    | 1 +
 ai_feedback/models/DeepSeekModel.py     | 1 +
 ai_feedback/models/DeepSeekV3Model.py   | 1 +
 ai_feedback/models/OpenAIModel.py       | 3 ++-
 ai_feedback/models/OpenAIModelVector.py | 2 ++
 5 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/ai_feedback/models/CodeLlamaModel.py b/ai_feedback/models/CodeLlamaModel.py
index 92134f5..c897a23 100644
--- a/ai_feedback/models/CodeLlamaModel.py
+++ b/ai_feedback/models/CodeLlamaModel.py
@@ -42,6 +42,7 @@ def generate_response(
             question_num (Optional[int]): An optional specific question number to extract content for.
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             Optional[Tuple[str, str]]: A tuple of the request and the model's response,
diff --git a/ai_feedback/models/DeepSeekModel.py b/ai_feedback/models/DeepSeekModel.py
index 9261bc4..3069ab8 100644
--- a/ai_feedback/models/DeepSeekModel.py
+++ b/ai_feedback/models/DeepSeekModel.py
@@ -39,6 +39,7 @@ def generate_response(
             question_num (Optional[int]): An optional question number to target specific content.
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             Optional[Tuple[str, str]]: A tuple containing the prompt and the model's response,
diff --git a/ai_feedback/models/DeepSeekV3Model.py b/ai_feedback/models/DeepSeekV3Model.py
index 27589e6..e845139 100644
--- a/ai_feedback/models/DeepSeekV3Model.py
+++ b/ai_feedback/models/DeepSeekV3Model.py
@@ -47,6 +47,7 @@ def generate_response(
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
             question_num (Optional[int]): An optional question number to target specific content.
             json_schema (Optional[str]): Optional json schema to use.
+
         Returns:
             Optional[Tuple[str, str]]: A tuple containing the prompt and the model's response,
                                        or None if the response was invalid.
diff --git a/ai_feedback/models/OpenAIModel.py b/ai_feedback/models/OpenAIModel.py
index d325680..c6baa06 100644
--- a/ai_feedback/models/OpenAIModel.py
+++ b/ai_feedback/models/OpenAIModel.py
@@ -46,6 +46,7 @@ def generate_response(
             question_num (Optional[int]): Specific question number to focus on.
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             Tuple[str, str]: The full prompt and the generated response from OpenAI.
@@ -68,7 +69,7 @@ def _call_openai(self, prompt: str, system_instructions: str, schema: Optional[d
 
         Args:
             prompt (str): The fully constructed input prompt including file content.
-
+            schema (Optional[dict]): Optional json schema to use.
         Returns:
             str: The model's response text.
         """
diff --git a/ai_feedback/models/OpenAIModelVector.py b/ai_feedback/models/OpenAIModelVector.py
index e7ab685..fd89c54 100644
--- a/ai_feedback/models/OpenAIModelVector.py
+++ b/ai_feedback/models/OpenAIModelVector.py
@@ -58,6 +58,7 @@ def generate_response(
             question_num (Optional[int]): An optional question number.
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             tuple[str, str]: A tuple containing the full system request and the model's text response.
@@ -111,6 +112,7 @@ def _call_openai(self, prompt: str, schema: Optional[dict] = None) -> str:
 
         Args:
             prompt (str): The input prompt for the assistant.
+            schema (Optional[dict]): Optional json schema to use.
 
         Returns:
             str: The assistant's generated response text.

From d8f75e4bd97982dbc29525b0978723fc75984ab8 Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 17:09:53 -0400
Subject: [PATCH 20/26] updated model descriptions

---
 ai_feedback/models/OpenAIModel.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ai_feedback/models/OpenAIModel.py b/ai_feedback/models/OpenAIModel.py
index c6baa06..8ecaa5f 100644
--- a/ai_feedback/models/OpenAIModel.py
+++ b/ai_feedback/models/OpenAIModel.py
@@ -70,6 +70,7 @@ def _call_openai(self, prompt: str, system_instructions: str, schema: Optional[d
         Args:
             prompt (str): The fully constructed input prompt including file content.
             schema (Optional[dict]): Optional json schema to use.
+
         Returns:
             str: The model's response text.
         """

From bfb060d82cf5c07229e8bd954dc17d66956be4d4 Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 17:11:09 -0400
Subject: [PATCH 21/26] updated model descriptions

---
 ai_feedback/models/RemoteModel.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ai_feedback/models/RemoteModel.py b/ai_feedback/models/RemoteModel.py
index d33ba80..06ee52d 100644
--- a/ai_feedback/models/RemoteModel.py
+++ b/ai_feedback/models/RemoteModel.py
@@ -54,6 +54,7 @@ def generate_response(
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
             submission_image (Optional[str]): An optional path to a submission image file.
+            json_schema (Optional[str]): An optional json schema to use.
 
         Returns:
             Optional[Tuple[str, str]]: A tuple containing the prompt and the model's response,

From 64df9d5c2dc7988edcafe7b8161acaa97b85ac0d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 8 Jul 2025 21:14:56 +0000
Subject: [PATCH 22/26] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7b68b5b..91c8e22 100644
--- a/README.md
+++ b/README.md
@@ -323,7 +323,7 @@ python3 -m ai_feedback --prompt code_table --scope code \
 ```bash
 python -m ai_feedback --prompt code_annotations --scope code --submission test_submissions/cnn_example/cnn_submission --solution test_submissions/cnn_example/cnn_solution.py --model openai --json_schema ai_feedback/data/schema/code_annotation_schema.json
 ```
-                      
+
 #### Evaluate using custom prompt file path
 ```bash
 python -m ai_feedback --prompt ai_feedback/data/prompts/user/code_overall.md --scope code --submission test_submissions/csc108/correct_submission/correct_submission.py --solution test_submissions/csc108/solution.py --model codellama:latest

From 1fa114646185e1d0f3765a9950b918936ac8e92b Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Wed, 9 Jul 2025 09:28:48 -0400
Subject: [PATCH 23/26] styles

---
 .../schema_structure_validation.py}                               | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/{structured_output_validation_tests/integration_test.py => local_tests/schema_structure_validation.py} (100%)

diff --git a/tests/structured_output_validation_tests/integration_test.py b/tests/local_tests/schema_structure_validation.py
similarity index 100%
rename from tests/structured_output_validation_tests/integration_test.py
rename to tests/local_tests/schema_structure_validation.py

From 8b489a8cd1eda2729100902999e019e5d2278f90 Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Wed, 9 Jul 2025 09:34:11 -0400
Subject: [PATCH 24/26] added fallback for schema in ollama models

---
 ai_feedback/models/CodeLlamaModel.py | 2 +-
 ai_feedback/models/DeepSeekModel.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ai_feedback/models/CodeLlamaModel.py b/ai_feedback/models/CodeLlamaModel.py
index c897a23..b660ec2 100644
--- a/ai_feedback/models/CodeLlamaModel.py
+++ b/ai_feedback/models/CodeLlamaModel.py
@@ -60,7 +60,7 @@ def generate_response(
                 {"role": "system", "content": system_instructions},
                 {"role": "user", "content": prompt},
             ],
-            format=schema['schema'],
+            format=schema['schema'] if schema else None,
         )
 
         if not response or "message" not in response or "content" not in response["message"]:
diff --git a/ai_feedback/models/DeepSeekModel.py b/ai_feedback/models/DeepSeekModel.py
index 3069ab8..c1d937f 100644
--- a/ai_feedback/models/DeepSeekModel.py
+++ b/ai_feedback/models/DeepSeekModel.py
@@ -57,7 +57,7 @@ def generate_response(
                 {"role": "system", "content": system_instructions},
                 {"role": "user", "content": prompt},
             ],
-            format=schema['schema'],
+            format=schema['schema'] if schema else None,
         )
 
         if not response or "message" not in response or "content" not in response["message"]:

From 93dbd4b1e15015e420474538b18180cdfe7a84d7 Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Wed, 9 Jul 2025 09:34:36 -0400
Subject: [PATCH 25/26] updated helper to use json_schema

---
 tests/test_helper.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_helper.py b/tests/test_helper.py
index a9ee45e..f00284b 100644
--- a/tests/test_helper.py
+++ b/tests/test_helper.py
@@ -73,6 +73,7 @@ def fake_generate_response(
         test_output: Optional[Path] = None,
         scope: Optional[str] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ):
         all_prompts.append((test_name, "OpenAIModel.generate_response", prompt))
         return prompt, f"[MOCKED RESPONSE] \n {prompt}"

From 2b8d924dc8365ff970c4925093eaa70da17aeb93 Mon Sep 17 00:00:00 2001
From: Will Kukkamalla <will.kukkamalla@mail.utoronto.ca>
Date: Wed, 9 Jul 2025 13:53:40 -0400
Subject: [PATCH 26/26] updated models to have consistent json shcema file
 checks

---
 ai_feedback/models/CodeLlamaModel.py    | 5 ++++-
 ai_feedback/models/DeepSeekModel.py     | 5 ++++-
 ai_feedback/models/DeepSeekV3Model.py   | 8 ++++++--
 ai_feedback/models/OpenAIModelVector.py | 8 ++++++--
 4 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/ai_feedback/models/CodeLlamaModel.py b/ai_feedback/models/CodeLlamaModel.py
index b660ec2..01d082f 100644
--- a/ai_feedback/models/CodeLlamaModel.py
+++ b/ai_feedback/models/CodeLlamaModel.py
@@ -49,7 +49,10 @@ def generate_response(
                                        or None if no valid response is returned.
         """
         if json_schema:
-            with open(json_schema, "r", encoding="utf-8") as f:
+            schema_path = Path(json_schema)
+            if not schema_path.exists():
+                raise FileNotFoundError(f"JSON schema file not found: {schema_path}")
+            with open(schema_path, "r", encoding="utf-8") as f:
                 schema = json.load(f)
         else:
             schema = None
diff --git a/ai_feedback/models/DeepSeekModel.py b/ai_feedback/models/DeepSeekModel.py
index c1d937f..c4562a5 100644
--- a/ai_feedback/models/DeepSeekModel.py
+++ b/ai_feedback/models/DeepSeekModel.py
@@ -46,7 +46,10 @@ def generate_response(
                                        or None if the response was invalid.
         """
         if json_schema:
-            with open(json_schema, "r", encoding="utf-8") as f:
+            schema_path = Path(json_schema)
+            if not schema_path.exists():
+                raise FileNotFoundError(f"JSON schema file not found: {schema_path}")
+            with open(schema_path, "r", encoding="utf-8") as f:
                 schema = json.load(f)
         else:
             schema = None
diff --git a/ai_feedback/models/DeepSeekV3Model.py b/ai_feedback/models/DeepSeekV3Model.py
index e845139..8c91269 100644
--- a/ai_feedback/models/DeepSeekV3Model.py
+++ b/ai_feedback/models/DeepSeekV3Model.py
@@ -52,10 +52,14 @@ def generate_response(
             Optional[Tuple[str, str]]: A tuple containing the prompt and the model's response,
                                        or None if the response was invalid.
         """
-        schema = None
         if json_schema:
-            with open(json_schema) as f:
+            schema_path = Path(json_schema)
+            if not schema_path.exists():
+                raise FileNotFoundError(f"JSON schema file not found: {schema_path}")
+            with open(schema_path, "r", encoding="utf-8") as f:
                 schema = json.load(f)
+        else:
+            schema = None
 
         prompt = f"{system_instructions}\n{prompt}"
         if llama_mode == 'server':
diff --git a/ai_feedback/models/OpenAIModelVector.py b/ai_feedback/models/OpenAIModelVector.py
index fd89c54..b5dd311 100644
--- a/ai_feedback/models/OpenAIModelVector.py
+++ b/ai_feedback/models/OpenAIModelVector.py
@@ -67,10 +67,14 @@ def generate_response(
         if not self.model:
             raise RuntimeError("Model was not created successfully.")
 
-        schema = None
         if json_schema:
-            with open(json_schema, "r") as f:
+            schema_path = Path(json_schema)
+            if not schema_path.exists():
+                raise FileNotFoundError(f"JSON schema file not found: {schema_path}")
+            with open(schema_path, "r", encoding="utf-8") as f:
                 schema = json.load(f)
+        else:
+            schema = None
 
         request = "Uploaded Files: "
         file_ids: List[str] = []