MarkUsProject · david-yz-liu · Jul 10, 2025 · Jul 8, 2025 · Jul 8, 2025 · Jul 8, 2025
diff --git a/README.md b/README.md
@@ -43,6 +43,7 @@ For the image scope, the program takes up to two files, depending on the prompt
 | `--system_prompt`    | Pre-defined system prompt name or file path to custom system prompt | ❌ |
 | `--llama_mode`       | How to invoke deepSeek-v3 (choices in `arg_options.LlamaMode`)    | ❌ |
 | `--output_template`  | Output template file (from `arg_options.OutputTemplate)           | ❌ |
+| `--json_schema`      | File path to json file for schema for structured output           | ❌ |
 ** One of either `--prompt` or `--prompt_text` must be selected.
 
 ## Scope
@@ -317,6 +318,12 @@ python3 -m ai_feedback --prompt code_table --scope code \
         --model deepSeek-v3 --llama_mode cli
 ```
 
+
+#### Get annotations for cnn_example test using openAI model
+```bash
+python -m ai_feedback --prompt code_annotations --scope code --submission test_submissions/cnn_example/cnn_submission --solution test_submissions/cnn_example/cnn_solution.py --model openai --json_schema ai_feedback/data/schema/code_annotation_schema.json
+```
+
 #### Evaluate using custom prompt file path
 ```bash
 python -m ai_feedback --prompt ai_feedback/data/prompts/user/code_overall.md --scope code --submission test_submissions/csc108/correct_submission/correct_submission.py --solution test_submissions/csc108/solution.py --model codellama:latest

diff --git a/ai_feedback/__main__.py b/ai_feedback/__main__.py
@@ -207,6 +207,13 @@ def main() -> int:
         default="cli",
         help=HELP_MESSAGES["llama_mode"],
     )
+    parser.add_argument(
+        "--json_schema",
+        type=str,
+        required=False,
+        default="",
+        help=HELP_MESSAGES["json_schema"],
+    )
 
     args = parser.parse_args()
 

diff --git a/ai_feedback/code_processing.py b/ai_feedback/code_processing.py
@@ -87,6 +87,7 @@ def process_code(args, prompt: str, system_instructions: str) -> Tuple[str, str]
                 question_num=args.question,
                 system_instructions=system_instructions,
                 llama_mode=args.llama_mode,
+                json_schema=args.json_schema,
             )
         else:
             request, response = model.generate_response(
@@ -96,6 +97,7 @@ def process_code(args, prompt: str, system_instructions: str) -> Tuple[str, str]
                 test_output=test_output_file,
                 system_instructions=system_instructions,
                 llama_mode=args.llama_mode,
+                json_schema=args.json_schema,
             )
 
     return request, response

diff --git a/ai_feedback/data/schema/code_annotation_schema.json b/ai_feedback/data/schema/code_annotation_schema.json
@@ -0,0 +1,54 @@
+{
+  "name": "student_code_annotation",
+  "description": "List of code annotations describing specific mistakes in the student's code.",
+  "schema": {
+    "type": "object",
+    "properties": {
+      "annotations": {
+        "type": "array",
+        "items": {
+          "type": "object",
+          "properties": {
+            "filename": {
+              "type": "string",
+              "description": "The name of the student's file where the issue was found."
+            },
+            "content": {
+              "type": "string",
+              "description": "A short description of the mistake or issue."
+            },
+            "line_start": {
+              "type": "integer",
+              "description": "The starting line number where the issue begins.",
+              "minimum": 1
+            },
+            "line_end": {
+              "type": "integer",
+              "description": "The ending line number where the issue ends.",
+              "minimum": 1
+            },
+            "column_start": {
+              "type": "integer",
+              "description": "The starting column position of the mistake.",
+              "minimum": 0
+            },
+            "column_end": {
+              "type": "integer",
+              "description": "The ending column position of the mistake.",
+              "minimum": 0
+            }
+          },
+          "required": [
+            "filename",
+            "content",
+            "line_start",
+            "line_end",
+            "column_start",
+            "column_end"
+          ]
+        }
+      }
+    },
+    "required": ["annotations"]
+  }
+}
diff --git a/ai_feedback/helpers/constants.py b/ai_feedback/helpers/constants.py
@@ -14,5 +14,6 @@
     "test_output": "The output of tests from evaluating the assignment.",
     "submission_image": "The file path for the image file.",
     "solution_image": "The file path to the solution image.",
+    "json_schema": "file path to a json file that contains the schema for ai output",
     "system_prompt": "Pre-defined system prompt name (from ai_feedback/data/prompts/system/) or file path to custom system prompt file.",
 }
diff --git a/ai_feedback/image_processing.py b/ai_feedback/image_processing.py
@@ -165,6 +165,7 @@ def process_image(args, prompt: dict, system_instructions: str) -> tuple[str, st
                 system_instructions=system_instructions,
                 question_num=question,
                 submission_image=args.submission_image,
+                json_schema=args.json_schema,
             )
             responses.append(str(response))
         else:

diff --git a/ai_feedback/models/ClaudeModel.py b/ai_feedback/models/ClaudeModel.py
@@ -29,6 +29,7 @@ def generate_response(
         question_num: Optional[int] = None,
         test_output: Optional[Path] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Optional[Tuple[str, str]]:
         """
         Generates a response from Claude using the provided prompt and assignment file context.
@@ -42,6 +43,7 @@ def generate_response(
             question_num (Optional[int]): Specific task number to extract from text files.
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             Optional[Tuple[str, str]]: The original prompt and the model's response, or None if the response is invalid.

diff --git a/ai_feedback/models/CodeLlamaModel.py b/ai_feedback/models/CodeLlamaModel.py
@@ -1,3 +1,4 @@
+import json
 from pathlib import Path
 from typing import Optional, Tuple
 
@@ -26,6 +27,7 @@ def generate_response(
         test_output: Optional[Path] = None,
         scope: Optional[str] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Optional[Tuple[str, str]]:
         """
         Generates a response from the CodeLlama model using the provided prompt
@@ -40,18 +42,25 @@ def generate_response(
             question_num (Optional[int]): An optional specific question number to extract content for.
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             Optional[Tuple[str, str]]: A tuple of the request and the model's response,
                                        or None if no valid response is returned.
         """
+        if json_schema:
+            with open(json_schema, "r", encoding="utf-8") as f:
+                schema = json.load(f)
+        else:
+            schema = None
 
         response = ollama.chat(
             model=self.model["model"],
             messages=[
                 {"role": "system", "content": system_instructions},
                 {"role": "user", "content": prompt},
             ],
+            format=schema['schema'],
         )
 
         if not response or "message" not in response or "content" not in response["message"]:

diff --git a/ai_feedback/models/DeepSeekModel.py b/ai_feedback/models/DeepSeekModel.py
@@ -1,3 +1,4 @@
+import json
 from pathlib import Path
 from typing import Optional, Tuple
 
@@ -24,6 +25,7 @@ def generate_response(
         test_output: Optional[Path] = None,
         scope: Optional[str] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Optional[Tuple[str, str]]:
         """
         Generate a model response using the prompt and assignment files.
@@ -37,18 +39,25 @@ def generate_response(
             question_num (Optional[int]): An optional question number to target specific content.
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             Optional[Tuple[str, str]]: A tuple containing the prompt and the model's response,
                                        or None if the response was invalid.
         """
+        if json_schema:
+            with open(json_schema, "r", encoding="utf-8") as f:
+                schema = json.load(f)
+        else:
+            schema = None
 
         response = ollama.chat(
             model=self.model["model"],
             messages=[
                 {"role": "system", "content": system_instructions},
                 {"role": "user", "content": prompt},
             ],
+            format=schema['schema'],
         )
 
         if not response or "message" not in response or "content" not in response["message"]:

diff --git a/ai_feedback/models/DeepSeekV3Model.py b/ai_feedback/models/DeepSeekV3Model.py
@@ -1,3 +1,4 @@
+import json
 import os
 import subprocess
 import sys
@@ -31,6 +32,7 @@ def generate_response(
         question_num: Optional[int] = None,
         test_output: Optional[Path] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Optional[Tuple[str, str]]:
         """
         Generate a model response using the prompt and assignment files.
@@ -44,18 +46,24 @@ def generate_response(
             test_output (Optional[Path]): Path Object pointing to the test output file.
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
             question_num (Optional[int]): An optional question number to target specific content.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             Optional[Tuple[str, str]]: A tuple containing the prompt and the model's response,
                                        or None if the response was invalid.
         """
+        schema = None
+        if json_schema:
+            with open(json_schema) as f:
+                schema = json.load(f)
+
         prompt = f"{system_instructions}\n{prompt}"
         if llama_mode == 'server':
             self._ensure_env_vars('LLAMA_SERVER_URL')
-            response = self._get_response_server(prompt)
+            response = self._get_response_server(prompt, schema)
         else:
             self._ensure_env_vars('LLAMA_MODEL_PATH', 'LLAMA_CLI_PATH')
-            response = self._get_response_cli(prompt)
+            response = self._get_response_cli(prompt, schema)
 
         response = response.strip()
 
@@ -81,24 +89,24 @@ def _ensure_env_vars(self, *names):
         if missing:
             raise RuntimeError(f"Error: Environment variable(s) {', '.join(missing)} not set")
 
-    def _get_response_server(
-        self,
-        prompt: str,
-    ) -> str:
+    def _get_response_server(self, prompt: str, schema: Optional[dict] = None) -> str:
         """
         Generate a model response using the prompt
 
         Args:
             prompt (str): The input prompt provided by the user.
+            schema (Optional[dict]): Optional schema provided by the user.
 
         Returns:
             str: A tuple containing the model response or None if the response was invalid.
         """
         url = f"{LLAMA_SERVER_URL}/v1/completions"
 
-        payload = {
-            "prompt": prompt,
-        }
+        payload = {"prompt": prompt, "temperature": 0.7, "max_tokens": 1000}
+
+        if schema:
+            raw_schema = schema.get("schema", schema)
+            payload["json_schema"] = raw_schema
 
         try:
             response = requests.post(url, json=payload, timeout=3000)
@@ -116,15 +124,13 @@ def _get_response_server(
 
         return model_output
 
-    def _get_response_cli(
-        self,
-        prompt: str,
-    ) -> str:
+    def _get_response_cli(self, prompt: str, schema: Optional[dict] = None) -> str:
         """
         Generate a model response using the prompt
 
         Args:
             prompt (str): The input prompt provided by the user.
+            schema (Optional[dict]): Optional schema provided by the user.
 
         Returns:
             str: The model response or None if the response was invalid.
@@ -141,6 +147,10 @@ def _get_response_cli(
             "--no-display-prompt",
         ]
 
+        if schema:
+            raw_schema = schema["schema"] if "schema" in schema else schema
+            cmd += ["--json-schema", json.dumps(raw_schema)]
+
         try:
             completed = subprocess.run(
                 cmd, input=prompt.encode(), check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=300

diff --git a/ai_feedback/models/OpenAIModel.py b/ai_feedback/models/OpenAIModel.py
@@ -1,4 +1,6 @@
+import json
 import os
+import re
 from pathlib import Path
 from typing import Optional, Tuple
 
@@ -30,6 +32,7 @@ def generate_response(
         test_output: Optional[Path] = None,
         scope: Optional[str] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Tuple[str, str]:
         """
         Generate a response based on the given prompt and assignment context.
@@ -43,30 +46,47 @@ def generate_response(
             question_num (Optional[int]): Specific question number to focus on.
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             Tuple[str, str]: The full prompt and the generated response from OpenAI.
         """
-        response = self._call_openai(prompt, system_instructions)
+        if json_schema:
+            schema_path = Path(json_schema)
+            if not schema_path.exists():
+                raise FileNotFoundError(f"JSON schema file not found: {schema_path}")
+            with open(schema_path, "r", encoding="utf-8") as f:
+                schema = json.load(f)
+        else:
+            schema = None
+
+        response = self._call_openai(prompt, system_instructions, schema)
         return prompt, response
 
-    def _call_openai(self, prompt: str, system_instructions: str) -> str:
+    def _call_openai(self, prompt: str, system_instructions: str, schema: Optional[dict] = None) -> str:
         """
         Send a prompt to OpenAI's chat completion API and retrieve the generated response.
 
         Args:
             prompt (str): The fully constructed input prompt including file content.
+            schema (Optional[dict]): Optional json schema to use.
 
         Returns:
             str: The model's response text.
         """
+        response_format = None
+        if schema:
+            response_format = {"type": "json_schema", "json_schema": schema}
+
         response = self.client.chat.completions.create(
-            model="gpt-4-turbo",
+            model="gpt-4o-mini",
             messages=[
                 {"role": "system", "content": system_instructions},
                 {"role": "user", "content": prompt},
             ],
-            max_tokens=1000,
+            response_format=response_format,
             temperature=0.5,
+            max_tokens=1000,
         )
+
         return response.choices[0].message.content