diff --git a/README.md b/README.md
index a84cab3..4630c1c 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,7 @@ For the image scope, the program takes up to two files, depending on the prompt
 | `--system_prompt`    | Pre-defined system prompt name or file path to custom system prompt | ❌ |
 | `--llama_mode`       | How to invoke deepSeek-v3 (choices in `arg_options.LlamaMode`)      | ❌ |
 | `--output_template`  | Output template file (from `arg_options.OutputTemplate)             | ❌ |
+| `--json_schema`      | File path to json file for schema for structured output             | ❌ |
 ** One of either `--prompt` or `--prompt_text` must be selected. If both are provided, `--prompt_text` will be appended to the contents of the file specified by `--prompt`.
 
 ## Scope
@@ -317,6 +318,12 @@ python3 -m ai_feedback --prompt code_table --scope code \
         --model deepSeek-v3 --llama_mode cli
 ```
 
+
+#### Get annotations for cnn_example test using openAI model
+```bash
+python -m ai_feedback --prompt code_annotations --scope code --submission test_submissions/cnn_example/cnn_submission --solution test_submissions/cnn_example/cnn_solution.py --model openai --json_schema ai_feedback/data/schema/code_annotation_schema.json
+```
+
 #### Evaluate using custom prompt file path
 ```bash
 python -m ai_feedback --prompt ai_feedback/data/prompts/user/code_overall.md --scope code --submission test_submissions/csc108/correct_submission/correct_submission.py --solution test_submissions/csc108/solution.py --model codellama:latest
diff --git a/ai_feedback/__main__.py b/ai_feedback/__main__.py
index ae0ca8f..b730826 100644
--- a/ai_feedback/__main__.py
+++ b/ai_feedback/__main__.py
@@ -207,6 +207,13 @@ def main() -> int:
         default="cli",
         help=HELP_MESSAGES["llama_mode"],
     )
+    parser.add_argument(
+        "--json_schema",
+        type=str,
+        required=False,
+        default="",
+        help=HELP_MESSAGES["json_schema"],
+    )
 
     args = parser.parse_args()
 
diff --git a/ai_feedback/code_processing.py b/ai_feedback/code_processing.py
index e034882..a10cc2c 100644
--- a/ai_feedback/code_processing.py
+++ b/ai_feedback/code_processing.py
@@ -87,6 +87,7 @@ def process_code(args, prompt: str, system_instructions: str) -> Tuple[str, str]
                 question_num=args.question,
                 system_instructions=system_instructions,
                 llama_mode=args.llama_mode,
+                json_schema=args.json_schema,
             )
         else:
             request, response = model.generate_response(
@@ -96,6 +97,7 @@ def process_code(args, prompt: str, system_instructions: str) -> Tuple[str, str]
                 test_output=test_output_file,
                 system_instructions=system_instructions,
                 llama_mode=args.llama_mode,
+                json_schema=args.json_schema,
             )
 
     return request, response
diff --git a/ai_feedback/data/schema/code_annotation_schema.json b/ai_feedback/data/schema/code_annotation_schema.json
new file mode 100644
index 0000000..7193602
--- /dev/null
+++ b/ai_feedback/data/schema/code_annotation_schema.json
@@ -0,0 +1,54 @@
+{
+  "name": "student_code_annotation",
+  "description": "List of code annotations describing specific mistakes in the student's code.",
+  "schema": {
+    "type": "object",
+    "properties": {
+      "annotations": {
+        "type": "array",
+        "items": {
+          "type": "object",
+          "properties": {
+            "filename": {
+              "type": "string",
+              "description": "The name of the student's file where the issue was found."
+            },
+            "content": {
+              "type": "string",
+              "description": "A short description of the mistake or issue."
+            },
+            "line_start": {
+              "type": "integer",
+              "description": "The starting line number where the issue begins.",
+              "minimum": 1
+            },
+            "line_end": {
+              "type": "integer",
+              "description": "The ending line number where the issue ends.",
+              "minimum": 1
+            },
+            "column_start": {
+              "type": "integer",
+              "description": "The starting column position of the mistake.",
+              "minimum": 0
+            },
+            "column_end": {
+              "type": "integer",
+              "description": "The ending column position of the mistake.",
+              "minimum": 0
+            }
+          },
+          "required": [
+            "filename",
+            "content",
+            "line_start",
+            "line_end",
+            "column_start",
+            "column_end"
+          ]
+        }
+      }
+    },
+    "required": ["annotations"]
+  }
+}
diff --git a/ai_feedback/helpers/constants.py b/ai_feedback/helpers/constants.py
index 592e16b..0896d65 100644
--- a/ai_feedback/helpers/constants.py
+++ b/ai_feedback/helpers/constants.py
@@ -14,5 +14,6 @@
     "test_output": "The output of tests from evaluating the assignment.",
     "submission_image": "The file path for the image file.",
     "solution_image": "The file path to the solution image.",
+    "json_schema": "file path to a json file that contains the schema for ai output",
     "system_prompt": "Pre-defined system prompt name (from ai_feedback/data/prompts/system/) or file path to custom system prompt file.",
 }
diff --git a/ai_feedback/image_processing.py b/ai_feedback/image_processing.py
index 206bc40..a7374ac 100644
--- a/ai_feedback/image_processing.py
+++ b/ai_feedback/image_processing.py
@@ -165,6 +165,7 @@ def process_image(args, prompt: dict, system_instructions: str) -> tuple[str, st
                 system_instructions=system_instructions,
                 question_num=question,
                 submission_image=args.submission_image,
+                json_schema=args.json_schema,
             )
             responses.append(str(response))
         else:
diff --git a/ai_feedback/models/ClaudeModel.py b/ai_feedback/models/ClaudeModel.py
index 584baa9..e3fae4d 100644
--- a/ai_feedback/models/ClaudeModel.py
+++ b/ai_feedback/models/ClaudeModel.py
@@ -29,6 +29,7 @@ def generate_response(
         question_num: Optional[int] = None,
         test_output: Optional[Path] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Optional[Tuple[str, str]]:
         """
         Generates a response from Claude using the provided prompt and assignment file context.
@@ -42,6 +43,7 @@ def generate_response(
             question_num (Optional[int]): Specific task number to extract from text files.
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             Optional[Tuple[str, str]]: The original prompt and the model's response, or None if the response is invalid.
diff --git a/ai_feedback/models/CodeLlamaModel.py b/ai_feedback/models/CodeLlamaModel.py
index 8299348..01d082f 100644
--- a/ai_feedback/models/CodeLlamaModel.py
+++ b/ai_feedback/models/CodeLlamaModel.py
@@ -1,3 +1,4 @@
+import json
 from pathlib import Path
 from typing import Optional, Tuple
 
@@ -26,6 +27,7 @@ def generate_response(
         test_output: Optional[Path] = None,
         scope: Optional[str] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Optional[Tuple[str, str]]:
         """
         Generates a response from the CodeLlama model using the provided prompt
@@ -40,11 +42,20 @@ def generate_response(
             question_num (Optional[int]): An optional specific question number to extract content for.
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             Optional[Tuple[str, str]]: A tuple of the request and the model's response,
                                        or None if no valid response is returned.
         """
+        if json_schema:
+            schema_path = Path(json_schema)
+            if not schema_path.exists():
+                raise FileNotFoundError(f"JSON schema file not found: {schema_path}")
+            with open(schema_path, "r", encoding="utf-8") as f:
+                schema = json.load(f)
+        else:
+            schema = None
 
         response = ollama.chat(
             model=self.model["model"],
@@ -52,6 +63,7 @@ def generate_response(
                 {"role": "system", "content": system_instructions},
                 {"role": "user", "content": prompt},
             ],
+            format=schema['schema'] if schema else None,
         )
 
         if not response or "message" not in response or "content" not in response["message"]:
diff --git a/ai_feedback/models/DeepSeekModel.py b/ai_feedback/models/DeepSeekModel.py
index ebace4c..c4562a5 100644
--- a/ai_feedback/models/DeepSeekModel.py
+++ b/ai_feedback/models/DeepSeekModel.py
@@ -1,3 +1,4 @@
+import json
 from pathlib import Path
 from typing import Optional, Tuple
 
@@ -24,6 +25,7 @@ def generate_response(
         test_output: Optional[Path] = None,
         scope: Optional[str] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Optional[Tuple[str, str]]:
         """
         Generate a model response using the prompt and assignment files.
@@ -37,11 +39,20 @@ def generate_response(
             question_num (Optional[int]): An optional question number to target specific content.
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             Optional[Tuple[str, str]]: A tuple containing the prompt and the model's response,
                                        or None if the response was invalid.
         """
+        if json_schema:
+            schema_path = Path(json_schema)
+            if not schema_path.exists():
+                raise FileNotFoundError(f"JSON schema file not found: {schema_path}")
+            with open(schema_path, "r", encoding="utf-8") as f:
+                schema = json.load(f)
+        else:
+            schema = None
 
         response = ollama.chat(
             model=self.model["model"],
@@ -49,6 +60,7 @@ def generate_response(
                 {"role": "system", "content": system_instructions},
                 {"role": "user", "content": prompt},
             ],
+            format=schema['schema'] if schema else None,
         )
 
         if not response or "message" not in response or "content" not in response["message"]:
diff --git a/ai_feedback/models/DeepSeekV3Model.py b/ai_feedback/models/DeepSeekV3Model.py
index 2b0975f..8c91269 100644
--- a/ai_feedback/models/DeepSeekV3Model.py
+++ b/ai_feedback/models/DeepSeekV3Model.py
@@ -1,3 +1,4 @@
+import json
 import os
 import subprocess
 import sys
@@ -31,6 +32,7 @@ def generate_response(
         question_num: Optional[int] = None,
         test_output: Optional[Path] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Optional[Tuple[str, str]]:
         """
         Generate a model response using the prompt and assignment files.
@@ -44,18 +46,28 @@ def generate_response(
             test_output (Optional[Path]): Path Object pointing to the test output file.
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
             question_num (Optional[int]): An optional question number to target specific content.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             Optional[Tuple[str, str]]: A tuple containing the prompt and the model's response,
                                        or None if the response was invalid.
         """
+        if json_schema:
+            schema_path = Path(json_schema)
+            if not schema_path.exists():
+                raise FileNotFoundError(f"JSON schema file not found: {schema_path}")
+            with open(schema_path, "r", encoding="utf-8") as f:
+                schema = json.load(f)
+        else:
+            schema = None
+
         prompt = f"{system_instructions}\n{prompt}"
         if llama_mode == 'server':
             self._ensure_env_vars('LLAMA_SERVER_URL')
-            response = self._get_response_server(prompt)
+            response = self._get_response_server(prompt, schema)
         else:
             self._ensure_env_vars('LLAMA_MODEL_PATH', 'LLAMA_CLI_PATH')
-            response = self._get_response_cli(prompt)
+            response = self._get_response_cli(prompt, schema)
 
         response = response.strip()
 
@@ -81,24 +93,24 @@ def _ensure_env_vars(self, *names):
         if missing:
             raise RuntimeError(f"Error: Environment variable(s) {', '.join(missing)} not set")
 
-    def _get_response_server(
-        self,
-        prompt: str,
-    ) -> str:
+    def _get_response_server(self, prompt: str, schema: Optional[dict] = None) -> str:
         """
         Generate a model response using the prompt
 
         Args:
             prompt (str): The input prompt provided by the user.
+            schema (Optional[dict]): Optional schema provided by the user.
 
         Returns:
             str: A tuple containing the model response or None if the response was invalid.
         """
         url = f"{LLAMA_SERVER_URL}/v1/completions"
 
-        payload = {
-            "prompt": prompt,
-        }
+        payload = {"prompt": prompt, "temperature": 0.7, "max_tokens": 1000}
+
+        if schema:
+            raw_schema = schema.get("schema", schema)
+            payload["json_schema"] = raw_schema
 
         try:
             response = requests.post(url, json=payload, timeout=3000)
@@ -116,15 +128,13 @@ def _get_response_server(
 
         return model_output
 
-    def _get_response_cli(
-        self,
-        prompt: str,
-    ) -> str:
+    def _get_response_cli(self, prompt: str, schema: Optional[dict] = None) -> str:
         """
         Generate a model response using the prompt
 
         Args:
             prompt (str): The input prompt provided by the user.
+            schema (Optional[dict]): Optional schema provided by the user.
 
         Returns:
             str: The model response or None if the response was invalid.
@@ -141,6 +151,10 @@ def _get_response_cli(
             "--no-display-prompt",
         ]
 
+        if schema:
+            raw_schema = schema["schema"] if "schema" in schema else schema
+            cmd += ["--json-schema", json.dumps(raw_schema)]
+
         try:
             completed = subprocess.run(
                 cmd, input=prompt.encode(), check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=300
diff --git a/ai_feedback/models/OpenAIModel.py b/ai_feedback/models/OpenAIModel.py
index a765ec6..8ecaa5f 100644
--- a/ai_feedback/models/OpenAIModel.py
+++ b/ai_feedback/models/OpenAIModel.py
@@ -1,4 +1,6 @@
+import json
 import os
+import re
 from pathlib import Path
 from typing import Optional, Tuple
 
@@ -30,6 +32,7 @@ def generate_response(
         test_output: Optional[Path] = None,
         scope: Optional[str] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Tuple[str, str]:
         """
         Generate a response based on the given prompt and assignment context.
@@ -43,30 +46,47 @@ def generate_response(
             question_num (Optional[int]): Specific question number to focus on.
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             Tuple[str, str]: The full prompt and the generated response from OpenAI.
         """
-        response = self._call_openai(prompt, system_instructions)
+        if json_schema:
+            schema_path = Path(json_schema)
+            if not schema_path.exists():
+                raise FileNotFoundError(f"JSON schema file not found: {schema_path}")
+            with open(schema_path, "r", encoding="utf-8") as f:
+                schema = json.load(f)
+        else:
+            schema = None
+
+        response = self._call_openai(prompt, system_instructions, schema)
         return prompt, response
 
-    def _call_openai(self, prompt: str, system_instructions: str) -> str:
+    def _call_openai(self, prompt: str, system_instructions: str, schema: Optional[dict] = None) -> str:
         """
         Send a prompt to OpenAI's chat completion API and retrieve the generated response.
 
         Args:
             prompt (str): The fully constructed input prompt including file content.
+            schema (Optional[dict]): Optional json schema to use.
 
         Returns:
             str: The model's response text.
         """
+        response_format = None
+        if schema:
+            response_format = {"type": "json_schema", "json_schema": schema}
+
         response = self.client.chat.completions.create(
-            model="gpt-4-turbo",
+            model="gpt-4o-mini",
             messages=[
                 {"role": "system", "content": system_instructions},
                 {"role": "user", "content": prompt},
             ],
-            max_tokens=1000,
+            response_format=response_format,
             temperature=0.5,
+            max_tokens=1000,
         )
+
         return response.choices[0].message.content
diff --git a/ai_feedback/models/OpenAIModelVector.py b/ai_feedback/models/OpenAIModelVector.py
index 84d8986..b5dd311 100644
--- a/ai_feedback/models/OpenAIModelVector.py
+++ b/ai_feedback/models/OpenAIModelVector.py
@@ -1,3 +1,4 @@
+import json
 import os
 from pathlib import Path
 from typing import List, Optional
@@ -28,7 +29,7 @@ def __init__(self) -> None:
         self.vector_store = self.client.vector_stores.create(name="Markus LLM Vector Store")
         self.model = self.client.beta.assistants.create(
             name="Markus LLM model",
-            model="gpt-4-turbo",
+            model="gpt-4o-mini",
             tools=[{"type": "file_search"}],
             tool_resources={"file_search": {"vector_store_ids": [self.vector_store.id]}},
         )
@@ -43,6 +44,7 @@ def generate_response(
         test_output: Optional[Path] = None,
         scope: Optional[str] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> tuple[str, str]:
         """
         Generate a response from the OpenAI model using the provided prompt and assignment files.
@@ -56,6 +58,7 @@ def generate_response(
             question_num (Optional[int]): An optional question number.
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
+            json_schema (Optional[str]): Optional json schema to use.
 
         Returns:
             tuple[str, str]: A tuple containing the full system request and the model's text response.
@@ -64,6 +67,15 @@ def generate_response(
         if not self.model:
             raise RuntimeError("Model was not created successfully.")
 
+        if json_schema:
+            schema_path = Path(json_schema)
+            if not schema_path.exists():
+                raise FileNotFoundError(f"JSON schema file not found: {schema_path}")
+            with open(schema_path, "r", encoding="utf-8") as f:
+                schema = json.load(f)
+        else:
+            schema = None
+
         request = "Uploaded Files: "
         file_ids: List[str] = []
         assignment_files = [f for f in (submission_file, solution_file, test_output) if f]
@@ -77,7 +89,7 @@ def generate_response(
         if question_num:
             prompt += f" Identify and generate a response for the mistakes **only** in task ${question_num}. "
 
-        response = self._call_openai(prompt)
+        response = self._call_openai(prompt, schema)
         self._cleanup_resources(file_ids)
 
         request = f"\n{system_instructions}\n{prompt}"
@@ -98,12 +110,13 @@ def _upload_file(self, file_path: Path) -> str:
             self.client.vector_stores.files.create(vector_store_id=self.vector_store.id, file_id=response.id)
         return response.id
 
-    def _call_openai(self, prompt: str) -> str:
+    def _call_openai(self, prompt: str, schema: Optional[dict] = None) -> str:
         """
         Send the user prompt to OpenAI's assistant model and retrieve the generated response.
 
         Args:
             prompt (str): The input prompt for the assistant.
+            schema (Optional[dict]): Optional json schema to use.
 
         Returns:
             str: The assistant's generated response text.
@@ -112,7 +125,18 @@ def _call_openai(self, prompt: str) -> str:
 
         self.client.beta.threads.messages.create(thread_id=thread.id, role="user", content=prompt)
 
-        run = self.client.beta.threads.runs.create(thread_id=thread.id, assistant_id=self.model.id)
+        response_format = None
+        if schema:
+            response_format = {
+                "type": "json_schema",
+                "json_schema": schema,
+            }
+
+        run = self.client.beta.threads.runs.create(
+            thread_id=thread.id,
+            assistant_id=self.model.id,
+            **({"response_format": response_format} if response_format else {}),
+        )
 
         while run.status not in ["completed", "failed"]:
             run = self.client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
diff --git a/ai_feedback/models/RemoteModel.py b/ai_feedback/models/RemoteModel.py
index 43645fe..06ee52d 100644
--- a/ai_feedback/models/RemoteModel.py
+++ b/ai_feedback/models/RemoteModel.py
@@ -39,6 +39,7 @@ def generate_response(
         scope: Optional[str] = None,
         llama_mode: Optional[str] = None,
         submission_image: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ) -> Optional[Tuple[str, str]]:
         """
         Generate a model response using the prompt and assignment files.
@@ -53,6 +54,7 @@ def generate_response(
             system_instructions (str): instructions for the model
             llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
             submission_image (Optional[str]): An optional path to a submission image file.
+            json_schema (Optional[str]): An optional json schema to use.
 
         Returns:
             Optional[Tuple[str, str]]: A tuple containing the prompt and the model's response,
diff --git a/ai_feedback/text_processing.py b/ai_feedback/text_processing.py
index 9250090..81b0dd6 100644
--- a/ai_feedback/text_processing.py
+++ b/ai_feedback/text_processing.py
@@ -60,6 +60,7 @@ def process_text(args, prompt: str, system_instructions: str) -> Tuple[str, str]
             question_num=args.question,
             system_instructions=system_instructions,
             llama_mode=args.llama_mode,
+            json_schema=args.json_schema,
         )
     else:
         request, response = model.generate_response(
@@ -69,6 +70,7 @@ def process_text(args, prompt: str, system_instructions: str) -> Tuple[str, str]
             scope=args.scope,
             system_instructions=system_instructions,
             llama_mode=args.llama_mode,
+            json_schema=args.json_schema,
         )
 
     return request, response
diff --git a/tests/local_tests/schema_structure_validation.py b/tests/local_tests/schema_structure_validation.py
new file mode 100644
index 0000000..98d0ede
--- /dev/null
+++ b/tests/local_tests/schema_structure_validation.py
@@ -0,0 +1,66 @@
+import json
+import subprocess
+from pathlib import Path
+
+import pytest
+
+BASE_DIR = Path(__file__).resolve().parent.parent.parent
+SUBMISSION = BASE_DIR / "test_submissions/cnn_example/cnn_submission.py"
+SOLUTION = BASE_DIR / "test_submissions/cnn_example/cnn_solution.py"
+SCHEMA_PATH = BASE_DIR / "ai_feedback/data/schema/code_annotation_schema.json"
+
+
+def run_cli(model_name: str) -> dict:
+    command = [
+        "python3",
+        "-m",
+        "ai_feedback",
+        "--prompt",
+        "code_annotation",
+        "--scope",
+        "code",
+        "--submission",
+        str(SUBMISSION),
+        "--solution",
+        str(SOLUTION),
+        "--model",
+        model_name,
+        "--json_schema",
+        str(SCHEMA_PATH),
+    ]
+    result = subprocess.run(command, capture_output=True, text=True)
+    assert result.returncode == 0, f"{model_name} failed: {result.stderr}"
+
+    output = result.stdout.strip()
+    json_start = output.find("{")
+    assert json_start != -1, f"{model_name} output has no JSON object"
+
+    return json.loads(output[json_start:])
+
+
+def validate_json_schema(result: dict):
+    assert "annotations" in result, "Missing 'annotations' key"
+    assert isinstance(result["annotations"], list), "'annotations' must be a list"
+    for item in result["annotations"]:
+        assert isinstance(item, dict), "Each annotation must be an object"
+        for key in ["filename", "content", "line_start", "line_end", "column_start", "column_end"]:
+            assert key in item, f"Missing key: {key}"
+            if key in ["filename", "content"]:
+                assert isinstance(item[key], str), f"{key} must be a string"
+            else:
+                assert isinstance(item[key], int), f"{key} must be an integer"
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "openai",
+        "openai-vector",
+        "codellama:latest",
+        "deepSeek-R1:70B",
+        "deepSeek-v3",
+    ],
+)
+def test_model_outputs_valid_json_schema(model):
+    result = run_cli(model)
+    validate_json_schema(result)
diff --git a/tests/test_helper.py b/tests/test_helper.py
index a9ee45e..f00284b 100644
--- a/tests/test_helper.py
+++ b/tests/test_helper.py
@@ -73,6 +73,7 @@ def fake_generate_response(
         test_output: Optional[Path] = None,
         scope: Optional[str] = None,
         llama_mode: Optional[str] = None,
+        json_schema: Optional[str] = None,
     ):
         all_prompts.append((test_name, "OpenAIModel.generate_response", prompt))
         return prompt, f"[MOCKED RESPONSE] \n {prompt}"