Skip to content
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
2dfebbb
styles
Jul 8, 2025
9a41cdf
styles
Jul 8, 2025
4e4e5dd
testing structured output for ollama
Jul 8, 2025
88f1001
test
Jul 8, 2025
ae9a93c
test
Jul 8, 2025
1067871
styles
Jul 8, 2025
1609ee6
updated the models to use json dumps
Jul 8, 2025
c85bd57
upgrade OpenAIModel to gpt-4o-mini
Jul 8, 2025
38c0326
updated OpenAIModel to use structured response
Jul 8, 2025
19e651e
updated models and annotations to be compatible with new openai model
Jul 8, 2025
dea83b9
styling
Jul 8, 2025
6c8cfb1
updated codellama and deepseekv3 to work with new style of annotations
Jul 8, 2025
99af119
updated deepseekv3 cli mode to work with new schema format
Jul 8, 2025
cd54e69
updated deepseekv3 server payload
Jul 8, 2025
4618174
change how payload recives schema
Jul 8, 2025
d0a7cef
stlyes
Jul 8, 2025
964161b
updated readme
Jul 8, 2025
0e79f7c
updated readme
Jul 8, 2025
18567c2
updated model descriptions
Jul 8, 2025
d8f75e4
updated model descriptions
Jul 8, 2025
bfb060d
updated model descriptions
Jul 8, 2025
6c4d8d8
Merge branch 'main' into add-json-schema
wkukka1 Jul 8, 2025
64df9d5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 8, 2025
1fa1146
styles
Jul 9, 2025
f24a1e0
Merge branch 'add-json-schema' of github.com:wkukka1/ai-autograding-f…
Jul 9, 2025
8b489a8
added fallback for schema in ollama models
Jul 9, 2025
93dbd4b
updated helper to use json_schema
Jul 9, 2025
2b8d924
updated models to have consistent json shcema file checks
Jul 9, 2025
0765342
Merge branch 'main' into add-json-schema
david-yz-liu Jul 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ For the image scope, the program takes up to two files, depending on the prompt
| `--system_prompt` | Pre-defined system prompt name or file path to custom system prompt | ❌ |
| `--llama_mode` | How to invoke deepSeek-v3 (choices in `arg_options.LlamaMode`) | ❌ |
| `--output_template` | Output template file (from `arg_options.OutputTemplate) | ❌ |
| `--json_schema` | File path to json file for schema for structured output | ❌ |
** One of either `--prompt` or `--prompt_text` must be selected.

## Scope
Expand Down Expand Up @@ -317,6 +318,12 @@ python3 -m ai_feedback --prompt code_table --scope code \
--model deepSeek-v3 --llama_mode cli
```


#### Get annotations for cnn_example test using openAI model
```bash
python -m ai_feedback --prompt code_annotations --scope code --submission test_submissions/cnn_example/cnn_submission --solution test_submissions/cnn_example/cnn_solution.py --model openai --json_schema ai_feedback/data/schema/code_annotation_schema.json
```

#### Evaluate using custom prompt file path
```bash
python -m ai_feedback --prompt ai_feedback/data/prompts/user/code_overall.md --scope code --submission test_submissions/csc108/correct_submission/correct_submission.py --solution test_submissions/csc108/solution.py --model codellama:latest
Expand Down
7 changes: 7 additions & 0 deletions ai_feedback/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,13 @@ def main() -> int:
default="cli",
help=HELP_MESSAGES["llama_mode"],
)
parser.add_argument(
"--json_schema",
type=str,
required=False,
default="",
help=HELP_MESSAGES["json_schema"],
)

args = parser.parse_args()

Expand Down
2 changes: 2 additions & 0 deletions ai_feedback/code_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def process_code(args, prompt: str, system_instructions: str) -> Tuple[str, str]
question_num=args.question,
system_instructions=system_instructions,
llama_mode=args.llama_mode,
json_schema=args.json_schema,
)
else:
request, response = model.generate_response(
Expand All @@ -96,6 +97,7 @@ def process_code(args, prompt: str, system_instructions: str) -> Tuple[str, str]
test_output=test_output_file,
system_instructions=system_instructions,
llama_mode=args.llama_mode,
json_schema=args.json_schema,
)

return request, response
Expand Down
54 changes: 54 additions & 0 deletions ai_feedback/data/schema/code_annotation_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"name": "student_code_annotation",
"description": "List of code annotations describing specific mistakes in the student's code.",
"schema": {
"type": "object",
"properties": {
"annotations": {
"type": "array",
"items": {
"type": "object",
"properties": {
"filename": {
"type": "string",
"description": "The name of the student's file where the issue was found."
},
"content": {
"type": "string",
"description": "A short description of the mistake or issue."
},
"line_start": {
"type": "integer",
"description": "The starting line number where the issue begins.",
"minimum": 1
},
"line_end": {
"type": "integer",
"description": "The ending line number where the issue ends.",
"minimum": 1
},
"column_start": {
"type": "integer",
"description": "The starting column position of the mistake.",
"minimum": 0
},
"column_end": {
"type": "integer",
"description": "The ending column position of the mistake.",
"minimum": 0
}
},
"required": [
"filename",
"content",
"line_start",
"line_end",
"column_start",
"column_end"
]
}
}
},
"required": ["annotations"]
}
}
1 change: 1 addition & 0 deletions ai_feedback/helpers/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@
"test_output": "The output of tests from evaluating the assignment.",
"submission_image": "The file path for the image file.",
"solution_image": "The file path to the solution image.",
"json_schema": "file path to a json file that contains the schema for ai output",
"system_prompt": "Pre-defined system prompt name (from ai_feedback/data/prompts/system/) or file path to custom system prompt file.",
}
1 change: 1 addition & 0 deletions ai_feedback/image_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ def process_image(args, prompt: dict, system_instructions: str) -> tuple[str, st
system_instructions=system_instructions,
question_num=question,
submission_image=args.submission_image,
json_schema=args.json_schema,
)
responses.append(str(response))
else:
Expand Down
2 changes: 2 additions & 0 deletions ai_feedback/models/ClaudeModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def generate_response(
question_num: Optional[int] = None,
test_output: Optional[Path] = None,
llama_mode: Optional[str] = None,
json_schema: Optional[str] = None,
) -> Optional[Tuple[str, str]]:
"""
Generates a response from Claude using the provided prompt and assignment file context.
Expand All @@ -42,6 +43,7 @@ def generate_response(
question_num (Optional[int]): Specific task number to extract from text files.
system_instructions (str): instructions for the model
llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
json_schema (Optional[str]): Optional json schema to use.
Returns:
Optional[Tuple[str, str]]: The original prompt and the model's response, or None if the response is invalid.
Expand Down
9 changes: 9 additions & 0 deletions ai_feedback/models/CodeLlamaModel.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from pathlib import Path
from typing import Optional, Tuple

Expand Down Expand Up @@ -26,6 +27,7 @@ def generate_response(
test_output: Optional[Path] = None,
scope: Optional[str] = None,
llama_mode: Optional[str] = None,
json_schema: Optional[str] = None,
) -> Optional[Tuple[str, str]]:
"""
Generates a response from the CodeLlama model using the provided prompt
Expand All @@ -40,18 +42,25 @@ def generate_response(
question_num (Optional[int]): An optional specific question number to extract content for.
system_instructions (str): instructions for the model
llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
json_schema (Optional[str]): Optional json schema to use.

Returns:
Optional[Tuple[str, str]]: A tuple of the request and the model's response,
or None if no valid response is returned.
"""
if json_schema:
with open(json_schema, "r", encoding="utf-8") as f:
schema = json.load(f)
else:
schema = None

response = ollama.chat(
model=self.model["model"],
messages=[
{"role": "system", "content": system_instructions},
{"role": "user", "content": prompt},
],
format=schema['schema'],
)

if not response or "message" not in response or "content" not in response["message"]:
Expand Down
9 changes: 9 additions & 0 deletions ai_feedback/models/DeepSeekModel.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from pathlib import Path
from typing import Optional, Tuple

Expand All @@ -24,6 +25,7 @@ def generate_response(
test_output: Optional[Path] = None,
scope: Optional[str] = None,
llama_mode: Optional[str] = None,
json_schema: Optional[str] = None,
) -> Optional[Tuple[str, str]]:
"""
Generate a model response using the prompt and assignment files.
Expand All @@ -37,18 +39,25 @@ def generate_response(
question_num (Optional[int]): An optional question number to target specific content.
system_instructions (str): instructions for the model
llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
json_schema (Optional[str]): Optional json schema to use.

Returns:
Optional[Tuple[str, str]]: A tuple containing the prompt and the model's response,
or None if the response was invalid.
"""
if json_schema:
with open(json_schema, "r", encoding="utf-8") as f:
schema = json.load(f)
else:
schema = None

response = ollama.chat(
model=self.model["model"],
messages=[
{"role": "system", "content": system_instructions},
{"role": "user", "content": prompt},
],
format=schema['schema'],
)

if not response or "message" not in response or "content" not in response["message"]:
Expand Down
36 changes: 23 additions & 13 deletions ai_feedback/models/DeepSeekV3Model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import os
import subprocess
import sys
Expand Down Expand Up @@ -31,6 +32,7 @@ def generate_response(
question_num: Optional[int] = None,
test_output: Optional[Path] = None,
llama_mode: Optional[str] = None,
json_schema: Optional[str] = None,
) -> Optional[Tuple[str, str]]:
"""
Generate a model response using the prompt and assignment files.
Expand All @@ -44,18 +46,24 @@ def generate_response(
test_output (Optional[Path]): Path Object pointing to the test output file.
llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
question_num (Optional[int]): An optional question number to target specific content.
json_schema (Optional[str]): Optional json schema to use.

Returns:
Optional[Tuple[str, str]]: A tuple containing the prompt and the model's response,
or None if the response was invalid.
"""
schema = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's keep the code structure consistent with the other classes (if-else, etc.) Also keep the open call the same, with the utf-8 encoding

if json_schema:
with open(json_schema) as f:
schema = json.load(f)

prompt = f"{system_instructions}\n{prompt}"
if llama_mode == 'server':
self._ensure_env_vars('LLAMA_SERVER_URL')
response = self._get_response_server(prompt)
response = self._get_response_server(prompt, schema)
else:
self._ensure_env_vars('LLAMA_MODEL_PATH', 'LLAMA_CLI_PATH')
response = self._get_response_cli(prompt)
response = self._get_response_cli(prompt, schema)

response = response.strip()

Expand All @@ -81,24 +89,24 @@ def _ensure_env_vars(self, *names):
if missing:
raise RuntimeError(f"Error: Environment variable(s) {', '.join(missing)} not set")

def _get_response_server(
self,
prompt: str,
) -> str:
def _get_response_server(self, prompt: str, schema: Optional[dict] = None) -> str:
"""
Generate a model response using the prompt

Args:
prompt (str): The input prompt provided by the user.
schema (Optional[dict]): Optional schema provided by the user.

Returns:
str: A tuple containing the model response or None if the response was invalid.
"""
url = f"{LLAMA_SERVER_URL}/v1/completions"

payload = {
"prompt": prompt,
}
payload = {"prompt": prompt, "temperature": 0.7, "max_tokens": 1000}

if schema:
raw_schema = schema.get("schema", schema)
payload["json_schema"] = raw_schema

try:
response = requests.post(url, json=payload, timeout=3000)
Expand All @@ -116,15 +124,13 @@ def _get_response_server(

return model_output

def _get_response_cli(
self,
prompt: str,
) -> str:
def _get_response_cli(self, prompt: str, schema: Optional[dict] = None) -> str:
"""
Generate a model response using the prompt

Args:
prompt (str): The input prompt provided by the user.
schema (Optional[dict]): Optional schema provided by the user.

Returns:
str: The model response or None if the response was invalid.
Expand All @@ -141,6 +147,10 @@ def _get_response_cli(
"--no-display-prompt",
]

if schema:
raw_schema = schema["schema"] if "schema" in schema else schema
cmd += ["--json-schema", json.dumps(raw_schema)]

try:
completed = subprocess.run(
cmd, input=prompt.encode(), check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=300
Expand Down
28 changes: 24 additions & 4 deletions ai_feedback/models/OpenAIModel.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import json
import os
import re
from pathlib import Path
from typing import Optional, Tuple

Expand Down Expand Up @@ -30,6 +32,7 @@ def generate_response(
test_output: Optional[Path] = None,
scope: Optional[str] = None,
llama_mode: Optional[str] = None,
json_schema: Optional[str] = None,
) -> Tuple[str, str]:
"""
Generate a response based on the given prompt and assignment context.
Expand All @@ -43,30 +46,47 @@ def generate_response(
question_num (Optional[int]): Specific question number to focus on.
system_instructions (str): instructions for the model
llama_mode (Optional[str]): Optional mode to invoke llama.cpp in.
json_schema (Optional[str]): Optional json schema to use.
Returns:
Tuple[str, str]: The full prompt and the generated response from OpenAI.
"""
response = self._call_openai(prompt, system_instructions)
if json_schema:
schema_path = Path(json_schema)
if not schema_path.exists():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a nice check; please add this to all of the classes.

raise FileNotFoundError(f"JSON schema file not found: {schema_path}")
with open(schema_path, "r", encoding="utf-8") as f:
schema = json.load(f)
else:
schema = None

response = self._call_openai(prompt, system_instructions, schema)
return prompt, response

def _call_openai(self, prompt: str, system_instructions: str) -> str:
def _call_openai(self, prompt: str, system_instructions: str, schema: Optional[dict] = None) -> str:
"""
Send a prompt to OpenAI's chat completion API and retrieve the generated response.
Args:
prompt (str): The fully constructed input prompt including file content.
schema (Optional[dict]): Optional json schema to use.
Returns:
str: The model's response text.
"""
response_format = None
if schema:
response_format = {"type": "json_schema", "json_schema": schema}

response = self.client.chat.completions.create(
model="gpt-4-turbo",
model="gpt-4o-mini",
messages=[
{"role": "system", "content": system_instructions},
{"role": "user", "content": prompt},
],
max_tokens=1000,
response_format=response_format,
temperature=0.5,
max_tokens=1000,
)

return response.choices[0].message.content
Loading