Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 70 additions & 32 deletions ai_feedback/helpers/template_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def render_prompt_template(
if question_num is not None:
template_data['file_contents'] = _get_question_contents([submission, solution], question_num)
else:
template_data['file_contents'] = gather_file_contents([submission, solution, test_output])
template_data['file_contents'] = gather_xml_file_contents(submission, solution, test_output)

# Handle image placeholders with context-aware replacement
if '{submission_image}' in prompt_content and 'submission_image' not in template_data:
Expand Down Expand Up @@ -84,47 +84,80 @@ def gather_file_references(submission: Path, solution: Optional[Path], test_outp
return "\n".join(references)


def gather_file_contents(assignment_files: List[Optional[Path]]) -> str:
"""Generate file contents with line numbers for prompt templates.
def gather_xml_file_contents(
submission: Path, solution: Optional[Path] = None, test_output: Optional[Path] = None
) -> str:
"""Generate file contents with XML tags for prompt templates.

Args:
assignment_files (list[str]): List of file paths to process
submission (Path): Student's submission file path
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be Optional[Path] for all these args

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

solution (Path, optional): Instructor's solution file path
test_output (Path, optional): Student's test output file path

Returns:
str: File contents formatted with line numbers
str: File contents formatted with XML tags and line numbers
"""
file_contents = ""

for file_path in assignment_files:
if not file_path:
continue
filename = os.path.basename(file_path)

try:
# Handle PDF files separately
if filename.lower().endswith('.pdf'):
text_content = extract_pdf_text(file_path)
lines = text_content.split('\n')
else:
# Handle regular text files
with open(file_path, "r", encoding="utf-8") as file:
lines = file.readlines()

# Common processing for both file types
file_contents += f"=== {filename} ===\n"
file_contents += _format_file_with_xml_tag(submission, "submission")

if solution:
file_contents += _format_file_with_xml_tag(solution, "solution")

if test_output:
file_contents += _format_file_with_xml_tag(test_output, "test_output")

return file_contents


def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str:
"""Format a single file with XML tags and line numbers.

Args:
file_path (Path): Path to the file to format
tag_name (str): The XML tag name (submission, solution, test_output)

Returns:
str: Formatted file content with XML tags
"""
if not file_path:
return ""

filename = os.path.basename(file_path)
content = ""

try:
# Handle PDF files separately
if filename.lower().endswith('.pdf'):
text_content = extract_pdf_text(file_path)
content += f"<{tag_name} file=\"{filename}\">\n"
lines = text_content.split('\n')
for i, line in enumerate(lines, start=1):
stripped_line = line.rstrip('\n').rstrip()
stripped_line = line.rstrip()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicated logic for wrapping lines with XML tags, both for PDFs and regular text files. Consider refactoring the repeated line formatting into a helper function—e.g., _wrap_lines_with_xml(lines, tag_name, filename)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was actually part of the original code, I’ve moved it into a helper function already. I’ll work on simplifying the logic further. Thanks for pointing that out!

if stripped_line.strip():
file_contents += f"(Line {i}) {stripped_line}\n"
content += f"(Line {i}) {stripped_line}\n"
else:
file_contents += f"(Line {i}) \n"
file_contents += "\n"
content += f"(Line {i}) \n"
content += f"</{tag_name}>\n\n"
else:
# Handle regular text files
with open(file_path, "r", encoding="utf-8") as file:
lines = file.readlines()

except Exception as e:
print(f"Error reading file {filename}: {e}")
continue
content += f"<{tag_name} file=\"{filename}\">\n"
for i, line in enumerate(lines, start=1):
stripped_line = line.rstrip("\n")
if stripped_line.strip():
content += f"(Line {i}) {stripped_line}\n"
else:
content += f"(Line {i}) {line}"
content += f"</{tag_name}>\n\n"

return file_contents
except Exception as e:
print(f"Error reading file {filename}: {e}")
return ""

return content


def extract_pdf_text(pdf_path: str) -> str:
Expand Down Expand Up @@ -233,6 +266,7 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num:

Args:
assignment_files (List[Optional[Path]]): List of Path or None objects to parse.
Expected order: [submission, solution]
question_num (int): The target task number to extract.

Returns:
Expand All @@ -244,7 +278,9 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num:
file_contents = ""
task_found = False

for file_path in assignment_files:
semantic_tags = ["submission", "solution"]

for index, file_path in enumerate(assignment_files):
if (
not file_path
or file_path.suffix != '.txt'
Expand All @@ -266,9 +302,11 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num:
task_content = task_match.group(1).strip()
task_found = True

file_contents += f"\n\n---\n### {file_path}\n\n"
tag_name = semantic_tags[index] if index < len(semantic_tags) else "file"
file_contents += f"<{tag_name} file=\"{file_path.name}\">\n"
file_contents += intro_content + "\n\n" if intro_content else ""
file_contents += task_content + "\n\n"
file_contents += f"</{tag_name}>\n\n"

if not task_found:
print(f"Task {question_num} not found in any assignment file.")
Expand Down
79 changes: 70 additions & 9 deletions tests/open_ai_model_tests/integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@


def test_cnn_example_openai_stdout(capsys, mock_and_capture):
"""
Example 1:
"""Example 1:
Evaluate cnn_example test using openAI model and print to stdout.
python -m ai_feedback --prompt code_lines --scope code \
--submission test_submissions/cnn_example/cnn_submission \
Expand All @@ -30,13 +29,12 @@ def test_cnn_example_openai_stdout(capsys, mock_and_capture):

assert "Compare the student's code and solution code. For each mistake" in output
assert "(Line 1) import numpy as np" in output
assert "=== cnn_submission.py ===" in output
assert "=== cnn_solution.py ===" in output
assert '<submission file="cnn_submission.py">' in output
assert '<solution file="cnn_solution.py">' in output


def test_cnn_example_custom_prompt_stdout(capsys, mock_and_capture):
"""
Example 2:
"""Example 2:
Evaluate cnn_example test using openAI model and a custom prompt text, printing to stdout.
python -m ai_feedback --prompt_text "Evaluate the student's code readability." \
--scope code \
Expand All @@ -58,13 +56,12 @@ def test_cnn_example_custom_prompt_stdout(capsys, mock_and_capture):
]
output = run_cli_and_capture(args, capsys)
assert "Evaluate the student's code readability." in output
assert "=== cnn_submission.py ===" in output
assert '<submission file="cnn_submission.py">' in output
assert "(Line 1) import numpy as np" in output


def test_pdf_example_openai_direct(capsys, mock_and_capture):
"""
Example 3:
"""Example 3:
Evaluate pdf_example test using openAI model and direct output mode.
python -m ai_feedback --prompt text_pdf_analyze --scope text \
--submission test_submissions/pdf_example/student_pdf_submission.pdf \
Expand All @@ -86,3 +83,67 @@ def test_pdf_example_openai_direct(capsys, mock_and_capture):
assert "Does the student correctly respond to the question, and meet all the" in output
assert "student_pdf_submission.pdf" in output
assert "Normalization allows each feature to have an equal influence on the mode" in output


def test_xml_formatting_code_scope(capsys, mock_and_capture):
"""
Test XML formatting for file contents in code scope.
Verifies that file contents use XML tags while file references remain plain text.
"""
parent = Path(__file__).parent.parent.parent

args = [
"--prompt_text",
"File references: {file_references}\n\nFile contents:\n{file_contents}",
"--scope",
"code",
"--submission",
str(parent / "test_submissions/csc108/correct_submission/correct_submission.py"),
"--solution",
str(parent / "test_submissions/csc108/solution.py"),
"--model",
"openai",
]
output = run_cli_and_capture(args, capsys)

assert "The student's submission file is correct_submission.py." in output
assert "The instructor's solution file is solution.py." in output

assert '<submission file="correct_submission.py">' in output
assert '</submission>' in output
assert '<solution file="solution.py">' in output
assert '</solution>' in output

assert "(Line 1) def fizzbuzz(n: int) -> list:" in output


def test_xml_formatting_text_scope_with_test_output(capsys, mock_and_capture):
"""
Test XML formatting for file contents in text scope with all file types.
Verifies submission, solution, and test_output files all use XML formatting.
"""
parent = Path(__file__).parent.parent.parent

args = [
"--prompt_text",
"File references: {file_references}\n\nFile contents:\n{file_contents}",
"--submission_type",
"python",
"--scope",
"text",
"--submission",
str(parent / "test_submissions/ggr274_homework5/test1/student_submission.txt"),
"--solution",
str(parent / "test_submissions/ggr274_homework5/test1/Homework_5_solution.txt"),
"--model",
"openai",
]
output = run_cli_and_capture(args, capsys)

assert "The student's submission file is student_submission.txt." in output
assert "The instructor's solution file is Homework_5_solution.txt." in output

assert '<submission file="student_submission.txt">' in output
assert '</submission>' in output
assert '<solution file="Homework_5_solution.txt">' in output
assert '</solution>' in output