diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py index 42374a6..feb7d5f 100644 --- a/ai_feedback/helpers/template_utils.py +++ b/ai_feedback/helpers/template_utils.py @@ -42,7 +42,7 @@ def render_prompt_template( if question_num is not None: template_data['file_contents'] = _get_question_contents([submission, solution], question_num) else: - template_data['file_contents'] = gather_file_contents([submission, solution, test_output]) + template_data['file_contents'] = gather_xml_file_contents(submission, solution, test_output) # Handle image placeholders with context-aware replacement if '{submission_image}' in prompt_content and 'submission_image' not in template_data: @@ -64,11 +64,13 @@ def render_prompt_template( return prompt_content.format(**template_data) -def gather_file_references(submission: Path, solution: Optional[Path], test_output: Optional[Path]) -> str: +def gather_file_references( + submission: Optional[Path] = None, solution: Optional[Path] = None, test_output: Optional[Path] = None +) -> str: """Generate file reference descriptions for prompt templates. Args: - submission (Path): Student's submission file path + submission (Path, optional): Student's submission file path solution (Path, optional): Instructor's solution file path test_output (Path, optional): Student's test output file path @@ -76,7 +78,8 @@ def gather_file_references(submission: Path, solution: Optional[Path], test_outp str: Descriptions like "The instructor's solution file..." """ references: List[str] = [] - references.append(f"The student's submission file is {submission.name}.") + if submission: + references.append(f"The student's submission file is {submission.name}.") if solution: references.append(f"The instructor's solution file is {solution.name}.") if test_output: @@ -84,49 +87,88 @@ def gather_file_references(submission: Path, solution: Optional[Path], test_outp return "\n".join(references) -def gather_file_contents(assignment_files: List[Optional[Path]]) -> str: - """Generate file contents with line numbers for prompt templates. +def gather_xml_file_contents( + submission: Optional[Path] = None, solution: Optional[Path] = None, test_output: Optional[Path] = None +) -> str: + """Generate file contents with XML tags for prompt templates. Args: - assignment_files (list[str]): List of file paths to process + submission (Path, optional): Student's submission file path + solution (Path, optional): Instructor's solution file path + test_output (Path, optional): Student's test output file path Returns: - str: File contents formatted with line numbers + str: File contents formatted with XML tags and line numbers """ file_contents = "" - for file_path in assignment_files: - if not file_path: - continue - filename = os.path.basename(file_path) - - try: - # Handle PDF files separately - if filename.lower().endswith('.pdf'): - text_content = extract_pdf_text(file_path) - lines = text_content.split('\n') - else: - # Handle regular text files - with open(file_path, "r", encoding="utf-8") as file: - lines = file.readlines() - - # Common processing for both file types - file_contents += f"=== {filename} ===\n" - for i, line in enumerate(lines, start=1): - stripped_line = line.rstrip('\n').rstrip() - if stripped_line.strip(): - file_contents += f"(Line {i}) {stripped_line}\n" - else: - file_contents += f"(Line {i}) \n" - file_contents += "\n" - - except Exception as e: - print(f"Error reading file {filename}: {e}") - continue + if submission: + file_contents += _format_file_with_xml_tag(submission, "submission") + + if solution: + file_contents += _format_file_with_xml_tag(solution, "solution") + + if test_output: + file_contents += _format_file_with_xml_tag(test_output, "test_output") return file_contents +def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str: + """Format a single file with XML tags and line numbers. + + Args: + file_path (Path): Path to the file to format + tag_name (str): The XML tag name (submission, solution, test_output) + + Returns: + str: Formatted file content with XML tags + """ + if not file_path: + return "" + + filename = os.path.basename(file_path) + + try: + # Handle PDF files separately + if filename.lower().endswith('.pdf'): + text_content = extract_pdf_text(file_path) + return f"<{tag_name} filename=\"{filename}\">\n{text_content}\n\n\n" + else: + # Handle regular text files + with open(file_path, "r", encoding="utf-8") as file: + lines = file.readlines() + return _wrap_lines_with_xml(lines, tag_name, filename) + + except Exception as e: + print(f"Error reading file {filename}: {e}") + return "" + + +def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str) -> str: + """Wrap lines with XML tags and add line numbers. + + Args: + lines (List[str]): List of lines to format + tag_name (str): The XML tag name (submission, solution, test_output) + filename (str): The filename to include in the XML tag + + Returns: + str: Formatted content with XML tags and line numbers + """ + content = f"<{tag_name} filename=\"{filename}\">\n" + + for i, line in enumerate(lines, start=1): + stripped_line = line.rstrip("\n") + if stripped_line.strip(): + content += f"(Line {i}) {stripped_line}\n" + else: + content += f"(Line {i}) {line}" + + content += f"\n\n" + return content + + def extract_pdf_text(pdf_path: str) -> str: """Extract text content from a PDF file. @@ -233,6 +275,7 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num: Args: assignment_files (List[Optional[Path]]): List of Path or None objects to parse. + Expected order: [submission, solution] question_num (int): The target task number to extract. Returns: @@ -244,7 +287,9 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num: file_contents = "" task_found = False - for file_path in assignment_files: + semantic_tags = ["submission", "solution"] + + for index, file_path in enumerate(assignment_files): if ( not file_path or file_path.suffix != '.txt' @@ -266,9 +311,11 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num: task_content = task_match.group(1).strip() task_found = True - file_contents += f"\n\n---\n### {file_path}\n\n" + tag_name = semantic_tags[index] if index < len(semantic_tags) else "file" + file_contents += f"<{tag_name} filename=\"{file_path.name}\">\n" file_contents += intro_content + "\n\n" if intro_content else "" file_contents += task_content + "\n\n" + file_contents += f"\n\n" if not task_found: print(f"Task {question_num} not found in any assignment file.") diff --git a/tests/open_ai_model_tests/integration_test.py b/tests/open_ai_model_tests/integration_test.py index 46f63ef..832f0f0 100644 --- a/tests/open_ai_model_tests/integration_test.py +++ b/tests/open_ai_model_tests/integration_test.py @@ -4,8 +4,7 @@ def test_cnn_example_openai_stdout(capsys, mock_and_capture): - """ - Example 1: + """Example 1: Evaluate cnn_example test using openAI model and print to stdout. python -m ai_feedback --prompt code_lines --scope code \ --submission test_submissions/cnn_example/cnn_submission \ @@ -30,13 +29,12 @@ def test_cnn_example_openai_stdout(capsys, mock_and_capture): assert "Compare the student's code and solution code. For each mistake" in output assert "(Line 1) import numpy as np" in output - assert "=== cnn_submission.py ===" in output - assert "=== cnn_solution.py ===" in output + assert '' in output + assert '' in output def test_cnn_example_custom_prompt_stdout(capsys, mock_and_capture): - """ - Example 2: + """Example 2: Evaluate cnn_example test using openAI model and a custom prompt text, printing to stdout. python -m ai_feedback --prompt_text "Evaluate the student's code readability." \ --scope code \ @@ -58,13 +56,12 @@ def test_cnn_example_custom_prompt_stdout(capsys, mock_and_capture): ] output = run_cli_and_capture(args, capsys) assert "Evaluate the student's code readability." in output - assert "=== cnn_submission.py ===" in output + assert '' in output assert "(Line 1) import numpy as np" in output def test_pdf_example_openai_direct(capsys, mock_and_capture): - """ - Example 3: + """Example 3: Evaluate pdf_example test using openAI model and direct output mode. python -m ai_feedback --prompt text_pdf_analyze --scope text \ --submission test_submissions/pdf_example/student_pdf_submission.pdf \ @@ -86,3 +83,67 @@ def test_pdf_example_openai_direct(capsys, mock_and_capture): assert "Does the student correctly respond to the question, and meet all the" in output assert "student_pdf_submission.pdf" in output assert "Normalization allows each feature to have an equal influence on the mode" in output + + +def test_xml_formatting_code_scope(capsys, mock_and_capture): + """ + Test XML formatting for file contents in code scope. + Verifies that file contents use XML tags while file references remain plain text. + """ + parent = Path(__file__).parent.parent.parent + + args = [ + "--prompt_text", + "File references: {file_references}\n\nFile contents:\n{file_contents}", + "--scope", + "code", + "--submission", + str(parent / "test_submissions/csc108/correct_submission/correct_submission.py"), + "--solution", + str(parent / "test_submissions/csc108/solution.py"), + "--model", + "openai", + ] + output = run_cli_and_capture(args, capsys) + + assert "The student's submission file is correct_submission.py." in output + assert "The instructor's solution file is solution.py." in output + + assert '' in output + assert '' in output + assert '' in output + assert '' in output + + assert "(Line 1) def fizzbuzz(n: int) -> list:" in output + + +def test_xml_formatting_text_scope_with_test_output(capsys, mock_and_capture): + """ + Test XML formatting for file contents in text scope with all file types. + Verifies submission, solution, and test_output files all use XML formatting. + """ + parent = Path(__file__).parent.parent.parent + + args = [ + "--prompt_text", + "File references: {file_references}\n\nFile contents:\n{file_contents}", + "--submission_type", + "python", + "--scope", + "text", + "--submission", + str(parent / "test_submissions/ggr274_homework5/test1/student_submission.txt"), + "--solution", + str(parent / "test_submissions/ggr274_homework5/test1/Homework_5_solution.txt"), + "--model", + "openai", + ] + output = run_cli_and_capture(args, capsys) + + assert "The student's submission file is student_submission.txt." in output + assert "The instructor's solution file is Homework_5_solution.txt." in output + + assert '' in output + assert '' in output + assert '' in output + assert '' in output