From b25e216debbad0cc61af76c562ca98eb6d042575 Mon Sep 17 00:00:00 2001 From: Rolland-He Date: Tue, 8 Jul 2025 16:13:02 -0400 Subject: [PATCH 01/12] Copy changes of template_utils from xml-tag branch --- ai_feedback/helpers/template_utils.py | 100 +++++++++++++++++--------- 1 file changed, 68 insertions(+), 32 deletions(-) diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py index 42374a6..d87cd6e 100644 --- a/ai_feedback/helpers/template_utils.py +++ b/ai_feedback/helpers/template_utils.py @@ -42,7 +42,7 @@ def render_prompt_template( if question_num is not None: template_data['file_contents'] = _get_question_contents([submission, solution], question_num) else: - template_data['file_contents'] = gather_file_contents([submission, solution, test_output]) + template_data['file_contents'] = gather_xml_file_contents(submission, solution, test_output) # Handle image placeholders with context-aware replacement if '{submission_image}' in prompt_content and 'submission_image' not in template_data: @@ -84,47 +84,78 @@ def gather_file_references(submission: Path, solution: Optional[Path], test_outp return "\n".join(references) -def gather_file_contents(assignment_files: List[Optional[Path]]) -> str: - """Generate file contents with line numbers for prompt templates. +def gather_xml_file_contents(submission: Path, solution: Optional[Path] = None, test_output: Optional[Path] = None) -> str: + """Generate file contents with XML tags for prompt templates. Args: - assignment_files (list[str]): List of file paths to process + submission (Path): Student's submission file path + solution (Path, optional): Instructor's solution file path + test_output (Path, optional): Student's test output file path Returns: - str: File contents formatted with line numbers + str: File contents formatted with XML tags and line numbers """ file_contents = "" + + file_contents += _format_file_with_xml_tag(submission, "submission") - for file_path in assignment_files: - if not file_path: - continue - filename = os.path.basename(file_path) - - try: - # Handle PDF files separately - if filename.lower().endswith('.pdf'): - text_content = extract_pdf_text(file_path) - lines = text_content.split('\n') - else: - # Handle regular text files - with open(file_path, "r", encoding="utf-8") as file: - lines = file.readlines() - - # Common processing for both file types - file_contents += f"=== {filename} ===\n" + if solution: + file_contents += _format_file_with_xml_tag(solution, "solution") + + if test_output: + file_contents += _format_file_with_xml_tag(test_output, "test_output") + + return file_contents + + +def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str: + """Format a single file with XML tags and line numbers. + + Args: + file_path (Path): Path to the file to format + tag_name (str): The XML tag name (submission, solution, test_output) + + Returns: + str: Formatted file content with XML tags + """ + if not file_path: + return "" + + filename = os.path.basename(file_path) + content = "" + + try: + # Handle PDF files separately + if filename.lower().endswith('.pdf'): + text_content = extract_pdf_text(file_path) + content += f"<{tag_name} file=\"{filename}\">\n" + lines = text_content.split('\n') for i, line in enumerate(lines, start=1): - stripped_line = line.rstrip('\n').rstrip() + stripped_line = line.rstrip() if stripped_line.strip(): - file_contents += f"(Line {i}) {stripped_line}\n" + content += f"(Line {i}) {stripped_line}\n" else: - file_contents += f"(Line {i}) \n" - file_contents += "\n" + content += f"(Line {i}) \n" + content += f"\n\n" + else: + # Handle regular text files + with open(file_path, "r", encoding="utf-8") as file: + lines = file.readlines() - except Exception as e: - print(f"Error reading file {filename}: {e}") - continue + content += f"<{tag_name} file=\"{filename}\">\n" + for i, line in enumerate(lines, start=1): + stripped_line = line.rstrip("\n") + if stripped_line.strip(): + content += f"(Line {i}) {stripped_line}\n" + else: + content += f"(Line {i}) {line}" + content += f"\n\n" - return file_contents + except Exception as e: + print(f"Error reading file {filename}: {e}") + return "" + + return content def extract_pdf_text(pdf_path: str) -> str: @@ -233,6 +264,7 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num: Args: assignment_files (List[Optional[Path]]): List of Path or None objects to parse. + Expected order: [submission, solution] question_num (int): The target task number to extract. Returns: @@ -243,8 +275,10 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num: """ file_contents = "" task_found = False + + semantic_tags = ["submission", "solution"] - for file_path in assignment_files: + for index, file_path in enumerate(assignment_files): if ( not file_path or file_path.suffix != '.txt' @@ -266,9 +300,11 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num: task_content = task_match.group(1).strip() task_found = True - file_contents += f"\n\n---\n### {file_path}\n\n" + tag_name = semantic_tags[index] if index < len(semantic_tags) else "file" + file_contents += f"<{tag_name} file=\"{file_path.name}\">\n" file_contents += intro_content + "\n\n" if intro_content else "" file_contents += task_content + "\n\n" + file_contents += f"\n\n" if not task_found: print(f"Task {question_num} not found in any assignment file.") From 9852a58286ba6e02a75e8942d27beeaa8fac72e6 Mon Sep 17 00:00:00 2001 From: Rolland-He Date: Tue, 8 Jul 2025 16:14:51 -0400 Subject: [PATCH 02/12] Copy changes of integeration_tests from xml-tag branch --- tests/open_ai_model_tests/integration_test.py | 79 ++++++++++++++++--- 1 file changed, 70 insertions(+), 9 deletions(-) diff --git a/tests/open_ai_model_tests/integration_test.py b/tests/open_ai_model_tests/integration_test.py index 46f63ef..b0a34a6 100644 --- a/tests/open_ai_model_tests/integration_test.py +++ b/tests/open_ai_model_tests/integration_test.py @@ -4,8 +4,7 @@ def test_cnn_example_openai_stdout(capsys, mock_and_capture): - """ - Example 1: + """Example 1: Evaluate cnn_example test using openAI model and print to stdout. python -m ai_feedback --prompt code_lines --scope code \ --submission test_submissions/cnn_example/cnn_submission \ @@ -30,13 +29,12 @@ def test_cnn_example_openai_stdout(capsys, mock_and_capture): assert "Compare the student's code and solution code. For each mistake" in output assert "(Line 1) import numpy as np" in output - assert "=== cnn_submission.py ===" in output - assert "=== cnn_solution.py ===" in output + assert '' in output + assert '' in output def test_cnn_example_custom_prompt_stdout(capsys, mock_and_capture): - """ - Example 2: + """Example 2: Evaluate cnn_example test using openAI model and a custom prompt text, printing to stdout. python -m ai_feedback --prompt_text "Evaluate the student's code readability." \ --scope code \ @@ -58,13 +56,12 @@ def test_cnn_example_custom_prompt_stdout(capsys, mock_and_capture): ] output = run_cli_and_capture(args, capsys) assert "Evaluate the student's code readability." in output - assert "=== cnn_submission.py ===" in output + assert '' in output assert "(Line 1) import numpy as np" in output def test_pdf_example_openai_direct(capsys, mock_and_capture): - """ - Example 3: + """Example 3: Evaluate pdf_example test using openAI model and direct output mode. python -m ai_feedback --prompt text_pdf_analyze --scope text \ --submission test_submissions/pdf_example/student_pdf_submission.pdf \ @@ -86,3 +83,67 @@ def test_pdf_example_openai_direct(capsys, mock_and_capture): assert "Does the student correctly respond to the question, and meet all the" in output assert "student_pdf_submission.pdf" in output assert "Normalization allows each feature to have an equal influence on the mode" in output + + +def test_xml_formatting_code_scope(capsys, mock_and_capture): + """ + Test XML formatting for file contents in code scope. + Verifies that file contents use XML tags while file references remain plain text. + """ + parent = Path(__file__).parent.parent.parent + + args = [ + "--prompt_text", + "File references: {file_references}\n\nFile contents:\n{file_contents}", + "--scope", + "code", + "--submission", + str(parent / "test_submissions/csc108/correct_submission/correct_submission.py"), + "--solution", + str(parent / "test_submissions/csc108/solution.py"), + "--model", + "openai" + ] + output = run_cli_and_capture(args, capsys) + + assert "The student's submission file is correct_submission.py." in output + assert "The instructor's solution file is solution.py." in output + + assert '' in output + assert '' in output + assert '' in output + assert '' in output + + assert "(Line 1) def fizzbuzz(n: int) -> list:" in output + + +def test_xml_formatting_text_scope_with_test_output(capsys, mock_and_capture): + """ + Test XML formatting for file contents in text scope with all file types. + Verifies submission, solution, and test_output files all use XML formatting. + """ + parent = Path(__file__).parent.parent.parent + + args = [ + "--prompt_text", + "File references: {file_references}\n\nFile contents:\n{file_contents}", + "--submission_type", + "python", + "--scope", + "text", + "--submission", + str(parent / "test_submissions/ggr274_homework5/test1/student_submission.txt"), + "--solution", + str(parent / "test_submissions/ggr274_homework5/test1/Homework_5_solution.txt"), + "--model", + "openai" + ] + output = run_cli_and_capture(args, capsys) + + assert "The student's submission file is student_submission.txt." in output + assert "The instructor's solution file is Homework_5_solution.txt." in output + + assert '' in output + assert '' in output + assert '' in output + assert '' in output From b95fb53835997769fd32afeea261ccb4bba94312 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 8 Jul 2025 21:07:11 +0000 Subject: [PATCH 03/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ai_feedback/helpers/template_utils.py | 20 ++++++++++--------- tests/open_ai_model_tests/integration_test.py | 18 ++++++++--------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py index d87cd6e..867b667 100644 --- a/ai_feedback/helpers/template_utils.py +++ b/ai_feedback/helpers/template_utils.py @@ -84,19 +84,21 @@ def gather_file_references(submission: Path, solution: Optional[Path], test_outp return "\n".join(references) -def gather_xml_file_contents(submission: Path, solution: Optional[Path] = None, test_output: Optional[Path] = None) -> str: +def gather_xml_file_contents( + submission: Path, solution: Optional[Path] = None, test_output: Optional[Path] = None +) -> str: """Generate file contents with XML tags for prompt templates. Args: submission (Path): Student's submission file path - solution (Path, optional): Instructor's solution file path + solution (Path, optional): Instructor's solution file path test_output (Path, optional): Student's test output file path Returns: str: File contents formatted with XML tags and line numbers """ file_contents = "" - + file_contents += _format_file_with_xml_tag(submission, "submission") if solution: @@ -104,26 +106,26 @@ def gather_xml_file_contents(submission: Path, solution: Optional[Path] = None, if test_output: file_contents += _format_file_with_xml_tag(test_output, "test_output") - + return file_contents def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str: """Format a single file with XML tags and line numbers. - + Args: file_path (Path): Path to the file to format tag_name (str): The XML tag name (submission, solution, test_output) - + Returns: str: Formatted file content with XML tags """ if not file_path: return "" - + filename = os.path.basename(file_path) content = "" - + try: # Handle PDF files separately if filename.lower().endswith('.pdf'): @@ -275,7 +277,7 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num: """ file_contents = "" task_found = False - + semantic_tags = ["submission", "solution"] for index, file_path in enumerate(assignment_files): diff --git a/tests/open_ai_model_tests/integration_test.py b/tests/open_ai_model_tests/integration_test.py index b0a34a6..86e53f2 100644 --- a/tests/open_ai_model_tests/integration_test.py +++ b/tests/open_ai_model_tests/integration_test.py @@ -95,20 +95,20 @@ def test_xml_formatting_code_scope(capsys, mock_and_capture): args = [ "--prompt_text", "File references: {file_references}\n\nFile contents:\n{file_contents}", - "--scope", + "--scope", "code", "--submission", str(parent / "test_submissions/csc108/correct_submission/correct_submission.py"), "--solution", str(parent / "test_submissions/csc108/solution.py"), "--model", - "openai" + "openai", ] output = run_cli_and_capture(args, capsys) - + assert "The student's submission file is correct_submission.py." in output assert "The instructor's solution file is solution.py." in output - + assert '' in output assert '' in output assert '' in output @@ -130,19 +130,19 @@ def test_xml_formatting_text_scope_with_test_output(capsys, mock_and_capture): "--submission_type", "python", "--scope", - "text", + "text", "--submission", str(parent / "test_submissions/ggr274_homework5/test1/student_submission.txt"), "--solution", str(parent / "test_submissions/ggr274_homework5/test1/Homework_5_solution.txt"), "--model", - "openai" + "openai", ] output = run_cli_and_capture(args, capsys) - + assert "The student's submission file is student_submission.txt." in output - assert "The instructor's solution file is Homework_5_solution.txt." in output - + assert "The instructor's solution file is Homework_5_solution.txt." in output + assert '' in output assert '' in output assert '' in output From 654a8eff09336b56475d6d2c6ad85608e435349b Mon Sep 17 00:00:00 2001 From: Rolland-He Date: Wed, 9 Jul 2025 10:06:46 -0400 Subject: [PATCH 04/12] Make submission optional --- ai_feedback/helpers/template_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py index d87cd6e..223f1ec 100644 --- a/ai_feedback/helpers/template_utils.py +++ b/ai_feedback/helpers/template_utils.py @@ -64,11 +64,11 @@ def render_prompt_template( return prompt_content.format(**template_data) -def gather_file_references(submission: Path, solution: Optional[Path], test_output: Optional[Path]) -> str: +def gather_file_references(submission: Optional[Path], solution: Optional[Path], test_output: Optional[Path]) -> str: """Generate file reference descriptions for prompt templates. Args: - submission (Path): Student's submission file path + submission (Path, optional): Student's submission file path solution (Path, optional): Instructor's solution file path test_output (Path, optional): Student's test output file path @@ -76,7 +76,8 @@ def gather_file_references(submission: Path, solution: Optional[Path], test_outp str: Descriptions like "The instructor's solution file..." """ references: List[str] = [] - references.append(f"The student's submission file is {submission.name}.") + if submission: + references.append(f"The student's submission file is {submission.name}.") if solution: references.append(f"The instructor's solution file is {solution.name}.") if test_output: From c35dbe0f55d815a23fabf38dc6e74112d1d8c629 Mon Sep 17 00:00:00 2001 From: Rolland-He Date: Wed, 9 Jul 2025 10:11:47 -0400 Subject: [PATCH 05/12] Make submission optional --- ai_feedback/helpers/template_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py index 99a6ba6..22291ef 100644 --- a/ai_feedback/helpers/template_utils.py +++ b/ai_feedback/helpers/template_utils.py @@ -86,12 +86,12 @@ def gather_file_references(submission: Optional[Path], solution: Optional[Path], def gather_xml_file_contents( - submission: Path, solution: Optional[Path] = None, test_output: Optional[Path] = None + submission: Optional[Path], solution: Optional[Path] = None, test_output: Optional[Path] = None ) -> str: """Generate file contents with XML tags for prompt templates. Args: - submission (Path): Student's submission file path + submission (Path, optional): Student's submission file path solution (Path, optional): Instructor's solution file path test_output (Path, optional): Student's test output file path @@ -100,7 +100,8 @@ def gather_xml_file_contents( """ file_contents = "" - file_contents += _format_file_with_xml_tag(submission, "submission") + if submission: + file_contents += _format_file_with_xml_tag(submission, "submission") if solution: file_contents += _format_file_with_xml_tag(solution, "solution") From ce8ba7b0d6f39dfb213f09bd99ca2b2431e1ef9a Mon Sep 17 00:00:00 2001 From: Rolland-He Date: Wed, 9 Jul 2025 10:19:25 -0400 Subject: [PATCH 06/12] Add helper func to reduce duplicated logic --- ai_feedback/helpers/template_utils.py | 54 +++++++++++++++++---------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py index 22291ef..39c5312 100644 --- a/ai_feedback/helpers/template_utils.py +++ b/ai_feedback/helpers/template_utils.py @@ -112,6 +112,38 @@ def gather_xml_file_contents( return file_contents +def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf: bool = False) -> str: + """Wrap lines with XML tags and add line numbers. + + Args: + lines (List[str]): List of lines to format + tag_name (str): The XML tag name (submission, solution, test_output) + filename (str): The filename to include in the XML tag + is_pdf (bool): Whether this is PDF content (affects empty line handling) + + Returns: + str: Formatted content with XML tags and line numbers + """ + content = f"<{tag_name} file=\"{filename}\">\n" + + for i, line in enumerate(lines, start=1): + if is_pdf: + stripped_line = line.rstrip() + if stripped_line.strip(): + content += f"(Line {i}) {stripped_line}\n" + else: + content += f"(Line {i}) \n" + else: + stripped_line = line.rstrip("\n") + if stripped_line.strip(): + content += f"(Line {i}) {stripped_line}\n" + else: + content += f"(Line {i}) {line}" + + content += f"\n\n" + return content + + def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str: """Format a single file with XML tags and line numbers. @@ -126,41 +158,23 @@ def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str: return "" filename = os.path.basename(file_path) - content = "" try: # Handle PDF files separately if filename.lower().endswith('.pdf'): text_content = extract_pdf_text(file_path) - content += f"<{tag_name} file=\"{filename}\">\n" lines = text_content.split('\n') - for i, line in enumerate(lines, start=1): - stripped_line = line.rstrip() - if stripped_line.strip(): - content += f"(Line {i}) {stripped_line}\n" - else: - content += f"(Line {i}) \n" - content += f"\n\n" + return _wrap_lines_with_xml(lines, tag_name, filename, is_pdf=True) else: # Handle regular text files with open(file_path, "r", encoding="utf-8") as file: lines = file.readlines() - - content += f"<{tag_name} file=\"{filename}\">\n" - for i, line in enumerate(lines, start=1): - stripped_line = line.rstrip("\n") - if stripped_line.strip(): - content += f"(Line {i}) {stripped_line}\n" - else: - content += f"(Line {i}) {line}" - content += f"\n\n" + return _wrap_lines_with_xml(lines, tag_name, filename, is_pdf=False) except Exception as e: print(f"Error reading file {filename}: {e}") return "" - return content - def extract_pdf_text(pdf_path: str) -> str: """Extract text content from a PDF file. From 2f2a01c82c40c96d30e2edf3d59affc43ada66aa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 9 Jul 2025 14:19:59 +0000 Subject: [PATCH 07/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ai_feedback/helpers/template_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py index 39c5312..e2c29c8 100644 --- a/ai_feedback/helpers/template_utils.py +++ b/ai_feedback/helpers/template_utils.py @@ -114,18 +114,18 @@ def gather_xml_file_contents( def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf: bool = False) -> str: """Wrap lines with XML tags and add line numbers. - + Args: lines (List[str]): List of lines to format tag_name (str): The XML tag name (submission, solution, test_output) filename (str): The filename to include in the XML tag is_pdf (bool): Whether this is PDF content (affects empty line handling) - + Returns: str: Formatted content with XML tags and line numbers """ content = f"<{tag_name} file=\"{filename}\">\n" - + for i, line in enumerate(lines, start=1): if is_pdf: stripped_line = line.rstrip() @@ -139,7 +139,7 @@ def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf: content += f"(Line {i}) {stripped_line}\n" else: content += f"(Line {i}) {line}" - + content += f"\n\n" return content From 7902ecbe33abcba36130d9c7d186b82b4332180b Mon Sep 17 00:00:00 2001 From: Rolland-He Date: Wed, 9 Jul 2025 10:40:12 -0400 Subject: [PATCH 08/12] Fix Optional format --- ai_feedback/helpers/template_utils.py | 68 +++++++++++++-------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py index 39c5312..9cd54bb 100644 --- a/ai_feedback/helpers/template_utils.py +++ b/ai_feedback/helpers/template_utils.py @@ -64,7 +64,7 @@ def render_prompt_template( return prompt_content.format(**template_data) -def gather_file_references(submission: Optional[Path], solution: Optional[Path], test_output: Optional[Path]) -> str: +def gather_file_references(submission: Optional[Path] = None, solution: Optional[Path] = None, test_output: Optional[Path] = None) -> str: """Generate file reference descriptions for prompt templates. Args: @@ -86,7 +86,7 @@ def gather_file_references(submission: Optional[Path], solution: Optional[Path], def gather_xml_file_contents( - submission: Optional[Path], solution: Optional[Path] = None, test_output: Optional[Path] = None + submission: Optional[Path] = None, solution: Optional[Path] = None, test_output: Optional[Path] = None ) -> str: """Generate file contents with XML tags for prompt templates. @@ -112,38 +112,6 @@ def gather_xml_file_contents( return file_contents -def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf: bool = False) -> str: - """Wrap lines with XML tags and add line numbers. - - Args: - lines (List[str]): List of lines to format - tag_name (str): The XML tag name (submission, solution, test_output) - filename (str): The filename to include in the XML tag - is_pdf (bool): Whether this is PDF content (affects empty line handling) - - Returns: - str: Formatted content with XML tags and line numbers - """ - content = f"<{tag_name} file=\"{filename}\">\n" - - for i, line in enumerate(lines, start=1): - if is_pdf: - stripped_line = line.rstrip() - if stripped_line.strip(): - content += f"(Line {i}) {stripped_line}\n" - else: - content += f"(Line {i}) \n" - else: - stripped_line = line.rstrip("\n") - if stripped_line.strip(): - content += f"(Line {i}) {stripped_line}\n" - else: - content += f"(Line {i}) {line}" - - content += f"\n\n" - return content - - def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str: """Format a single file with XML tags and line numbers. @@ -176,6 +144,38 @@ def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str: return "" +def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf: bool = False) -> str: + """Wrap lines with XML tags and add line numbers. + + Args: + lines (List[str]): List of lines to format + tag_name (str): The XML tag name (submission, solution, test_output) + filename (str): The filename to include in the XML tag + is_pdf (bool): Whether this is PDF content (affects empty line handling) + + Returns: + str: Formatted content with XML tags and line numbers + """ + content = f"<{tag_name} file=\"{filename}\">\n" + + for i, line in enumerate(lines, start=1): + if is_pdf: + stripped_line = line.rstrip() + if stripped_line.strip(): + content += f"(Line {i}) {stripped_line}\n" + else: + content += f"(Line {i}) \n" + else: + stripped_line = line.rstrip("\n") + if stripped_line.strip(): + content += f"(Line {i}) {stripped_line}\n" + else: + content += f"(Line {i}) {line}" + + content += f"\n\n" + return content + + def extract_pdf_text(pdf_path: str) -> str: """Extract text content from a PDF file. From a14c3b539e005d554d3c8c77403b85241ce976a3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 9 Jul 2025 14:45:33 +0000 Subject: [PATCH 09/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ai_feedback/helpers/template_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py index 47bdf11..e9db6b3 100644 --- a/ai_feedback/helpers/template_utils.py +++ b/ai_feedback/helpers/template_utils.py @@ -64,7 +64,9 @@ def render_prompt_template( return prompt_content.format(**template_data) -def gather_file_references(submission: Optional[Path] = None, solution: Optional[Path] = None, test_output: Optional[Path] = None) -> str: +def gather_file_references( + submission: Optional[Path] = None, solution: Optional[Path] = None, test_output: Optional[Path] = None +) -> str: """Generate file reference descriptions for prompt templates. Args: From c83d5562ca238fc693d9c17f353d1863403901d6 Mon Sep 17 00:00:00 2001 From: Rolland-He Date: Thu, 10 Jul 2025 09:32:44 -0400 Subject: [PATCH 10/12] Update attribute name --- ai_feedback/helpers/template_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py index 47bdf11..f3b478c 100644 --- a/ai_feedback/helpers/template_utils.py +++ b/ai_feedback/helpers/template_utils.py @@ -156,7 +156,7 @@ def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf: Returns: str: Formatted content with XML tags and line numbers """ - content = f"<{tag_name} file=\"{filename}\">\n" + content = f"<{tag_name} filename=\"{filename}\">\n" for i, line in enumerate(lines, start=1): if is_pdf: @@ -319,7 +319,7 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num: task_found = True tag_name = semantic_tags[index] if index < len(semantic_tags) else "file" - file_contents += f"<{tag_name} file=\"{file_path.name}\">\n" + file_contents += f"<{tag_name} filename=\"{file_path.name}\">\n" file_contents += intro_content + "\n\n" if intro_content else "" file_contents += task_content + "\n\n" file_contents += f"\n\n" From c6544576a881add9c617bcf48c06caad2d505ef6 Mon Sep 17 00:00:00 2001 From: Rolland-He Date: Thu, 10 Jul 2025 09:33:03 -0400 Subject: [PATCH 11/12] Update tests accordingly --- tests/open_ai_model_tests/integration_test.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/open_ai_model_tests/integration_test.py b/tests/open_ai_model_tests/integration_test.py index 86e53f2..832f0f0 100644 --- a/tests/open_ai_model_tests/integration_test.py +++ b/tests/open_ai_model_tests/integration_test.py @@ -29,8 +29,8 @@ def test_cnn_example_openai_stdout(capsys, mock_and_capture): assert "Compare the student's code and solution code. For each mistake" in output assert "(Line 1) import numpy as np" in output - assert '' in output - assert '' in output + assert '' in output + assert '' in output def test_cnn_example_custom_prompt_stdout(capsys, mock_and_capture): @@ -56,7 +56,7 @@ def test_cnn_example_custom_prompt_stdout(capsys, mock_and_capture): ] output = run_cli_and_capture(args, capsys) assert "Evaluate the student's code readability." in output - assert '' in output + assert '' in output assert "(Line 1) import numpy as np" in output @@ -109,9 +109,9 @@ def test_xml_formatting_code_scope(capsys, mock_and_capture): assert "The student's submission file is correct_submission.py." in output assert "The instructor's solution file is solution.py." in output - assert '' in output + assert '' in output assert '' in output - assert '' in output + assert '' in output assert '' in output assert "(Line 1) def fizzbuzz(n: int) -> list:" in output @@ -143,7 +143,7 @@ def test_xml_formatting_text_scope_with_test_output(capsys, mock_and_capture): assert "The student's submission file is student_submission.txt." in output assert "The instructor's solution file is Homework_5_solution.txt." in output - assert '' in output + assert '' in output assert '' in output - assert '' in output + assert '' in output assert '' in output From 2da992108e9eeea2e0611698b660c8dbcfc9d281 Mon Sep 17 00:00:00 2001 From: Rolland-He Date: Thu, 10 Jul 2025 09:57:35 -0400 Subject: [PATCH 12/12] Remove logic of line numbering for pdf --- ai_feedback/helpers/template_utils.py | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py index e6dc67c..feb7d5f 100644 --- a/ai_feedback/helpers/template_utils.py +++ b/ai_feedback/helpers/template_utils.py @@ -133,27 +133,25 @@ def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str: # Handle PDF files separately if filename.lower().endswith('.pdf'): text_content = extract_pdf_text(file_path) - lines = text_content.split('\n') - return _wrap_lines_with_xml(lines, tag_name, filename, is_pdf=True) + return f"<{tag_name} filename=\"{filename}\">\n{text_content}\n\n\n" else: # Handle regular text files with open(file_path, "r", encoding="utf-8") as file: lines = file.readlines() - return _wrap_lines_with_xml(lines, tag_name, filename, is_pdf=False) + return _wrap_lines_with_xml(lines, tag_name, filename) except Exception as e: print(f"Error reading file {filename}: {e}") return "" -def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf: bool = False) -> str: +def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str) -> str: """Wrap lines with XML tags and add line numbers. Args: lines (List[str]): List of lines to format tag_name (str): The XML tag name (submission, solution, test_output) filename (str): The filename to include in the XML tag - is_pdf (bool): Whether this is PDF content (affects empty line handling) Returns: str: Formatted content with XML tags and line numbers @@ -161,18 +159,11 @@ def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf: content = f"<{tag_name} filename=\"{filename}\">\n" for i, line in enumerate(lines, start=1): - if is_pdf: - stripped_line = line.rstrip() - if stripped_line.strip(): - content += f"(Line {i}) {stripped_line}\n" - else: - content += f"(Line {i}) \n" + stripped_line = line.rstrip("\n") + if stripped_line.strip(): + content += f"(Line {i}) {stripped_line}\n" else: - stripped_line = line.rstrip("\n") - if stripped_line.strip(): - content += f"(Line {i}) {stripped_line}\n" - else: - content += f"(Line {i}) {line}" + content += f"(Line {i}) {line}" content += f"\n\n" return content