From b25e216debbad0cc61af76c562ca98eb6d042575 Mon Sep 17 00:00:00 2001
From: Rolland-He <rolland.he@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 16:13:02 -0400
Subject: [PATCH 01/12] Copy changes of template_utils from xml-tag branch

---
 ai_feedback/helpers/template_utils.py | 100 +++++++++++++++++---------
 1 file changed, 68 insertions(+), 32 deletions(-)

diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py
index 42374a6..d87cd6e 100644
--- a/ai_feedback/helpers/template_utils.py
+++ b/ai_feedback/helpers/template_utils.py
@@ -42,7 +42,7 @@ def render_prompt_template(
     if question_num is not None:
         template_data['file_contents'] = _get_question_contents([submission, solution], question_num)
     else:
-        template_data['file_contents'] = gather_file_contents([submission, solution, test_output])
+        template_data['file_contents'] = gather_xml_file_contents(submission, solution, test_output)
 
     # Handle image placeholders with context-aware replacement
     if '{submission_image}' in prompt_content and 'submission_image' not in template_data:
@@ -84,47 +84,78 @@ def gather_file_references(submission: Path, solution: Optional[Path], test_outp
     return "\n".join(references)
 
 
-def gather_file_contents(assignment_files: List[Optional[Path]]) -> str:
-    """Generate file contents with line numbers for prompt templates.
+def gather_xml_file_contents(submission: Path, solution: Optional[Path] = None, test_output: Optional[Path] = None) -> str:
+    """Generate file contents with XML tags for prompt templates.
 
     Args:
-        assignment_files (list[str]): List of file paths to process
+        submission (Path): Student's submission file path
+        solution (Path, optional): Instructor's solution file path  
+        test_output (Path, optional): Student's test output file path
 
     Returns:
-        str: File contents formatted with line numbers
+        str: File contents formatted with XML tags and line numbers
     """
     file_contents = ""
+    
+    file_contents += _format_file_with_xml_tag(submission, "submission")
 
-    for file_path in assignment_files:
-        if not file_path:
-            continue
-        filename = os.path.basename(file_path)
-
-        try:
-            # Handle PDF files separately
-            if filename.lower().endswith('.pdf'):
-                text_content = extract_pdf_text(file_path)
-                lines = text_content.split('\n')
-            else:
-                # Handle regular text files
-                with open(file_path, "r", encoding="utf-8") as file:
-                    lines = file.readlines()
-
-            # Common processing for both file types
-            file_contents += f"=== {filename} ===\n"
+    if solution:
+        file_contents += _format_file_with_xml_tag(solution, "solution")
+
+    if test_output:
+        file_contents += _format_file_with_xml_tag(test_output, "test_output")
+    
+    return file_contents
+
+
+def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str:
+    """Format a single file with XML tags and line numbers.
+    
+    Args:
+        file_path (Path): Path to the file to format
+        tag_name (str): The XML tag name (submission, solution, test_output)
+    
+    Returns:
+        str: Formatted file content with XML tags
+    """
+    if not file_path:
+        return ""
+        
+    filename = os.path.basename(file_path)
+    content = ""
+    
+    try:
+        # Handle PDF files separately
+        if filename.lower().endswith('.pdf'):
+            text_content = extract_pdf_text(file_path)
+            content += f"<{tag_name} file=\"{filename}\">\n"
+            lines = text_content.split('\n')
             for i, line in enumerate(lines, start=1):
-                stripped_line = line.rstrip('\n').rstrip()
+                stripped_line = line.rstrip()
                 if stripped_line.strip():
-                    file_contents += f"(Line {i}) {stripped_line}\n"
+                    content += f"(Line {i}) {stripped_line}\n"
                 else:
-                    file_contents += f"(Line {i}) \n"
-            file_contents += "\n"
+                    content += f"(Line {i}) \n"
+            content += f"</{tag_name}>\n\n"
+        else:
+            # Handle regular text files
+            with open(file_path, "r", encoding="utf-8") as file:
+                lines = file.readlines()
 
-        except Exception as e:
-            print(f"Error reading file {filename}: {e}")
-            continue
+            content += f"<{tag_name} file=\"{filename}\">\n"
+            for i, line in enumerate(lines, start=1):
+                stripped_line = line.rstrip("\n")
+                if stripped_line.strip():
+                    content += f"(Line {i}) {stripped_line}\n"
+                else:
+                    content += f"(Line {i}) {line}"
+            content += f"</{tag_name}>\n\n"
 
-    return file_contents
+    except Exception as e:
+        print(f"Error reading file {filename}: {e}")
+        return ""
+
+    return content
 
 
 def extract_pdf_text(pdf_path: str) -> str:
@@ -233,6 +264,7 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num:
 
     Args:
         assignment_files (List[Optional[Path]]): List of Path or None objects to parse.
+            Expected order: [submission, solution]
         question_num (int): The target task number to extract.
 
     Returns:
@@ -243,8 +275,10 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num:
     """
     file_contents = ""
     task_found = False
+    
+    semantic_tags = ["submission", "solution"]
 
-    for file_path in assignment_files:
+    for index, file_path in enumerate(assignment_files):
         if (
             not file_path
             or file_path.suffix != '.txt'
@@ -266,9 +300,11 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num:
             task_content = task_match.group(1).strip()
             task_found = True
 
-        file_contents += f"\n\n---\n### {file_path}\n\n"
+        tag_name = semantic_tags[index] if index < len(semantic_tags) else "file"
+        file_contents += f"<{tag_name} file=\"{file_path.name}\">\n"
         file_contents += intro_content + "\n\n" if intro_content else ""
         file_contents += task_content + "\n\n"
+        file_contents += f"</{tag_name}>\n\n"
 
     if not task_found:
         print(f"Task {question_num} not found in any assignment file.")

From 9852a58286ba6e02a75e8942d27beeaa8fac72e6 Mon Sep 17 00:00:00 2001
From: Rolland-He <rolland.he@mail.utoronto.ca>
Date: Tue, 8 Jul 2025 16:14:51 -0400
Subject: [PATCH 02/12] Copy changes of integeration_tests from xml-tag branch

---
 tests/open_ai_model_tests/integration_test.py | 79 ++++++++++++++++---
 1 file changed, 70 insertions(+), 9 deletions(-)

diff --git a/tests/open_ai_model_tests/integration_test.py b/tests/open_ai_model_tests/integration_test.py
index 46f63ef..b0a34a6 100644
--- a/tests/open_ai_model_tests/integration_test.py
+++ b/tests/open_ai_model_tests/integration_test.py
@@ -4,8 +4,7 @@
 
 
 def test_cnn_example_openai_stdout(capsys, mock_and_capture):
-    """
-    Example 1:
+    """Example 1:
     Evaluate cnn_example test using openAI model and print to stdout.
     python -m ai_feedback --prompt code_lines --scope code \
         --submission test_submissions/cnn_example/cnn_submission \
@@ -30,13 +29,12 @@ def test_cnn_example_openai_stdout(capsys, mock_and_capture):
 
     assert "Compare the student's code and solution code. For each mistake" in output
     assert "(Line 1) import numpy as np" in output
-    assert "=== cnn_submission.py ===" in output
-    assert "=== cnn_solution.py ===" in output
+    assert '<submission file="cnn_submission.py">' in output
+    assert '<solution file="cnn_solution.py">' in output
 
 
 def test_cnn_example_custom_prompt_stdout(capsys, mock_and_capture):
-    """
-    Example 2:
+    """Example 2:
     Evaluate cnn_example test using openAI model and a custom prompt text, printing to stdout.
     python -m ai_feedback --prompt_text "Evaluate the student's code readability." \
         --scope code \
@@ -58,13 +56,12 @@ def test_cnn_example_custom_prompt_stdout(capsys, mock_and_capture):
     ]
     output = run_cli_and_capture(args, capsys)
     assert "Evaluate the student's code readability." in output
-    assert "=== cnn_submission.py ===" in output
+    assert '<submission file="cnn_submission.py">' in output
     assert "(Line 1) import numpy as np" in output
 
 
 def test_pdf_example_openai_direct(capsys, mock_and_capture):
-    """
-    Example 3:
+    """Example 3:
     Evaluate pdf_example test using openAI model and direct output mode.
     python -m ai_feedback --prompt text_pdf_analyze --scope text \
         --submission test_submissions/pdf_example/student_pdf_submission.pdf \
@@ -86,3 +83,67 @@ def test_pdf_example_openai_direct(capsys, mock_and_capture):
     assert "Does the student correctly respond to the question, and meet all the" in output
     assert "student_pdf_submission.pdf" in output
     assert "Normalization allows each feature to have an equal influence on the mode" in output
+
+
+def test_xml_formatting_code_scope(capsys, mock_and_capture):
+    """
+    Test XML formatting for file contents in code scope.
+    Verifies that file contents use XML tags while file references remain plain text.
+    """
+    parent = Path(__file__).parent.parent.parent
+
+    args = [
+        "--prompt_text",
+        "File references: {file_references}\n\nFile contents:\n{file_contents}",
+        "--scope", 
+        "code",
+        "--submission",
+        str(parent / "test_submissions/csc108/correct_submission/correct_submission.py"),
+        "--solution",
+        str(parent / "test_submissions/csc108/solution.py"),
+        "--model",
+        "openai"
+    ]
+    output = run_cli_and_capture(args, capsys)
+    
+    assert "The student's submission file is correct_submission.py." in output
+    assert "The instructor's solution file is solution.py." in output
+    
+    assert '<submission file="correct_submission.py">' in output
+    assert '</submission>' in output
+    assert '<solution file="solution.py">' in output
+    assert '</solution>' in output
+
+    assert "(Line 1) def fizzbuzz(n: int) -> list:" in output
+
+
+def test_xml_formatting_text_scope_with_test_output(capsys, mock_and_capture):
+    """
+    Test XML formatting for file contents in text scope with all file types.
+    Verifies submission, solution, and test_output files all use XML formatting.
+    """
+    parent = Path(__file__).parent.parent.parent
+
+    args = [
+        "--prompt_text",
+        "File references: {file_references}\n\nFile contents:\n{file_contents}",
+        "--submission_type",
+        "python",
+        "--scope",
+        "text", 
+        "--submission",
+        str(parent / "test_submissions/ggr274_homework5/test1/student_submission.txt"),
+        "--solution",
+        str(parent / "test_submissions/ggr274_homework5/test1/Homework_5_solution.txt"),
+        "--model",
+        "openai"
+    ]
+    output = run_cli_and_capture(args, capsys)
+    
+    assert "The student's submission file is student_submission.txt." in output
+    assert "The instructor's solution file is Homework_5_solution.txt." in output  
+    
+    assert '<submission file="student_submission.txt">' in output
+    assert '</submission>' in output
+    assert '<solution file="Homework_5_solution.txt">' in output
+    assert '</solution>' in output

From b95fb53835997769fd32afeea261ccb4bba94312 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 8 Jul 2025 21:07:11 +0000
Subject: [PATCH 03/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ai_feedback/helpers/template_utils.py         | 20 ++++++++++---------
 tests/open_ai_model_tests/integration_test.py | 18 ++++++++---------
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py
index d87cd6e..867b667 100644
--- a/ai_feedback/helpers/template_utils.py
+++ b/ai_feedback/helpers/template_utils.py
@@ -84,19 +84,21 @@ def gather_file_references(submission: Path, solution: Optional[Path], test_outp
     return "\n".join(references)
 
 
-def gather_xml_file_contents(submission: Path, solution: Optional[Path] = None, test_output: Optional[Path] = None) -> str:
+def gather_xml_file_contents(
+    submission: Path, solution: Optional[Path] = None, test_output: Optional[Path] = None
+) -> str:
     """Generate file contents with XML tags for prompt templates.
 
     Args:
         submission (Path): Student's submission file path
-        solution (Path, optional): Instructor's solution file path  
+        solution (Path, optional): Instructor's solution file path
         test_output (Path, optional): Student's test output file path
 
     Returns:
         str: File contents formatted with XML tags and line numbers
     """
     file_contents = ""
-    
+
     file_contents += _format_file_with_xml_tag(submission, "submission")
 
     if solution:
@@ -104,26 +106,26 @@ def gather_xml_file_contents(submission: Path, solution: Optional[Path] = None,
 
     if test_output:
         file_contents += _format_file_with_xml_tag(test_output, "test_output")
-    
+
     return file_contents
 
 
 def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str:
     """Format a single file with XML tags and line numbers.
-    
+
     Args:
         file_path (Path): Path to the file to format
         tag_name (str): The XML tag name (submission, solution, test_output)
-    
+
     Returns:
         str: Formatted file content with XML tags
     """
     if not file_path:
         return ""
-        
+
     filename = os.path.basename(file_path)
     content = ""
-    
+
     try:
         # Handle PDF files separately
         if filename.lower().endswith('.pdf'):
@@ -275,7 +277,7 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num:
     """
     file_contents = ""
     task_found = False
-    
+
     semantic_tags = ["submission", "solution"]
 
     for index, file_path in enumerate(assignment_files):
diff --git a/tests/open_ai_model_tests/integration_test.py b/tests/open_ai_model_tests/integration_test.py
index b0a34a6..86e53f2 100644
--- a/tests/open_ai_model_tests/integration_test.py
+++ b/tests/open_ai_model_tests/integration_test.py
@@ -95,20 +95,20 @@ def test_xml_formatting_code_scope(capsys, mock_and_capture):
     args = [
         "--prompt_text",
         "File references: {file_references}\n\nFile contents:\n{file_contents}",
-        "--scope", 
+        "--scope",
         "code",
         "--submission",
         str(parent / "test_submissions/csc108/correct_submission/correct_submission.py"),
         "--solution",
         str(parent / "test_submissions/csc108/solution.py"),
         "--model",
-        "openai"
+        "openai",
     ]
     output = run_cli_and_capture(args, capsys)
-    
+
     assert "The student's submission file is correct_submission.py." in output
     assert "The instructor's solution file is solution.py." in output
-    
+
     assert '<submission file="correct_submission.py">' in output
     assert '</submission>' in output
     assert '<solution file="solution.py">' in output
@@ -130,19 +130,19 @@ def test_xml_formatting_text_scope_with_test_output(capsys, mock_and_capture):
         "--submission_type",
         "python",
         "--scope",
-        "text", 
+        "text",
         "--submission",
         str(parent / "test_submissions/ggr274_homework5/test1/student_submission.txt"),
         "--solution",
         str(parent / "test_submissions/ggr274_homework5/test1/Homework_5_solution.txt"),
         "--model",
-        "openai"
+        "openai",
     ]
     output = run_cli_and_capture(args, capsys)
-    
+
     assert "The student's submission file is student_submission.txt." in output
-    assert "The instructor's solution file is Homework_5_solution.txt." in output  
-    
+    assert "The instructor's solution file is Homework_5_solution.txt." in output
+
     assert '<submission file="student_submission.txt">' in output
     assert '</submission>' in output
     assert '<solution file="Homework_5_solution.txt">' in output

From 654a8eff09336b56475d6d2c6ad85608e435349b Mon Sep 17 00:00:00 2001
From: Rolland-He <rolland.he@mail.utoronto.ca>
Date: Wed, 9 Jul 2025 10:06:46 -0400
Subject: [PATCH 04/12] Make submission optional

---
 ai_feedback/helpers/template_utils.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py
index d87cd6e..223f1ec 100644
--- a/ai_feedback/helpers/template_utils.py
+++ b/ai_feedback/helpers/template_utils.py
@@ -64,11 +64,11 @@ def render_prompt_template(
     return prompt_content.format(**template_data)
 
 
-def gather_file_references(submission: Path, solution: Optional[Path], test_output: Optional[Path]) -> str:
+def gather_file_references(submission: Optional[Path], solution: Optional[Path], test_output: Optional[Path]) -> str:
     """Generate file reference descriptions for prompt templates.
 
     Args:
-        submission (Path): Student's submission file path
+        submission (Path, optional): Student's submission file path
         solution (Path, optional): Instructor's solution file path
         test_output (Path, optional): Student's test output file path
 
@@ -76,7 +76,8 @@ def gather_file_references(submission: Path, solution: Optional[Path], test_outp
         str: Descriptions like "The instructor's solution file..."
     """
     references: List[str] = []
-    references.append(f"The student's submission file is {submission.name}.")
+    if submission:
+        references.append(f"The student's submission file is {submission.name}.")
     if solution:
         references.append(f"The instructor's solution file is {solution.name}.")
     if test_output:

From c35dbe0f55d815a23fabf38dc6e74112d1d8c629 Mon Sep 17 00:00:00 2001
From: Rolland-He <rolland.he@mail.utoronto.ca>
Date: Wed, 9 Jul 2025 10:11:47 -0400
Subject: [PATCH 05/12] Make submission optional

---
 ai_feedback/helpers/template_utils.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py
index 99a6ba6..22291ef 100644
--- a/ai_feedback/helpers/template_utils.py
+++ b/ai_feedback/helpers/template_utils.py
@@ -86,12 +86,12 @@ def gather_file_references(submission: Optional[Path], solution: Optional[Path],
 
 
 def gather_xml_file_contents(
-    submission: Path, solution: Optional[Path] = None, test_output: Optional[Path] = None
+    submission: Optional[Path], solution: Optional[Path] = None, test_output: Optional[Path] = None
 ) -> str:
     """Generate file contents with XML tags for prompt templates.
 
     Args:
-        submission (Path): Student's submission file path
+        submission (Path, optional): Student's submission file path
         solution (Path, optional): Instructor's solution file path
         test_output (Path, optional): Student's test output file path
 
@@ -100,7 +100,8 @@ def gather_xml_file_contents(
     """
     file_contents = ""
 
-    file_contents += _format_file_with_xml_tag(submission, "submission")
+    if submission:
+        file_contents += _format_file_with_xml_tag(submission, "submission")
 
     if solution:
         file_contents += _format_file_with_xml_tag(solution, "solution")

From ce8ba7b0d6f39dfb213f09bd99ca2b2431e1ef9a Mon Sep 17 00:00:00 2001
From: Rolland-He <rolland.he@mail.utoronto.ca>
Date: Wed, 9 Jul 2025 10:19:25 -0400
Subject: [PATCH 06/12] Add helper func to reduce duplicated logic

---
 ai_feedback/helpers/template_utils.py | 54 +++++++++++++++++----------
 1 file changed, 34 insertions(+), 20 deletions(-)

diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py
index 22291ef..39c5312 100644
--- a/ai_feedback/helpers/template_utils.py
+++ b/ai_feedback/helpers/template_utils.py
@@ -112,6 +112,38 @@ def gather_xml_file_contents(
     return file_contents
 
 
+def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf: bool = False) -> str:
+    """Wrap lines with XML tags and add line numbers.
+    
+    Args:
+        lines (List[str]): List of lines to format
+        tag_name (str): The XML tag name (submission, solution, test_output)
+        filename (str): The filename to include in the XML tag
+        is_pdf (bool): Whether this is PDF content (affects empty line handling)
+    
+    Returns:
+        str: Formatted content with XML tags and line numbers
+    """
+    content = f"<{tag_name} file=\"{filename}\">\n"
+    
+    for i, line in enumerate(lines, start=1):
+        if is_pdf:
+            stripped_line = line.rstrip()
+            if stripped_line.strip():
+                content += f"(Line {i}) {stripped_line}\n"
+            else:
+                content += f"(Line {i}) \n"
+        else:
+            stripped_line = line.rstrip("\n")
+            if stripped_line.strip():
+                content += f"(Line {i}) {stripped_line}\n"
+            else:
+                content += f"(Line {i}) {line}"
+    
+    content += f"</{tag_name}>\n\n"
+    return content
+
+
 def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str:
     """Format a single file with XML tags and line numbers.
 
@@ -126,41 +158,23 @@ def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str:
         return ""
 
     filename = os.path.basename(file_path)
-    content = ""
 
     try:
         # Handle PDF files separately
         if filename.lower().endswith('.pdf'):
             text_content = extract_pdf_text(file_path)
-            content += f"<{tag_name} file=\"{filename}\">\n"
             lines = text_content.split('\n')
-            for i, line in enumerate(lines, start=1):
-                stripped_line = line.rstrip()
-                if stripped_line.strip():
-                    content += f"(Line {i}) {stripped_line}\n"
-                else:
-                    content += f"(Line {i}) \n"
-            content += f"</{tag_name}>\n\n"
+            return _wrap_lines_with_xml(lines, tag_name, filename, is_pdf=True)
         else:
             # Handle regular text files
             with open(file_path, "r", encoding="utf-8") as file:
                 lines = file.readlines()
-
-            content += f"<{tag_name} file=\"{filename}\">\n"
-            for i, line in enumerate(lines, start=1):
-                stripped_line = line.rstrip("\n")
-                if stripped_line.strip():
-                    content += f"(Line {i}) {stripped_line}\n"
-                else:
-                    content += f"(Line {i}) {line}"
-            content += f"</{tag_name}>\n\n"
+            return _wrap_lines_with_xml(lines, tag_name, filename, is_pdf=False)
 
     except Exception as e:
         print(f"Error reading file {filename}: {e}")
         return ""
 
-    return content
-
 
 def extract_pdf_text(pdf_path: str) -> str:
     """Extract text content from a PDF file.

From 2f2a01c82c40c96d30e2edf3d59affc43ada66aa Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 9 Jul 2025 14:19:59 +0000
Subject: [PATCH 07/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ai_feedback/helpers/template_utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py
index 39c5312..e2c29c8 100644
--- a/ai_feedback/helpers/template_utils.py
+++ b/ai_feedback/helpers/template_utils.py
@@ -114,18 +114,18 @@ def gather_xml_file_contents(
 
 def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf: bool = False) -> str:
     """Wrap lines with XML tags and add line numbers.
-    
+
     Args:
         lines (List[str]): List of lines to format
         tag_name (str): The XML tag name (submission, solution, test_output)
         filename (str): The filename to include in the XML tag
         is_pdf (bool): Whether this is PDF content (affects empty line handling)
-    
+
     Returns:
         str: Formatted content with XML tags and line numbers
     """
     content = f"<{tag_name} file=\"{filename}\">\n"
-    
+
     for i, line in enumerate(lines, start=1):
         if is_pdf:
             stripped_line = line.rstrip()
@@ -139,7 +139,7 @@ def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf:
                 content += f"(Line {i}) {stripped_line}\n"
             else:
                 content += f"(Line {i}) {line}"
-    
+
     content += f"</{tag_name}>\n\n"
     return content
 

From 7902ecbe33abcba36130d9c7d186b82b4332180b Mon Sep 17 00:00:00 2001
From: Rolland-He <rolland.he@mail.utoronto.ca>
Date: Wed, 9 Jul 2025 10:40:12 -0400
Subject: [PATCH 08/12] Fix Optional format

---
 ai_feedback/helpers/template_utils.py | 68 +++++++++++++--------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py
index 39c5312..9cd54bb 100644
--- a/ai_feedback/helpers/template_utils.py
+++ b/ai_feedback/helpers/template_utils.py
@@ -64,7 +64,7 @@ def render_prompt_template(
     return prompt_content.format(**template_data)
 
 
-def gather_file_references(submission: Optional[Path], solution: Optional[Path], test_output: Optional[Path]) -> str:
+def gather_file_references(submission: Optional[Path] = None, solution: Optional[Path] = None, test_output: Optional[Path] = None) -> str:
     """Generate file reference descriptions for prompt templates.
 
     Args:
@@ -86,7 +86,7 @@ def gather_file_references(submission: Optional[Path], solution: Optional[Path],
 
 
 def gather_xml_file_contents(
-    submission: Optional[Path], solution: Optional[Path] = None, test_output: Optional[Path] = None
+    submission: Optional[Path] = None, solution: Optional[Path] = None, test_output: Optional[Path] = None
 ) -> str:
     """Generate file contents with XML tags for prompt templates.
 
@@ -112,38 +112,6 @@ def gather_xml_file_contents(
     return file_contents
 
 
-def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf: bool = False) -> str:
-    """Wrap lines with XML tags and add line numbers.
-    
-    Args:
-        lines (List[str]): List of lines to format
-        tag_name (str): The XML tag name (submission, solution, test_output)
-        filename (str): The filename to include in the XML tag
-        is_pdf (bool): Whether this is PDF content (affects empty line handling)
-    
-    Returns:
-        str: Formatted content with XML tags and line numbers
-    """
-    content = f"<{tag_name} file=\"{filename}\">\n"
-    
-    for i, line in enumerate(lines, start=1):
-        if is_pdf:
-            stripped_line = line.rstrip()
-            if stripped_line.strip():
-                content += f"(Line {i}) {stripped_line}\n"
-            else:
-                content += f"(Line {i}) \n"
-        else:
-            stripped_line = line.rstrip("\n")
-            if stripped_line.strip():
-                content += f"(Line {i}) {stripped_line}\n"
-            else:
-                content += f"(Line {i}) {line}"
-    
-    content += f"</{tag_name}>\n\n"
-    return content
-
-
 def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str:
     """Format a single file with XML tags and line numbers.
 
@@ -176,6 +144,38 @@ def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str:
         return ""
 
 
+def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf: bool = False) -> str:
+    """Wrap lines with XML tags and add line numbers.
+    
+    Args:
+        lines (List[str]): List of lines to format
+        tag_name (str): The XML tag name (submission, solution, test_output)
+        filename (str): The filename to include in the XML tag
+        is_pdf (bool): Whether this is PDF content (affects empty line handling)
+    
+    Returns:
+        str: Formatted content with XML tags and line numbers
+    """
+    content = f"<{tag_name} file=\"{filename}\">\n"
+    
+    for i, line in enumerate(lines, start=1):
+        if is_pdf:
+            stripped_line = line.rstrip()
+            if stripped_line.strip():
+                content += f"(Line {i}) {stripped_line}\n"
+            else:
+                content += f"(Line {i}) \n"
+        else:
+            stripped_line = line.rstrip("\n")
+            if stripped_line.strip():
+                content += f"(Line {i}) {stripped_line}\n"
+            else:
+                content += f"(Line {i}) {line}"
+    
+    content += f"</{tag_name}>\n\n"
+    return content
+
+
 def extract_pdf_text(pdf_path: str) -> str:
     """Extract text content from a PDF file.
 

From a14c3b539e005d554d3c8c77403b85241ce976a3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 9 Jul 2025 14:45:33 +0000
Subject: [PATCH 09/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ai_feedback/helpers/template_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py
index 47bdf11..e9db6b3 100644
--- a/ai_feedback/helpers/template_utils.py
+++ b/ai_feedback/helpers/template_utils.py
@@ -64,7 +64,9 @@ def render_prompt_template(
     return prompt_content.format(**template_data)
 
 
-def gather_file_references(submission: Optional[Path] = None, solution: Optional[Path] = None, test_output: Optional[Path] = None) -> str:
+def gather_file_references(
+    submission: Optional[Path] = None, solution: Optional[Path] = None, test_output: Optional[Path] = None
+) -> str:
     """Generate file reference descriptions for prompt templates.
 
     Args:

From c83d5562ca238fc693d9c17f353d1863403901d6 Mon Sep 17 00:00:00 2001
From: Rolland-He <rolland.he@mail.utoronto.ca>
Date: Thu, 10 Jul 2025 09:32:44 -0400
Subject: [PATCH 10/12] Update attribute name

---
 ai_feedback/helpers/template_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py
index 47bdf11..f3b478c 100644
--- a/ai_feedback/helpers/template_utils.py
+++ b/ai_feedback/helpers/template_utils.py
@@ -156,7 +156,7 @@ def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf:
     Returns:
         str: Formatted content with XML tags and line numbers
     """
-    content = f"<{tag_name} file=\"{filename}\">\n"
+    content = f"<{tag_name} filename=\"{filename}\">\n"
 
     for i, line in enumerate(lines, start=1):
         if is_pdf:
@@ -319,7 +319,7 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num:
             task_found = True
 
         tag_name = semantic_tags[index] if index < len(semantic_tags) else "file"
-        file_contents += f"<{tag_name} file=\"{file_path.name}\">\n"
+        file_contents += f"<{tag_name} filename=\"{file_path.name}\">\n"
         file_contents += intro_content + "\n\n" if intro_content else ""
         file_contents += task_content + "\n\n"
         file_contents += f"</{tag_name}>\n\n"

From c6544576a881add9c617bcf48c06caad2d505ef6 Mon Sep 17 00:00:00 2001
From: Rolland-He <rolland.he@mail.utoronto.ca>
Date: Thu, 10 Jul 2025 09:33:03 -0400
Subject: [PATCH 11/12] Update tests accordingly

---
 tests/open_ai_model_tests/integration_test.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/open_ai_model_tests/integration_test.py b/tests/open_ai_model_tests/integration_test.py
index 86e53f2..832f0f0 100644
--- a/tests/open_ai_model_tests/integration_test.py
+++ b/tests/open_ai_model_tests/integration_test.py
@@ -29,8 +29,8 @@ def test_cnn_example_openai_stdout(capsys, mock_and_capture):
 
     assert "Compare the student's code and solution code. For each mistake" in output
     assert "(Line 1) import numpy as np" in output
-    assert '<submission file="cnn_submission.py">' in output
-    assert '<solution file="cnn_solution.py">' in output
+    assert '<submission filename="cnn_submission.py">' in output
+    assert '<solution filename="cnn_solution.py">' in output
 
 
 def test_cnn_example_custom_prompt_stdout(capsys, mock_and_capture):
@@ -56,7 +56,7 @@ def test_cnn_example_custom_prompt_stdout(capsys, mock_and_capture):
     ]
     output = run_cli_and_capture(args, capsys)
     assert "Evaluate the student's code readability." in output
-    assert '<submission file="cnn_submission.py">' in output
+    assert '<submission filename="cnn_submission.py">' in output
     assert "(Line 1) import numpy as np" in output
 
 
@@ -109,9 +109,9 @@ def test_xml_formatting_code_scope(capsys, mock_and_capture):
     assert "The student's submission file is correct_submission.py." in output
     assert "The instructor's solution file is solution.py." in output
 
-    assert '<submission file="correct_submission.py">' in output
+    assert '<submission filename="correct_submission.py">' in output
     assert '</submission>' in output
-    assert '<solution file="solution.py">' in output
+    assert '<solution filename="solution.py">' in output
     assert '</solution>' in output
 
     assert "(Line 1) def fizzbuzz(n: int) -> list:" in output
@@ -143,7 +143,7 @@ def test_xml_formatting_text_scope_with_test_output(capsys, mock_and_capture):
     assert "The student's submission file is student_submission.txt." in output
     assert "The instructor's solution file is Homework_5_solution.txt." in output
 
-    assert '<submission file="student_submission.txt">' in output
+    assert '<submission filename="student_submission.txt">' in output
     assert '</submission>' in output
-    assert '<solution file="Homework_5_solution.txt">' in output
+    assert '<solution filename="Homework_5_solution.txt">' in output
     assert '</solution>' in output

From 2da992108e9eeea2e0611698b660c8dbcfc9d281 Mon Sep 17 00:00:00 2001
From: Rolland-He <rolland.he@mail.utoronto.ca>
Date: Thu, 10 Jul 2025 09:57:35 -0400
Subject: [PATCH 12/12] Remove logic of line numbering for pdf

---
 ai_feedback/helpers/template_utils.py | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/ai_feedback/helpers/template_utils.py b/ai_feedback/helpers/template_utils.py
index e6dc67c..feb7d5f 100644
--- a/ai_feedback/helpers/template_utils.py
+++ b/ai_feedback/helpers/template_utils.py
@@ -133,27 +133,25 @@ def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str:
         # Handle PDF files separately
         if filename.lower().endswith('.pdf'):
             text_content = extract_pdf_text(file_path)
-            lines = text_content.split('\n')
-            return _wrap_lines_with_xml(lines, tag_name, filename, is_pdf=True)
+            return f"<{tag_name} filename=\"{filename}\">\n{text_content}\n</{tag_name}>\n\n"
         else:
             # Handle regular text files
             with open(file_path, "r", encoding="utf-8") as file:
                 lines = file.readlines()
-            return _wrap_lines_with_xml(lines, tag_name, filename, is_pdf=False)
+            return _wrap_lines_with_xml(lines, tag_name, filename)
 
     except Exception as e:
         print(f"Error reading file {filename}: {e}")
         return ""
 
 
-def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf: bool = False) -> str:
+def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str) -> str:
     """Wrap lines with XML tags and add line numbers.
 
     Args:
         lines (List[str]): List of lines to format
         tag_name (str): The XML tag name (submission, solution, test_output)
         filename (str): The filename to include in the XML tag
-        is_pdf (bool): Whether this is PDF content (affects empty line handling)
 
     Returns:
         str: Formatted content with XML tags and line numbers
@@ -161,18 +159,11 @@ def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf:
     content = f"<{tag_name} filename=\"{filename}\">\n"
 
     for i, line in enumerate(lines, start=1):
-        if is_pdf:
-            stripped_line = line.rstrip()
-            if stripped_line.strip():
-                content += f"(Line {i}) {stripped_line}\n"
-            else:
-                content += f"(Line {i}) \n"
+        stripped_line = line.rstrip("\n")
+        if stripped_line.strip():
+            content += f"(Line {i}) {stripped_line}\n"
         else:
-            stripped_line = line.rstrip("\n")
-            if stripped_line.strip():
-                content += f"(Line {i}) {stripped_line}\n"
-            else:
-                content += f"(Line {i}) {line}"
+            content += f"(Line {i}) {line}"
 
     content += f"</{tag_name}>\n\n"
     return content