@@ -42,7 +42,7 @@ def render_prompt_template(
4242 if question_num is not None :
4343 template_data ['file_contents' ] = _get_question_contents ([submission , solution ], question_num )
4444 else :
45- template_data ['file_contents' ] = gather_file_contents ([ submission , solution , test_output ] )
45+ template_data ['file_contents' ] = gather_xml_file_contents ( submission , solution , test_output )
4646
4747 # Handle image placeholders with context-aware replacement
4848 if '{submission_image}' in prompt_content and 'submission_image' not in template_data :
@@ -64,69 +64,111 @@ def render_prompt_template(
6464 return prompt_content .format (** template_data )
6565
6666
67- def gather_file_references (submission : Path , solution : Optional [Path ], test_output : Optional [Path ]) -> str :
67+ def gather_file_references (
68+ submission : Optional [Path ] = None , solution : Optional [Path ] = None , test_output : Optional [Path ] = None
69+ ) -> str :
6870 """Generate file reference descriptions for prompt templates.
6971
7072 Args:
71- submission (Path): Student's submission file path
73+ submission (Path, optional ): Student's submission file path
7274 solution (Path, optional): Instructor's solution file path
7375 test_output (Path, optional): Student's test output file path
7476
7577 Returns:
7678 str: Descriptions like "The instructor's solution file..."
7779 """
7880 references : List [str ] = []
79- references .append (f"The student's submission file is { submission .name } ." )
81+ if submission :
82+ references .append (f"The student's submission file is { submission .name } ." )
8083 if solution :
8184 references .append (f"The instructor's solution file is { solution .name } ." )
8285 if test_output :
8386 references .append (f"The student's test output file is { test_output .name } ." )
8487 return "\n " .join (references )
8588
8689
87- def gather_file_contents (assignment_files : List [Optional [Path ]]) -> str :
88- """Generate file contents with line numbers for prompt templates.
90+ def gather_xml_file_contents (
91+ submission : Optional [Path ] = None , solution : Optional [Path ] = None , test_output : Optional [Path ] = None
92+ ) -> str :
93+ """Generate file contents with XML tags for prompt templates.
8994
9095 Args:
91- assignment_files (list[str]): List of file paths to process
96+ submission (Path, optional): Student's submission file path
97+ solution (Path, optional): Instructor's solution file path
98+ test_output (Path, optional): Student's test output file path
9299
93100 Returns:
94- str: File contents formatted with line numbers
101+ str: File contents formatted with XML tags and line numbers
95102 """
96103 file_contents = ""
97104
98- for file_path in assignment_files :
99- if not file_path :
100- continue
101- filename = os .path .basename (file_path )
102-
103- try :
104- # Handle PDF files separately
105- if filename .lower ().endswith ('.pdf' ):
106- text_content = extract_pdf_text (file_path )
107- lines = text_content .split ('\n ' )
108- else :
109- # Handle regular text files
110- with open (file_path , "r" , encoding = "utf-8" ) as file :
111- lines = file .readlines ()
112-
113- # Common processing for both file types
114- file_contents += f"=== { filename } ===\n "
115- for i , line in enumerate (lines , start = 1 ):
116- stripped_line = line .rstrip ('\n ' ).rstrip ()
117- if stripped_line .strip ():
118- file_contents += f"(Line { i } ) { stripped_line } \n "
119- else :
120- file_contents += f"(Line { i } ) \n "
121- file_contents += "\n "
122-
123- except Exception as e :
124- print (f"Error reading file { filename } : { e } " )
125- continue
105+ if submission :
106+ file_contents += _format_file_with_xml_tag (submission , "submission" )
107+
108+ if solution :
109+ file_contents += _format_file_with_xml_tag (solution , "solution" )
110+
111+ if test_output :
112+ file_contents += _format_file_with_xml_tag (test_output , "test_output" )
126113
127114 return file_contents
128115
129116
117+ def _format_file_with_xml_tag (file_path : Path , tag_name : str ) -> str :
118+ """Format a single file with XML tags and line numbers.
119+
120+ Args:
121+ file_path (Path): Path to the file to format
122+ tag_name (str): The XML tag name (submission, solution, test_output)
123+
124+ Returns:
125+ str: Formatted file content with XML tags
126+ """
127+ if not file_path :
128+ return ""
129+
130+ filename = os .path .basename (file_path )
131+
132+ try :
133+ # Handle PDF files separately
134+ if filename .lower ().endswith ('.pdf' ):
135+ text_content = extract_pdf_text (file_path )
136+ return f"<{ tag_name } filename=\" { filename } \" >\n { text_content } \n </{ tag_name } >\n \n "
137+ else :
138+ # Handle regular text files
139+ with open (file_path , "r" , encoding = "utf-8" ) as file :
140+ lines = file .readlines ()
141+ return _wrap_lines_with_xml (lines , tag_name , filename )
142+
143+ except Exception as e :
144+ print (f"Error reading file { filename } : { e } " )
145+ return ""
146+
147+
148+ def _wrap_lines_with_xml (lines : List [str ], tag_name : str , filename : str ) -> str :
149+ """Wrap lines with XML tags and add line numbers.
150+
151+ Args:
152+ lines (List[str]): List of lines to format
153+ tag_name (str): The XML tag name (submission, solution, test_output)
154+ filename (str): The filename to include in the XML tag
155+
156+ Returns:
157+ str: Formatted content with XML tags and line numbers
158+ """
159+ content = f"<{ tag_name } filename=\" { filename } \" >\n "
160+
161+ for i , line in enumerate (lines , start = 1 ):
162+ stripped_line = line .rstrip ("\n " )
163+ if stripped_line .strip ():
164+ content += f"(Line { i } ) { stripped_line } \n "
165+ else :
166+ content += f"(Line { i } ) { line } "
167+
168+ content += f"</{ tag_name } >\n \n "
169+ return content
170+
171+
130172def extract_pdf_text (pdf_path : str ) -> str :
131173 """Extract text content from a PDF file.
132174
@@ -233,6 +275,7 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num:
233275
234276 Args:
235277 assignment_files (List[Optional[Path]]): List of Path or None objects to parse.
278+ Expected order: [submission, solution]
236279 question_num (int): The target task number to extract.
237280
238281 Returns:
@@ -244,7 +287,9 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num:
244287 file_contents = ""
245288 task_found = False
246289
247- for file_path in assignment_files :
290+ semantic_tags = ["submission" , "solution" ]
291+
292+ for index , file_path in enumerate (assignment_files ):
248293 if (
249294 not file_path
250295 or file_path .suffix != '.txt'
@@ -266,9 +311,11 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num:
266311 task_content = task_match .group (1 ).strip ()
267312 task_found = True
268313
269- file_contents += f"\n \n ---\n ### { file_path } \n \n "
314+ tag_name = semantic_tags [index ] if index < len (semantic_tags ) else "file"
315+ file_contents += f"<{ tag_name } filename=\" { file_path .name } \" >\n "
270316 file_contents += intro_content + "\n \n " if intro_content else ""
271317 file_contents += task_content + "\n \n "
318+ file_contents += f"</{ tag_name } >\n \n "
272319
273320 if not task_found :
274321 print (f"Task { question_num } not found in any assignment file." )
0 commit comments