-
Notifications
You must be signed in to change notification settings - Fork 3
Wrap file contents in XML tags #21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
b25e216
9852a58
b95fb53
654a8ef
73b482b
c35dbe0
ce8ba7b
2f2a01c
7902ecb
10e5804
a14c3b5
c83d556
c654457
2b8ee04
2da9921
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,7 +42,7 @@ def render_prompt_template( | |
| if question_num is not None: | ||
| template_data['file_contents'] = _get_question_contents([submission, solution], question_num) | ||
| else: | ||
| template_data['file_contents'] = gather_file_contents([submission, solution, test_output]) | ||
| template_data['file_contents'] = gather_xml_file_contents(submission, solution, test_output) | ||
|
|
||
| # Handle image placeholders with context-aware replacement | ||
| if '{submission_image}' in prompt_content and 'submission_image' not in template_data: | ||
|
|
@@ -64,69 +64,120 @@ def render_prompt_template( | |
| return prompt_content.format(**template_data) | ||
|
|
||
|
|
||
| def gather_file_references(submission: Path, solution: Optional[Path], test_output: Optional[Path]) -> str: | ||
| def gather_file_references( | ||
| submission: Optional[Path] = None, solution: Optional[Path] = None, test_output: Optional[Path] = None | ||
| ) -> str: | ||
| """Generate file reference descriptions for prompt templates. | ||
|
|
||
| Args: | ||
| submission (Path): Student's submission file path | ||
| submission (Path, optional): Student's submission file path | ||
| solution (Path, optional): Instructor's solution file path | ||
| test_output (Path, optional): Student's test output file path | ||
|
|
||
| Returns: | ||
| str: Descriptions like "The instructor's solution file..." | ||
| """ | ||
| references: List[str] = [] | ||
| references.append(f"The student's submission file is {submission.name}.") | ||
| if submission: | ||
| references.append(f"The student's submission file is {submission.name}.") | ||
| if solution: | ||
| references.append(f"The instructor's solution file is {solution.name}.") | ||
| if test_output: | ||
| references.append(f"The student's test output file is {test_output.name}.") | ||
| return "\n".join(references) | ||
|
|
||
|
|
||
| def gather_file_contents(assignment_files: List[Optional[Path]]) -> str: | ||
| """Generate file contents with line numbers for prompt templates. | ||
| def gather_xml_file_contents( | ||
| submission: Optional[Path] = None, solution: Optional[Path] = None, test_output: Optional[Path] = None | ||
| ) -> str: | ||
| """Generate file contents with XML tags for prompt templates. | ||
|
|
||
| Args: | ||
| assignment_files (list[str]): List of file paths to process | ||
| submission (Path, optional): Student's submission file path | ||
| solution (Path, optional): Instructor's solution file path | ||
| test_output (Path, optional): Student's test output file path | ||
|
|
||
| Returns: | ||
| str: File contents formatted with line numbers | ||
| str: File contents formatted with XML tags and line numbers | ||
| """ | ||
| file_contents = "" | ||
|
|
||
| for file_path in assignment_files: | ||
| if not file_path: | ||
| continue | ||
| filename = os.path.basename(file_path) | ||
| if submission: | ||
| file_contents += _format_file_with_xml_tag(submission, "submission") | ||
|
|
||
| try: | ||
| # Handle PDF files separately | ||
| if filename.lower().endswith('.pdf'): | ||
| text_content = extract_pdf_text(file_path) | ||
| lines = text_content.split('\n') | ||
| else: | ||
| # Handle regular text files | ||
| with open(file_path, "r", encoding="utf-8") as file: | ||
| lines = file.readlines() | ||
|
|
||
| # Common processing for both file types | ||
| file_contents += f"=== {filename} ===\n" | ||
| for i, line in enumerate(lines, start=1): | ||
| stripped_line = line.rstrip('\n').rstrip() | ||
| if stripped_line.strip(): | ||
| file_contents += f"(Line {i}) {stripped_line}\n" | ||
| else: | ||
| file_contents += f"(Line {i}) \n" | ||
| file_contents += "\n" | ||
|
|
||
| except Exception as e: | ||
| print(f"Error reading file {filename}: {e}") | ||
| continue | ||
| if solution: | ||
| file_contents += _format_file_with_xml_tag(solution, "solution") | ||
|
|
||
| if test_output: | ||
| file_contents += _format_file_with_xml_tag(test_output, "test_output") | ||
|
|
||
| return file_contents | ||
|
|
||
|
|
||
| def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str: | ||
| """Format a single file with XML tags and line numbers. | ||
|
|
||
| Args: | ||
| file_path (Path): Path to the file to format | ||
| tag_name (str): The XML tag name (submission, solution, test_output) | ||
|
|
||
| Returns: | ||
| str: Formatted file content with XML tags | ||
| """ | ||
| if not file_path: | ||
| return "" | ||
|
|
||
| filename = os.path.basename(file_path) | ||
|
|
||
| try: | ||
| # Handle PDF files separately | ||
| if filename.lower().endswith('.pdf'): | ||
| text_content = extract_pdf_text(file_path) | ||
| lines = text_content.split('\n') | ||
| return _wrap_lines_with_xml(lines, tag_name, filename, is_pdf=True) | ||
| else: | ||
| # Handle regular text files | ||
| with open(file_path, "r", encoding="utf-8") as file: | ||
| lines = file.readlines() | ||
| return _wrap_lines_with_xml(lines, tag_name, filename, is_pdf=False) | ||
|
|
||
| except Exception as e: | ||
| print(f"Error reading file {filename}: {e}") | ||
| return "" | ||
|
|
||
|
|
||
| def _wrap_lines_with_xml(lines: List[str], tag_name: str, filename: str, is_pdf: bool = False) -> str: | ||
| """Wrap lines with XML tags and add line numbers. | ||
|
|
||
| Args: | ||
| lines (List[str]): List of lines to format | ||
| tag_name (str): The XML tag name (submission, solution, test_output) | ||
| filename (str): The filename to include in the XML tag | ||
| is_pdf (bool): Whether this is PDF content (affects empty line handling) | ||
|
|
||
| Returns: | ||
| str: Formatted content with XML tags and line numbers | ||
| """ | ||
| content = f"<{tag_name} file=\"{filename}\">\n" | ||
|
|
||
| for i, line in enumerate(lines, start=1): | ||
| if is_pdf: | ||
|
||
| stripped_line = line.rstrip() | ||
| if stripped_line.strip(): | ||
| content += f"(Line {i}) {stripped_line}\n" | ||
| else: | ||
| content += f"(Line {i}) \n" | ||
| else: | ||
| stripped_line = line.rstrip("\n") | ||
| if stripped_line.strip(): | ||
| content += f"(Line {i}) {stripped_line}\n" | ||
| else: | ||
| content += f"(Line {i}) {line}" | ||
|
|
||
| content += f"</{tag_name}>\n\n" | ||
| return content | ||
|
|
||
|
|
||
| def extract_pdf_text(pdf_path: str) -> str: | ||
| """Extract text content from a PDF file. | ||
|
|
||
|
|
@@ -233,6 +284,7 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num: | |
|
|
||
| Args: | ||
| assignment_files (List[Optional[Path]]): List of Path or None objects to parse. | ||
| Expected order: [submission, solution] | ||
| question_num (int): The target task number to extract. | ||
|
|
||
| Returns: | ||
|
|
@@ -244,7 +296,9 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num: | |
| file_contents = "" | ||
| task_found = False | ||
|
|
||
| for file_path in assignment_files: | ||
| semantic_tags = ["submission", "solution"] | ||
|
|
||
| for index, file_path in enumerate(assignment_files): | ||
| if ( | ||
| not file_path | ||
| or file_path.suffix != '.txt' | ||
|
|
@@ -266,9 +320,11 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num: | |
| task_content = task_match.group(1).strip() | ||
| task_found = True | ||
|
|
||
| file_contents += f"\n\n---\n### {file_path}\n\n" | ||
| tag_name = semantic_tags[index] if index < len(semantic_tags) else "file" | ||
| file_contents += f"<{tag_name} file=\"{file_path.name}\">\n" | ||
| file_contents += intro_content + "\n\n" if intro_content else "" | ||
| file_contents += task_content + "\n\n" | ||
| file_contents += f"</{tag_name}>\n\n" | ||
|
|
||
| if not task_found: | ||
| print(f"Task {question_num} not found in any assignment file.") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use the attribute name
filenameinstead offile, here and throughout all XML tags that have to do with filesThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.