-
Notifications
You must be signed in to change notification settings - Fork 3
Wrap file contents in XML tags #21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
b25e216
9852a58
b95fb53
654a8ef
73b482b
c35dbe0
ce8ba7b
2f2a01c
7902ecb
10e5804
a14c3b5
c83d556
c654457
2b8ee04
2da9921
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,7 +42,7 @@ def render_prompt_template( | |
| if question_num is not None: | ||
| template_data['file_contents'] = _get_question_contents([submission, solution], question_num) | ||
| else: | ||
| template_data['file_contents'] = gather_file_contents([submission, solution, test_output]) | ||
| template_data['file_contents'] = gather_xml_file_contents(submission, solution, test_output) | ||
|
|
||
| # Handle image placeholders with context-aware replacement | ||
| if '{submission_image}' in prompt_content and 'submission_image' not in template_data: | ||
|
|
@@ -84,47 +84,80 @@ def gather_file_references(submission: Path, solution: Optional[Path], test_outp | |
| return "\n".join(references) | ||
|
|
||
|
|
||
| def gather_file_contents(assignment_files: List[Optional[Path]]) -> str: | ||
| """Generate file contents with line numbers for prompt templates. | ||
| def gather_xml_file_contents( | ||
| submission: Path, solution: Optional[Path] = None, test_output: Optional[Path] = None | ||
| ) -> str: | ||
| """Generate file contents with XML tags for prompt templates. | ||
|
|
||
| Args: | ||
| assignment_files (list[str]): List of file paths to process | ||
| submission (Path): Student's submission file path | ||
| solution (Path, optional): Instructor's solution file path | ||
| test_output (Path, optional): Student's test output file path | ||
|
|
||
| Returns: | ||
| str: File contents formatted with line numbers | ||
| str: File contents formatted with XML tags and line numbers | ||
| """ | ||
| file_contents = "" | ||
|
|
||
| for file_path in assignment_files: | ||
| if not file_path: | ||
| continue | ||
| filename = os.path.basename(file_path) | ||
|
|
||
| try: | ||
| # Handle PDF files separately | ||
| if filename.lower().endswith('.pdf'): | ||
| text_content = extract_pdf_text(file_path) | ||
| lines = text_content.split('\n') | ||
| else: | ||
| # Handle regular text files | ||
| with open(file_path, "r", encoding="utf-8") as file: | ||
| lines = file.readlines() | ||
|
|
||
| # Common processing for both file types | ||
| file_contents += f"=== {filename} ===\n" | ||
| file_contents += _format_file_with_xml_tag(submission, "submission") | ||
|
|
||
| if solution: | ||
| file_contents += _format_file_with_xml_tag(solution, "solution") | ||
|
|
||
| if test_output: | ||
| file_contents += _format_file_with_xml_tag(test_output, "test_output") | ||
|
|
||
| return file_contents | ||
|
|
||
|
|
||
| def _format_file_with_xml_tag(file_path: Path, tag_name: str) -> str: | ||
| """Format a single file with XML tags and line numbers. | ||
|
|
||
| Args: | ||
| file_path (Path): Path to the file to format | ||
| tag_name (str): The XML tag name (submission, solution, test_output) | ||
|
|
||
| Returns: | ||
| str: Formatted file content with XML tags | ||
| """ | ||
| if not file_path: | ||
| return "" | ||
|
|
||
| filename = os.path.basename(file_path) | ||
| content = "" | ||
|
|
||
| try: | ||
| # Handle PDF files separately | ||
| if filename.lower().endswith('.pdf'): | ||
| text_content = extract_pdf_text(file_path) | ||
| content += f"<{tag_name} file=\"{filename}\">\n" | ||
| lines = text_content.split('\n') | ||
| for i, line in enumerate(lines, start=1): | ||
| stripped_line = line.rstrip('\n').rstrip() | ||
| stripped_line = line.rstrip() | ||
|
||
| if stripped_line.strip(): | ||
| file_contents += f"(Line {i}) {stripped_line}\n" | ||
| content += f"(Line {i}) {stripped_line}\n" | ||
| else: | ||
| file_contents += f"(Line {i}) \n" | ||
| file_contents += "\n" | ||
| content += f"(Line {i}) \n" | ||
| content += f"</{tag_name}>\n\n" | ||
| else: | ||
| # Handle regular text files | ||
| with open(file_path, "r", encoding="utf-8") as file: | ||
| lines = file.readlines() | ||
|
|
||
| except Exception as e: | ||
| print(f"Error reading file {filename}: {e}") | ||
| continue | ||
| content += f"<{tag_name} file=\"{filename}\">\n" | ||
| for i, line in enumerate(lines, start=1): | ||
| stripped_line = line.rstrip("\n") | ||
| if stripped_line.strip(): | ||
| content += f"(Line {i}) {stripped_line}\n" | ||
| else: | ||
| content += f"(Line {i}) {line}" | ||
| content += f"</{tag_name}>\n\n" | ||
|
|
||
| return file_contents | ||
| except Exception as e: | ||
| print(f"Error reading file {filename}: {e}") | ||
| return "" | ||
|
|
||
| return content | ||
|
|
||
|
|
||
| def extract_pdf_text(pdf_path: str) -> str: | ||
|
|
@@ -233,6 +266,7 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num: | |
|
|
||
| Args: | ||
| assignment_files (List[Optional[Path]]): List of Path or None objects to parse. | ||
| Expected order: [submission, solution] | ||
| question_num (int): The target task number to extract. | ||
|
|
||
| Returns: | ||
|
|
@@ -244,7 +278,9 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num: | |
| file_contents = "" | ||
| task_found = False | ||
|
|
||
| for file_path in assignment_files: | ||
| semantic_tags = ["submission", "solution"] | ||
|
|
||
| for index, file_path in enumerate(assignment_files): | ||
| if ( | ||
| not file_path | ||
| or file_path.suffix != '.txt' | ||
|
|
@@ -266,9 +302,11 @@ def _get_question_contents(assignment_files: List[Optional[Path]], question_num: | |
| task_content = task_match.group(1).strip() | ||
| task_found = True | ||
|
|
||
| file_contents += f"\n\n---\n### {file_path}\n\n" | ||
| tag_name = semantic_tags[index] if index < len(semantic_tags) else "file" | ||
| file_contents += f"<{tag_name} file=\"{file_path.name}\">\n" | ||
| file_contents += intro_content + "\n\n" if intro_content else "" | ||
| file_contents += task_content + "\n\n" | ||
| file_contents += f"</{tag_name}>\n\n" | ||
|
|
||
| if not task_found: | ||
| print(f"Task {question_num} not found in any assignment file.") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should be Optional[Path] for all these args
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.