|
| 1 | +import chardet |
| 2 | +from file_processing.errors import FileProcessingFailedError |
| 3 | +from file_processing.file_processor_strategy import FileProcessorStrategy |
| 4 | + |
| 5 | +class JavaFileProcessor(FileProcessorStrategy): |
| 6 | + """ |
| 7 | + Processor for handling Java files (.java), extracting source code metadata and content. |
| 8 | +
|
| 9 | + Attributes: |
| 10 | + metadata (dict): Contains extracted metadata such as 'text', 'encoding', |
| 11 | + 'num_lines', 'num_characters', 'num_methods', and 'num_classes'. |
| 12 | + """ |
| 13 | + |
| 14 | + def __init__(self, file_path: str, open_file: bool = True) -> None: |
| 15 | + """ |
| 16 | + Initializes the JavaFileProcessor with the specified file path. |
| 17 | +
|
| 18 | + Args: |
| 19 | + file_path (str): Path to the Java file to process. |
| 20 | + open_file (bool): Indicates whether to open and process the file immediately. |
| 21 | +
|
| 22 | + Sets: |
| 23 | + metadata (dict): Populated with 'message' if `open_file` is False. |
| 24 | + """ |
| 25 | + super().__init__(file_path, open_file) |
| 26 | + self.metadata = {'message': 'File was not opened'} if not open_file else {} |
| 27 | + |
| 28 | + def process(self) -> None: |
| 29 | + """ |
| 30 | + Extracts metadata from the Java source file if it is open and accessible. |
| 31 | +
|
| 32 | + Metadata extracted includes the source text, file encoding, number of lines, |
| 33 | + number of characters, number of methods, and number of classes. |
| 34 | +
|
| 35 | + Raises: |
| 36 | + FileProcessingFailedError: If an error occurs during Java file processing. |
| 37 | + """ |
| 38 | + if not self.open_file: |
| 39 | + return |
| 40 | + try: |
| 41 | + raw_data = open(self.file_path, 'rb').read() |
| 42 | + encoding = chardet.detect(raw_data)['encoding'] |
| 43 | + |
| 44 | + with open(self.file_path, 'r', encoding=encoding) as f: |
| 45 | + text = f.read() |
| 46 | + |
| 47 | + num_lines = len(text.splitlines()) |
| 48 | + num_characters = len(text) |
| 49 | + |
| 50 | + # Simple method/class counting using regex patterns |
| 51 | + import re |
| 52 | + method_pattern = re.compile(r'\b(public|private|protected|static|\s)+\s+\w+\s+\w+\s*\([^\)]*\)\s*\{', re.MULTILINE) |
| 53 | + class_pattern = re.compile(r'\b(class|interface|enum)\s+\w+', re.MULTILINE) |
| 54 | + |
| 55 | + num_methods = len(method_pattern.findall(text)) |
| 56 | + num_classes = len(class_pattern.findall(text)) |
| 57 | + |
| 58 | + self.metadata.update({ |
| 59 | + 'text': text, |
| 60 | + 'encoding': encoding, |
| 61 | + 'num_lines': num_lines, |
| 62 | + 'num_characters': num_characters, |
| 63 | + 'num_methods': num_methods, |
| 64 | + 'num_classes': num_classes, |
| 65 | + }) |
| 66 | + except Exception as e: |
| 67 | + raise FileProcessingFailedError( |
| 68 | + f"Error encountered while processing {self.file_path}: {e}" |
| 69 | + ) |
| 70 | + |
| 71 | + def save(self, output_path: str = None) -> None: |
| 72 | + """ |
| 73 | + Saves the processed Java file to the specified output path with current metadata. |
| 74 | +
|
| 75 | + Args: |
| 76 | + output_path (str): Path to save the processed Java file. If None, overwrites the original file. |
| 77 | +
|
| 78 | + Raises: |
| 79 | + FileProcessingFailedError: If an error occurs while saving the Java file. |
| 80 | + """ |
| 81 | + try: |
| 82 | + save_path = output_path or self.file_path |
| 83 | + with open(save_path, 'w', encoding=self.metadata['encoding']) as f: |
| 84 | + f.write(self.metadata['text']) |
| 85 | + except Exception as e: |
| 86 | + raise FileProcessingFailedError( |
| 87 | + f"Error encountered while saving file {self.file_path} to {save_path}: {e}" |
| 88 | + ) |
0 commit comments