hc-sc-ocdo-bdpd
diff --git a/‎file_processing/file.py‎
Lines changed: 6 additions & 0 deletions b/‎file_processing/file.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎file_processing/processors/__init__.py‎
Lines changed: 5 additions & 0 deletions b/‎file_processing/processors/__init__.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎file_processing/processors/cpp_processor.py‎
Lines changed: 57 additions & 0 deletions b/‎file_processing/processors/cpp_processor.py‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎file_processing/processors/go_processor.py‎
Lines changed: 57 additions & 0 deletions b/‎file_processing/processors/go_processor.py‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎file_processing/processors/h_processor.py‎
Lines changed: 75 additions & 0 deletions b/‎file_processing/processors/h_processor.py‎
Lines changed: 75 additions & 0 deletions
diff --git a/‎file_processing/processors/js_processor.py‎
Lines changed: 70 additions & 0 deletions b/‎file_processing/processors/js_processor.py‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎file_processing/processors/rb_processor.py‎
Lines changed: 57 additions & 0 deletions b/‎file_processing/processors/rb_processor.py‎
Lines changed: 57 additions & 0 deletions
@@ -30,10 +30,15 @@ class File:
     TRANSCRIPTION_APPLICABLE_EXTENSIONS = {".mp3", ".wav", ".mp4", ".flac", ".aiff", ".ogg"}
 
     PROCESSORS = {
+        ".cpp": processors.CppFileProcessor,
+        ".cc": processors.CppFileProcessor,
         ".csv": processors.CsvFileProcessor,
+        ".js": processors.JsFileProcessor,
         ".txt": processors.TextFileProcessor,
         ".pdf": processors.PdfFileProcessor,
         ".docx": processors.DocxFileProcessor,
+        ".h": processors.HFileProcessor,
+        ".go": processors.GoFileProcessor,
         ".msg": processors.MsgFileProcessor,
         ".pptx": processors.PptxFileProcessor,
         ".rtf": processors.RtfFileProcessor,
@@ -50,6 +55,7 @@ class File:
         ".wav": processors.AudioFileProcessor,
         ".mp4": processors.AudioFileProcessor,
         ".flac": processors.AudioFileProcessor,
+        ".rb": processors.RbFileProcessor,
         ".aiff": processors.AudioFileProcessor,
         ".ogg": processors.AudioFileProcessor,
         ".py": processors.PyFileProcessor,
 
@@ -17,6 +17,7 @@
 from .txt_processor import TextFileProcessor
 from .pdf_processor import PdfFileProcessor
 from .docx_processor import DocxFileProcessor
+from .h_processor import HFileProcessor
 from .msg_processor import MsgFileProcessor
 from .png_processor import PngFileProcessor
 from .xlsx_processor import XlsxFileProcessor
@@ -41,3 +42,7 @@
 from .exe_processor import ExeFileProcessor
 from .whl_processor import WhlFileProcessor
 from .java_processor import JavaFileProcessor
+from .go_processor import GoFileProcessor
+from .rb_processor import RbFileProcessor
+from .cpp_processor import CppFileProcessor
+from .js_processor import JsFileProcessor
@@ -0,0 +1,57 @@
+import chardet
+import re
+from file_processing.errors import FileProcessingFailedError
+from file_processing.file_processor_strategy import FileProcessorStrategy
+
+class CppFileProcessor(FileProcessorStrategy):
+    """
+    Processor for handling C++ source files (.cpp, .cc), extracting metadata and content.
+
+    Attributes:
+        metadata (dict): Contains metadata such as 'text', 'encoding', 'num_lines',
+                         'num_functions', 'num_classes', and 'num_comments'.
+    """
+
+    def __init__(self, file_path: str, open_file: bool = True) -> None:
+        super().__init__(file_path, open_file)
+        self.metadata = {'message': 'File was not opened'} if not open_file else {}
+
+    def process(self) -> None:
+        if not self.open_file:
+            return
+        try:
+            raw_data = open(self.file_path, 'rb').read()
+            encoding = chardet.detect(raw_data)['encoding'] or 'utf-8'
+
+            with open(self.file_path, 'r', encoding=encoding, errors='replace') as f:
+                text = f.read()
+
+                num_lines = len(text.splitlines())
+
+                num_functions = len(re.findall(r'\b[a-zA-Z_]\w*\s+[a-zA-Z_]\w*\s*\(.*?\)\s*\{', text))
+                num_classes = len(re.findall(r'\bclass\s+\w+', text))
+                num_comments = len(re.findall(r'(//[^\n]*|/\*.*?\*/)', text, re.DOTALL))
+
+                self.metadata.update({
+                    'text': text,
+                    'encoding': encoding,
+                    'num_lines': num_lines,
+                    'num_functions': num_functions,
+                    'num_classes': num_classes,
+                    'num_comments': num_comments
+                })
+
+        except Exception as e:
+            raise FileProcessingFailedError(
+                f"Error processing {self.file_path}: {e}"
+            )
+
+    def save(self, output_path: str = None) -> None:
+        try:
+            save_path = output_path or self.file_path
+            with open(save_path, 'w', encoding=self.metadata['encoding']) as f:
+                f.write(self.metadata['text'])
+        except Exception as e:
+            raise FileProcessingFailedError(
+                f"Error saving {self.file_path} to {save_path}: {e}"
+            )
@@ -0,0 +1,57 @@
+import chardet
+import re
+from file_processing.errors import FileProcessingFailedError
+from file_processing.file_processor_strategy import FileProcessorStrategy
+
+class GoFileProcessor(FileProcessorStrategy):
+    """
+    Processor for handling Go source files (.go), extracting metadata and content.
+
+    Attributes:
+        metadata (dict): Contains metadata such as 'text', 'encoding', 'num_lines',
+                         'num_functions', 'num_structs', and 'num_interfaces'.
+    """
+
+    def __init__(self, file_path: str, open_file: bool = True) -> None:
+        super().__init__(file_path, open_file)
+        self.metadata = {'message': 'File was not opened'} if not open_file else {}
+
+    def process(self) -> None:
+        if not self.open_file:
+            return
+        try:
+            raw_data = open(self.file_path, 'rb').read()
+            encoding = chardet.detect(raw_data)['encoding'] or 'utf-8'
+
+            with open(self.file_path, 'r', encoding=encoding) as f:
+                text = f.read()
+
+                num_lines = len(text.splitlines())
+
+                num_functions = len(re.findall(r'\bfunc\s+\w+\s*\(', text))
+                num_structs = len(re.findall(r'\btype\s+\w+\s+struct\s*\{', text))
+                num_interfaces = len(re.findall(r'\btype\s+\w+\s+interface\s*\{', text))
+
+                self.metadata.update({
+                    'text': text,
+                    'encoding': encoding,
+                    'num_lines': num_lines,
+                    'num_functions': num_functions,
+                    'num_structs': num_structs,
+                    'num_interfaces': num_interfaces
+                })
+
+        except Exception as e:
+            raise FileProcessingFailedError(
+                f"Error processing {self.file_path}: {e}"
+            )
+
+    def save(self, output_path: str = None) -> None:
+        try:
+            save_path = output_path or self.file_path
+            with open(save_path, 'w', encoding=self.metadata['encoding']) as f:
+                f.write(self.metadata['text'])
+        except Exception as e:
+            raise FileProcessingFailedError(
+                f"Error saving {self.file_path} to {save_path}: {e}"
+            )
@@ -0,0 +1,75 @@
+import chardet
+import re
+from file_processing.errors import FileProcessingFailedError
+from file_processing.file_processor_strategy import FileProcessorStrategy
+
+class HFileProcessor(FileProcessorStrategy):
+    """
+    Processor for handling C/C++ header files (.h), extracting metadata and content.
+
+    Attributes:
+        metadata (dict): Contains metadata such as:
+            'text' (str): Full file content as string.
+            'encoding' (str): Detected file encoding.
+            'num_lines' (int): Total number of lines.
+            'num_includes' (int): Count of preprocessor #include statements.
+            'num_macros' (int): Count of #define statements.
+            'num_structs' (int): Count of struct definitions.
+            'num_classes' (int): Count of class definitions (common in C++ headers).
+            'num_comments' (int): Count of single-line and multi-line comments.
+    """
+
+    def __init__(self, file_path: str, open_file: bool = True) -> None:
+        super().__init__(file_path, open_file)
+        self.metadata = {'message': 'File was not opened'} if not open_file else {}
+
+    def process(self) -> None:
+        if not self.open_file:
+            return
+        try:
+            raw_data = open(self.file_path, 'rb').read()
+            encoding = chardet.detect(raw_data)['encoding'] or 'utf-8'
+
+            with open(self.file_path, 'r', encoding=encoding, errors='replace') as f:
+                text = f.read()
+
+            num_lines = len(text.splitlines())
+
+            # Regex patterns:
+            include_pattern = re.compile(r'^\s*#\s*include\s+["<].*[">]', re.MULTILINE)
+            macro_pattern = re.compile(r'^\s*#\s*define\s+\w+', re.MULTILINE)
+            struct_pattern = re.compile(r'\bstruct\s+\w+', re.MULTILINE)
+            class_pattern = re.compile(r'\bclass\s+\w+', re.MULTILINE)
+            comment_pattern = re.compile(r'(//[^\n]*|/\*.*?\*/)', re.DOTALL)
+
+            num_includes = len(include_pattern.findall(text))
+            num_macros = len(macro_pattern.findall(text))
+            num_structs = len(struct_pattern.findall(text))
+            num_classes = len(class_pattern.findall(text))
+            num_comments = len(comment_pattern.findall(text))
+
+            self.metadata.update({
+                'text': text,
+                'encoding': encoding,
+                'num_lines': num_lines,
+                'num_includes': num_includes,
+                'num_macros': num_macros,
+                'num_structs': num_structs,
+                'num_classes': num_classes,
+                'num_comments': num_comments,
+            })
+
+        except Exception as e:
+            raise FileProcessingFailedError(
+                f"Error processing {self.file_path}: {e}"
+            )
+
+    def save(self, output_path: str = None) -> None:
+        try:
+            save_path = output_path or self.file_path
+            with open(save_path, 'w', encoding=self.metadata['encoding']) as f:
+                f.write(self.metadata['text'])
+        except Exception as e:
+            raise FileProcessingFailedError(
+                f"Error saving {self.file_path} to {save_path}: {e}"
+            )
@@ -0,0 +1,70 @@
+import chardet
+import re
+from file_processing.errors import FileProcessingFailedError
+from file_processing.file_processor_strategy import FileProcessorStrategy
+
+class JsFileProcessor(FileProcessorStrategy):
+    """
+    Processor for handling JavaScript (.js) source files, extracting metadata and content.
+
+    Attributes:
+        metadata (dict): Contains metadata such as 'text', 'encoding', 'num_lines',
+                         'num_functions', 'num_classes', and 'num_comments'.
+    """
+
+    def __init__(self, file_path: str, open_file: bool = True) -> None:
+        super().__init__(file_path, open_file)
+        self.metadata = {'message': 'File was not opened'} if not open_file else {}
+
+    def process(self) -> None:
+        """Extracts JavaScript file metadata if open_file is True."""
+        if not self.open_file:
+            return
+        try:
+            raw_data = open(self.file_path, 'rb').read()
+            encoding = chardet.detect(raw_data)['encoding'] or 'utf-8'
+
+            # Read using the detected (or fallback) encoding
+            with open(self.file_path, 'r', encoding=encoding, errors='replace') as f:
+                text = f.read()
+
+            num_lines = len(text.splitlines())
+
+            # Basic regex patterns:
+            # Matches 'function foo(...) { ... }' or 'export function foo(...) { ... }'
+            function_pattern = re.compile(r'\b(function|export\s+function)\s+[A-Za-z_]\w*\s*\(.*?\)\s*\{')
+
+            # Matches 'class SomeClass { ... }'
+            class_pattern = re.compile(r'\bclass\s+[A-Za-z_]\w*\s*\{?')
+
+            # Matches single-line (// ...) and multi-line (/* ... */) comments
+            comment_pattern = re.compile(r'(//[^\n]*|/\*.*?\*/)', re.DOTALL)
+
+            num_functions = len(function_pattern.findall(text))
+            num_classes = len(class_pattern.findall(text))
+            num_comments = len(comment_pattern.findall(text))
+
+            self.metadata.update({
+                'text': text,
+                'encoding': encoding,
+                'num_lines': num_lines,
+                'num_functions': num_functions,
+                'num_classes': num_classes,
+                'num_comments': num_comments
+            })
+
+        except Exception as e:
+            raise FileProcessingFailedError(
+                f"Error processing {self.file_path}: {e}"
+            )
+
+    def save(self, output_path: str = None) -> None:
+        """Saves the JavaScript file to the specified path."""
+        try:
+            save_path = output_path or self.file_path
+            with open(save_path, 'w', encoding=self.metadata['encoding']) as f:
+                f.write(self.metadata['text'])
+        except Exception as e:
+            raise FileProcessingFailedError(
+                f"Error saving {self.file_path} to {save_path}: {e}"
+            )
@@ -0,0 +1,57 @@
+import chardet
+import re
+from file_processing.errors import FileProcessingFailedError
+from file_processing.file_processor_strategy import FileProcessorStrategy
+
+class RbFileProcessor(FileProcessorStrategy):
+    """
+    Processor for handling Ruby (.rb) source files, extracting metadata and content.
+
+    Attributes:
+        metadata (dict): Contains metadata such as 'text', 'encoding', 'num_lines',
+                         'num_methods', 'num_classes', and 'num_modules'.
+    """
+
+    def __init__(self, file_path: str, open_file: bool = True) -> None:
+        super().__init__(file_path, open_file)
+        self.metadata = {'message': 'File was not opened'} if not open_file else {}
+
+    def process(self) -> None:
+        if not self.open_file:
+            return
+        try:
+            raw_data = open(self.file_path, 'rb').read()
+            encoding = chardet.detect(raw_data)['encoding'] or 'utf-8'
+
+            with open(self.file_path, 'r', encoding=encoding) as f:
+                text = f.read()
+
+                num_lines = len(text.splitlines())
+
+                num_methods = len(re.findall(r'^\s*def\s+\w+', text, re.MULTILINE))
+                num_classes = len(re.findall(r'^\s*class\s+\w+', text, re.MULTILINE))
+                num_modules = len(re.findall(r'^\s*module\s+\w+', text, re.MULTILINE))
+
+                self.metadata.update({
+                    'text': text,
+                    'encoding': encoding,
+                    'num_lines': num_lines,
+                    'num_methods': num_methods,
+                    'num_classes': num_classes,
+                    'num_modules': num_modules
+                })
+
+        except Exception as e:
+            raise FileProcessingFailedError(
+                f"Error processing {self.file_path}: {e}"
+            )
+
+    def save(self, output_path: str = None) -> None:
+        try:
+            save_path = output_path or self.file_path
+            with open(save_path, 'w', encoding=self.metadata['encoding']) as f:
+                f.write(self.metadata['text'])
+        except Exception as e:
+            raise FileProcessingFailedError(
+                f"Error saving {self.file_path} to {save_path}: {e}"
+            )