1+ import chardet
2+ import re
3+ from file_processing .errors import FileProcessingFailedError
4+ from file_processing .file_processor_strategy import FileProcessorStrategy
5+
6+ class HFileProcessor (FileProcessorStrategy ):
7+ """
8+ Processor for handling C/C++ header files (.h), extracting metadata and content.
9+
10+ Attributes:
11+ metadata (dict): Contains metadata such as:
12+ 'text' (str): Full file content as string.
13+ 'encoding' (str): Detected file encoding.
14+ 'num_lines' (int): Total number of lines.
15+ 'num_includes' (int): Count of preprocessor #include statements.
16+ 'num_macros' (int): Count of #define statements.
17+ 'num_structs' (int): Count of struct definitions.
18+ 'num_classes' (int): Count of class definitions (common in C++ headers).
19+ 'num_comments' (int): Count of single-line and multi-line comments.
20+ """
21+
22+ def __init__ (self , file_path : str , open_file : bool = True ) -> None :
23+ super ().__init__ (file_path , open_file )
24+ self .metadata = {'message' : 'File was not opened' } if not open_file else {}
25+
26+ def process (self ) -> None :
27+ if not self .open_file :
28+ return
29+ try :
30+ raw_data = open (self .file_path , 'rb' ).read ()
31+ encoding = chardet .detect (raw_data )['encoding' ] or 'utf-8'
32+
33+ with open (self .file_path , 'r' , encoding = encoding , errors = 'replace' ) as f :
34+ text = f .read ()
35+
36+ num_lines = len (text .splitlines ())
37+
38+ # Regex patterns:
39+ include_pattern = re .compile (r'^\s*#\s*include\s+["<].*[">]' , re .MULTILINE )
40+ macro_pattern = re .compile (r'^\s*#\s*define\s+\w+' , re .MULTILINE )
41+ struct_pattern = re .compile (r'\bstruct\s+\w+' , re .MULTILINE )
42+ class_pattern = re .compile (r'\bclass\s+\w+' , re .MULTILINE )
43+ comment_pattern = re .compile (r'(//[^\n]*|/\*.*?\*/)' , re .DOTALL )
44+
45+ num_includes = len (include_pattern .findall (text ))
46+ num_macros = len (macro_pattern .findall (text ))
47+ num_structs = len (struct_pattern .findall (text ))
48+ num_classes = len (class_pattern .findall (text ))
49+ num_comments = len (comment_pattern .findall (text ))
50+
51+ self .metadata .update ({
52+ 'text' : text ,
53+ 'encoding' : encoding ,
54+ 'num_lines' : num_lines ,
55+ 'num_includes' : num_includes ,
56+ 'num_macros' : num_macros ,
57+ 'num_structs' : num_structs ,
58+ 'num_classes' : num_classes ,
59+ 'num_comments' : num_comments ,
60+ })
61+
62+ except Exception as e :
63+ raise FileProcessingFailedError (
64+ f"Error processing { self .file_path } : { e } "
65+ )
66+
67+ def save (self , output_path : str = None ) -> None :
68+ try :
69+ save_path = output_path or self .file_path
70+ with open (save_path , 'w' , encoding = self .metadata ['encoding' ]) as f :
71+ f .write (self .metadata ['text' ])
72+ except Exception as e :
73+ raise FileProcessingFailedError (
74+ f"Error saving { self .file_path } to { save_path } : { e } "
75+ )
0 commit comments