Skip to content

Commit 0d0f4f5

Browse files
committed
h
1 parent 71ff580 commit 0d0f4f5

4 files changed

Lines changed: 129 additions & 0 deletions

File tree

file_processing/file.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ class File:
3737
".txt": processors.TextFileProcessor,
3838
".pdf": processors.PdfFileProcessor,
3939
".docx": processors.DocxFileProcessor,
40+
".h": processors.HFileProcessor,
4041
".go": processors.GoFileProcessor,
4142
".msg": processors.MsgFileProcessor,
4243
".pptx": processors.PptxFileProcessor,

file_processing/processors/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from .txt_processor import TextFileProcessor
1818
from .pdf_processor import PdfFileProcessor
1919
from .docx_processor import DocxFileProcessor
20+
from .h_processor import HFileProcessor
2021
from .msg_processor import MsgFileProcessor
2122
from .png_processor import PngFileProcessor
2223
from .xlsx_processor import XlsxFileProcessor
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import chardet
2+
import re
3+
from file_processing.errors import FileProcessingFailedError
4+
from file_processing.file_processor_strategy import FileProcessorStrategy
5+
6+
class HFileProcessor(FileProcessorStrategy):
7+
"""
8+
Processor for handling C/C++ header files (.h), extracting metadata and content.
9+
10+
Attributes:
11+
metadata (dict): Contains metadata such as:
12+
'text' (str): Full file content as string.
13+
'encoding' (str): Detected file encoding.
14+
'num_lines' (int): Total number of lines.
15+
'num_includes' (int): Count of preprocessor #include statements.
16+
'num_macros' (int): Count of #define statements.
17+
'num_structs' (int): Count of struct definitions.
18+
'num_classes' (int): Count of class definitions (common in C++ headers).
19+
'num_comments' (int): Count of single-line and multi-line comments.
20+
"""
21+
22+
def __init__(self, file_path: str, open_file: bool = True) -> None:
23+
super().__init__(file_path, open_file)
24+
self.metadata = {'message': 'File was not opened'} if not open_file else {}
25+
26+
def process(self) -> None:
27+
if not self.open_file:
28+
return
29+
try:
30+
raw_data = open(self.file_path, 'rb').read()
31+
encoding = chardet.detect(raw_data)['encoding'] or 'utf-8'
32+
33+
with open(self.file_path, 'r', encoding=encoding, errors='replace') as f:
34+
text = f.read()
35+
36+
num_lines = len(text.splitlines())
37+
38+
# Regex patterns:
39+
include_pattern = re.compile(r'^\s*#\s*include\s+["<].*[">]', re.MULTILINE)
40+
macro_pattern = re.compile(r'^\s*#\s*define\s+\w+', re.MULTILINE)
41+
struct_pattern = re.compile(r'\bstruct\s+\w+', re.MULTILINE)
42+
class_pattern = re.compile(r'\bclass\s+\w+', re.MULTILINE)
43+
comment_pattern = re.compile(r'(//[^\n]*|/\*.*?\*/)', re.DOTALL)
44+
45+
num_includes = len(include_pattern.findall(text))
46+
num_macros = len(macro_pattern.findall(text))
47+
num_structs = len(struct_pattern.findall(text))
48+
num_classes = len(class_pattern.findall(text))
49+
num_comments = len(comment_pattern.findall(text))
50+
51+
self.metadata.update({
52+
'text': text,
53+
'encoding': encoding,
54+
'num_lines': num_lines,
55+
'num_includes': num_includes,
56+
'num_macros': num_macros,
57+
'num_structs': num_structs,
58+
'num_classes': num_classes,
59+
'num_comments': num_comments,
60+
})
61+
62+
except Exception as e:
63+
raise FileProcessingFailedError(
64+
f"Error processing {self.file_path}: {e}"
65+
)
66+
67+
def save(self, output_path: str = None) -> None:
68+
try:
69+
save_path = output_path or self.file_path
70+
with open(save_path, 'w', encoding=self.metadata['encoding']) as f:
71+
f.write(self.metadata['text'])
72+
except Exception as e:
73+
raise FileProcessingFailedError(
74+
f"Error saving {self.file_path} to {save_path}: {e}"
75+
)

tests/unit/test_h_processor.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import os
2+
import pytest
3+
from unittest.mock import patch
4+
from file_processing.file import File
5+
from file_processing.errors import FileProcessingFailedError
6+
from file_processing_test_data import get_test_files_path
7+
8+
test_files_path = get_test_files_path()
9+
10+
# (file_name, encoding, num_lines, num_includes, num_macros, num_structs, num_classes, num_comments)
11+
values = [
12+
('internal.h', 'ascii', 202, 5, 6, 4, 0, 23),
13+
('rtp_av1.h', 'ascii', 132, 2, 19, 0, 0, 13),
14+
('wglew.h', 'ascii', 958, 1, 356, 2, 0, 77),
15+
('wglext.h', 'ascii', 696, 1, 235, 1, 0, 27),
16+
]
17+
18+
@pytest.mark.parametrize(
19+
"file_name, encoding, num_lines, num_includes, num_macros, num_structs, num_classes, num_comments",
20+
values
21+
)
22+
def test_h_metadata_extraction(file_name, encoding, num_lines, num_includes,
23+
num_macros, num_structs, num_classes, num_comments):
24+
"""Tests .h file processing metadata extraction."""
25+
h_file_path = test_files_path / file_name
26+
h_file = File(str(h_file_path))
27+
28+
metadata = h_file.processor.metadata
29+
assert metadata['encoding'] == encoding
30+
assert metadata['num_lines'] == num_lines
31+
assert metadata['num_includes'] == num_includes
32+
assert metadata['num_macros'] == num_macros
33+
assert metadata['num_structs'] == num_structs
34+
assert metadata['num_classes'] == num_classes
35+
assert metadata['num_comments'] == num_comments
36+
37+
@pytest.mark.parametrize("file_name", [entry[0] for entry in values])
38+
def test_h_invalid_save_location(file_name):
39+
"""Tests that saving to an invalid location raises an error."""
40+
h_file_path = test_files_path / file_name
41+
h_file = File(str(h_file_path))
42+
invalid_save_path = '/non_existent_folder/' + file_name
43+
with pytest.raises(FileProcessingFailedError):
44+
h_file.save(invalid_save_path)
45+
46+
@pytest.mark.parametrize("file_name", [entry[0] for entry in values])
47+
def test_h_processor_open_file_false(file_name):
48+
"""Tests that the file is not opened when open_file=False."""
49+
h_file_path = test_files_path / file_name
50+
with patch("builtins.open") as mock_open:
51+
File(str(h_file_path), open_file=False)
52+
mock_open.assert_not_called()

0 commit comments

Comments
 (0)