Skip to content

Commit 320f03d

Browse files
authored
Merge pull request #218 from hc-sc-ocdo-bdpd/java
Java
2 parents 5ae5b5c + c6b2f88 commit 320f03d

4 files changed

Lines changed: 138 additions & 0 deletions

File tree

file_processing/file.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class File:
4141
".xml": processors.XmlFileProcessor,
4242
".png": processors.PngFileProcessor,
4343
".xlsx": processors.XlsxFileProcessor,
44+
".java": processors.JavaFileProcessor,
4445
".jpeg": processors.JpegFileProcessor,
4546
".jpg": processors.JpegFileProcessor,
4647
".json": processors.JsonFileProcessor,

file_processing/processors/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,4 @@
4040
from .gguf_processor import GgufFileProcessor
4141
from .exe_processor import ExeFileProcessor
4242
from .whl_processor import WhlFileProcessor
43+
from .java_processor import JavaFileProcessor
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import chardet
2+
from file_processing.errors import FileProcessingFailedError
3+
from file_processing.file_processor_strategy import FileProcessorStrategy
4+
5+
class JavaFileProcessor(FileProcessorStrategy):
6+
"""
7+
Processor for handling Java files (.java), extracting source code metadata and content.
8+
9+
Attributes:
10+
metadata (dict): Contains extracted metadata such as 'text', 'encoding',
11+
'num_lines', 'num_characters', 'num_methods', and 'num_classes'.
12+
"""
13+
14+
def __init__(self, file_path: str, open_file: bool = True) -> None:
15+
"""
16+
Initializes the JavaFileProcessor with the specified file path.
17+
18+
Args:
19+
file_path (str): Path to the Java file to process.
20+
open_file (bool): Indicates whether to open and process the file immediately.
21+
22+
Sets:
23+
metadata (dict): Populated with 'message' if `open_file` is False.
24+
"""
25+
super().__init__(file_path, open_file)
26+
self.metadata = {'message': 'File was not opened'} if not open_file else {}
27+
28+
def process(self) -> None:
29+
"""
30+
Extracts metadata from the Java source file if it is open and accessible.
31+
32+
Metadata extracted includes the source text, file encoding, number of lines,
33+
number of characters, number of methods, and number of classes.
34+
35+
Raises:
36+
FileProcessingFailedError: If an error occurs during Java file processing.
37+
"""
38+
if not self.open_file:
39+
return
40+
try:
41+
raw_data = open(self.file_path, 'rb').read()
42+
encoding = chardet.detect(raw_data)['encoding']
43+
44+
with open(self.file_path, 'r', encoding=encoding) as f:
45+
text = f.read()
46+
47+
num_lines = len(text.splitlines())
48+
num_characters = len(text)
49+
50+
# Simple method/class counting using regex patterns
51+
import re
52+
method_pattern = re.compile(r'\b(public|private|protected|static|\s)+\s+\w+\s+\w+\s*\([^\)]*\)\s*\{', re.MULTILINE)
53+
class_pattern = re.compile(r'\b(class|interface|enum)\s+\w+', re.MULTILINE)
54+
55+
num_methods = len(method_pattern.findall(text))
56+
num_classes = len(class_pattern.findall(text))
57+
58+
self.metadata.update({
59+
'text': text,
60+
'encoding': encoding,
61+
'num_lines': num_lines,
62+
'num_characters': num_characters,
63+
'num_methods': num_methods,
64+
'num_classes': num_classes,
65+
})
66+
except Exception as e:
67+
raise FileProcessingFailedError(
68+
f"Error encountered while processing {self.file_path}: {e}"
69+
)
70+
71+
def save(self, output_path: str = None) -> None:
72+
"""
73+
Saves the processed Java file to the specified output path with current metadata.
74+
75+
Args:
76+
output_path (str): Path to save the processed Java file. If None, overwrites the original file.
77+
78+
Raises:
79+
FileProcessingFailedError: If an error occurs while saving the Java file.
80+
"""
81+
try:
82+
save_path = output_path or self.file_path
83+
with open(save_path, 'w', encoding=self.metadata['encoding']) as f:
84+
f.write(self.metadata['text'])
85+
except Exception as e:
86+
raise FileProcessingFailedError(
87+
f"Error encountered while saving file {self.file_path} to {save_path}: {e}"
88+
)

tests/unit/test_java_processor.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import os
2+
import pytest
3+
from unittest.mock import patch
4+
from file_processing.file import File
5+
from file_processing.errors import FileProcessingFailedError
6+
from file_processing_test_data import get_test_files_path
7+
8+
# Get the directory where test files are stored
9+
test_files_path = get_test_files_path()
10+
11+
# Java test file metadata expectations (filename, text length, encoding, num_lines, num_methods, num_classes)
12+
values = [
13+
('DataTypes.java', 59831, 'ascii', 1409, 70, 17),
14+
('ExecutionJobVertex.java', 27219, 'ascii', 695, 24, 2),
15+
('TopicAdmin.java', 39062, 'ascii', 770, 25, 3),
16+
('Utils.java', 4721, 'ascii', 106, 7, 1),
17+
]
18+
19+
@pytest.mark.parametrize(
20+
"file_name, text_length, encoding, num_lines, num_methods, num_classes",
21+
values
22+
)
23+
def test_java_metadata_extraction(file_name, text_length, encoding, num_lines, num_methods, num_classes):
24+
"""Tests Java file processing metadata extraction."""
25+
java_file_path = test_files_path / file_name
26+
java_file = File(java_file_path)
27+
28+
assert len(java_file.processor.metadata['text']) == text_length
29+
assert java_file.processor.metadata['encoding'] == encoding
30+
assert java_file.processor.metadata['num_lines'] == num_lines
31+
assert java_file.processor.metadata['num_methods'] == num_methods
32+
assert java_file.processor.metadata['num_classes'] == num_classes
33+
34+
@pytest.mark.parametrize("file_name", [file_name for file_name, *_ in values])
35+
def test_java_invalid_save_location(file_name):
36+
"""Tests that saving to an invalid location raises an error."""
37+
java_file = File(test_files_path / file_name)
38+
invalid_save_path = '/non_existent_folder/' + file_name
39+
with pytest.raises(FileProcessingFailedError):
40+
java_file.save(invalid_save_path)
41+
42+
@pytest.mark.parametrize("file_name", [file_name for file_name, *_ in values])
43+
def test_java_processor_open_file_false(file_name):
44+
"""Tests that the file is not opened when open_file=False."""
45+
java_file_path = test_files_path / file_name
46+
with patch("builtins.open") as mock_open:
47+
File(java_file_path, open_file=False)
48+
mock_open.assert_not_called()

0 commit comments

Comments
 (0)