22
33import re
44import sys
5+ import xml .etree .ElementTree as ET
56from collections import Counter
7+ from pathlib import Path
68from typing import Dict , List , Tuple
79
8- import xml .etree .ElementTree as ET
10+ DOC_TO_PIXELS_LOG_START = "----- DOC TO PIXELS LOG START -----"
11+ DOC_TO_PIXELS_LOG_END = "----- DOC TO PIXELS LOG END -----"
12+
13+ EXPECTED_PATTERNS = [
14+ re .compile (r"^Converting page X/X to pixels$" ),
15+ re .compile (r"^Converting page X/X from pixels to searchable PDF$" ),
16+ re .compile (r"^Converting to PDF using LibreOffice$" ),
17+ re .compile (r"^Converted document to pixels$" ),
18+ re .compile (r"^Safe PDF created$" ),
19+ re .compile (r"^Compressing PDF$" ),
20+ re .compile (r"^Merging X pages into a single PDF$" ),
21+ re .compile (r"^Calculating number of pages$" ),
22+ re .compile (r"^\[COMMAND\].*$" ),
23+ re .compile (r"^Result: (SUCCESS|FAILURE)$" ),
24+ re .compile (r"^pdfinfo:$" ),
25+ re .compile (r"^pdftoppm: Syntax Error.*$" ),
26+ re .compile (r"^convert /tmp/input_file as a .*$" ),
27+ re .compile (r"^time=.*msg=\"forwarding signal.*" ),
28+ re .compile (r"^time=.*msg=\"Waiting for container.*" ),
29+ re .compile (r"^Installing LibreOffice extension.*$" ),
30+ re .compile (r"^Archive:.*$" ),
31+ re .compile (r"^ extracting:.*$" ),
32+ re .compile (r"^ inflating:.*$" ),
33+ re .compile (r"^$" ),
34+ ]
35+
36+
37+ def scrub_container_line (line : str ) -> str :
38+ line = re .sub (r"\b[0-9a-fA-F]{6,}\b" , "X" , line )
39+ line = re .sub (r"\b\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}" , "X" , line )
40+ line = re .sub (r"\d+" , "X" , line )
41+ return line
942
1043
11- # Pattern to scrub variable data (dates, hex IDs, numbers) for grouping
12- VARIABLE_PATTERN = re .compile (
13- r"\b\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})?\b"
14- r"|\b[0-9a-f]{8,}\b"
15- r"|\b\d{4}/\d{2}/\d{2}\b"
16- r"|\b\d{2}:\d{2}:\d{2}\b"
17- r'|(?<=file\s)\S+\.pdf'
18- r"|\bpage\s+\d+"
19- r"|\bpages\s+\d+"
20- )
44+ def is_expected_line (line : str ) -> bool :
45+ return any (p .match (line ) for p in EXPECTED_PATTERNS )
2146
2247
23- def scrub_text (text : str ) -> str :
24- """Replace variable data with placeholders for better grouping."""
25- return VARIABLE_PATTERN .sub ("X" , text )
48+ def is_blank_line (line : str ) -> bool :
49+ return line == ""
2650
2751
2852def parse_junit (xml_file : str ) -> ET .Element :
@@ -31,52 +55,99 @@ def parse_junit(xml_file: str) -> ET.Element:
3155
3256
3357def count_results (root : ET .Element ) -> Dict [str , int ]:
34- testsuite = root .find ("testsuite" )
35- if testsuite is None :
36- return {"errors" : 0 , "failures" : 0 , "skipped" : 0 , "tests" : 0 }
58+ total_errors = 0
59+ total_failures = 0
60+ total_skipped = 0
61+ total_tests = 0
62+ for testsuite in root .findall ("testsuite" ):
63+ total_errors += int (testsuite .attrib .get ("errors" , "0" ))
64+ total_failures += int (testsuite .attrib .get ("failures" , "0" ))
65+ total_skipped += int (testsuite .attrib .get ("skipped" , "0" ))
66+ total_tests += int (testsuite .attrib .get ("tests" , "0" ))
3767 return {
38- "errors" : int ( testsuite . attrib . get ( "errors" , "0" )) ,
39- "failures" : int ( testsuite . attrib . get ( "failures" , "0" )) ,
40- "skipped" : int ( testsuite . attrib . get ( "skipped" , "0" )) ,
41- "tests" : int ( testsuite . attrib . get ( "tests" , "0" )) ,
68+ "errors" : total_errors ,
69+ "failures" : total_failures ,
70+ "skipped" : total_skipped ,
71+ "tests" : total_tests ,
4272 }
4373
4474
45- def get_test_overview (root : ET .Element ) -> List [Tuple [str , str ]]:
46- testsuite = root .find ("testsuite" )
47- results = []
48- if testsuite is not None :
49- for testcase in testsuite .findall ("testcase" ):
50- name = testcase .attrib .get ("name" , "unknown" )
51- classname = testcase .attrib .get ("classname" , "" )
52- full_name = f"{ classname } ::{ name } " if classname else name
53- failure = testcase .find ("failure" )
54- error = testcase .find ("error" )
55- if failure is not None :
56- status = "FAIL"
57- elif error is not None :
58- status = "ERROR"
59- else :
60- status = "PASS"
61- results .append ((full_name , status ))
62- return results
63-
64-
65- def get_container_outputs (root : ET .Element ) -> List [str ]:
66- outputs = []
67- testsuite = root .find ("testsuite" )
68- if testsuite is not None :
69- for testcase in testsuite .findall ("testcase" ):
70- for child in ("failure" , "error" ):
71- elem = testcase .find (child )
72- if elem is not None and elem .text :
73- outputs .append (scrub_text (elem .text .strip ()))
74- return outputs
75+ def get_test_cases (root : ET .Element ) -> List [ET .Element ]:
76+ cases = []
77+ for testsuite in root .findall ("testsuite" ):
78+ cases .extend (testsuite .findall ("testcase" ))
79+ return cases
80+
81+
82+ def get_test_status (testcase : ET .Element ) -> str :
83+ if testcase .find ("failure" ) is not None :
84+ return "FAIL"
85+ elif testcase .find ("error" ) is not None :
86+ return "ERROR"
87+ return "PASS"
88+
89+
90+ def get_extension (name : str ) -> str :
91+ m = re .search (r"\[([^\]]+)\]" , name )
92+ if m :
93+ ext = Path (m .group (1 )).suffix .lstrip ("." )
94+ return ext if ext else "none"
95+ return "none"
96+
97+
98+ def get_size_bucket (name : str ) -> str :
99+ if "10K_docs" in name :
100+ return "0KB - 10KB"
101+ elif "100K_docs" in name :
102+ return "10KB - 100KB"
103+ elif "10M_docs" in name :
104+ return "100KB - 10MB"
105+ elif "100M_docs" in name :
106+ return "10MB - 100MB"
107+ return "unknown"
108+
109+
110+ def extract_captured_text (testcase : ET .Element , tag : str ) -> str :
111+ elem = testcase .find (tag )
112+ if elem is not None and elem .text :
113+ text = elem .text
114+ lines = text .split ("\n " )
115+ content_lines = []
116+ in_content = False
117+ for line in lines :
118+ if "Captured" in line and "---" in line :
119+ in_content = True
120+ continue
121+ if in_content :
122+ content_lines .append (line )
123+ if content_lines :
124+ return "\n " .join (content_lines )
125+ return ""
126+
127+
128+ def extract_container_output (testcase : ET .Element ) -> str :
129+ output = extract_captured_text (testcase , "system-out" )
130+ if DOC_TO_PIXELS_LOG_START in output and DOC_TO_PIXELS_LOG_END in output :
131+ (_ , rest ) = output .split (DOC_TO_PIXELS_LOG_START , 1 )
132+ (log , _ ) = rest .split (DOC_TO_PIXELS_LOG_END , 1 )
133+ return log .strip ()
134+ elif output :
135+ return output .strip ()
136+ return ""
137+
138+
139+ def get_container_lines (testcase : ET .Element ) -> List [str ]:
140+ output = extract_container_output (testcase )
141+ if output :
142+ return [line .rstrip () for line in output .split ("\n " )]
143+ return []
75144
76145
77146def generate_report (xml_file : str ) -> str :
78147 root = parse_junit (xml_file )
79148 results = count_results (root )
149+ test_cases = get_test_cases (root )
150+
80151 total = results ["tests" ]
81152 failures = results ["failures" ]
82153 errors = results ["errors" ]
@@ -87,31 +158,43 @@ def generate_report(xml_file: str) -> str:
87158 lines .append ("==== RESULTS SUMMARY ===" )
88159 lines .append (f" errors: { errors } " )
89160 lines .append (f" failures: { failures } " )
161+ lines .append (f" successes: { total - errors - failures - skipped } " )
90162 lines .append (f" skipped: { skipped } " )
91163 lines .append (f" tests: { total } " )
92164 lines .append (f" failure rate: { failure_rate } " )
93165 lines .append ("" )
94166 lines .append ("" )
95167
96- # Test overview
97- overview = get_test_overview (root )
98- pass_count = sum (1 for _ , s in overview if s == "PASS" )
99- fail_count = sum (1 for _ , s in overview if s in ("FAIL" , "ERROR" ))
168+ ext_counter = Counter ()
169+ size_counter : Dict [str , int ] = {}
170+ for tc in test_cases :
171+ name = tc .attrib .get ("name" , "" )
172+ ext = get_extension (name )
173+ size_bucket = get_size_bucket (name )
174+ ext_counter [ext ] += 1
175+ size_counter [size_bucket ] = size_counter .get (size_bucket , 0 ) + 1
176+
100177 lines .append ("=== TEST OVERVIEW ===" )
101- lines .append (f" Total: { len (overview )} Passed: { pass_count } Failed: { fail_count } " )
102- if fail_count > 0 :
103- lines .append ("" )
104- lines .append (" Failures:" )
105- for name , status in overview :
106- if status in ("FAIL" , "ERROR" ):
107- lines .append (f" [{ status } ] { name } " )
178+ lines .append ("" )
179+ lines .append (" Extensions breakdown (All available tests)" )
180+ for ext , count in ext_counter .most_common ():
181+ lines .append (f" { count :>8} { ext } " )
182+ lines .append ("" )
183+ lines .append (" File sizes breakdown (All available tests)" )
184+ for bucket in ["0KB - 10KB" , "10KB - 100KB" , "100KB - 10MB" , "10MB - 100MB" ]:
185+ count = size_counter .get (bucket , 0 )
186+ lines .append (f" { bucket } { count } " )
108187 lines .append ("" )
109188 lines .append ("" )
110189
111- # Most common container output
112- outputs = get_container_outputs (root )
113- if outputs :
114- counter = Counter (outputs )
190+ all_lines : List [str ] = []
191+ for tc in test_cases :
192+ all_lines .extend (get_container_lines (tc ))
193+
194+ if all_lines :
195+ scrubbed = [scrub_container_line (line ) for line in all_lines ]
196+ filtered = [l for l in scrubbed if not is_expected_line (l )]
197+ counter = Counter (filtered )
115198 lines .append ("=== MOST COMMON CONTAINER OUTPUT ===" )
116199 lines .append ("" )
117200 lines .append (" Top 30:" )
@@ -120,18 +203,69 @@ def generate_report(xml_file: str) -> str:
120203 lines .append ("" )
121204 lines .append ("" )
122205
123- # Failure reasons
206+ fail_lines : List [str ] = []
207+ for tc in test_cases :
208+ if get_test_status (tc ) in ("FAIL" , "ERROR" ):
209+ fail_lines .extend (get_container_lines (tc ))
210+
211+ if fail_lines :
212+ scrubbed = [scrub_container_line (line ) for line in fail_lines ]
213+ filtered = [l for l in scrubbed if not is_expected_line (l )]
214+ counter = Counter (filtered )
124215 lines .append ("=== FAILURE REASONS ===" )
125216 lines .append ("" )
126217 lines .append (" All failures:" )
127218 for output , count in counter .most_common ():
128- lines .append (f" { count :>5} { output [:120 ]} " )
219+ lines .append (f" { count :>5} { output } " )
220+ lines .append ("" )
221+ lines .append ("" )
129222
130- # Timeouts (not directly in JUnit, but useful)
223+ timeout_files : List [str ] = []
224+ for tc in test_cases :
225+ output = extract_captured_text (tc , "system-out" )
226+ if "TIMEOUT EXCEEDED" in output :
227+ m = re .search (r"'(.*?)'" , output )
228+ if m :
229+ timeout_files .append (m .group (1 ))
230+
231+ lines .append ("=== TIMEOUTS ===" )
232+ lines .append ("" )
233+ if timeout_files :
234+ lines .append (f" Summary: { len (timeout_files )} " )
235+ lines .append ("" )
236+ lines .append (" Affected files:" )
237+ for f in timeout_files :
238+ lines .append (f" - { f } " )
239+ else :
240+ lines .append (" Summary: 0" )
241+ lines .append ("" )
242+ lines .append (" Affected files:" )
243+ lines .append ("" )
244+ lines .append ("" )
245+
246+ failed_entries : List [Tuple [str , List [str ]]] = []
247+ for tc in test_cases :
248+ if get_test_status (tc ) in ("FAIL" , "ERROR" ):
249+ name = tc .attrib .get ("name" , "" )
250+ m = re .search (r"\[([^\]]+)\]" , name )
251+ fname = m .group (1 ) if m else name
252+ container_lines = get_container_lines (tc )
253+ scrubbed = [scrub_container_line (l ) for l in container_lines ]
254+ filtered = [l for l in scrubbed if not is_expected_line (l )]
255+ preview = filtered [:3 ]
256+ failed_entries .append ((fname , preview ))
257+
258+ lines .append ("=== FAILED FILES ===" )
259+ lines .append ("" )
260+ if failed_entries :
261+ for fname , preview in sorted (failed_entries , key = lambda x : x [0 ]):
262+ lines .append (f" - { fname } " )
263+ for pline in preview :
264+ lines .append (f" { pline } " )
265+ else :
266+ lines .append (" (none)" )
131267 lines .append ("" )
132268 lines .append ("" )
133- lines .append ("=== TIMEOUTS ===" )
134- lines .append (" (Not available from JUnit XML)" )
135269
136270 return "\n " .join (lines )
137271
0 commit comments