1+ """
2+ Utility functions for BladeLogic Analysis Agent.
3+ Pattern-based extraction for BladeLogic objects (Jobs, Packages, Policies, Scripts).
4+ """
5+ import re
6+ import logging
7+ from typing import Optional , Dict , Any , List
8+ import uuid
9+
10+ # Create a simple exception class since shared.exceptions might not exist yet
11+ class JSONParseError (Exception ):
12+ """JSON parsing error"""
13+ pass
14+
15+ logger = logging .getLogger (__name__ )
16+
17+ class BladeLogicExtractor :
18+ """Extracts BladeLogic information using pattern matching."""
19+
20+ @staticmethod
21+ def detect_bladelogic_type (content : str , filename : str = "" ) -> str :
22+ """
23+ Detect BladeLogic object type from content.
24+ Returns: JOB, PACKAGE, POLICY, SCRIPT, UNKNOWN
25+ """
26+ content_lower = content .lower ()
27+
28+ # Check filename first
29+ if filename :
30+ fname_lower = filename .lower ()
31+ if "job" in fname_lower :
32+ return "JOB"
33+ elif "package" in fname_lower or "pkg" in fname_lower :
34+ return "PACKAGE"
35+ elif "policy" in fname_lower or "pol" in fname_lower :
36+ return "POLICY"
37+ elif fname_lower .endswith (('.nsh' , '.sh' )):
38+ return "SCRIPT"
39+
40+ # Content-based detection
41+ job_indicators = [
42+ 'blcli job' ,
43+ 'nexec' ,
44+ 'blcli_execute' ,
45+ 'job create' ,
46+ 'job run'
47+ ]
48+
49+ package_indicators = [
50+ 'blpackage' ,
51+ 'package create' ,
52+ 'software package' ,
53+ 'depot object'
54+ ]
55+
56+ policy_indicators = [
57+ 'blpolicy' ,
58+ 'compliance policy' ,
59+ 'policy create' ,
60+ 'compliance rule'
61+ ]
62+
63+ script_indicators = [
64+ '#!/bin/nsh' ,
65+ 'nsh -c' ,
66+ 'blcli ' ,
67+ 'nexec -f'
68+ ]
69+
70+ for indicator in job_indicators :
71+ if indicator in content_lower :
72+ return "JOB"
73+
74+ for indicator in package_indicators :
75+ if indicator in content_lower :
76+ return "PACKAGE"
77+
78+ for indicator in policy_indicators :
79+ if indicator in content_lower :
80+ return "POLICY"
81+
82+ for indicator in script_indicators :
83+ if indicator in content_lower :
84+ return "SCRIPT"
85+
86+ return "UNKNOWN"
87+
88+ @staticmethod
89+ def extract_bladelogic_metadata (content : str , object_type : str ) -> Dict [str , Any ]:
90+ """Extract metadata from BladeLogic content."""
91+ metadata = {
92+ "name" : BladeLogicExtractor ._extract_object_name (content , object_type ),
93+ "description" : BladeLogicExtractor ._extract_description (content ),
94+ "version" : BladeLogicExtractor ._extract_version (content ),
95+ "author" : BladeLogicExtractor ._extract_author (content ),
96+ "target_platforms" : BladeLogicExtractor ._extract_target_platforms (content )
97+ }
98+ return {k : v for k , v in metadata .items () if v }
99+
100+ @staticmethod
101+ def _extract_object_name (content : str , object_type : str ) -> Optional [str ]:
102+ """Extract object name based on type."""
103+ patterns = {
104+ "JOB" : [
105+ r'job\s+create\s+["\']([^"\']+)["\']' ,
106+ r'JobName[:\s=]+["\']?([^"\'\\n]+)["\']?' ,
107+ r'blcli\s+job\s+["\']([^"\']+)["\']'
108+ ],
109+ "PACKAGE" : [
110+ r'package\s+create\s+["\']([^"\']+)["\']' ,
111+ r'PackageName[:\s=]+["\']?([^"\'\\n]+)["\']?' ,
112+ r'blpackage\s+["\']([^"\']+)["\']'
113+ ],
114+ "POLICY" : [
115+ r'policy\s+create\s+["\']([^"\']+)["\']' ,
116+ r'PolicyName[:\s=]+["\']?([^"\'\\n]+)["\']?' ,
117+ r'blpolicy\s+["\']([^"\']+)["\']'
118+ ],
119+ "SCRIPT" : [
120+ r'# Script:\s*([^\\n]+)' ,
121+ r'# Name:\s*([^\\n]+)' ,
122+ r'echo\s+["\']Script:\s*([^"\']+)["\']'
123+ ]
124+ }
125+
126+ for pattern in patterns .get (object_type , []):
127+ match = re .search (pattern , content , re .IGNORECASE )
128+ if match :
129+ return match .group (1 ).strip ()
130+
131+ return None
132+
133+ @staticmethod
134+ def _extract_description (content : str ) -> Optional [str ]:
135+ """Extract description from content."""
136+ patterns = [
137+ r'Description[:\s=]+["\']?([^"\'\\n]+)["\']?' ,
138+ r'# Description:\s*([^\\n]+)' ,
139+ r'# Purpose:\s*([^\\n]+)' ,
140+ r'echo\s+["\']Description:\s*([^"\']+)["\']'
141+ ]
142+
143+ for pattern in patterns :
144+ match = re .search (pattern , content , re .IGNORECASE )
145+ if match :
146+ return match .group (1 ).strip ()
147+
148+ return None
149+
150+ @staticmethod
151+ def _extract_version (content : str ) -> Optional [str ]:
152+ """Extract version information."""
153+ patterns = [
154+ r'Version[:\s=]+["\']?([^"\'\\n]+)["\']?' ,
155+ r'# Version:\s*([^\\n]+)' ,
156+ r'blcli.*version\s+([\\d\\.]+)'
157+ ]
158+
159+ for pattern in patterns :
160+ match = re .search (pattern , content , re .IGNORECASE )
161+ if match :
162+ return match .group (1 ).strip ()
163+
164+ return None
165+
166+ @staticmethod
167+ def _extract_author (content : str ) -> Optional [str ]:
168+ """Extract author information."""
169+ patterns = [
170+ r'Author[:\s=]+["\']?([^"\'\\n]+)["\']?' ,
171+ r'# Author:\s*([^\\n]+)' ,
172+ r'# Created by:\s*([^\\n]+)'
173+ ]
174+
175+ for pattern in patterns :
176+ match = re .search (pattern , content , re .IGNORECASE )
177+ if match :
178+ return match .group (1 ).strip ()
179+
180+ return None
181+
182+ @staticmethod
183+ def _extract_target_platforms (content : str ) -> List [str ]:
184+ """Extract target platforms."""
185+ platforms = []
186+ platform_patterns = {
187+ 'Windows' : r'(?i)(windows|win32|win64|microsoft)' ,
188+ 'Linux' : r'(?i)(linux|rhel|centos|ubuntu|debian|suse)' ,
189+ 'AIX' : r'(?i)(aix|unix)' ,
190+ 'Solaris' : r'(?i)(solaris|sunos)' ,
191+ 'HPUX' : r'(?i)(hpux|hp-ux)'
192+ }
193+
194+ for platform , pattern in platform_patterns .items ():
195+ if re .search (pattern , content ):
196+ platforms .append (platform )
197+
198+ return platforms
199+
200+ @staticmethod
201+ def extract_bladelogic_operations (content : str , object_type : str ) -> Dict [str , List [str ]]:
202+ """Extract operations performed by BladeLogic object."""
203+ operations = {
204+ "services" : [],
205+ "packages" : [],
206+ "files" : [],
207+ "commands" : [],
208+ "policies" : []
209+ }
210+
211+ # Service operations
212+ service_patterns = [
213+ r'service\s+([\\w\\-\\.]+)\s+(?:start|stop|restart|enable|disable)' ,
214+ r'systemctl\s+(?:start|stop|restart|enable|disable)\s+([\\w\\-\\.]+)' ,
215+ r'net\s+(?:start|stop)\s+([\\w\\-\\.]+)'
216+ ]
217+
218+ for pattern in service_patterns :
219+ operations ["services" ].extend (re .findall (pattern , content , re .IGNORECASE ))
220+
221+ # Package operations
222+ package_patterns = [
223+ r'(?:yum|apt-get|rpm)\s+install\s+([\\w\\-\\.]+)' ,
224+ r'msiexec.*["\']([^"\']+\\.msi)["\']' ,
225+ r'software\s+package\s+["\']([^"\']+)["\']'
226+ ]
227+
228+ for pattern in package_patterns :
229+ operations ["packages" ].extend (re .findall (pattern , content , re .IGNORECASE ))
230+
231+ # File operations
232+ file_patterns = [
233+ r'(?:copy|cp|move|mv)\s+["\']?([^"\'\\s]+)["\']?' ,
234+ r'echo\s+.*>\s*["\']?([^"\'\\s]+)["\']?' ,
235+ r'blcli\s+file\s+["\']([^"\']+)["\']'
236+ ]
237+
238+ for pattern in file_patterns :
239+ operations ["files" ].extend (re .findall (pattern , content , re .IGNORECASE ))
240+
241+ # Commands
242+ command_patterns = [
243+ r'nexec\s+-c\s+["\']([^"\']+)["\']' ,
244+ r'blcli_execute\s+["\']([^"\']+)["\']' ,
245+ r'system\s+["\']([^"\']+)["\']'
246+ ]
247+
248+ for pattern in command_patterns :
249+ operations ["commands" ].extend (re .findall (pattern , content , re .IGNORECASE ))
250+
251+ # Deduplicate and clean
252+ for key in operations :
253+ operations [key ] = list (dict .fromkeys ([op .strip () for op in operations [key ] if op .strip ()]))
254+ # Limit to reasonable number
255+ operations [key ] = operations [key ][:10 ]
256+
257+ return operations
258+
259+ class BladeLogicValidator :
260+ """Validates BladeLogic input data."""
261+
262+ @staticmethod
263+ def validate_bladelogic_input (bladelogic_data : Dict [str , Any ]) -> None :
264+ """Validate BladeLogic input structure."""
265+ if not isinstance (bladelogic_data , dict ):
266+ raise ValueError ("BladeLogic data must be a dictionary" )
267+
268+ if "files" not in bladelogic_data :
269+ raise ValueError ("BladeLogic data must contain 'files' key" )
270+
271+ files = bladelogic_data ["files" ]
272+ if not isinstance (files , dict ) or not files :
273+ raise ValueError ("Files must be a non-empty dictionary" )
274+
275+ for filename , content in files .items ():
276+ if not isinstance (filename , str ) or not filename .strip ():
277+ raise ValueError (f"Invalid filename: { filename } " )
278+
279+ if not isinstance (content , str ):
280+ raise ValueError (f"File content must be string for { filename } " )
281+
282+ def create_correlation_id () -> str :
283+ """Generate correlation ID for request tracking."""
284+ import uuid
285+ return str (uuid .uuid4 ())[:8 ]
286+
287+ def format_bladelogic_for_analysis (bladelogic_data : Dict [str , Any ]) -> str :
288+ """Format BladeLogic files for LLM analysis."""
289+ BladeLogicValidator .validate_bladelogic_input (bladelogic_data )
290+
291+ files = bladelogic_data ["files" ]
292+ object_name = bladelogic_data .get ("name" , "unknown" )
293+
294+ formatted_parts = [f"BladeLogic Object: { object_name } " , "" ]
295+
296+ for filename , content in files .items ():
297+ object_type = BladeLogicExtractor .detect_bladelogic_type (content , filename )
298+ formatted_parts .extend ([
299+ f"=== File: { filename } (Type: { object_type } ) ===" ,
300+ content .strip (),
301+ ""
302+ ])
303+
304+ return "\\ n" .join (formatted_parts )
0 commit comments