-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathxml_control_merger.py
More file actions
303 lines (238 loc) · 11.2 KB
/
Copy pathxml_control_merger.py
File metadata and controls
303 lines (238 loc) · 11.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
#!/usr/bin/env python3
"""
XML Control Merger Script - Text-based
This script processes XML files as text and copies missing CONTROL structures
from the ORIG file into the TARGET file to create a complete OUT file.
It preserves the original formatting and CDATA sections.
Usage:
python xml_control_merger.py <ORIG_FILE> <TARGET_FILE> <OUT_FILE>
python xml_control_merger.py --verbose <ORIG_FILE> <TARGET_FILE> <OUT_FILE>
Example:
python xml_control_merger.py CIS_W2022.xml TEST_W2022.xml out.xml
python xml_control_merger.py --verbose CIS_W2022.xml TEST_W2022.xml out.xml
"""
import sys
import re
import argparse
import copy
def parse_arguments():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description='XML Control Merger - Copies missing CONTROL structures between XML files (text-based)'
)
parser.add_argument('orig_file', help='Path to the ORIG XML file')
parser.add_argument('ziel_file', help='Path to the TARGET XML file')
parser.add_argument('out_file', help='Path to the output XML file')
parser.add_argument('--verbose', '-v', action='store_true',
help='Shows detailed information about inserted CONTROL IDs and sections')
return parser.parse_args()
def extract_control_blocks(text):
"""Extracts all CONTROL blocks from the text."""
# Regex to find CONTROL blocks (including nested structures)
control_pattern = r'<CONTROL>(.*?)</CONTROL>'
controls = []
for match in re.finditer(control_pattern, text, re.DOTALL):
control_text = match.group(0)
controls.append(control_text)
return controls
def get_control_id(control_text):
"""Extracts the ID from a CONTROL text."""
id_match = re.search(r'<ID>(\d+)</ID>', control_text)
return id_match.group(1) if id_match else None
def find_section_controls(text, section_number):
"""Finds all CONTROL blocks in a specific SECTION."""
# Find the SECTION with the given number
section_pattern = rf'<SECTION>\s*<NUMBER>{section_number}</NUMBER>.*?</SECTION>'
section_match = re.search(section_pattern, text, re.DOTALL)
if not section_match:
return []
section_text = section_match.group(0)
return extract_control_blocks(section_text)
def find_controls_total_in_section(text, section_number):
"""Finds the CONTROLS total attribute in a SECTION."""
section_pattern = rf'<SECTION>\s*<NUMBER>{section_number}</NUMBER>.*?</SECTION>'
section_match = re.search(section_pattern, text, re.DOTALL)
if not section_match:
return None
section_text = section_match.group(0)
controls_match = re.search(r'<CONTROLS total="(\d+)">', section_text)
return int(controls_match.group(1)) if controls_match else None
def update_controls_total_in_section(text, section_number, new_total):
"""Updates the CONTROLS total attribute in a SECTION."""
section_pattern = rf'<SECTION>\s*<NUMBER>{section_number}</NUMBER>.*?</SECTION>'
def replace_controls_total(match):
section_text = match.group(0)
# Replace the total attribute
updated_section = re.sub(
r'<CONTROLS total="\d+">',
f'<CONTROLS total="{new_total}">',
section_text
)
return updated_section
return re.sub(section_pattern, replace_controls_total, text, flags=re.DOTALL)
def modify_control_disable(control_text):
"""Sets IS_CONTROL_DISABLE to 1 for copied CONTROL elements."""
# Replace IS_CONTROL_DISABLE with CDATA
return re.sub(
r'<IS_CONTROL_DISABLE><!\[CDATA\[\d+\]\]></IS_CONTROL_DISABLE>',
'<IS_CONTROL_DISABLE><![CDATA[1]]></IS_CONTROL_DISABLE>',
control_text
)
def find_control_position_in_orig(orig_controls, control_id):
"""Finds the position of a CONTROL element in the ORIG list."""
for i, control in enumerate(orig_controls):
if get_control_id(control) == control_id:
return i
return -1
def insert_control_in_section(text, section_number, control_text, position):
"""Inserts a CONTROL element into a SECTION at a specific position."""
section_pattern = rf'<SECTION>\s*<NUMBER>{section_number}</NUMBER>.*?</SECTION>'
def insert_control(match):
section_text = match.group(0)
# Find all CONTROL blocks in this SECTION
controls = extract_control_blocks(section_text)
if position >= len(controls):
# Insert at the end
# Find the last </CONTROL> and insert after it
last_control_end = section_text.rfind('</CONTROL>')
if last_control_end != -1:
insert_pos = last_control_end + len('</CONTROL>')
new_section = (section_text[:insert_pos] +
'\n ' + control_text +
section_text[insert_pos:])
else:
# Fallback: Insert after <CONTROLS total="x">
controls_start = section_text.find('<CONTROLS total="')
if controls_start != -1:
controls_tag_end = section_text.find(
'>', controls_start) + 1
new_section = (section_text[:controls_tag_end] +
'\n ' + control_text +
section_text[controls_tag_end:])
else:
new_section = section_text
else:
# Insert at the specific position
if position == 0:
# Insert at the beginning
controls_start = section_text.find('<CONTROLS total="')
if controls_start != -1:
controls_tag_end = section_text.find(
'>', controls_start) + 1
new_section = (section_text[:controls_tag_end] +
'\n ' + control_text +
section_text[controls_tag_end:])
else:
new_section = section_text
else:
# Find the CONTROL at the desired position
current_pos = 0
insert_pos = 0
for match in re.finditer(r'<CONTROL>.*?</CONTROL>', section_text, re.DOTALL):
if current_pos == position:
insert_pos = match.start()
break
current_pos += 1
if insert_pos > 0:
new_section = (section_text[:insert_pos] +
control_text + '\n ' +
section_text[insert_pos:])
else:
new_section = section_text
return new_section
return re.sub(section_pattern, insert_control, text, flags=re.DOTALL)
def merge_controls(orig_text, ziel_text, verbose=False):
"""Main function to merge CONTROL structures."""
# Find all SECTION numbers in the TARGET file
section_numbers = re.findall(r'<NUMBER>(\d+)</NUMBER>', ziel_text)
result_text = ziel_text
inserted_controls = [] # List for inserted controls
for section_num in section_numbers:
print(f"Processing section {section_num}...")
# Get CONTROL blocks from both files
orig_controls = find_section_controls(orig_text, section_num)
ziel_controls = find_section_controls(ziel_text, section_num)
# Create ID sets
orig_ids = {get_control_id(
control) for control in orig_controls if get_control_id(control)}
ziel_ids = {get_control_id(
control) for control in ziel_controls if get_control_id(control)}
# Find missing CONTROL elements
missing_ids = orig_ids - ziel_ids
if missing_ids:
print(
f" {len(missing_ids)} missing CONTROL elements found: {missing_ids}")
# Copy missing CONTROL elements
for missing_id in missing_ids:
# Find the CONTROL element in the ORIG file
missing_control = None
for control in orig_controls:
if get_control_id(control) == missing_id:
missing_control = control
break
if missing_control:
# Find the position in the ORIG file
position = find_control_position_in_orig(
orig_controls, missing_id)
# Modify the CONTROL element
modified_control = modify_control_disable(missing_control)
# Insert the CONTROL element
result_text = insert_control_in_section(
result_text, section_num, modified_control, position)
# Add to the list of inserted controls
inserted_controls.append((section_num, missing_id))
print(
f" CONTROL {missing_id} was copied and inserted at position {position}")
# Update the total number of CONTROLS
final_controls = find_section_controls(result_text, section_num)
result_text = update_controls_total_in_section(
result_text, section_num, len(final_controls))
print(f" CONTROLS total updated to {len(final_controls)}")
else:
print(
f" No missing CONTROL elements in section {section_num}")
# Show summary of inserted controls if verbose is enabled
if verbose and inserted_controls:
print("\n" + "="*60)
print("SUMMARY OF INSERTED CONTROL ELEMENTS:")
print("="*60)
for section_num, control_id in inserted_controls:
print(f" Section {section_num}: CONTROL ID {control_id}")
print(f"\nTotal inserted: {len(inserted_controls)} CONTROL elements")
print("="*60)
elif verbose and not inserted_controls:
print("\n" + "="*60)
print("NO CONTROL ELEMENTS INSERTED")
print("All CONTROL elements from the ORIG file are already present in the TARGET file.")
print("="*60)
return result_text
def main():
"""Main function of the script."""
args = parse_arguments()
try:
# Load the XML files as text
print(f"Loading ORIG file: {args.orig_file}")
with open(args.orig_file, 'r', encoding='utf-8') as f:
orig_text = f.read()
print(f"Loading TARGET file: {args.ziel_file}")
with open(args.ziel_file, 'r', encoding='utf-8') as f:
ziel_text = f.read()
# Create a 1:1 copy of the TARGET file as OUT file
print("Creating 1:1 copy of the TARGET file as OUT file...")
out_text = ziel_text
# Perform the CONTROL merge
print("Performing CONTROL merge...")
out_text = merge_controls(orig_text, out_text, args.verbose)
# Save the resulting XML file
print(f"Saving output file: {args.out_file}")
with open(args.out_file, 'w', encoding='utf-8') as f:
f.write(out_text)
print("Processing completed successfully!")
except FileNotFoundError as e:
print(f"Error: File not found - {e}")
sys.exit(1)
except Exception as e:
print(f"Unexpected error - {e}")
sys.exit(1)
if __name__ == "__main__":
main()