|
10 | 10 | from urllib3 import disable_warnings |
11 | 11 | import yaml |
12 | 12 | from pathlib import Path |
13 | | - |
| 13 | +from urllib.parse import urlparse, urlunparse, unquote |
14 | 14 |
|
15 | 15 | def load_environment_variables(): |
16 | 16 | """Load required environment variables for Splunk connection.""" |
@@ -114,6 +114,70 @@ def send_data_to_splunk(file_path, splunk_host, hec_token, event_host_uuid, |
114 | 114 | except Exception as e: |
115 | 115 | print(f":x: Error sending {file_path} to Splunk HEC: {e}") |
116 | 116 |
|
| 117 | +def parse_old_attack_yml_data_file(yml_file_path, |
| 118 | + index_override, |
| 119 | + source_override, |
| 120 | + sourcetype_override, |
| 121 | + host_uuid): |
| 122 | + ### handling possible empty inputs |
| 123 | + print("Processing old attack data yml file") |
| 124 | + if source_override == "" or sourcetype_override == "" or index_override == "": |
| 125 | + return None, [], {} |
| 126 | + |
| 127 | + try: |
| 128 | + with open(yml_file_path, 'r') as file: |
| 129 | + data = yaml.safe_load(file) |
| 130 | + |
| 131 | + # Extract required fields |
| 132 | + file_id = host_uuid |
| 133 | + d = data.get('dataset') |
| 134 | + ### if the instance is list |
| 135 | + if isinstance(d, list): |
| 136 | + dataset_val = d[0] |
| 137 | + if isinstance(d, str): |
| 138 | + dataset_val = d |
| 139 | + |
| 140 | + name_value = os.path.basename(dataset_val).split(".")[0] |
| 141 | + p = urlparse(dataset_val) |
| 142 | + if not p.scheme or not p.netloc: |
| 143 | + raise ValueError(f"Unsupported GitHub URL format: {dataset_val}") |
| 144 | + |
| 145 | + m, path_value = str(p.path).split("master") |
| 146 | + ### generate our own datasets data |
| 147 | + ### "datasets": [ |
| 148 | + ### { |
| 149 | + ### "name": "windows-sysmon_creddump", |
| 150 | + ### "path": "/datasets/attack_techniques/T1003.001/atomic_red_team/windows-sysmon_creddump.log", |
| 151 | + ### "sourcetype": "XmlWinEventLog", |
| 152 | + ### "source": "XmlWinEventLog:Microsoft-Windows-Sysmon/Operational" |
| 153 | + ### } |
| 154 | + ### ] |
| 155 | + # Extract required fields |
| 156 | + |
| 157 | + datasets = [ |
| 158 | + { |
| 159 | + "name": name_value, |
| 160 | + "path": path_value, |
| 161 | + "sourcetype":sourcetype_override, |
| 162 | + "source":source_override |
| 163 | + } |
| 164 | + ] |
| 165 | + #print(datasets) |
| 166 | + # Extract default metadata from YAML file |
| 167 | + default_index = index_override |
| 168 | + default_source = source_override |
| 169 | + default_sourcetype = sourcetype_override |
| 170 | + |
| 171 | + # Return tuple of (id, datasets_list, default_metadata) |
| 172 | + return file_id, datasets, { |
| 173 | + 'index': default_index, |
| 174 | + 'source': default_source, |
| 175 | + 'sourcetype': default_sourcetype |
| 176 | + } |
| 177 | + |
| 178 | + except Exception as e: |
| 179 | + print(f"Error parsing {yml_file_path}: {e}") |
| 180 | + return None, [], {} |
117 | 181 |
|
118 | 182 | def main(): |
119 | 183 | parser = argparse.ArgumentParser( |
@@ -205,8 +269,12 @@ def main(): |
205 | 269 | file_id, datasets, defaults = parse_data_yml(yml_file) |
206 | 270 |
|
207 | 271 | if not file_id or not datasets: |
208 | | - print(f"Skipping {yml_file} - no valid data found") |
209 | | - continue |
| 272 | + |
| 273 | + file_id, datasets, defaults = parse_old_attack_yml_data_file(yml_file, args.index_override, args.source_override, args.sourcetype_override, args.host_uuid) |
| 274 | + |
| 275 | + if not file_id or not datasets: |
| 276 | + print(f"Skipping {yml_file} - no valid data found") |
| 277 | + continue |
210 | 278 |
|
211 | 279 | # Use the id from YAML file as host field (unless user provided one) |
212 | 280 | event_host_uuid = args.host_uuid or file_id |
|
0 commit comments