splunk
diff --git a/‎README.md‎
Lines changed: 5 additions & 0 deletions b/‎README.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎bin/replay.py‎
Lines changed: 71 additions & 3 deletions b/‎bin/replay.py‎
Lines changed: 71 additions & 3 deletions
diff --git a/‎total_replay/assets/banner.png‎
107 KB b/‎total_replay/assets/banner.png‎
107 KB
diff --git a/‎total_replay/assets/usage.png‎
417 KB b/‎total_replay/assets/usage.png‎
417 KB
diff --git a/‎total_replay/configuration/config.yml‎
Lines changed: 7 additions & 0 deletions b/‎total_replay/configuration/config.yml‎
Lines changed: 7 additions & 0 deletions
@@ -126,6 +126,11 @@ See a quick demo 📺 of the process to dump a dataset [here](https://www.youtub
 
 To contribute a dataset simply create a PR on this repository, for general instructions on creating a PR [see this guide](https://gist.github.com/Chaser324/ce0505fbed06b947d962).
 
+# TOTAL-REPLAY
+A lightweight tool helps you make the most of Splunk’s [Security Content](https://github.com/splunk/security_content) metadata, such as detection names, analytic stories, and more, by replaying relevant test event logs or attack data from either the [Splunk Attack Data](https://github.com/splunk/attack_data) or [Splunk Attack Range](https://github.com/splunk/attack_range) projects.
+
+for more information of this tool, please refer to [TOTAL-REPLAY Guide](total_replay/readme.md)
+
 # Automatically generated Datasets ⚙️
 
 This project takes advantage of automation to generate datasets using the attack_range. You can see details about this service on this [sub-project folder attack_data_service](https://github.com/splunk/attack_data/tree/master/attack_data_service).
 
@@ -10,7 +10,7 @@
 from urllib3 import disable_warnings
 import yaml
 from pathlib import Path
-
+from urllib.parse import urlparse, urlunparse, unquote
 
 def load_environment_variables():
     """Load required environment variables for Splunk connection."""
@@ -114,6 +114,70 @@ def send_data_to_splunk(file_path, splunk_host, hec_token, event_host_uuid,
         except Exception as e:
             print(f":x: Error sending {file_path} to Splunk HEC: {e}")
 
+def parse_old_attack_yml_data_file(yml_file_path, 
+                                   index_override, 
+                                   source_override, 
+                                   sourcetype_override, 
+                                   host_uuid):
+    ### handling possible empty inputs
+    print("Processing old attack data yml file")
+    if source_override == "" or sourcetype_override == "" or index_override == "":
+        return None, [], {}
+    
+    try:
+        with open(yml_file_path, 'r') as file:
+            data = yaml.safe_load(file)
+
+        # Extract required fields
+        file_id = host_uuid
+        d = data.get('dataset')
+        ### if the instance is list
+        if isinstance(d, list):
+            dataset_val = d[0]
+        if isinstance(d, str):
+            dataset_val = d
+
+        name_value = os.path.basename(dataset_val).split(".")[0]
+        p = urlparse(dataset_val)
+        if not p.scheme or not p.netloc:
+            raise ValueError(f"Unsupported GitHub URL format: {dataset_val}")
+
+        m, path_value = str(p.path).split("master")
+        ### generate our own datasets data
+        ### "datasets": [
+        ###     {
+        ###        "name": "windows-sysmon_creddump",
+        ###        "path": "/datasets/attack_techniques/T1003.001/atomic_red_team/windows-sysmon_creddump.log",
+        ###        "sourcetype": "XmlWinEventLog",
+        ###        "source": "XmlWinEventLog:Microsoft-Windows-Sysmon/Operational"
+        ###    }
+        ### ]
+        # Extract required fields
+
+        datasets = [
+            {
+                "name": name_value,
+                "path": path_value,
+                "sourcetype":sourcetype_override,
+                "source":source_override
+            }
+        ]
+        #print(datasets)
+        # Extract default metadata from YAML file
+        default_index = index_override  
+        default_source = source_override
+        default_sourcetype = sourcetype_override
+
+        # Return tuple of (id, datasets_list, default_metadata)
+        return file_id, datasets, {
+            'index': default_index,
+            'source': default_source,
+            'sourcetype': default_sourcetype
+        }
+
+    except Exception as e:
+        print(f"Error parsing {yml_file_path}: {e}")
+        return None, [], {}
 
 def main():
     parser = argparse.ArgumentParser(
@@ -205,8 +269,12 @@ def main():
             file_id, datasets, defaults = parse_data_yml(yml_file)
 
             if not file_id or not datasets:
-                print(f"Skipping {yml_file} - no valid data found")
-                continue
+                
+                file_id, datasets, defaults = parse_old_attack_yml_data_file(yml_file, args.index_override, args.source_override, args.sourcetype_override, args.host_uuid)
+                
+                if not file_id or not datasets:
+                    print(f"Skipping {yml_file} - no valid data found")
+                    continue
 
             # Use the id from YAML file as host field (unless user provided one)
             event_host_uuid = args.host_uuid or file_id
 
@@ -0,0 +1,7 @@
+settings:
+  security_content_detection_path: ~/path/to/your/security_content/detections
+  attack_data_dir_path: ~/path/to/your/attack_data
+  output_dir_name : output
+  cache_replay_yaml_name : cache_replay_data.yml
+  replayed_yaml_cache_dir_name: replayed_yaml_cache
+  debug_print: False