Merge pull request #686 from karle-nishi/add-query-tags

sathiish-kumar · web-flow · commit 11b5785e9ff8 · 2023-05-09T10:55:24.000-07:00
Added query tags depending upon Simple Replay or Node Config
diff --git a/src/SimpleReplay/replay.py b/src/SimpleReplay/replay.py
@@ -68,8 +68,12 @@
 
 g_is_serverless = False
 
-g_serverless_cluster_endpoint_pattern = r"(.+)\.(.+)\.(.+).redshift-serverless(-dev)?\.amazonaws\.com:[0-9]{4,5}\/(.)+"
-g_cluster_endpoint_pattern = r"(.+)\.(.+)\.(.+).redshift(-serverless)?\.amazonaws\.com:[0-9]{4,5}\/(.)+"
+g_serverless_cluster_endpoint_pattern = (
+    r"(.+)\.(.+)\.(.+).redshift-serverless(-dev)?\.amazonaws\.com:[0-9]{4,5}\/(.)+"
+)
+g_cluster_endpoint_pattern = (
+    r"(.+)\.(.+)\.(.+).redshift(-serverless)?\.amazonaws\.com:[0-9]{4,5}\/(.)+"
+)
 
 
 class ConnectionLog:
@@ -393,11 +397,15 @@ def save_query_stats(self, starttime, endtime, xid, query_idx):
                 )
 
     def get_tagged_sql(self, query_text, idx, transaction, connection):
-        json_tags = {
-            "xid": transaction.xid,
-            "query_idx": idx,
-            "replay_start": g_replay_timestamp.isoformat(),
-        }
+        if g_config.get("source_tag", None):
+            json_tags = {
+                "xid": transaction.xid,
+                "query_idx": idx,
+                "replay_start": g_replay_timestamp.isoformat(),
+                "source": g_config.get('source_tag', 'SimpleReplay'),
+            }
+
+
         return "/* {} */ {}".format(json.dumps(json_tags), query_text)
 
     def execute_transaction(self, transaction, connection):
@@ -2262,7 +2270,7 @@ def init_manager():
     logger.info(
         f"Replay finished in {datetime.datetime.now(tz=datetime.timezone.utc) - g_replay_timestamp}."
     )
-    
+
     if g_config.get("analysis_iam_role") and g_config.get("analysis_output"):
         try:
             run_replay_analysis(
diff --git a/src/SimpleReplay/replay.yaml b/src/SimpleReplay/replay.yaml
@@ -1,14 +1,16 @@
 # Optional - Custom identifier for this replay run
 tag: ""
 
+# Optional - Identifier for queries executed
+source_tag: ""
+
 # Directory location of extracted workload, relative to current directory
 workload_location: ""
 
 # Endpoint and username of target cluster to replay queries on
 target_cluster_endpoint: "host:port/database"
 target_cluster_region: ""
 master_username: ""
-
 # NLB or NAT endpoint for Simple Replay to connect to. This NLB or NAT should have connectivity to target_cluster_endpoint
 nlb_nat_dns: ""
 
@@ -42,7 +44,7 @@ replay_output: ""
 analysis_output: ""
 
 # Optional - Leaving this blank means UNLOADs will not be replayed. IAM role for UNLOADs to be performed with.
-unload_iam_role: ""
+unload_iam_role:  ""
 
 # Optional - Leaving this blank means analysis will not be run. IAM role for analysis needs UNLOAD access.
 analysis_iam_role: ""
@@ -71,7 +73,7 @@ filters:
 ##
 
 # Set the amount of logging
-log_level: "info"
+log_level: "INFO"
 
 # number of proceses to use to parallelize the work. If omitted or null, uses
 # one process per cpu - 1 
@@ -95,4 +97,4 @@ split_multi: true
 
 # In case of Serverless, set up a secret to store admin username and password. Specify the name of the secret below
 # Note: This admin username maps to the username specified as `master_username` in this file.  This will be updated to `admin_username` in a future release.
-secret_name: ""
+secret_name: ""