2222 "alias" : "Engagement Rate" ,
2323}
2424
25- METRIC_ENGAGED_SESSIONS = {
26- "id" : "engagedSessions" ,
27- "alias" : "Engaged Sessions" ,
28- }
29-
3025# Regex matching page paths that are clearly not real pages (bot probes,
3126# broken markdown links, asset requests, etc.).
3227SUSPICIOUS_PAGE_PATH_RE = re .compile (
@@ -49,11 +44,13 @@ def event_key(event):
4944 return event .get ("key" , event ["event_name" ])
5045
5146
52- def _count_events (event_name , params , page_path_regex = None ):
53- """Fetch event count, optionally filtered by page path regex."""
47+ def _count_events (event_name , params , page_path_regex = None , click_url_regex = None ):
48+ """Fetch event count, optionally filtered by page path and/or click URL regex."""
5449 dimensions = [DIMENSION_EVENT_NAME ]
5550 if page_path_regex :
5651 dimensions .append (DIMENSION_PAGE_PATH )
52+ if click_url_regex :
53+ dimensions .append (DIMENSION_CUSTOM_URL )
5754
5855 df = get_data_df_from_fields (
5956 [METRIC_EVENT_COUNT ],
@@ -66,27 +63,29 @@ def _count_events(event_name, params, page_path_regex=None):
6663 return 0
6764
6865 if page_path_regex :
69- pattern = re .compile (page_path_regex )
70- mask = df [DIMENSION_PAGE_PATH ["alias" ]].str .match (pattern , na = False )
71- return int (df .loc [mask , METRIC_EVENT_COUNT ["alias" ]].sum ())
66+ df = df [df [DIMENSION_PAGE_PATH ["alias" ]].str .match (page_path_regex , na = False )]
67+
68+ if click_url_regex :
69+ df = df [df [DIMENSION_CUSTOM_URL ["alias" ]].str .contains (click_url_regex , na = False )]
7270
7371 return int (df [METRIC_EVENT_COUNT ["alias" ]].sum ())
7472
7573
76- def get_custom_event_change (event_name , params_current , params_prior , page_path_regex = None ):
74+ def get_custom_event_change (event_name , params_current , params_prior , page_path_regex = None , click_url_regex = None ):
7775 """Fetch a custom event count with month-over-month change.
7876
7977 Args:
8078 event_name: GA4 event name (e.g., "chat_submitted").
8179 params_current: Analytics params for the current period.
8280 params_prior: Analytics params for the prior period.
8381 page_path_regex: Optional regex to filter by page path.
82+ click_url_regex: Optional regex to filter by click URL.
8483
8584 Returns:
8685 Dict with "current", "prior", and "change" keys.
8786 """
88- current_count = _count_events (event_name , params_current , page_path_regex )
89- prior_count = _count_events (event_name , params_prior , page_path_regex )
87+ current_count = _count_events (event_name , params_current , page_path_regex , click_url_regex )
88+ prior_count = _count_events (event_name , params_prior , page_path_regex , click_url_regex )
9089
9190 change = None
9291 if prior_count > 0 :
@@ -95,21 +94,26 @@ def get_custom_event_change(event_name, params_current, params_prior, page_path_
9594 return {"current" : current_count , "prior" : prior_count , "change" : change }
9695
9796
98- def get_event_detail_table (event_name , params , page_path_regex = None ):
97+ def get_event_detail_table (event_name , params , page_path_regex = None , click_url_regex = None ):
9998 """Fetch event details broken down by page path and entity name.
10099
101100 Args:
102101 event_name: GA4 event name.
103102 params: Analytics params for the period.
104103 page_path_regex: Optional regex to filter by page path.
104+ click_url_regex: Optional regex to filter by click URL.
105105
106106 Returns:
107107 List of dicts with "page_path", "entity_name", and "count" keys,
108- sorted by count descending.
108+ sorted by count descending. Includes "click_url" when click_url_regex is used.
109109 """
110+ dimensions = [DIMENSION_EVENT_NAME , DIMENSION_PAGE_PATH , DIMENSION_ENTITY_NAME ]
111+ if click_url_regex :
112+ dimensions .append (DIMENSION_CUSTOM_URL )
113+
110114 df = get_data_df_from_fields (
111115 [METRIC_EVENT_COUNT ],
112- [ DIMENSION_EVENT_NAME , DIMENSION_PAGE_PATH , DIMENSION_ENTITY_NAME ] ,
116+ dimensions ,
113117 dimension_filter = f"eventName=={ event_name } " ,
114118 ** params ,
115119 )
@@ -118,14 +122,25 @@ def get_event_detail_table(event_name, params, page_path_regex=None):
118122 return []
119123
120124 if page_path_regex :
121- pattern = re .compile (page_path_regex )
122- df = df [df [DIMENSION_PAGE_PATH ["alias" ]].str .match (pattern , na = False )]
125+ df = df [df [DIMENSION_PAGE_PATH ["alias" ]].str .match (page_path_regex , na = False )]
126+
127+ if click_url_regex :
128+ df = df [df [DIMENSION_CUSTOM_URL ["alias" ]].str .contains (click_url_regex , na = False )]
123129
124130 if len (df ) == 0 :
125131 return []
126132
127- result = df [[DIMENSION_PAGE_PATH ["alias" ], DIMENSION_ENTITY_NAME ["alias" ], METRIC_EVENT_COUNT ["alias" ]]].copy ()
128- result .columns = ["page_path" , "entity_name" , "count" ]
133+ export_cols = [DIMENSION_PAGE_PATH ["alias" ], DIMENSION_ENTITY_NAME ["alias" ]]
134+ output_names = ["page_path" , "entity_name" ]
135+ if click_url_regex :
136+ export_cols .append (DIMENSION_CUSTOM_URL ["alias" ])
137+ output_names .append ("click_url" )
138+ export_cols .append (METRIC_EVENT_COUNT ["alias" ])
139+ output_names .append ("count" )
140+
141+ result = df [export_cols ].copy ()
142+ result .columns = output_names
143+ result ["count" ] = result ["count" ].astype (int )
129144 result = result .sort_values ("count" , ascending = False )
130145 return result .to_dict (orient = "records" )
131146
@@ -396,15 +411,13 @@ def fetch_data(
396411
397412 print ("Fetching sessions and engagement data..." )
398413 df_sessions_current = get_data_df_from_fields (
399- [METRIC_SESSIONS , METRIC_ENGAGED_SESSIONS , METRIC_ENGAGEMENT_RATE ], [], ** params ,
414+ [METRIC_SESSIONS , METRIC_ENGAGEMENT_RATE ], [], ** params ,
400415 )
401416 df_sessions_prior = get_data_df_from_fields (
402- [METRIC_SESSIONS , METRIC_ENGAGED_SESSIONS , METRIC_ENGAGEMENT_RATE ], [], ** params_prior ,
417+ [METRIC_SESSIONS , METRIC_ENGAGEMENT_RATE ], [], ** params_prior ,
403418 )
404419 sessions_current = int (df_sessions_current [METRIC_SESSIONS ["alias" ]].sum ()) if len (df_sessions_current ) > 0 else 0
405420 sessions_prior = int (df_sessions_prior [METRIC_SESSIONS ["alias" ]].sum ()) if len (df_sessions_prior ) > 0 else 0
406- engaged_sessions_current = int (df_sessions_current [METRIC_ENGAGED_SESSIONS ["alias" ]].sum ()) if len (df_sessions_current ) > 0 else 0
407- engaged_sessions_prior = int (df_sessions_prior [METRIC_ENGAGED_SESSIONS ["alias" ]].sum ()) if len (df_sessions_prior ) > 0 else 0
408421 engagement_current = float (df_sessions_current [METRIC_ENGAGEMENT_RATE ["alias" ]].mean ()) if len (df_sessions_current ) > 0 else 0
409422 engagement_prior = float (df_sessions_prior [METRIC_ENGAGEMENT_RATE ["alias" ]].mean ()) if len (df_sessions_prior ) > 0 else 0
410423
@@ -440,10 +453,6 @@ def fetch_data(
440453 "current" : sessions_current ,
441454 "prior" : sessions_prior ,
442455 },
443- "engaged_sessions" : {
444- "current" : engaged_sessions_current ,
445- "prior" : engaged_sessions_prior ,
446- },
447456 "engagement_rate" : {
448457 "current" : engagement_current ,
449458 "prior" : engagement_prior ,
@@ -470,12 +479,14 @@ def fetch_data(
470479 data [f"event_{ key } " ] = get_custom_event_change (
471480 event ["event_name" ], params , params_prior ,
472481 page_path_regex = event .get ("page_path_regex" ),
482+ click_url_regex = event .get ("click_url_regex" ),
473483 )
474484 if event .get ("detail_table" ):
475485 print (f"Fetching { event ['label' ]} detail table..." )
476486 data [f"event_{ key } _detail" ] = get_event_detail_table (
477487 event ["event_name" ], params ,
478488 page_path_regex = event .get ("page_path_regex" ),
489+ click_url_regex = event .get ("click_url_regex" ),
479490 )
480491
481492 if event_charts :
0 commit comments