From e3fa8c11dce18ad362ddaef99e06f9dd1c0622ae Mon Sep 17 00:00:00 2001 From: Alan Guo Date: Fri, 2 May 2025 12:47:53 -0700 Subject: [PATCH 1/2] fix grafana dashboards dropdowns for data and train dashboard Signed-off-by: Alan Guo --- .../dashboards/data_grafana_dashboard_base.json | 12 ++++++------ .../dashboards/train_grafana_dashboard_base.json | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/python/ray/dashboard/modules/metrics/dashboards/data_grafana_dashboard_base.json b/python/ray/dashboard/modules/metrics/dashboards/data_grafana_dashboard_base.json index 2d8fe951d0ecb..dea96d4513b25 100644 --- a/python/ray/dashboard/modules/metrics/dashboards/data_grafana_dashboard_base.json +++ b/python/ray/dashboard/modules/metrics/dashboards/data_grafana_dashboard_base.json @@ -46,7 +46,7 @@ "selected": false }, "datasource": "${datasource}", - "definition": "label_values(ray_data_allocated_bytes{{{global_filters}}}, SessionName)", + "definition": "query_result(count by (SessionName)(last_over_time(ray_data_output_bytes{{{global_filters}}}[$__range])))", "description": "Filter queries to specific ray sessions.", "error": null, "hide": 0, @@ -56,11 +56,11 @@ "name": "SessionName", "options": [], "query": { - "query": "label_values(ray_data_allocated_bytes{{{global_filters}}}, SessionName)", + "query": "query_result(count by (SessionName)(last_over_time(ray_data_output_bytes{{{global_filters}}}[$__range])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": "", + "regex": "{SessionName=\"(?.*)\".*", "skipUrlSync": false, "sort": 2, "tagValuesQuery": "", @@ -81,7 +81,7 @@ ] }, "datasource": "${datasource}", - "definition": "label_values(ray_data_allocated_bytes{{{global_filters}}}, dataset)", + "definition": "query_result(count by (dataset)(last_over_time(ray_data_output_bytes{{SessionName=~\"$SessionName\",{global_filters}}}[$__range])))", "description": null, "error": null, "hide": 0, @@ -91,11 +91,11 @@ "name": "DatasetID", "options": [], "query": { - "query": "label_values(ray_data_allocated_bytes{{{global_filters}}}, dataset)", + "query": "query_result(count by (dataset)(last_over_time(ray_data_output_bytes{{SessionName=~\"$SessionName\",{global_filters}}}[$__range])))", "refId": "Prometheus-Dataset-Variable-Query" }, "refresh": 2, - "regex": "", + "regex": "{dataset=\"(?.*)\".*", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", diff --git a/python/ray/dashboard/modules/metrics/dashboards/train_grafana_dashboard_base.json b/python/ray/dashboard/modules/metrics/dashboards/train_grafana_dashboard_base.json index 7aeb1764a4a81..02bd8db817afc 100644 --- a/python/ray/dashboard/modules/metrics/dashboards/train_grafana_dashboard_base.json +++ b/python/ray/dashboard/modules/metrics/dashboards/train_grafana_dashboard_base.json @@ -46,7 +46,7 @@ "selected": false }, "datasource": "${datasource}", - "definition": "label_values(ray_train_report_total_blocked_time_s{{{global_filters}}}, SessionName)", + "definition": "query_result(count by (SessionName)(last_over_time(ray_train_report_total_blocked_time_s{{SessionName=~\"$SessionName\",{global_filters}}}[$__range])))", "description": "Filter queries to specific ray sessions.", "error": null, "hide": 0, @@ -56,11 +56,11 @@ "name": "SessionName", "options": [], "query": { - "query": "label_values(ray_train_report_total_blocked_time_s{{{global_filters}}}, SessionName)", + "query": "query_result(count by (SessionName)(last_over_time(ray_train_report_total_blocked_time_s{{SessionName=~\"$SessionName\",{global_filters}}}[$__range])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": "", + "regex": "{SessionName=\"(?.*)\".*", "skipUrlSync": false, "sort": 2, "tagValuesQuery": "", @@ -75,7 +75,7 @@ "selected": false }, "datasource": "${datasource}", - "definition": "label_values(ray_train_report_total_blocked_time_s{{{global_filters}}}, ray_train_run_name)", + "definition": "query_result(count by (ray_train_run_name)(last_over_time(ray_train_report_total_blocked_time_s{{SessionName=~\"$SessionName\",{global_filters}}}[$__range])))", "description": "Filter queries to specific ray sessions.", "error": null, "hide": 0, @@ -85,11 +85,11 @@ "name": "TrainRunName", "options": [], "query": { - "query": "label_values(ray_train_report_total_blocked_time_s{{{global_filters}}}, ray_train_run_name)", + "query": "query_result(count by (ray_train_run_name)(last_over_time(ray_train_report_total_blocked_time_s{{SessionName=~\"$SessionName\",{global_filters}}}[$__range])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": "", + "regex": "{ray_train_run_name=\"(?.*)\".*", "skipUrlSync": false, "sort": 2, "tagValuesQuery": "", From 2f73cde9a170ed0ac2f77c537aa2354ddbc2b79a Mon Sep 17 00:00:00 2001 From: Alan Guo Date: Fri, 2 May 2025 15:28:26 -0700 Subject: [PATCH 2/2] fixup Signed-off-by: Alan Guo --- .../dashboards/train_grafana_dashboard_base.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/ray/dashboard/modules/metrics/dashboards/train_grafana_dashboard_base.json b/python/ray/dashboard/modules/metrics/dashboards/train_grafana_dashboard_base.json index 02bd8db817afc..04651a75fb186 100644 --- a/python/ray/dashboard/modules/metrics/dashboards/train_grafana_dashboard_base.json +++ b/python/ray/dashboard/modules/metrics/dashboards/train_grafana_dashboard_base.json @@ -46,7 +46,7 @@ "selected": false }, "datasource": "${datasource}", - "definition": "query_result(count by (SessionName)(last_over_time(ray_train_report_total_blocked_time_s{{SessionName=~\"$SessionName\",{global_filters}}}[$__range])))", + "definition": "query_result(count by (SessionName)(last_over_time(ray_train_worker_group_start_total_time_s{{SessionName=~\"$SessionName\",{global_filters}}}[$__range])))", "description": "Filter queries to specific ray sessions.", "error": null, "hide": 0, @@ -56,7 +56,7 @@ "name": "SessionName", "options": [], "query": { - "query": "query_result(count by (SessionName)(last_over_time(ray_train_report_total_blocked_time_s{{SessionName=~\"$SessionName\",{global_filters}}}[$__range])))", + "query": "query_result(count by (SessionName)(last_over_time(ray_train_worker_group_start_total_time_s{{SessionName=~\"$SessionName\",{global_filters}}}[$__range])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -75,8 +75,8 @@ "selected": false }, "datasource": "${datasource}", - "definition": "query_result(count by (ray_train_run_name)(last_over_time(ray_train_report_total_blocked_time_s{{SessionName=~\"$SessionName\",{global_filters}}}[$__range])))", - "description": "Filter queries to specific ray sessions.", + "definition": "query_result(count by (ray_train_run_name)(last_over_time(ray_train_worker_group_start_total_time_s{{SessionName=~\"$SessionName\",{global_filters}}}[$__range])))", + "description": "Filter queries to specific Ray Train run names.", "error": null, "hide": 0, "includeAll": true, @@ -85,7 +85,7 @@ "name": "TrainRunName", "options": [], "query": { - "query": "query_result(count by (ray_train_run_name)(last_over_time(ray_train_report_total_blocked_time_s{{SessionName=~\"$SessionName\",{global_filters}}}[$__range])))", + "query": "query_result(count by (ray_train_run_name)(last_over_time(ray_train_worker_group_start_total_time_s{{SessionName=~\"$SessionName\",{global_filters}}}[$__range])))", "refId": "StandardVariableQuery" }, "refresh": 2,