Skip to content

Commit 6d04f89

Browse files
authored
Merge pull request #2780 from amnonh/ethtool
scylla-os: Add ethtool section
2 parents 1268a1a + b4af3c8 commit 6d04f89

File tree

2 files changed

+108
-0
lines changed

2 files changed

+108
-0
lines changed

grafana/scylla-os.template.json

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,94 @@
585585
],
586586
"title": "New row"
587587
},
588+
{
589+
"class": "row",
590+
"dashversion":[">2025.4"],
591+
"panels": [
592+
{
593+
"collapsed": true,
594+
"datasource": null,
595+
"id": "auto",
596+
"gridPos": {
597+
"h": 1,
598+
"w": 24
599+
},
600+
"panels": [],
601+
"title": "ethtool limitations - only applicable if ethtool is enabled on nodetool_exporter",
602+
"type": "row"
603+
}
604+
]
605+
},
606+
{
607+
"class": "row",
608+
"dashversion":[">2025.4"],
609+
"panels": [
610+
{
611+
"class": "graph_panel",
612+
"span": 3,
613+
"description": "Outbound bandwidth allowance exceed events per second — the rate at which the NIC attempts to transmit above its allowed egress bandwidth limit (often indicating throttling/shaping).",
614+
"targets": [
615+
{
616+
"expr": "sum(rate(node_ethtool_bw_out_allowance_exceeded{instance=~\"[[node]]\",cluster=\"$cluster\", dc=~\"$dc\"}[3m])) by ([[by]])",
617+
"intervalFactor": 1,
618+
"legendFormat": "",
619+
"metric": "",
620+
"refId": "A",
621+
"step": 1
622+
}
623+
],
624+
"title": "Bandwidth-Out allowance exceeded"
625+
},
626+
{
627+
"class": "graph_panel",
628+
"span": 3,
629+
"description": "Inbound bandwidth allowance exceed events per second — the rate at which the NIC receives above its allowed ingress bandwidth limit (often indicating ingress shaping/throttling).",
630+
"targets": [
631+
{
632+
"expr": "sum(rate(node_ethtool_bw_in_allowance_exceeded{instance=~\"[[node]]\",cluster=\"$cluster\", dc=~\"$dc\"}[3m])) by ([[by]])",
633+
"intervalFactor": 1,
634+
"legendFormat": "",
635+
"metric": "",
636+
"refId": "A",
637+
"step": 1
638+
}
639+
],
640+
"title": "Bandwidth-In allowance exceeded"
641+
},
642+
{
643+
"class": "graph_panel",
644+
"span": 3,
645+
"description": "Conntrack allowance exceed events per second — the rate at which the system/NIC exceeds its allowed connection-tracking (conntrack) limit, often indicating connection pressure and potential drops or throttling.",
646+
"targets": [
647+
{
648+
"expr": "sum(rate(node_ethtool_conntrack_allowance_exceeded{instance=~\"[[node]]\",cluster=\"$cluster\", dc=~\"$dc\"}[3m])) by ([[by]])",
649+
"intervalFactor": 1,
650+
"legendFormat": "",
651+
"metric": "",
652+
"refId": "A",
653+
"step": 1
654+
}
655+
],
656+
"title": "Conntrack allowance exceed"
657+
},
658+
{
659+
"class": "graph_panel",
660+
"span": 3,
661+
"description": "Link-local allowance exceed events per second — the rate at which the NIC exceeds its allowed link-local traffic limit (e.g., ARP/ND or other local control traffic), often indicating local-network chatter and possible throttling",
662+
"targets": [
663+
{
664+
"expr": "sum(rate(node_ethtool_linklocal_allowance_exceeded{instance=~\"[[node]]\",cluster=\"$cluster\", dc=~\"$dc\"}[3m])) by ([[by]])",
665+
"intervalFactor": 1,
666+
"legendFormat": "",
667+
"metric": "",
668+
"refId": "A",
669+
"step": 1
670+
}
671+
],
672+
"title": "Link-local allowance exceed"
673+
}
674+
]
675+
},
588676
{
589677
"class": "monitoring_version_row"
590678
}

prometheus/prom_rules/prometheus.latency.rules.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,26 @@ groups:
552552
labels:
553553
by: "cluster"
554554
dd: "1"
555+
- record: node_ethtool_bw_in_allowance_exceeded_total
556+
expr: sum(node_ethtool_bw_in_allowance_exceeded) by (cluster)
557+
labels:
558+
by: "cluster"
559+
dd: "1"
560+
- record: node_ethtool_bw_out_allowance_exceeded_total
561+
expr: sum(node_ethtool_bw_out_allowance_exceeded) by (cluster)
562+
labels:
563+
by: "cluster"
564+
dd: "1"
565+
- record: node_ethtool_conntrack_allowance_exceeded_total
566+
expr: sum(node_ethtool_conntrack_allowance_exceeded) by (cluster)
567+
labels:
568+
by: "cluster"
569+
dd: "1"
570+
- record: node_ethtool_linklocal_allowance_exceeded_total
571+
expr: sum(node_ethtool_linklocal_allowance_exceeded) by (cluster)
572+
labels:
573+
by: "cluster"
574+
dd: "1"
555575
- record: wlatencyp99
556576
expr: scylla_storage_proxy_coordinator_write_latency_summary{quantile="0.99"}
557577
- record: rlatencyp99

0 commit comments

Comments
 (0)