diff --git a/.gitignore b/.gitignore index 7532db7..f1e44aa 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ build dist -.vscode \ No newline at end of file +.vscode +dashboards/openstack-libvirt-dashboard/vendor +dashboards/openstack-libvirt-dashboard/jsonnetfile.lock.json \ No newline at end of file diff --git a/dashboards/openstack-libvirt-dashboard/jsonnetfile.json b/dashboards/openstack-libvirt-dashboard/jsonnetfile.json new file mode 100644 index 0000000..c5938f1 --- /dev/null +++ b/dashboards/openstack-libvirt-dashboard/jsonnetfile.json @@ -0,0 +1,15 @@ +{ + "version": 1, + "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-v11.4.0" + } + }, + "version": "main" + } + ], + "legacyImports": true +} diff --git a/dashboards/openstack-libvirt-dashboard/lib/query.jsonnet b/dashboards/openstack-libvirt-dashboard/lib/query.jsonnet new file mode 100644 index 0000000..a04a248 --- /dev/null +++ b/dashboards/openstack-libvirt-dashboard/lib/query.jsonnet @@ -0,0 +1,124 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-v11.4.0/main.libsonnet'; + +{ + power_state: g.query.prometheus.new('${datasource}', 'libvirt_domain_info_state{domain="$vm_id"}'), + cpu_allocated: g.query.prometheus.new('${datasource}', 'libvirt_domain_vcpu_current{domain="$vm_id"}'), + memory_allocated: g.query.prometheus.new('${datasource}', 'libvirt_domain_memory_stats_maximum_bytes{domain="$vm_id"}'), + disk_count: g.query.prometheus.new('${datasource}', 'count(libvirt_domain_block_stats_info{domain="$vm_id"})'), + network_port_count: g.query.prometheus.new('${datasource}', 'count(libvirt_domain_interface_stats_info{domain="$vm_id"})'), + + cpu_usage_percentage: g.query.prometheus.new( + '${datasource}', + '(rate(libvirt_domain_info_cpu_time_seconds_total{domain="$vm_id"}[5m])/libvirt_domain_vcpu_current{domain="$vm_id"}) * 100' + ) + g.query.prometheus.withLegendFormat('{{domain}}'), + cpu_steal_pecentage: g.query.prometheus.new( + '${datasource}', + '(sum by (domain) (rate(libvirt_domain_vcpu_delay_seconds_total{domain="$vm_id"}[5m]))/sum by (domain) (rate(libvirt_domain_vcpu_time_seconds_total{domain="$vm_id"}[5m]))) * 100' + ) + g.query.prometheus.withLegendFormat('{{domain}}'), + + memory_usage_percentage: g.query.prometheus.new( + '${datasource}', + 'libvirt_domain_memory_stats_used_percent{domain="$vm_id"}' + ) + g.query.prometheus.withLegendFormat('{{domain}}'), + + memory_swap_bytes: g.query.prometheus.new( + '${datasource}', + 'sum by (domain)(\n rate(libvirt_domain_memory_stats_swap_in_bytes{domain="$vm_id"}[2m])\n+\n rate(libvirt_domain_memory_stats_swap_out_bytes{domain="$vm_id"}[2m])\n)\n' + ) + g.query.prometheus.withLegendFormat('{{domain}}'), + + storage_iops_total: g.query.prometheus.new( + '${datasource}', + 'rate(\n sum by (domain, target_device) (\n {__name__=~"libvirt_domain_block_stats_(read|write)_requests_total", domain="$vm_id"} > 0\n )[2m:30s]\n)' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + storage_iops_read: g.query.prometheus.new( + '${datasource}', + 'rate(\n sum by (domain, target_device) (\n {__name__=~"libvirt_domain_block_stats_read_requests_total", domain="$vm_id"} > 0\n )[2m:30s]\n)' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + storage_iops_write: g.query.prometheus.new( + '${datasource}', + 'rate(\n sum by (domain, target_device) (\n {__name__=~"libvirt_domain_block_stats_write_requests_total", domain="$vm_id"} > 0\n )[2m:30s]\n)' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + storage_throughput_total: g.query.prometheus.new( + '${datasource}', + 'rate(\n sum by (domain, target_device) (\n {__name__=~"libvirt_domain_block_stats_(read|write)_bytes_total", domain="$vm_id"} > 0\n )[2m:30s]\n)' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + storage_throughput_read: g.query.prometheus.new( + '${datasource}', + 'rate(\n sum by (domain, target_device) (\n {__name__=~"libvirt_domain_block_stats_read_bytes_total", domain="$vm_id"} > 0\n )[2m:30s]\n)' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + storage_throughput_write: g.query.prometheus.new( + '${datasource}', + 'rate(\n sum by (domain, target_device) (\n {__name__=~"libvirt_domain_block_stats_write_bytes_total", domain="$vm_id"} > 0\n )[2m:30s]\n)' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + storage_latency_read: g.query.prometheus.new( + '${datasource}', + 'rate(\n sum by (domain, target_device) (\n {__name__=~"libvirt_domain_block_stats_read_time_seconds_total", domain="$vm_id"} > 0\n )[2m:30s]\n)\n/\nrate(\n sum by (domain, target_device) (\n {__name__=~"libvirt_domain_block_stats_read_requests_total", domain="$vm_id"} > 0\n )[2m:30s]\n) * 1000' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + storage_latency_write: g.query.prometheus.new( + '${datasource}', + 'rate(\n sum by (domain, target_device) (\n {__name__=~"libvirt_domain_block_stats_write_time_seconds_total", domain="$vm_id"} > 0\n )[2m:30s]\n)\n/\nrate(\n sum by (domain, target_device) (\n {__name__=~"libvirt_domain_block_stats_write_requests_total", domain="$vm_id"} > 0\n )[2m:30s]\n) * 1000' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + storage_average_block_size: g.query.prometheus.new( + '${datasource}', + 'rate(\n sum by (domain, target_device) (\n {__name__=~"libvirt_domain_block_stats_(read|write)_bytes_total", domain="$vm_id"} > 0\n )[2m:30s]\n)\n/\nrate(\n sum by (domain, target_device) (\n {__name__=~"libvirt_domain_block_stats_(read|write)_requests_total", domain="$vm_id"} > 0\n )[2m:30s]\n)' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + newtork_throughput_total: g.query.prometheus.new( + '${datasource}', + 'sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_receive_bytes_total{domain="$vm_id"}[2m])\n+\n rate(libvirt_domain_interface_stats_transmit_bytes_total{domain="$vm_id"}[2m])\n)\n' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + newtork_throughput_receive: g.query.prometheus.new( + '${datasource}', + 'sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_receive_bytes_total{domain="$vm_id"}[2m])\n)\n' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + newtork_throughput_transmit: g.query.prometheus.new( + '${datasource}', + 'sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_transmit_bytes_total{domain="$vm_id"}[2m])\n)\n' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + network_packet_total: g.query.prometheus.new( + '${datasource}', + 'sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_transmit_packets_total{domain="$vm_id"}[2m])\n+\n rate(libvirt_domain_interface_stats_receive_packets_total{domain="$vm_id"}[2m])\n)\n' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + network_packet_receive: g.query.prometheus.new( + '${datasource}', + 'sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_receive_packets_total{domain="$vm_id"}[2m])\n)\n' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + network_packet_transmit: g.query.prometheus.new( + '${datasource}', + 'sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_transmit_packets_total{domain="$vm_id"}[2m])\n)\n' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + network_errors_receive: g.query.prometheus.new( + '${datasource}', + 'sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_receive_errors_total{domain="$vm_id"}[2m])\n)\n' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + network_errors_transmit: g.query.prometheus.new( + '${datasource}', + 'sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_transmit_errors_total{domain="$vm_id"}[2m])\n)\n' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + network_drops_receive: g.query.prometheus.new( + '${datasource}', + 'sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_receive_drops_total{domain="$vm_id"}[2m])\n)' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + + network_drops_transmit: g.query.prometheus.new( + '${datasource}', + 'sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_receive_drops_total{domain="$vm_id"}[2m])\n)' + ) + g.query.prometheus.withLegendFormat('{{target_device}}'), + +} diff --git a/dashboards/openstack-libvirt-dashboard/lib/templates.jsonnet b/dashboards/openstack-libvirt-dashboard/lib/templates.jsonnet new file mode 100644 index 0000000..9c5f85d --- /dev/null +++ b/dashboards/openstack-libvirt-dashboard/lib/templates.jsonnet @@ -0,0 +1,47 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-v11.4.0/main.libsonnet'; + +{ + // A specialized Stat panel for this specific dashboard context + vmStatPanel(title, targets, description='', unit=null, colorMode='value', mappings=[], noThresholds=false):: + local base = g.panel.stat.new(title) + + g.panel.stat.queryOptions.withDatasource('prometheus', '${datasource}') + + g.panel.stat.queryOptions.withTargets(targets) + + g.panel.stat.options.withGraphMode('area') + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']) + + g.panel.stat.standardOptions.withNoValue('Requires VM to be powered-on'); + + // Manual Field Config for Thresholds + local thresholdConfig = if noThresholds then + { + fieldConfig: { + defaults: { + thresholds: { + mode: 'absolute', + // A single step with null value = solid color everywhere + steps: [{ color: 'green', value: 0 }], + }, + }, + }, + } + else + {}; + + base + + g.panel.stat.options.withColorMode(colorMode) + + thresholdConfig // Apply the manual config here + + (if description != '' then g.panel.stat.panelOptions.withDescription(description) else {}) + + (if unit != null then g.panel.stat.standardOptions.withUnit(unit) else {}) + + (if std.length(mappings) > 0 then g.panel.stat.standardOptions.withMappings(mappings) else {}), + + + timeSeriesPanel(title, targets, unit=null, description='', noThresholds=false):: + g.panel.timeSeries.new(title) + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${datasource}') + + g.panel.timeSeries.queryOptions.withTargets(targets) + + g.panel.timeSeries.options.legend.withDisplayMode('list') + + g.panel.timeSeries.options.legend.withPlacement('bottom') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.standardOptions.withNoValue('No Data') + + (if unit != null then g.panel.timeSeries.standardOptions.withUnit(unit) else {}) + + (if description != '' then g.panel.timeSeries.panelOptions.withDescription(description) else {}), +} diff --git a/dashboards/openstack-libvirt-dashboard/lib/variables.jsonnet b/dashboards/openstack-libvirt-dashboard/lib/variables.jsonnet new file mode 100644 index 0000000..92d1a12 --- /dev/null +++ b/dashboards/openstack-libvirt-dashboard/lib/variables.jsonnet @@ -0,0 +1,25 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-v11.4.0/main.libsonnet'; + +{ + // datasource: + // g.dashboard.variable.datasource.new('datasource', 'prometheus') + // + g.dashboard.variable.datasource.generalOptions.withLabel('DataSource'), + + project: + g.dashboard.variable.query.new('project_name', 'label_values(libvirt_domain_openstack_info, project_name)') + + g.dashboard.variable.query.withDatasource('prometheus', '${datasource}') + + g.dashboard.variable.query.refresh.onLoad() + + g.dashboard.variable.query.generalOptions.withLabel('Project Name'), + + vmName: + g.dashboard.variable.query.new('vm_name', 'label_values(libvirt_domain_openstack_info{project_name="$project_name"}, instance_name)') + + g.dashboard.variable.query.withDatasource('prometheus', '${datasource}') + + g.dashboard.variable.query.refresh.onLoad() + + g.dashboard.variable.query.generalOptions.withLabel('VM Name'), + + vmId: + g.dashboard.variable.query.new('vm_id', 'label_values(libvirt_domain_openstack_info{instance_name="$vm_name", project_name="$project_name"}, instance_id)') + + g.dashboard.variable.query.withDatasource('prometheus', '${datasource}') + + g.dashboard.variable.query.refresh.onLoad() + + g.dashboard.variable.query.generalOptions.withLabel('VM ID'), +} diff --git a/dashboards/openstack-libvirt-dashboard/libvirt-openstack.json b/dashboards/openstack-libvirt-dashboard/libvirt-openstack.json new file mode 100644 index 0000000..6ff4703 --- /dev/null +++ b/dashboards/openstack-libvirt-dashboard/libvirt-openstack.json @@ -0,0 +1,1533 @@ +{ + "__inputs": [ + { + "description": "Select your Prometheus datasource", + "label": "Prometheus", + "name": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "9.0.0" + }, + { + "id": "prometheus", + "name": "Prometheus", + "type": "datasource", + "version": "1.0.0" + } + ], + "description": "Comprehensive monitoring for Libvirt virtual machines running on OpenStack. Visualizes CPU, memory, disk I/O, and network traffic per instance. Requires the inovex/prometheus-libvirt-exporter to be installed and scraping metrics.", + "graphTooltip": 1, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Shows the current power state of the VM. Requires the VM to be powered-on to return a value.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "1": { + "color": "green", + "text": "ON" + }, + "5": { + "color": "red", + "text": "OFF" + } + }, + "type": "value" + } + ], + "noValue": "Requires VM to be powered-on" + } + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "libvirt_domain_info_state{domain=\"$vm_id\"}" + } + ], + "title": "VM • Power State", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Shows the current number of allocated vCPUs for the VM.", + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + } + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 3, + "options": { + "colorMode": "fixed", + "graphMode": "area", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "libvirt_domain_vcpu_current{domain=\"$vm_id\"}" + } + ], + "title": "CPU • Allocated vCPUs", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Shows the current allocated memory for the VM.", + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 4, + "options": { + "colorMode": "fixed", + "graphMode": "area", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "libvirt_domain_memory_stats_maximum_bytes{domain=\"$vm_id\"}" + } + ], + "title": "Memory • Allocated RAM", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Shows the number of disks attached to the VM.", + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + } + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 5, + "options": { + "colorMode": "fixed", + "graphMode": "area", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "count(libvirt_domain_block_stats_info{domain=\"$vm_id\"})" + } + ], + "title": "Storage • Disk Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Shows the number of network ports attached to the VM.", + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + } + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 6, + "options": { + "colorMode": "fixed", + "graphMode": "area", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "count(libvirt_domain_interface_stats_info{domain=\"$vm_id\"})" + } + ], + "title": "Network • Port Count", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 7, + "title": "CPU Information", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Percentage of allocated CPU capacity actively used by the VM.\n\n- 0–80% → Normal
\n- 80-90% → Heavy load
\n- 90% sustained → CPU pressure likely\n\nHigh utilization + scheduling delay indicates host contention.", + "fieldConfig": { + "defaults": { + "custom": { + "thresholdsStyle": { + "mode": "area" + } + }, + "noValue": "No Data", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 80 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + } + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 3 + }, + "id": 8, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(rate(libvirt_domain_info_cpu_time_seconds_total{domain=\"$vm_id\"}[5m])/libvirt_domain_vcpu_current{domain=\"$vm_id\"}) * 100", + "legendFormat": "{{domain}}" + } + ], + "title": "CPU • Usage %", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Time the VM was runnable but not scheduled by the hypervisor.\n\n- 0–1% → Healthy
\n- 1–3% → Light contention
\n- 3–5% → Moderate contention
\n- 5% sustained → Requires Operator attention
\n\nHigh delay + high utilization = hypervisor contention.", + "fieldConfig": { + "defaults": { + "custom": { + "thresholdsStyle": { + "mode": "area" + } + }, + "noValue": "No Data", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "percent" + } + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 3 + }, + "id": 9, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum by (domain) (rate(libvirt_domain_vcpu_delay_seconds_total{domain=\"$vm_id\"}[5m]))/sum by (domain) (rate(libvirt_domain_vcpu_time_seconds_total{domain=\"$vm_id\"}[5m]))) * 100", + "legendFormat": "{{domain}}" + } + ], + "title": "CPU • Steal Time %", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 10, + "title": "Memory Information", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Percentage of allocated memory currently used inside the guest OS.", + "fieldConfig": { + "defaults": { + "custom": { + "thresholdsStyle": { + "mode": "area" + } + }, + "noValue": "No Data", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 80 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + } + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 5 + }, + "id": 11, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "libvirt_domain_memory_stats_used_percent{domain=\"$vm_id\"}", + "legendFormat": "{{domain}}" + } + ], + "title": "Memory • Usage %", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Amount of data swapped into RAM from disk per second.\n\nSustained non-zero values indicate active memory pressure inside the guest.\n\n- 0 → No swap activity
\n- <1 MB/s → Light pressure\n- 1–10 MB/s sustained → Moderate pressure\n- 10 MB/s sustained → Likely performance impact", + "fieldConfig": { + "defaults": { + "custom": { + "thresholdsStyle": { + "mode": "area" + } + }, + "noValue": "No Data", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1048576 + }, + { + "color": "red", + "value": 10485760 + } + ] + }, + "unit": "binBps" + } + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 5 + }, + "id": 12, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (domain)(\n rate(libvirt_domain_memory_stats_swap_in_bytes{domain=\"$vm_id\"}[2m])\n+\n rate(libvirt_domain_memory_stats_swap_out_bytes{domain=\"$vm_id\"}[2m])\n)\n", + "legendFormat": "{{domain}}" + } + ], + "title": "Memory • Total Swap Activity", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 13, + "title": "Storage Information", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Total read + write operations per second per disk.\n\nHigh IOPS alone is fine.\nHigh IOPS + increasing latency indicates storage saturation.", + "fieldConfig": { + "defaults": { + "noValue": "No Data", + "unit": "ops/s" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 7 + }, + "id": 14, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(\n sum by (domain, target_device) (\n {__name__=~\"libvirt_domain_block_stats_(read|write)_requests_total\", domain=\"$vm_id\"} > 0\n )[2m:30s]\n)", + "legendFormat": "{{target_device}}" + } + ], + "title": "Storage • Total IOPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Number of read operations per second across all VM disks.", + "fieldConfig": { + "defaults": { + "noValue": "No Data", + "unit": "ops/s" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 7 + }, + "id": 15, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(\n sum by (domain, target_device) (\n {__name__=~\"libvirt_domain_block_stats_read_requests_total\", domain=\"$vm_id\"} > 0\n )[2m:30s]\n)", + "legendFormat": "{{target_device}}" + } + ], + "title": "Storage • Read IOPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Number of write operations per second across all VM disks.", + "fieldConfig": { + "defaults": { + "noValue": "No Data", + "unit": "ops/s" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 7 + }, + "id": 16, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(\n sum by (domain, target_device) (\n {__name__=~\"libvirt_domain_block_stats_write_requests_total\", domain=\"$vm_id\"} > 0\n )[2m:30s]\n)", + "legendFormat": "{{target_device}}" + } + ], + "title": "Storage • Write IOPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Combined read and write data rate (bytes/sec).\n\nHigh throughput + high latency suggests backend limits / qos limits.", + "fieldConfig": { + "defaults": { + "noValue": "No Data", + "unit": "binBps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 15 + }, + "id": 17, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(\n sum by (domain, target_device) (\n {__name__=~\"libvirt_domain_block_stats_(read|write)_bytes_total\", domain=\"$vm_id\"} > 0\n )[2m:30s]\n)", + "legendFormat": "{{target_device}}" + } + ], + "title": "Storage • Total Throughput", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Read throughput in bytes per second across all VM disks.", + "fieldConfig": { + "defaults": { + "noValue": "No Data", + "unit": "binBps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 15 + }, + "id": 18, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(\n sum by (domain, target_device) (\n {__name__=~\"libvirt_domain_block_stats_read_bytes_total\", domain=\"$vm_id\"} > 0\n )[2m:30s]\n)", + "legendFormat": "{{target_device}}" + } + ], + "title": "Storage • Read Throughput", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Write throughput in bytes per second across all VM disks.", + "fieldConfig": { + "defaults": { + "noValue": "No Data", + "unit": "binBps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 15 + }, + "id": 19, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(\n sum by (domain, target_device) (\n {__name__=~\"libvirt_domain_block_stats_write_bytes_total\", domain=\"$vm_id\"} > 0\n )[2m:30s]\n)", + "legendFormat": "{{target_device}}" + } + ], + "title": "Storage • Write Throughput", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Average time to complete read operations.\n\n- <5 ms → Excellent
\n- 5–25 ms → Acceptable
\n- 25-35 ms → Workload may be impacted
\n- 35 ms sustained → Backend pressure / Reached QOS Limits\n", + "fieldConfig": { + "defaults": { + "custom": { + "thresholdsStyle": { + "mode": "area" + } + }, + "noValue": "No Data", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "lightgreen", + "value": 5 + }, + { + "color": "orange", + "value": 25 + }, + { + "color": "red", + "value": 35 + } + ] + }, + "unit": "ms" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 23 + }, + "id": 20, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(\n sum by (domain, target_device) (\n {__name__=~\"libvirt_domain_block_stats_read_time_seconds_total\", domain=\"$vm_id\"} > 0\n )[2m:30s]\n)\n/\nrate(\n sum by (domain, target_device) (\n {__name__=~\"libvirt_domain_block_stats_read_requests_total\", domain=\"$vm_id\"} > 0\n )[2m:30s]\n) * 1000", + "legendFormat": "{{target_device}}" + } + ], + "title": "Storage • Read Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Average time to complete write operations.\n\n- <8 ms → Excellent
\n- 8–25 ms → Acceptable
\n- 25-35 ms → Workload may be impacted
\n- 35 ms sustained → Backend pressure / Reached QOS Limits\n", + "fieldConfig": { + "defaults": { + "custom": { + "thresholdsStyle": { + "mode": "area" + } + }, + "noValue": "No Data", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "lightgreen", + "value": 8 + }, + { + "color": "orange", + "value": 25 + }, + { + "color": "red", + "value": 35 + } + ] + }, + "unit": "ms" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 23 + }, + "id": 21, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(\n sum by (domain, target_device) (\n {__name__=~\"libvirt_domain_block_stats_write_time_seconds_total\", domain=\"$vm_id\"} > 0\n )[2m:30s]\n)\n/\nrate(\n sum by (domain, target_device) (\n {__name__=~\"libvirt_domain_block_stats_write_requests_total\", domain=\"$vm_id\"} > 0\n )[2m:30s]\n) * 1000", + "legendFormat": "{{target_device}}" + } + ], + "title": "Storage • Write Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Average size per I/O operation.\n\nSmall values → IOPS-heavy workload
\nLarge values → throughput-heavy workload", + "fieldConfig": { + "defaults": { + "noValue": "No Data", + "unit": "bytes" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 23 + }, + "id": 22, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(\n sum by (domain, target_device) (\n {__name__=~\"libvirt_domain_block_stats_(read|write)_bytes_total\", domain=\"$vm_id\"} > 0\n )[2m:30s]\n)\n/\nrate(\n sum by (domain, target_device) (\n {__name__=~\"libvirt_domain_block_stats_(read|write)_requests_total\", domain=\"$vm_id\"} > 0\n )[2m:30s]\n)", + "legendFormat": "{{target_device}}" + } + ], + "title": "Storage • Average Block Size", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 23, + "title": "Network Information", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Total network throughput in bytes per second across all VM interfaces.", + "fieldConfig": { + "defaults": { + "noValue": "No Data", + "unit": "binBps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 25 + }, + "id": 24, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_receive_bytes_total{domain=\"$vm_id\"}[2m])\n+\n rate(libvirt_domain_interface_stats_transmit_bytes_total{domain=\"$vm_id\"}[2m])\n)\n", + "legendFormat": "{{target_device}}" + } + ], + "title": "Network • Total Throughput", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Network receive throughput in bytes per second across all VM interfaces.", + "fieldConfig": { + "defaults": { + "noValue": "No Data", + "unit": "binBps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 25 + }, + "id": 25, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_receive_bytes_total{domain=\"$vm_id\"}[2m])\n)\n", + "legendFormat": "{{target_device}}" + } + ], + "title": "Network • Receive Throughput", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Network transmit throughput in bytes per second across all VM interfaces.", + "fieldConfig": { + "defaults": { + "noValue": "No Data", + "unit": "binBps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 25 + }, + "id": 26, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_transmit_bytes_total{domain=\"$vm_id\"}[2m])\n)\n", + "legendFormat": "{{target_device}}" + } + ], + "title": "Network • Transmit Throughput", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Total network packets per second across all VM interfaces.", + "fieldConfig": { + "defaults": { + "noValue": "No Data", + "unit": "pps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 33 + }, + "id": 27, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_transmit_packets_total{domain=\"$vm_id\"}[2m])\n+\n rate(libvirt_domain_interface_stats_receive_packets_total{domain=\"$vm_id\"}[2m])\n)\n", + "legendFormat": "{{target_device}}" + } + ], + "title": "Network • Total Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Network receive packets per second across all VM interfaces.", + "fieldConfig": { + "defaults": { + "noValue": "No Data", + "unit": "pps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 33 + }, + "id": 28, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_receive_packets_total{domain=\"$vm_id\"}[2m])\n)\n", + "legendFormat": "{{target_device}}" + } + ], + "title": "Network • Receive Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Network transmit packets per second across all VM interfaces.", + "fieldConfig": { + "defaults": { + "noValue": "No Data", + "unit": "pps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 33 + }, + "id": 29, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_transmit_packets_total{domain=\"$vm_id\"}[2m])\n)\n", + "legendFormat": "{{target_device}}" + } + ], + "title": "Network • Transmit Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Inbound packet errors per second.\n\nThese indicate corrupted frames, checksum failures, or driver-level issues.\n\n⚠ Sustained non-zero values are abnormal.\nEven 0.1 errors/sec sustained is worth investigation.", + "fieldConfig": { + "defaults": { + "custom": { + "thresholdsStyle": { + "mode": "area" + } + }, + "noValue": "No Data", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "eps" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 30, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_receive_errors_total{domain=\"$vm_id\"}[2m])\n)\n", + "legendFormat": "{{target_device}}" + } + ], + "title": "Network • Receive Errors", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Outbound packet errors per second.\n\nThese indicate corrupted frames, checksum failures, or driver-level issues.\n\n⚠ Sustained non-zero values are abnormal.\nEven 0.1 errors/sec sustained is worth investigation.", + "fieldConfig": { + "defaults": { + "custom": { + "thresholdsStyle": { + "mode": "area" + } + }, + "noValue": "No Data", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "eps" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 31, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_transmit_errors_total{domain=\"$vm_id\"}[2m])\n)\n", + "legendFormat": "{{target_device}}" + } + ], + "title": "Network • Transmit Errors", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Inbound packets dropped per second.\n\nCommon causes:
\n- Buffer exhaustion
\n- VM CPU not processing packets fast enough
\n- Host networking pressure
\n\nDrops + high CPU delay = likely host contention.", + "fieldConfig": { + "defaults": { + "noValue": "No Data", + "unit": "dps" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 32, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_receive_drops_total{domain=\"$vm_id\"}[2m])\n)", + "legendFormat": "{{target_device}}" + } + ], + "title": "Network • Receive Drops", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Outbound packets dropped per second.\n\nPotential Causes:\n- Egress shaping\n- Queue limits\n- Host network congestion", + "fieldConfig": { + "defaults": { + "noValue": "No Data", + "unit": "dps" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 33, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (domain, target_device)(\n rate(libvirt_domain_interface_stats_receive_drops_total{domain=\"$vm_id\"}[2m])\n)", + "legendFormat": "{{target_device}}" + } + ], + "title": "Network • Transmit Drops", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "tags": [ + "libvirt", + "inovex", + "openstack" + ], + "templating": { + "list": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "label": "Project Name", + "name": "project_name", + "query": "label_values(libvirt_domain_openstack_info, project_name)", + "refresh": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "label": "VM Name", + "name": "vm_name", + "query": "label_values(libvirt_domain_openstack_info{project_name=\"$project_name\"}, instance_name)", + "refresh": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "label": "VM ID", + "name": "vm_id", + "query": "label_values(libvirt_domain_openstack_info{instance_name=\"$vm_name\", project_name=\"$project_name\"}, instance_id)", + "refresh": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "utc", + "title": "Libvirt Dashboard for Openstack" +} diff --git a/dashboards/openstack-libvirt-dashboard/main.jsonnet b/dashboards/openstack-libvirt-dashboard/main.jsonnet new file mode 100644 index 0000000..76c19a3 --- /dev/null +++ b/dashboards/openstack-libvirt-dashboard/main.jsonnet @@ -0,0 +1,411 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-v11.4.0/main.libsonnet'; +local query = import 'lib/query.jsonnet'; +local templates = import 'lib/templates.jsonnet'; +local vars = import 'lib/variables.jsonnet'; + + +local summaryPanels = { + powerState: templates.vmStatPanel( + title='VM • Power State', + targets=[query.power_state], + description='Shows the current power state of the VM. Requires the VM to be powered-on to return a value.', + unit=null, + colorMode='value', + mappings=[ + { + type: 'value', + options: { + '1': { color: 'green', text: 'ON' }, + '5': { color: 'red', text: 'OFF' }, + }, + }, + ] + ), + + cpuAllocated: templates.vmStatPanel( + title='CPU • Allocated vCPUs', + targets=[query.cpu_allocated], + description='Shows the current number of allocated vCPUs for the VM.', + unit=null, + colorMode='fixed', + mappings=[], + noThresholds=true + ), + + memoryAllocated: templates.vmStatPanel( + title='Memory • Allocated RAM', + targets=[query.memory_allocated], + description='Shows the current allocated memory for the VM.', + unit='bytes', + colorMode='fixed', + mappings=[], + noThresholds=true + ), + + diskCount: templates.vmStatPanel( + title='Storage • Disk Count', + targets=[query.disk_count], + description='Shows the number of disks attached to the VM.', + unit=null, + colorMode='fixed', + mappings=[], + noThresholds=true + ), + + networkPortCount: templates.vmStatPanel( + title='Network • Port Count', + targets=[query.network_port_count], + description='Shows the number of network ports attached to the VM.', + unit=null, + colorMode='fixed', + mappings=[], + noThresholds=true + ), +}; + +local cpuPanel = { + cpuUsage: templates.timeSeriesPanel( + title='CPU • Usage %', + targets=[query.cpu_usage_percentage], + unit='percent', + description='Percentage of allocated CPU capacity actively used by the VM.\n\n- 0–80% → Normal
\n- 80-90% → Heavy load
\n- 90% sustained → CPU pressure likely\n\nHigh utilization + scheduling delay indicates host contention.', + ) { + fieldConfig+: { + defaults+: { + thresholds: { + mode: 'absolute', + steps: [ + { color: 'green', value: null }, + { color: 'yellow', value: 80 }, + { color: 'red', value: 90 }, + ], + }, + custom: { + thresholdsStyle: { mode: 'area' }, + }, + }, + }, + }, + cpuSteal: templates.timeSeriesPanel( + title='CPU • Steal Time %', + targets=[query.cpu_steal_pecentage], + unit='percent', + description='Time the VM was runnable but not scheduled by the hypervisor.\n\n- 0–1% → Healthy
\n- 1–3% → Light contention
\n- 3–5% → Moderate contention
\n- 5% sustained → Requires Operator attention
\n\nHigh delay + high utilization = hypervisor contention.', + ) { + fieldConfig+: { + defaults+: { + thresholds: { + mode: 'absolute', + steps: [ + { color: 'green', value: null }, + { color: 'yellow', value: 1 }, + { color: 'orange', value: 3 }, + { color: 'red', value: 5 }, + ], + }, + custom: { + thresholdsStyle: { mode: 'area' }, + }, + }, + }, + }, +}; + + +local memoryPanel = { + memoryUsage: templates.timeSeriesPanel( + title='Memory • Usage %', + targets=[query.memory_usage_percentage], + unit='percent', + description='Percentage of allocated memory currently used inside the guest OS.', + ) + { + fieldConfig+: { + defaults+: { + thresholds: { + mode: 'absolute', + steps: [ + { color: 'green', value: null }, + { color: 'yellow', value: 80 }, + { color: 'red', value: 90 }, + ], + }, + custom: { + thresholdsStyle: { mode: 'area' }, + }, + }, + }, + }, + memorySwap: templates.timeSeriesPanel( + title='Memory • Total Swap Activity', + targets=[query.memory_swap_bytes], + unit='binBps', + description='Amount of data swapped into RAM from disk per second.\n\nSustained non-zero values indicate active memory pressure inside the guest.\n\n- 0 → No swap activity
\n- <1 MB/s → Light pressure\n- 1–10 MB/s sustained → Moderate pressure\n- 10 MB/s sustained → Likely performance impact', + ) + { + fieldConfig+: { + defaults+: { + thresholds: { + mode: 'absolute', + steps: [ + { color: 'green', value: null }, + { color: 'orange', value: 1024 * 1024 }, + { color: 'red', value: 10 * 1024 * 1024 }, + ], + }, + custom: { + thresholdsStyle: { mode: 'area' }, + }, + }, + }, + }, +}; + +local storagePanel = { + iopsTotal: templates.timeSeriesPanel( + title='Storage • Total IOPS', + targets=[query.storage_iops_total], + unit='ops/s', + description='Total read + write operations per second per disk.\n\nHigh IOPS alone is fine.\nHigh IOPS + increasing latency indicates storage saturation.', + ), + iopsRead: templates.timeSeriesPanel( + title='Storage • Read IOPS', + targets=[query.storage_iops_read], + unit='ops/s', + description='Number of read operations per second across all VM disks.', + ), + iopsWrite: templates.timeSeriesPanel( + title='Storage • Write IOPS', + targets=[query.storage_iops_write], + unit='ops/s', + description='Number of write operations per second across all VM disks.', + ), + throughputTotal: templates.timeSeriesPanel( + title='Storage • Total Throughput', + targets=[query.storage_throughput_total], + unit='binBps', + description='Combined read and write data rate (bytes/sec).\n\nHigh throughput + high latency suggests backend limits / qos limits.', + ), + throughputRead: templates.timeSeriesPanel( + title='Storage • Read Throughput', + targets=[query.storage_throughput_read], + unit='binBps', + description='Read throughput in bytes per second across all VM disks.', + ), + throughputWrite: templates.timeSeriesPanel( + title='Storage • Write Throughput', + targets=[query.storage_throughput_write], + unit='binBps', + description='Write throughput in bytes per second across all VM disks.', + ), + latencyRead: templates.timeSeriesPanel( + title='Storage • Read Latency', + targets=[query.storage_latency_read], + unit='ms', + description='Average time to complete read operations.\n\n- <5 ms → Excellent
\n- 5–25 ms → Acceptable
\n- 25-35 ms → Workload may be impacted
\n- 35 ms sustained → Backend pressure / Reached QOS Limits\n', + ) + { + fieldConfig+: { + defaults+: { + thresholds: { + mode: 'absolute', + steps: [ + { color: 'green', value: null }, + { color: 'lightgreen', value: 5 }, + { color: 'orange', value: 25 }, + { color: 'red', value: 35 }, + ], + }, + custom: { + thresholdsStyle: { mode: 'area' }, + }, + }, + }, + }, + latencyWrite: templates.timeSeriesPanel( + title='Storage • Write Latency', + targets=[query.storage_latency_write], + unit='ms', + description='Average time to complete write operations.\n\n- <8 ms → Excellent
\n- 8–25 ms → Acceptable
\n- 25-35 ms → Workload may be impacted
\n- 35 ms sustained → Backend pressure / Reached QOS Limits\n', + ) + { + fieldConfig+: { + defaults+: { + thresholds: { + mode: 'absolute', + steps: [ + { color: 'green', value: null }, + { color: 'lightgreen', value: 8 }, + { color: 'orange', value: 25 }, + { color: 'red', value: 35 }, + ], + }, + custom: { + thresholdsStyle: { mode: 'area' }, + }, + }, + }, + }, + averageBlockSize: templates.timeSeriesPanel( + title='Storage • Average Block Size', + targets=[query.storage_average_block_size], + unit='bytes', + description='Average size per I/O operation.\n\nSmall values → IOPS-heavy workload
\nLarge values → throughput-heavy workload', + ), +}; + + +local networkPanel = { + networkThroughputTotal: templates.timeSeriesPanel( + title='Network • Total Throughput', + targets=[query.newtork_throughput_total], + unit='binBps', + description='Total network throughput in bytes per second across all VM interfaces.', + ), + networkThroughputReceive: templates.timeSeriesPanel( + title='Network • Receive Throughput', + targets=[query.newtork_throughput_receive], + unit='binBps', + description='Network receive throughput in bytes per second across all VM interfaces.', + ), + networkThroughputTransmit: templates.timeSeriesPanel( + title='Network • Transmit Throughput', + targets=[query.newtork_throughput_transmit], + unit='binBps', + description='Network transmit throughput in bytes per second across all VM interfaces.', + ), + networkPacketTotal: templates.timeSeriesPanel( + title='Network • Total Packets', + targets=[query.network_packet_total], + unit='pps', + description='Total network packets per second across all VM interfaces.', + ), + networkPacketReceive: templates.timeSeriesPanel( + title='Network • Receive Packets', + targets=[query.network_packet_receive], + unit='pps', + description='Network receive packets per second across all VM interfaces.', + ), + networkPacketTransmit: templates.timeSeriesPanel( + title='Network • Transmit Packets', + targets=[query.network_packet_transmit], + unit='pps', + description='Network transmit packets per second across all VM interfaces.', + ), + networkErrorsReceive: templates.timeSeriesPanel( + title='Network • Receive Errors', + targets=[query.network_errors_receive], + unit='eps', + description='Inbound packet errors per second.\n\nThese indicate corrupted frames, checksum failures, or driver-level issues.\n\n⚠ Sustained non-zero values are abnormal.\nEven 0.1 errors/sec sustained is worth investigation.', + ) + { + fieldConfig+: { + defaults+: { + thresholds: { + mode: 'absolute', + steps: [ + { color: 'green', value: null }, + { color: 'red', value: 1 }, + ], + }, + custom: { + thresholdsStyle: { mode: 'area' }, + }, + }, + }, + }, + networkErrorsTransmit: templates.timeSeriesPanel( + title='Network • Transmit Errors', + targets=[query.network_errors_transmit], + unit='eps', + description='Outbound packet errors per second.\n\nThese indicate corrupted frames, checksum failures, or driver-level issues.\n\n⚠ Sustained non-zero values are abnormal.\nEven 0.1 errors/sec sustained is worth investigation.', + ) + { + fieldConfig+: { + defaults+: { + thresholds: { + mode: 'absolute', + steps: [ + { color: 'green', value: null }, + { color: 'red', value: 1 }, + ], + }, + custom: { + thresholdsStyle: { mode: 'area' }, + }, + }, + }, + }, + networkDropsReceive: templates.timeSeriesPanel( + title='Network • Receive Drops', + targets=[query.network_drops_receive], + unit='dps', + description='Inbound packets dropped per second.\n\nCommon causes:
\n- Buffer exhaustion
\n- VM CPU not processing packets fast enough
\n- Host networking pressure
\n\nDrops + high CPU delay = likely host contention.', + ), + networkDropsTransmit: templates.timeSeriesPanel( + title='Network • Transmit Drops', + targets=[query.network_drops_transmit], + unit='dps', + description='Outbound packets dropped per second.\n\nPotential Causes:\n- Egress shaping\n- Queue limits\n- Host network congestion', + ), +}; + +g.dashboard.new('Libvirt Dashboard for Openstack') ++ g.dashboard.withDescription( + 'Comprehensive monitoring for Libvirt virtual machines running on OpenStack. ' + + 'Visualizes CPU, memory, disk I/O, and network traffic per instance. ' + + 'Requires the inovex/prometheus-libvirt-exporter to be installed and scraping metrics.' +) ++ g.dashboard.withTags(['libvirt', 'inovex', 'openstack']) ++ g.dashboard.graphTooltip.withSharedCrosshair() ++ g.dashboard.withVariables([vars.project, vars.vmName, vars.vmId]) ++ g.dashboard.withPanels([ + g.panel.row.new('Overview') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } }, + summaryPanels.powerState { gridPos: { x: 0, y: 1, w: 4, h: 4 } }, + summaryPanels.cpuAllocated { gridPos: { x: 4, y: 1, w: 4, h: 4 } }, + summaryPanels.memoryAllocated { gridPos: { x: 8, y: 1, w: 4, h: 4 } }, + summaryPanels.diskCount { gridPos: { x: 12, y: 1, w: 4, h: 4 } }, + summaryPanels.networkPortCount { gridPos: { x: 16, y: 1, w: 4, h: 4 } }, + + g.panel.row.new('CPU Information') + { gridPos: { x: 0, y: 2, w: 24, h: 1 } }, + cpuPanel.cpuUsage { gridPos: { x: 0, y: 3, w: 12, h: 10 } }, + cpuPanel.cpuSteal { gridPos: { x: 12, y: 3, w: 12, h: 10 } }, + + g.panel.row.new('Memory Information') + { gridPos: { x: 0, y: 4, w: 24, h: 1 } }, + memoryPanel.memoryUsage { gridPos: { x: 0, y: 5, w: 12, h: 10 } }, + memoryPanel.memorySwap { gridPos: { x: 12, y: 5, w: 12, h: 10 } }, + + g.panel.row.new('Storage Information') + { gridPos: { x: 0, y: 6, w: 24, h: 1 } }, + storagePanel.iopsTotal { gridPos: { x: 0, y: 7, w: 8, h: 8 } }, + storagePanel.iopsRead { gridPos: { x: 8, y: 7, w: 8, h: 8 } }, + storagePanel.iopsWrite { gridPos: { x: 16, y: 7, w: 8, h: 8 } }, + storagePanel.throughputTotal { gridPos: { x: 0, y: 15, w: 8, h: 8 } }, + storagePanel.throughputRead { gridPos: { x: 8, y: 15, w: 8, h: 8 } }, + storagePanel.throughputWrite { gridPos: { x: 16, y: 15, w: 8, h: 8 } }, + storagePanel.latencyRead { gridPos: { x: 0, y: 23, w: 8, h: 8 } }, + storagePanel.latencyWrite { gridPos: { x: 8, y: 23, w: 8, h: 8 } }, + storagePanel.averageBlockSize { gridPos: { x: 16, y: 23, w: 8, h: 8 } }, + + g.panel.row.new('Network Information') + { gridPos: { x: 0, y: 24, w: 24, h: 1 } }, + networkPanel.networkThroughputTotal { gridPos: { x: 0, y: 25, w: 8, h: 8 } }, + networkPanel.networkThroughputReceive { gridPos: { x: 8, y: 25, w: 8, h: 8 } }, + networkPanel.networkThroughputTransmit { gridPos: { x: 16, y: 25, w: 8, h: 8 } }, + networkPanel.networkPacketTotal { gridPos: { x: 0, y: 33, w: 8, h: 8 } }, + networkPanel.networkPacketReceive { gridPos: { x: 8, y: 33, w: 8, h: 8 } }, + networkPanel.networkPacketTransmit { gridPos: { x: 16, y: 33, w: 8, h: 8 } }, + networkPanel.networkErrorsReceive { gridPos: { x: 0, y: 41, w: 12, h: 8 } }, + networkPanel.networkErrorsTransmit { gridPos: { x: 12, y: 41, w: 12, h: 8 } }, + networkPanel.networkDropsReceive { gridPos: { x: 0, y: 49, w: 12, h: 8 } }, + networkPanel.networkDropsTransmit { gridPos: { x: 12, y: 49, w: 12, h: 8 } }, + +]) + { + __inputs: [ + { + name: 'datasource', + label: 'Prometheus', + description: 'Select your Prometheus datasource', + type: 'datasource', + pluginId: 'prometheus', + pluginName: 'Prometheus', + }, + ], + // Good practice to declare requirements for templates + __requires: [ + { type: 'grafana', id: 'grafana', name: 'Grafana', version: '9.0.0' }, + { type: 'datasource', id: 'prometheus', name: 'Prometheus', version: '1.0.0' }, + ], +}