nicholasdille
diff --git a/‎000_introduction/02_bio.md
Lines changed: 1 addition & 1 deletion b/‎000_introduction/02_bio.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎100_monitoring/prometheus/application-level.md
Lines changed: 22 additions & 11 deletions b/‎100_monitoring/prometheus/application-level.md
Lines changed: 22 additions & 11 deletions
diff --git a/‎100_monitoring/prometheus/cadvisor/compose.yaml
Lines changed: 0 additions & 3 deletions b/‎100_monitoring/prometheus/cadvisor/compose.yaml
Lines changed: 0 additions & 3 deletions
diff --git a/‎100_monitoring/prometheus/cluster_scraping.drawio.svg
Lines changed: 166 additions & 109 deletions b/‎100_monitoring/prometheus/cluster_scraping.drawio.svg
Lines changed: 166 additions & 109 deletions
diff --git a/‎100_monitoring/prometheus/container.md
Lines changed: 23 additions & 13 deletions b/‎100_monitoring/prometheus/container.md
Lines changed: 23 additions & 13 deletions
diff --git a/‎100_monitoring/prometheus/grafana.md
Lines changed: 4 additions & 4 deletions b/‎100_monitoring/prometheus/grafana.md
Lines changed: 4 additions & 4 deletions
diff --git a/‎100_monitoring/prometheus/host.md
Lines changed: 16 additions & 36 deletions b/‎100_monitoring/prometheus/host.md
Lines changed: 16 additions & 36 deletions
diff --git a/‎100_monitoring/prometheus/kube-state-metrics.md
Lines changed: 26 additions & 5 deletions b/‎100_monitoring/prometheus/kube-state-metrics.md
Lines changed: 26 additions & 5 deletions
diff --git a/‎100_monitoring/prometheus/metrics-server.md
Lines changed: 8 additions & 10 deletions b/‎100_monitoring/prometheus/metrics-server.md
Lines changed: 8 additions & 10 deletions
diff --git a/‎100_monitoring/prometheus/metrics.md
Lines changed: 2 additions & 2 deletions b/‎100_monitoring/prometheus/metrics.md
Lines changed: 2 additions & 2 deletions
@@ -13,7 +13,7 @@
 - <span class="fa-li"><img src="images/TraefikLabs-icon-white.svg" style="height: 1em;" /></span> [tr&aelig;fik Ambassador][5] since 2021</li>
 - <span class="fa-li"><i class="fa fa-briefcase"></i></span> [Haufe Group][6] since 2016
 - <span class="fa-li"><i class="fa fa-person-chalkboard"></i></span> Self-employed [trainer][7] since 2020
-- <span class="fa-li"><i class="fa fa-person-chalkboard"></i></span> Initiator/maintainer of uniget[12] since 2023
+- <span class="fa-li"><i class="fa fa-user-helmet-safety"></i></span> Initiator/maintainer of [uniget][12] since 2021
 
 <!-- .element: class="fa-ul" style="line-height: 175%;" -->
 
 
@@ -1,12 +1,12 @@
 ## Application Level Monitoring
 
-XXX many apps ship with exporters
+Many apps ship with integrated exporters
 
-XXX many FOSS services have an exporter
+Many FOSS services have an exporter
 
-XXX collection just like system services
+Collection works just like for system services
 
-XXX if not, use special exporters
+If nothing available, use generic exporters
 
 ### `blackbox_exporter` [](https://github.com/prometheus/blackbox_exporter)
 
@@ -16,13 +16,17 @@ Probing of endpoints over HTTP, HTTPS, DNS, TCP, ICMP and gRPC
 
 Scraping of remote JSON by JSONPath [](https://goessner.net/articles/JsonPath/)
 
+Alternative: JSON API datasource [](https://grafana.com/grafana/plugins/marcusolsson-json-datasource/)
+
 ---
 
-## Application Location
+## Application on the network
+
+XXX datacenters, firewalls, policies
 
-XXX network
+Check whether scraping is possible
 
-XXX datacenters, firewalls, policies, pull vs. push
+Otherwise push metrics to gateway:
 
 ### `pushgateway` [](https://github.com/prometheus/pushgateway)
 
@@ -47,14 +51,21 @@ When resources on a node are depleted:
 
 ### How pods are "chosen"
 
-Pods have a quiality-of-service based on resource requests and limits [](https://kubernetes.io/docs/tasks/configure-pod-container/quality-service-pod/)
+Pods have a quality-of-service based on resource requests and limits [](https://kubernetes.io/docs/tasks/configure-pod-container/quality-service-pod/)
 
 - Best effort: All container have resource identical requests and limits
 - Burstable: At least one container has resource requests or limits
 - Guaranteed: All container do not have resource requests or limits
 
 Scheduling uses resource requests to find suitable node
 
-Notes:
-Check pods for QoS
-`kubectl get pods -A -o json | jq -r '.items[] | "\(.metadata.name): \(.status.qosClass)"'`
+---
+
+## Pod Quality-of-Service
+
+### Check QoS
+
+```bash
+kubectl get pods --all-namespaces --output=json \
+| jq --raw-output '.items[] | "\(.metadata.name): \(.status.qosClass)"'
+```
@@ -22,14 +22,11 @@ services:
     container_name: cadvisor
     command:
     - --docker="unix:///var/run/docker.sock"
-    #- --containerd="unix:///var/run/docker/containerd/containerd.sock"
-    #- --containerd-namespace=docker
     ports:
     - 8080:8080
     volumes:
     - /:/rootfs:ro
     - /var/run:/var/run:rw
-    #- /var/run/docker/containerd/containerd.sock:/var/run/docker/containerd/containerd.sock
     - /sys:/sys:ro
     - /var/lib/docker/:/var/lib/docker:ro
 
 
@@ -30,21 +30,21 @@ cat "/sys/fs/cgroup/memory/docker/${ID}/memory.usage_in_bytes"
 
 ## Container metrics in Kubernetes
 
-Remember: `kubelet` is responsible for maintaining pods/containers on a node
+`kubelet` is responsible for maintaining pods/containers on a node
 
-kubelet offers metrics
+### Metrics...
 
-kubelet ships with cadvisor [](https://github.com/google/cadvisor)
+...are offered by `kubelet` as well
 
-Published under `/metrics/cadvisor/`
+`kubelet` ships with cadvisor [](https://github.com/google/cadvisor)
 
----
+Published under `/metrics/cadvisor/`
 
-## Demo: cadvisor with Docker
+### Demo: cadvisor with Docker
 
-XXX
+Run `cadvisor` in `compose`
 
-XXX docker-exporter?
+XXX docker-exporter https://github.com/0xERR0R/dex
 
 ---
 
@@ -72,6 +72,7 @@ kubeletctl \
     --token ${TOKEN} \
     metrics cadvisor | less
 ```
+<!-- .element: style="width: 46em;" -->
 
 ---
 
@@ -99,12 +100,15 @@ curl -skH "Authorization: Bearer ${TOKEN}" \
     "https://${IP}:10250/metrics/cadvisor" \
 | grep container_memory_usage_bytes | grep kube-proxy
 ```
+<!-- .element: style="width: 46em;" -->
 
 ---
 
 ## OpenMetrics 1/
 
-"...today's de-facto standard for transmitting cloud-native metrics at scale." [](https://openmetrics.io/)
+"...today's de-facto standard for transmitting cloud-native metrics at scale."
+
+Specification [](https://openmetrics.io/)
 
 ### Types
 
@@ -114,7 +118,7 @@ curl -skH "Authorization: Bearer ${TOKEN}" \
 - <span class="fa-li"><i class="fa-duotone fa-chart-column"></i></span> Histogram
 - <span class="fa-li"><i class="fa-duotone fa-ball-pile"></i></span> and more [](https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#metric-types)
 
-<!-- .element: class="fa-ul" -->
+<!-- .element: class="fa-ul" style="line-height: 1.5em;" -->
 
 ### Metadata
 
@@ -146,12 +150,19 @@ go_goroutines 69
 # HELP process_cpu_seconds Total user and system CPU time spent in seconds.
 process_cpu_seconds_total 4.20072246e+06
 ```
+<!-- .element: style="width: 47em;" -->
 
 ---
 
 ## OpenMetrics
 
-Metrics in Kubernetes have labels for:
+Format:
+
+```plaintext
+name{labels} value [timestamp]
+```
+
+Labels provide context for...
 
 - Namespace name
 - Pod name
@@ -163,7 +174,6 @@ For example:
 container_memory_usage_bytes{
     namespace="kube-system",
     pod="kube-proxy-68mp4",
-    container="kube-proxy",
-    # ...
+    container="kube-proxy"
 } 1.4917632e+07 1669235346213
 ```
@@ -4,18 +4,18 @@ Grafana is the most prominent tool to query, visualize and alert on metrics
 
 Supports many datasources including Prometheus
 
-Support datasource-specific query language
+Supports datasource-specific query languages
 
 Prometheus community offers pre-created dashcoards [](https://github.com/kubernetes-monitoring/kubernetes-mixin)
 
 ### Demo
 
 Quick intro to UI [](http://grafana.inmylab.de)
 
-Graph for pod memory
-
-Graph for pod CPU (usage)
+Graph for pod memory and CPU (usage)
 
 Graph for node memory
 
 Count running pods
+
+Add variable for namespace and pod name
@@ -4,6 +4,8 @@ Can containers use all resources? Yes, but they should not!
 
 Some reservations are necessary [](https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/)
 
+Capacity must be divided between system, cluster and containers
+
 ![](100_monitoring/prometheus/reservations.drawio.svg) <!-- .element: style="float: right; width: 40%;" -->
 
 ### Operating system
@@ -14,44 +16,25 @@ Reserved for system services
 
 Reserved for cluster components
 
-### Further resources
-
-Instance calculator for cloud providers [](https://learnk8s.io/kubernetes-instance-calculator)
+### Allocatable resources
 
-Read reservations from managed cluster [](https://github.com/learnk8s/kubernetes-resource-inspector)
+`Allocatable = Capacity - System - Kubernetes`
 
 ---
 
-## CPU Reservations in Managed Kubernetes
+## Reservations in Managed Kubernetes
 
-Major cloud providers agree
+Overview of AWS, Azure and Google Cloud [](https://learnk8s.io/allocatable-resources)
 
-XXX link to docs and rules
-
-| Cores | Reservation    | Cumulative | Efficiency |
-|-------|---------------:|-----------:|-----------:|
-| 1     |   60m          | 60m        | 94.0%      |
-| 2     | + 10m          | 70m        | 96.5%      |
-| 4     | + 10m          | 80m        | 98.0%      |
-| 8     | + 10m          | 90m        | 99.0%      |
-
----
+Larger VMs have less overhead
 
-## Memory reservations in Managed Kubernetes
+More VMs provide more availability
 
-Most major cloud providers agree
+### Further reading
 
-AWS uses: 255MiB + 11MiB * MAX_PODS
-
-XXX link to docs and rules
+Instance calculator for cloud providers [](https://learnk8s.io/kubernetes-instance-calculator)
 
-| Memory | Reservation | Cumulative | Efficiency |
-|--------|------------:|-----------:|-----------:|
-| 0      |   255MiB    | 255MiB     |            |
-| 4GiB   | + 800MiB    | 1055MiB    | 73.7%      |
-| 8GiB   | + 800GiB    | 1855MiB    | 76,8%      |
-| 112GiB | + 672MiB    | 2527MiB    | 97.7%      |
-| 128GiB | + 256MiB    | 2783MiB    | 97.8%      |
+Read reservations from managed cluster [](https://github.com/learnk8s/kubernetes-resource-inspector)
 
 ---
 
@@ -86,27 +69,24 @@ nodes:
 
 ## Host metrics collection
 
-node-exporter [](https://github.com/prometheus/node_exporter) collects host metrics...
+`node-exporter` [](https://github.com/prometheus/node_exporter) collects host metrics...
 
 ...and exports them for scraping
 
 Metrics [](https://github.com/prometheus/node_exporter#collectors) include CPU, memory, disk, network and a lot more!
 
-Some are disabled by default [](https://github.com/prometheus/node_exporter#disabled-by-default)
+Some are disabled but the defaults are reasonable [](https://github.com/prometheus/node_exporter#disabled-by-default)
 
 ### Demo
 
-Start Kubernetes API proxy:
+Start Kubernetes API proxy and read metrics endpoint:
 
 ```bash
 kubectl proxy
-```
-
-Read metrics endpoint:
 
-```bash
+H=localhost:8001
 NS=kube-system
 SVC=node-exporter-prometheus-node-exporter
-curl -s localhost:8001/api/v1/namespaces/${NS}/services/${SVC}:metrics/proxy/metrics \
+curl -s $H/api/v1/namespaces/$NS/services/$SVC:metrics/proxy/metrics \
 | grep node_cpu_seconds_total
 ```
@@ -1,9 +1,30 @@
-### `kube-state-metrics` [](https://github.com/kubernetes/kube-state-metrics)
+## `kube-state-metrics`
 
-New metrics about cluster
+Metrics derived from cluster and resources
 
-https://www.datadoghq.com/blog/monitoring-kubernetes-performance-metrics/
+Project page [](https://github.com/kubernetes/kube-state-metrics)
 
-`kubectl proxy`
+### Exposed Metrics (exerpt)
 
-`curl localhost:8001/api/v1/namespaces/kube-system/services/kube-state-metrics:http/proxy/metrics`
+For every resources:
+
+- *_info
+- *_labels
+- *_annotations
+
+Full list of metrics [](https://github.com/kubernetes/kube-state-metrics/tree/main/docs#exposed-metrics)
+
+Very useful for joins against other metrics [](https://github.com/kubernetes/kube-state-metrics/tree/main/docs#join-metrics)
+
+---
+
+## Demo: `kube-state-metrics`
+
+```bash
+kubectl proxy
+
+H=localhost:8001
+NS=kube-system
+S=kube-state-metrics
+curl -s $H/api/v1/namespaces/$N/services/$S:http/proxy/metrics
+```
@@ -1,10 +1,10 @@
-## metrics-server [](https://github.com/kubernetes-sigs/metrics-server/)
+<!-- .slide: data-transition="fade" -->
 
-Provides an API for metrics collected by kubelet
+## metrics-server [](https://github.com/kubernetes-sigs/metrics-server/)
 
-Required for `kubectl top`
+Provides an API for metrics collected by `kubelet`/`cadvisor`
 
-Required for Horizontal/Vertical Pod AutoScaler
+Required for `kubectl top` and Horizontal/Vertical Pod AutoScaler
 
 ### Demo 1/
 
@@ -22,15 +22,13 @@ kubectl top pod
 
 ---
 
-## metrics-server [](https://github.com/kubernetes-sigs/metrics-server/)
+<!-- .slide: data-transition="fade" -->
 
-Provides an API for metrics collected by kubelet
-
-Builds on cadvisor (XXX link?)
+## metrics-server [](https://github.com/kubernetes-sigs/metrics-server/)
 
-Required for `kubectl top`
+Provides an API for metrics collected by `kubelet`/`cadvisor`
 
-Required for Horizontal Pod AutoScaler
+Required for `kubectl top` and Horizontal/Vertical Pod AutoScaler
 
 ### Demo 2/2
 
 
@@ -12,15 +12,15 @@ How metrics can be collected...
 
 ![](100_monitoring/prometheus/push.drawio.svg) <!-- .element: style="width: 45%; float: right;" -->
 
-### Push <i class="fa-duotone fa-truck"></i>
+### Push <i class="fa-duotone fa-person-dolly"></i>
 
 Metrics are delivered to database
 
 Usually involves an agent
 
 Example: Telegraf agent and InfluxDB
 
-### Pull <i class="fa-duotone fa-hand-holding-heart"></i>
+### Pull <i class="fa-duotone fa-cart-shopping"></i>
 
 ![](100_monitoring/prometheus/pull.drawio.svg) <!-- .element: style="width:45%; float: right;" -->