Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .chloggen/host-extension-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: config_examples

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add config example for a host metrics receiver config that fits all needs of the Dynatrace Host extension

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [809]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
273 changes: 273 additions & 0 deletions config_examples/host-metrics-extension.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
# OpenTelemetry Collector Configuration for Host Extension
#
# This configuration is tailored for the Dynatrace OpenTelemetry Host Extension (custom:opentelemetry).
# Only metrics used by the extension are enabled to minimize data volume and cardinality.

extensions:
health_check:
endpoint: 0.0.0.0:13133

receivers:
hostmetrics/5m:
collection_interval: 5m
scrapers:
# CPU metrics
cpu:
metrics:
system.cpu.logical.count:
enabled: false
system.cpu.physical.count:
enabled: false
system.cpu.utilization:
enabled: false
system.cpu.time:
enabled: true
system.cpu.frequency:
enabled: false

# Memory metrics
memory:
metrics:
system.memory.limit:
enabled: false
system.memory.utilization:
enabled: false
system.memory.usage:
enabled: true

# Paging/Swap metrics
paging:
metrics:
system.paging.usage:
enabled: true
system.paging.operations:
enabled: true
system.paging.faults:
enabled: true

# Network metrics
network:
metrics:
system.network.io:
enabled: true
system.network.packets:
enabled: true
system.network.errors:
enabled: true
system.network.connections:
enabled: true
system.network.dropped:
enabled: true

# Load average metrics
load:
metrics:
system.cpu.load_average.1m:
enabled: false
system.cpu.load_average.5m:
enabled: true
system.cpu.load_average.15m:
enabled: true

# Disk metrics
disk:
metrics:
system.disk.io:
enabled: true
system.disk.operations:
enabled: true
system.disk.io_time:
enabled: true
system.disk.operation_time:
enabled: true
system.disk.pending_operations:
enabled: false
system.disk.merged:
enabled: false
system.disk.weighted_io_time:
enabled: false

# Filesystem metrics
filesystem:
metrics:
system.filesystem.usage:
enabled: true
system.filesystem.utilization:
enabled: true
system.filesystem.inodes.usage:
enabled: true

# Process count metrics
processes:
metrics:
system.processes.count:
enabled: true
system.processes.created:
enabled: true

# Per-process metrics
process:
mute_process_all_errors: true
metrics:
process.cpu.utilization:
enabled: true
process.cpu.time:
enabled: true
process.memory.usage:
enabled: true
process.memory.virtual:
enabled: true
process.disk.io:
enabled: true

hostmetrics/1m:
collection_interval: 1m
scrapers:
# CPU metrics
cpu:
metrics:
system.cpu.utilization:
enabled: true
system.cpu.time:
enabled: false

# Memory metrics
memory:
metrics:
system.memory.utilization:
enabled: true
system.memory.usage:
enabled: false

# Load average metrics
load:
metrics:
system.cpu.load_average.1m:
enabled: true
system.cpu.load_average.5m:
enabled: false
system.cpu.load_average.15m:
enabled: false

hostmetrics/1h:
collection_interval: 1h
scrapers:
system:
metrics:
system.uptime:
enabled: true

hostmetrics/test:
collection_interval: 10s
scrapers:
cpu:
metrics:
system.cpu.utilization:
enabled: true
system.cpu.time:
enabled: false

processors:
filter:
metrics:
datapoint:
- metric.name == "system.cpu.utilization" and attributes["state"] == "idle"

transform:
error_mode: ignore
metric_statements:
- context: datapoint
statements:
# For process.* metrics, keep only the attributes needed by the OpenTelemetry Host extension:
# - process.command_line (for entity identification and display)
# - process.pid (for entity attributes)
# - process.executable.name (for entity display name)
- delete_key(resource.attributes, "process.cgroup") where IsMatch(metric.name, "^process\\..*")
- delete_key(resource.attributes, "process.command") where IsMatch(metric.name, "^process\\..*")
- delete_key(resource.attributes, "process.executable.path") where IsMatch(metric.name, "^process\\..*")
- delete_key(resource.attributes, "process.owner") where IsMatch(metric.name, "^process\\..*")
- delete_key(resource.attributes, "process.parent_pid") where IsMatch(metric.name, "^process\\..*")

# Delete empty device attributes
- delete_key(attributes, "device") where attributes["device"] == ""

# Delete datapoint attributes not used by the OpenTelemetry Host extension
# Network metrics: Dashboard aggregates all interfaces and protocols
- delete_key(attributes, "protocol") where IsMatch(metric.name, "^system\\.network\\..*")

# Filesystem metrics: Extension uses mountpoint and state, but not device, mode, or type
- delete_key(attributes, "device") where IsMatch(metric.name, "^system\\.filesystem\\..*")
- delete_key(attributes, "mode") where IsMatch(metric.name, "^system\\.filesystem\\..*")
- delete_key(attributes, "type") where IsMatch(metric.name, "^system\\.filesystem\\..*")

- context: resource
statements:
# Replace spaces in process command line with underscores for cleaner entity IDs
- replace_pattern(attributes["process.command_line"], " ", "_")

- context: metric
statements:
- aggregate_on_attributes("sum", ["cpu"]) where name == "system.cpu.utilization"
# Average remaining datapoints (one per core) into a single host-level value
- aggregate_on_attributes("mean") where name == "system.cpu.utilization"

batch:
send_batch_size: 100
cumulativetodelta:
max_staleness: 25h
resourcedetection:
detectors: ["system"]
system:
resource_attributes:
# Host attributes used by the extension
host.arch:
enabled: true
host.id:
enabled: true
host.name:
enabled: true
host.ip:
enabled: true
host.interface:
enabled: true
host.mac:
enabled: true
host.cpu.model.name:
enabled: true
host.cpu.vendor.id:
enabled: false
host.cpu.family:
enabled: false
host.cpu.model.id:
enabled: false
host.cpu.stepping:
enabled: false
host.cpu.cache.l2.size:
enabled: false
# OS attributes used by the extension
os.type:
enabled: true
os.description:
enabled: true
os.name:
enabled: true
os.version:
enabled: true
os.build.id:
enabled: true

exporters:
debug:
verbosity: detailed
otlphttp:
endpoint: "${env:DT_ENDPOINT}"
headers:
Authorization: "Api-Token ${env:DT_API_TOKEN}"

service:
extensions: [health_check]
pipelines:
metrics:
receivers: [hostmetrics/test]
processors: [filter, resourcedetection, transform, cumulativetodelta]
exporters: [debug, otlphttp]
Loading
Loading