diff --git a/changelog/fragments/1765545606-prometheus-parser-panic-fix.yaml b/changelog/fragments/1765545606-prometheus-parser-panic-fix.yaml new file mode 100644 index 000000000000..3b193ff78918 --- /dev/null +++ b/changelog/fragments/1765545606-prometheus-parser-panic-fix.yaml @@ -0,0 +1,45 @@ +# REQUIRED +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: bug-fix + +# REQUIRED for all kinds +# Change summary; a 80ish characters long description of the change. +summary: Harden Prometheus metrics parser against panics caused by malformed input data + +# REQUIRED for breaking-change, deprecation, known-issue +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# description: + +# REQUIRED for breaking-change, deprecation, known-issue +# impact: + +# REQUIRED for breaking-change, deprecation, known-issue +# action: + +# REQUIRED for all kinds +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: metricbeat + +# AUTOMATED +# OPTIONAL to manually add other PR URLs +# PR URL: A link the PR that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +# pr: https://github.com/owner/repo/1234 + +# AUTOMATED +# OPTIONAL to manually add other issue URLs +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +# issue: https://github.com/owner/repo/1234 diff --git a/metricbeat/helper/prometheus/textparse.go b/metricbeat/helper/prometheus/textparse.go index c1e7d49b7a9f..846b9ae9475a 100644 --- a/metricbeat/helper/prometheus/textparse.go +++ b/metricbeat/helper/prometheus/textparse.go @@ -310,7 +310,7 @@ func (m *MetricFamily) GetName() string { return "" } func (m *MetricFamily) GetUnit() string { - if m != nil && *m.Unit != "" { + if m != nil && m.Unit != nil && *m.Unit != "" { return *m.Unit } return "" @@ -506,6 +506,39 @@ func ParseMetricFamilies(b []byte, contentType string, ts time.Time, logger *log metricTypes = make(map[string]model.MetricType) ) + // safeExemplar wraps parser.Exemplar with panic recovery. + // Returns false if parsing fails or if a panic occurs. + safeExemplar := func(e *exemplar.Exemplar) (ok bool) { + ok = false + defer func() { + if r := recover(); r != nil { + ok = false + if logger != nil { + logger.Debugf("Recovered from panic while parsing exemplar: %v", r) + } + } + }() + ok = parser.Exemplar(e) + return + } + + // safeLabels wraps parser.Labels with panic recovery. + // Returns false if a panic occurs, true otherwise. + safeLabels := func(lset *labels.Labels) (ok bool) { + ok = false + defer func() { + if r := recover(); r != nil { + ok = false + if logger != nil { + logger.Debugf("Recovered from panic while parsing labels: %v", r) + } + } + }() + parser.Labels(lset) + ok = true + return + } + for { var ( et textparse.Entry @@ -580,7 +613,9 @@ func ParseMetricFamilies(b []byte, contentType string, ts time.Time, logger *log _, tp, v := parser.Series() var lset labels.Labels - parser.Labels(&lset) + if !safeLabels(&lset) { + continue + } metadata := schema.NewMetadataFromLabels(lset) metricName := metadata.Name @@ -683,7 +718,7 @@ func ParseMetricFamilies(b []byte, contentType string, ts time.Time, logger *log continue } case model.MetricTypeHistogram: - if hasExemplar := parser.Exemplar(&e); hasExemplar { + if hasExemplar := safeExemplar(&e); hasExemplar { exm = &e } lookupMetricName, metric = histogramMetricName(metricName, v, qv, lbls.String(), &t, false, exm, histogramsByName) @@ -696,7 +731,7 @@ func ParseMetricFamilies(b []byte, contentType string, ts time.Time, logger *log continue } case model.MetricTypeGaugeHistogram: - if hasExemplar := parser.Exemplar(&e); hasExemplar { + if hasExemplar := safeExemplar(&e); hasExemplar { exm = &e } lookupMetricName, metric = histogramMetricName(metricName, v, qv, lbls.String(), &t, true, exm, histogramsByName) @@ -733,7 +768,7 @@ func ParseMetricFamilies(b []byte, contentType string, ts time.Time, logger *log } } - if hasExemplar := parser.Exemplar(&e); hasExemplar && mt != model.MetricTypeHistogram && metric != nil { + if hasExemplar := safeExemplar(&e); hasExemplar && mt != model.MetricTypeHistogram && metric != nil { if !e.HasTs { e.Ts = t } diff --git a/metricbeat/helper/prometheus/textparse_fuzz_test.go b/metricbeat/helper/prometheus/textparse_fuzz_test.go new file mode 100644 index 000000000000..200ccc66ae70 --- /dev/null +++ b/metricbeat/helper/prometheus/textparse_fuzz_test.go @@ -0,0 +1,127 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package prometheus + +import ( + "testing" + "time" + + "github.com/elastic/elastic-agent-libs/logp" +) + +func FuzzParseMetricFamilies(f *testing.F) { + seeds := [][]byte{ + // Valid metrics + []byte("# TYPE http_requests counter\nhttp_requests 100\n"), + []byte("# TYPE http_requests_total counter\nhttp_requests_total 100\n"), + []byte("# TYPE temperature gauge\ntemperature 23.5\n"), + []byte("# TYPE temperature gauge\ntemperature{location=\"room1\"} 23.5\n"), + []byte(`# TYPE http_duration histogram +http_duration_bucket{le="0.1"} 10 +http_duration_bucket{le="0.5"} 50 +http_duration_bucket{le="+Inf"} 100 +http_duration_sum 35.5 +http_duration_count 100 +`), + []byte(`# TYPE rpc_duration summary +rpc_duration{quantile="0.5"} 0.05 +rpc_duration{quantile="0.9"} 0.08 +rpc_duration{quantile="0.99"} 0.1 +rpc_duration_sum 17.5 +rpc_duration_count 200 +`), + []byte("# TYPE custom_metric unknown\ncustom_metric 42\n"), + []byte("metric_without_type 42\n"), + + // OpenMetrics + []byte("# TYPE http_requests counter\nhttp_requests_total 100\nhttp_requests_created 1234567890\n# EOF\n"), + []byte("# TYPE build info\nbuild_info{version=\"1.0\",commit=\"abc123\"} 1\n# EOF\n"), + []byte("# TYPE feature stateset\nfeature{feature=\"a\"} 1\nfeature{feature=\"b\"} 0\n# EOF\n"), + []byte(`# TYPE request_size gaugehistogram +request_size_gcount 100 +request_size_gsum 12345 +request_size_bucket{le="100"} 10 +request_size_bucket{le="+Inf"} 100 +# EOF +`), + + // Exemplars + []byte("# TYPE http_requests counter\nhttp_requests_total 100 # {trace_id=\"abc\"} 1.0\n# EOF\n"), + []byte("# TYPE http_requests counter\nhttp_requests_total 100 # {trace_id=\"abc\",span_id=\"def\"} 1.0 123456\n# EOF\n"), + []byte("# TYPE http_duration histogram\nhttp_duration_bucket{le=\"1\"} 10 # {trace_id=\"xyz\"} 0.9\n# EOF\n"), + + // Metadata + []byte("# HELP http_requests Total HTTP requests\n# TYPE http_requests counter\nhttp_requests 100\n"), + []byte("# TYPE temperature gauge\n# UNIT temperature celsius\ntemperature 23.5\n# EOF\n"), + + // Timestamps and labels + []byte("metric_with_ts 100 1234567890\n"), + []byte("metric_with_ts{label=\"value\"} 100 1234567890\n"), + []byte("metric{a=\"1\",b=\"2\",c=\"3\",d=\"4\"} 100\n"), + + // Special values + []byte("metric_nan NaN\n"), + []byte("metric_inf +Inf\n"), + []byte("metric_neginf -Inf\n"), + + // Edge cases + nil, + {}, + []byte("\n"), + + // Malformed labels + []byte("metric{"), + []byte("metric{label}"), + []byte("metric{label=}"), + []byte("metric{label=\""), + + // Known crash inputs + []byte("{A}0"), + []byte("{A}00"), + []byte("{A}000"), + []byte("{A}0000"), + []byte("{A} 1"), + []byte("{A} 1\n"), + []byte("{A}0\n"), + []byte("{A}0 1"), + []byte("{A}0 1\n"), + []byte("{A}0\n000"), + []byte("{A}00\n"), + []byte("{A}00000"), + []byte("{A}00 1"), + []byte("{A}00 1\n"), + []byte("{A}00\n000"), + + // Malformed exemplars + []byte("# TYPE c counter\nc_total 10 # {}\n# EOF\n"), + []byte("# TYPE c counter\nc_total 10 # {\n# EOF\n"), + []byte("# TYPE c counter\nc_total 10 # {a=}\n# EOF\n"), + } + + for _, seed := range seeds { + f.Add(seed) + } + + logger := logp.NewLogger("fuzz") + + f.Fuzz(func(t *testing.T, data []byte) { + _, _ = ParseMetricFamilies(data, ContentTypeTextFormat, time.Now(), logger) + _, _ = ParseMetricFamilies(data, OpenMetricsType, time.Now(), logger) + _, _ = ParseMetricFamilies(data, "", time.Now(), logger) + }) +} diff --git a/metricbeat/helper/prometheus/textparse_test.go b/metricbeat/helper/prometheus/textparse_test.go index fa3e42a46c51..b144eb27ca3f 100644 --- a/metricbeat/helper/prometheus/textparse_test.go +++ b/metricbeat/helper/prometheus/textparse_test.go @@ -22,8 +22,10 @@ import ( "time" "github.com/prometheus/prometheus/model/labels" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/logp/logptest" ) @@ -39,6 +41,39 @@ func int64p(x int64) *int64 { return &x } +func TestParseMetricFamiliesMalformedInput(t *testing.T) { + logger := logp.NewLogger("test") + + malformedInputs := [][]byte{ + nil, + {}, + []byte("invalid"), + []byte("metric_name{"), + []byte("metric_name{label=}"), + []byte("{A}0"), + []byte("{A}00"), + []byte("{A}000"), + []byte("{A}0000"), + []byte("{A} 1"), + []byte("{A} 1\n"), + []byte("{A}0\n"), + []byte("{A}0 1"), + []byte("{A}0 1\n"), + []byte("{A}0\n000"), + []byte("{A}00\n"), + []byte("{A}00000"), + []byte("{A}00 1"), + []byte("{A}00 1\n"), + []byte("{A}00\n000"), + } + + for _, input := range malformedInputs { + assert.NotPanics(t, func() { + _, _ = ParseMetricFamilies(input, ContentTypeTextFormat, time.Now(), logger) + }, "ParseMetricFamilies should not panic on malformed input") + } +} + func TestCounterOpenMetrics(t *testing.T) { input := ` # TYPE process_cpu_total counter @@ -961,3 +996,151 @@ process_cpu_total 4200722.46 }) } } + +func TestInfoGetters(t *testing.T) { + // nil receiver + var nilInfo *Info + assert.Equal(t, int64(0), nilInfo.GetValue()) + assert.False(t, nilInfo.HasValidValue()) + + // valid Info with value 1 + val := int64(1) + info := &Info{Value: &val} + assert.Equal(t, int64(1), info.GetValue()) + assert.True(t, info.HasValidValue()) + + // Info with value 0 + val0 := int64(0) + info0 := &Info{Value: &val0} + assert.Equal(t, int64(0), info0.GetValue()) + assert.False(t, info0.HasValidValue()) +} + +func TestStatesetGetters(t *testing.T) { + // nil receiver + var nilStateset *Stateset + assert.Equal(t, int64(0), nilStateset.GetValue()) + assert.False(t, nilStateset.HasValidValue()) + + // Stateset with value 1 + val1 := int64(1) + ss1 := &Stateset{Value: &val1} + assert.Equal(t, int64(1), ss1.GetValue()) + assert.True(t, ss1.HasValidValue()) + + // Stateset with value 0 + val0 := int64(0) + ss0 := &Stateset{Value: &val0} + assert.Equal(t, int64(0), ss0.GetValue()) + assert.True(t, ss0.HasValidValue()) + + // Stateset with invalid value + val2 := int64(2) + ss2 := &Stateset{Value: &val2} + assert.False(t, ss2.HasValidValue()) +} + +func TestUnknownGetters(t *testing.T) { + // nil receiver + var nilUnknown *Unknown + assert.Equal(t, float64(0), nilUnknown.GetValue()) + + // valid Unknown + val := 42.5 + u := &Unknown{Value: &val} + assert.Equal(t, 42.5, u.GetValue()) +} + +func TestOpenMetricGetters(t *testing.T) { + // nil receiver + var nilMetric *OpenMetric + assert.Nil(t, nilMetric.GetName()) + assert.Nil(t, nilMetric.GetInfo()) + assert.Nil(t, nilMetric.GetStateset()) + assert.Nil(t, nilMetric.GetUnknown()) + assert.Nil(t, nilMetric.GetGaugeHistogram()) + assert.Equal(t, int64(0), nilMetric.GetTimestampMs()) + + // OpenMetric with Info + name := "test_info" + val := int64(1) + metric := &OpenMetric{ + Name: &name, + Info: &Info{Value: &val}, + } + assert.Equal(t, &name, metric.GetName()) + assert.NotNil(t, metric.GetInfo()) + + // OpenMetric with Stateset + ssVal := int64(1) + ssMetric := &OpenMetric{ + Stateset: &Stateset{Value: &ssVal}, + } + assert.NotNil(t, ssMetric.GetStateset()) + + // OpenMetric with Unknown + uVal := 42.0 + uMetric := &OpenMetric{ + Unknown: &Unknown{Value: &uVal}, + } + assert.NotNil(t, uMetric.GetUnknown()) + + // OpenMetric with GaugeHistogram + ghMetric := &OpenMetric{ + Histogram: &Histogram{IsGaugeHistogram: true}, + } + assert.NotNil(t, ghMetric.GetGaugeHistogram()) + assert.Nil(t, ghMetric.GetHistogram()) // regular GetHistogram should return nil for gauge histogram + + // OpenMetric with timestamp + ts := int64(1234567890) + tsMetric := &OpenMetric{ + TimestampMs: &ts, + } + assert.Equal(t, int64(1234567890), tsMetric.GetTimestampMs()) +} + +func TestMetricFamilyGetUnit(t *testing.T) { + // nil unit + mf := &MetricFamily{} + assert.Equal(t, "", mf.GetUnit()) + + // empty unit + empty := "" + mf2 := &MetricFamily{Unit: &empty} + assert.Equal(t, "", mf2.GetUnit()) + + // valid unit + unit := "bytes" + mf3 := &MetricFamily{Unit: &unit} + assert.Equal(t, "bytes", mf3.GetUnit()) +} + +func TestGetContentType(t *testing.T) { + tests := []struct { + name string + contentType string + expected string + }{ + {"empty", "", ""}, + {"text_plain", "text/plain", ContentTypeTextFormat}, + {"text_plain_version", "text/plain; version=0.0.4", ContentTypeTextFormat}, + {"text_plain_wrong_version", "text/plain; version=1.0.0", ""}, + {"openmetrics", "application/openmetrics-text", OpenMetricsType}, + {"openmetrics_delimited", "application/openmetrics-text; encoding=delimited", OpenMetricsType}, + {"openmetrics_wrong_encoding", "application/openmetrics-text; encoding=protobuf", ""}, + {"json", "application/json", ""}, + {"invalid", "not a valid; content type", ""}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + header := make(map[string][]string) + if tt.contentType != "" { + header["Content-Type"] = []string{tt.contentType} + } + result := GetContentType(header) + assert.Equal(t, tt.expected, result) + }) + } +}