diff --git a/_topic_maps/_topic_map.yml b/_topic_maps/_topic_map.yml index 045c4dc6bbe3..a2222a0a83d8 100644 --- a/_topic_maps/_topic_map.yml +++ b/_topic_maps/_topic_map.yml @@ -2993,6 +2993,8 @@ Topics: Topics: - Name: Release notes File: log6x-release-notes-6.2 + - Name: Troubleshooting log forwarding + File: log6x-log-forwarding-troubleshooting-6.2 - Name: Logging 6.1 Dir: logging-6.1 Topics: @@ -3010,6 +3012,8 @@ Topics: File: log6x-opentelemetry-data-model-6.1 - Name: Visualization for logging File: log6x-visual-6.1 + - Name: Troubleshooting log forwarding + File: log6x-log-forwarding-troubleshooting-6.1 - Name: Logging 6.0 Dir: logging-6.0 Topics: @@ -3025,13 +3029,17 @@ Topics: File: log6x-loki - Name: Visualization for logging File: log6x-visual + - Name: Troubleshooting log forwarding + File: log6x-log-forwarding-troubleshooting-6.0 # - Name: API reference 6.0 # File: log6x-api-reference - Name: Logging 5.8 - Dir: logging_release_notes + Dir: logging_5 Topics: - Name: Release notes File: logging-5-8-release-notes + - Name: Troubleshooting log forwarding + File: log-forwarding-troubleshooting # - Name: Support # File: cluster-logging-support # - Name: Troubleshooting logging diff --git a/modules/log6-loki-rate-limit-errors .adoc b/modules/log6-loki-rate-limit-errors .adoc new file mode 100644 index 000000000000..f4da13f1895f --- /dev/null +++ b/modules/log6-loki-rate-limit-errors .adoc @@ -0,0 +1,76 @@ +// Module is included in the following assemblies: +// * logging/cluster-logging-loki.adoc +// * observability/logging/log_collection_forwarding/log-forwarding.adoc +// * observability/logging/troubleshooting/log-forwarding-troubleshooting.adoc + +:_mod-docs-content-type: PROCEDURE +[id="log6x-loki-rate-limit-errors_{context}"] += Troubleshooting Loki rate limit errors + +If the Log Forwarder API forwards a large block of messages that exceeds the rate limit to Loki, Loki generates rate limit (`429`) errors. + +These errors can occur during normal operation. For example, when adding the {logging} to a cluster that already has some logs, rate limit errors might occur while the {logging} tries to ingest all of the existing log entries. In this case, if the rate of addition of new logs is less than the total rate limit, the historical data is eventually ingested, and the rate limit errors are resolved without requiring user intervention. + +In cases where the rate limit errors continue to occur, you can fix the issue by modifying the `LokiStack` custom resource (CR). + +[IMPORTANT] +==== +The `LokiStack` CR is not available on Grafana-hosted Loki. This topic does not apply to Grafana-hosted Loki servers. +==== + +.Conditions + +* The Log Forwarder API is configured to forward logs to Loki. + +* Your system sends a block of messages that is larger than 2 MB to Loki. For example: ++ +[source,text] +---- +"values":[["1630410392689800468","{\"kind\":\"Event\",\"apiVersion\":\ +....... +...... +...... +...... +\"received_at\":\"2021-08-31T11:46:32.800278+00:00\",\"version\":\"1.7.4 1.6.0\"}},\"@timestamp\":\"2021-08-31T11:46:32.799692+00:00\",\"viaq_index_name\":\"audit-write\",\"viaq_msg_id\":\"MzFjYjJkZjItNjY0MC00YWU4LWIwMTEtNGNmM2E5ZmViMGU4\",\"log_type\":\"audit\"}"]]}]} +---- + +* After you enter `oc logs -n openshift-logging -l component=collector`, the collector logs in your cluster show a line containing one of the following error messages: ++ +[source,text] +---- +429 Too Many Requests Ingestion rate limit exceeded +---- ++ +.Example Vector error message +[source,text] +---- +2023-08-25T16:08:49.301780Z WARN sink{component_kind="sink" component_id=default_loki_infra component_type=loki component_name=default_loki_infra}: vector::sinks::util::retries: Retrying after error. error=Server responded with an error: 429 Too Many Requests internal_log_rate_limit=true +---- ++ +.Example Loki ingester error message +[source,text] +---- +level=warn ts=2023-08-30T14:57:34.155592243Z caller=grpc_logging.go:43 duration=1.434942ms method=/logproto.Pusher/Push err="rpc error: code = Code(429) desc = entry with timestamp 2023-08-30 14:57:32.012778399 +0000 UTC ignored, reason: 'Per stream rate limit exceeded (limit: 3MB/sec) while attempting to ingest for stream +---- + +.Procedure + +* Update the `ingestionBurstSize` and `ingestionRate` fields in the `LokiStack` CR: ++ +[source,yaml] +---- +apiVersion: loki.grafana.com/v1 +kind: LokiStack +metadata: + name: logging-loki + namespace: openshift-logging +spec: + limits: + global: + ingestion: + ingestionBurstSize: 16 # <1> + ingestionRate: 8 # <2> +# ... +---- +<1> The `ingestionBurstSize` field defines the maximum local rate-limited sample size per distributor replica in MB. This value is a hard limit. Set this value to at least the maximum logs size expected in a single push request. Single requests that are larger than the `ingestionBurstSize` value are not permitted. +<2> The `ingestionRate` field is a soft limit on the maximum amount of ingested samples per second in MB. Rate limit errors occur if the rate of logs exceeds the limit, but the collector retries sending the logs. As long as the total average is lower than the limit, the system recovers and errors are resolved without user intervention. diff --git a/observability/logging/logging-6.0/log6x-log-forwarding-troubleshooting-6.0.adoc b/observability/logging/logging-6.0/log6x-log-forwarding-troubleshooting-6.0.adoc new file mode 100644 index 000000000000..4d37732e1184 --- /dev/null +++ b/observability/logging/logging-6.0/log6x-log-forwarding-troubleshooting-6.0.adoc @@ -0,0 +1,10 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +include::_attributes/attributes-openshift-dedicated.adoc[] +[id="log6x-log-forwarding-troubleshooting-6.0"] += Troubleshooting log forwarding +:context: log6x-log-forwarding-troubleshooting-6.0 + +toc::[] + +include::modules/log6x-loki-rate-limit-errors.adoc[leveloffset=+1] diff --git a/observability/logging/logging-6.1/log6x-log-forwarding-troubleshooting-6.1.adoc b/observability/logging/logging-6.1/log6x-log-forwarding-troubleshooting-6.1.adoc new file mode 100644 index 000000000000..b159602487d9 --- /dev/null +++ b/observability/logging/logging-6.1/log6x-log-forwarding-troubleshooting-6.1.adoc @@ -0,0 +1,10 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +include::_attributes/attributes-openshift-dedicated.adoc[] +[id="log6x-log-forwarding-troubleshooting-6.1"] += Troubleshooting log forwarding +:context: log6x-log-forwarding-troubleshooting-6.1 + +toc::[] + +include::modules/log6x-loki-rate-limit-errors.adoc[leveloffset=+1] \ No newline at end of file diff --git a/observability/logging/logging-6.2/log6x-log-forwarding-troubleshooting-6.2.adoc b/observability/logging/logging-6.2/log6x-log-forwarding-troubleshooting-6.2.adoc new file mode 100644 index 000000000000..58b3bb6f7c76 --- /dev/null +++ b/observability/logging/logging-6.2/log6x-log-forwarding-troubleshooting-6.2.adoc @@ -0,0 +1,10 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +include::_attributes/attributes-openshift-dedicated.adoc[] +[id="log6x-log-forwarding-troubleshooting-6.2"] += Troubleshooting log forwarding +:context: log6x-log-forwarding-troubleshooting-6.2 + +toc::[] + +include::modules/log6x-loki-rate-limit-errors.adoc[leveloffset=+1] diff --git a/observability/logging/logging_release_notes/_attributes b/observability/logging/logging_5/_attributes similarity index 100% rename from observability/logging/logging_release_notes/_attributes rename to observability/logging/logging_5/_attributes diff --git a/observability/logging/logging_release_notes/images b/observability/logging/logging_5/images similarity index 100% rename from observability/logging/logging_release_notes/images rename to observability/logging/logging_5/images diff --git a/observability/logging/logging_5/log-forwarding-troubleshooting.adoc b/observability/logging/logging_5/log-forwarding-troubleshooting.adoc new file mode 100644 index 000000000000..2e345a01f197 --- /dev/null +++ b/observability/logging/logging_5/log-forwarding-troubleshooting.adoc @@ -0,0 +1,11 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +include::_attributes/attributes-openshift-dedicated.adoc[] +[id="log-forwarding-troubleshooting"] += Troubleshooting log forwarding +:context: log-forwarding-troubleshooting + +toc::[] + +include::modules/redeploying-fluentd-pods.adoc[leveloffset=+1] +include::modules/loki-rate-limit-errors.adoc[leveloffset=+1] diff --git a/observability/logging/logging_release_notes/logging-5-7-release-notes.adoc b/observability/logging/logging_5/logging-5-7-release-notes.adoc similarity index 100% rename from observability/logging/logging_release_notes/logging-5-7-release-notes.adoc rename to observability/logging/logging_5/logging-5-7-release-notes.adoc diff --git a/observability/logging/logging_release_notes/logging-5-8-release-notes.adoc b/observability/logging/logging_5/logging-5-8-release-notes.adoc similarity index 100% rename from observability/logging/logging_release_notes/logging-5-8-release-notes.adoc rename to observability/logging/logging_5/logging-5-8-release-notes.adoc diff --git a/observability/logging/logging_release_notes/logging-5-9-release-notes.adoc b/observability/logging/logging_5/logging-5-9-release-notes.adoc similarity index 100% rename from observability/logging/logging_release_notes/logging-5-9-release-notes.adoc rename to observability/logging/logging_5/logging-5-9-release-notes.adoc diff --git a/observability/logging/logging_release_notes/modules b/observability/logging/logging_5/modules similarity index 100% rename from observability/logging/logging_release_notes/modules rename to observability/logging/logging_5/modules diff --git a/observability/logging/logging_release_notes/snippets b/observability/logging/logging_5/snippets similarity index 100% rename from observability/logging/logging_release_notes/snippets rename to observability/logging/logging_5/snippets