-
Notifications
You must be signed in to change notification settings - Fork 45
Export Snapshot Profiling Stack Traces As OpenTelemetry Log Messages #2237
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
72d1404
28772e7
2bec2fa
2eaf7e6
b78aa4b
cff68ec
4ec661d
62cbcfb
be77245
a9a98ad
7f70840
1190956
02ed77a
30551e4
aab9b3d
98d7943
7234dcf
fad3539
b50f54c
072faa0
826cfae
e0db0d6
9b10d29
b5bc51d
cc4c27d
475b5d9
a5bfcbc
bd562fd
c89be8a
f8ef40e
b6f6055
9358d52
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -74,6 +74,12 @@ public class Configuration implements AutoConfigurationCustomizerProvider { | |
| public static final String CONFIG_KEY_SNAPSHOT_SELECTION_RATE = "splunk.snapshot.selection.rate"; | ||
| private static final double DEFAULT_SNAPSHOT_SELECTION_RATE = 0.01; | ||
| private static final double MAX_SNAPSHOT_SELECTION_RATE = 0.10; | ||
| private static final String CONFIG_KEY_SNAPSHOT_PROFILER_STACK_DEPTH = | ||
| "splunk.snapshot.profiler.max.stack.depth"; | ||
| private static final int DEFAULT_SNAPSHOT_PROFILER_STACK_DEPTH = 1024; | ||
| private static final String CONFIG_KEY_SNAPSHOT_PROFILER_SAMPLING_INTERVAL = | ||
| "splunk.snapshot.profiler.sampling.interval"; | ||
| public static final Duration DEFAULT_SNAPSHOT_PROFILER_SAMPLING_INTERVAL = Duration.ofMillis(20); | ||
|
|
||
| @Override | ||
| public void customize(AutoConfigurationCustomizer autoConfiguration) { | ||
|
|
@@ -181,6 +187,10 @@ private static int getJavaVersion() { | |
| return Integer.parseInt(javaSpecVersion); | ||
| } | ||
|
|
||
| public static boolean isSnapshotProfilingEnabled(ConfigProperties properties) { | ||
| return properties.getBoolean(CONFIG_KEY_ENABLE_SNAPSHOT_PROFILER, false); | ||
| } | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A 2nd use was added in |
||
|
|
||
| public static double getSnapshotSelectionRate(ConfigProperties properties) { | ||
| String selectionRatePropertyValue = | ||
| properties.getString( | ||
|
|
@@ -207,4 +217,15 @@ public static double getSnapshotSelectionRate(ConfigProperties properties) { | |
| return DEFAULT_SNAPSHOT_SELECTION_RATE; | ||
| } | ||
| } | ||
|
|
||
| public static int getSnapshotProfilerStackDepth(ConfigProperties properties) { | ||
| return properties.getInt( | ||
| CONFIG_KEY_SNAPSHOT_PROFILER_STACK_DEPTH, DEFAULT_SNAPSHOT_PROFILER_STACK_DEPTH); | ||
| } | ||
|
|
||
| public static Duration getSnapshotProfilerSamplingInterval(ConfigProperties properties) { | ||
| return properties.getDuration( | ||
| CONFIG_KEY_SNAPSHOT_PROFILER_SAMPLING_INTERVAL, | ||
| DEFAULT_SNAPSHOT_PROFILER_SAMPLING_INTERVAL); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| /* | ||
| * Copyright Splunk Inc. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package com.splunk.opentelemetry.profiler; | ||
|
|
||
| import com.google.common.annotations.VisibleForTesting; | ||
| import io.opentelemetry.api.logs.Logger; | ||
| import io.opentelemetry.sdk.autoconfigure.spi.ConfigProperties; | ||
| import io.opentelemetry.sdk.logs.LogRecordProcessor; | ||
| import io.opentelemetry.sdk.logs.SdkLoggerProvider; | ||
| import io.opentelemetry.sdk.logs.export.LogRecordExporter; | ||
| import io.opentelemetry.sdk.logs.export.SimpleLogRecordProcessor; | ||
| import io.opentelemetry.sdk.resources.Resource; | ||
| import java.util.function.Function; | ||
|
|
||
| public class OtelLoggerFactory { | ||
| private final Function<ConfigProperties, LogRecordExporter> logRecordExporter; | ||
|
|
||
| public OtelLoggerFactory() { | ||
| this(LogExporterBuilder::fromConfig); | ||
| } | ||
|
|
||
| @VisibleForTesting | ||
| public OtelLoggerFactory(Function<ConfigProperties, LogRecordExporter> logRecordExporter) { | ||
| this.logRecordExporter = logRecordExporter; | ||
| } | ||
|
|
||
| public Logger build(ConfigProperties properties, Resource resource) { | ||
| LogRecordExporter exporter = createLogRecordExporter(properties); | ||
| LogRecordProcessor processor = SimpleLogRecordProcessor.create(exporter); | ||
| return buildOtelLogger(processor, resource); | ||
| } | ||
|
|
||
| private LogRecordExporter createLogRecordExporter(ConfigProperties properties) { | ||
| return logRecordExporter.apply(properties); | ||
| } | ||
|
|
||
| private Logger buildOtelLogger(LogRecordProcessor logProcessor, Resource resource) { | ||
| return SdkLoggerProvider.builder() | ||
| .addLogRecordProcessor(logProcessor) | ||
| .setResource(resource) | ||
| .build() | ||
| .loggerBuilder(ProfilingSemanticAttributes.OTEL_INSTRUMENTATION_NAME) | ||
| .setInstrumentationVersion(ProfilingSemanticAttributes.OTEL_INSTRUMENTATION_VERSION) | ||
| .build(); | ||
| } | ||
| } |
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| /* | ||
| * Copyright Splunk Inc. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package com.splunk.opentelemetry.profiler.snapshot; | ||
|
|
||
| import java.util.ArrayList; | ||
| import java.util.List; | ||
| import java.util.concurrent.ConcurrentHashMap; | ||
| import java.util.concurrent.ConcurrentMap; | ||
| import java.util.function.Supplier; | ||
|
|
||
| class AccumulatingStagingArea implements StagingArea { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here you collect stack traces per trace id and when the trace is done you export them. To me the staging area abstraction doesn't seem ideal. When there is a long request you end up accumulating many stack traces, when the request is short you export a small batch with only one or two traces. When there are multiple concurrent short requests you'll export many small batches. With the non-snapshot profiler we already get the input data in batches and the interval between the batches is large so we don't need to handle this. Did you consider alternative solutions like for example what is used in https://github.com/open-telemetry/opentelemetry-java/blob/main/sdk/trace/src/main/java/io/opentelemetry/sdk/trace/export/BatchSpanProcessorBuilder.java Place data in the queue, if queue has batch size items (there it is 512) export them, otherwise wait for a bit (there 5s) and export whatever is in the queue. If queue grows too large drop data.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This sounds like a very good idea. Would you be against doing this as a followup (this PR is a partial blocker to adding span correlation)? I don't think a time-based approach is necessarily the right approach however. What I'm envisioning is a This would control how many stack traces are bundled into a single log record. The frequency of the log record exporting is separately controlled by the
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
sure
Flushing when there are no new stack traces in some time isn't that hard. When you write stacks from all ongoing traces to the same pprof then flushing on ending the entry span doesn't make sense.
Using |
||
| private final ConcurrentMap<String, List<StackTrace>> stackTraces = new ConcurrentHashMap<>(); | ||
| private final Supplier<StackTraceExporter> exporter; | ||
|
|
||
| AccumulatingStagingArea(Supplier<StackTraceExporter> exporter) { | ||
| this.exporter = exporter; | ||
| } | ||
|
|
||
| @Override | ||
| public void stage(String traceId, StackTrace stackTrace) { | ||
| stackTraces.compute( | ||
| traceId, | ||
| (id, stackTraces) -> { | ||
| if (stackTraces == null) { | ||
| stackTraces = new ArrayList<>(); | ||
| } | ||
| stackTraces.add(stackTrace); | ||
| return stackTraces; | ||
| }); | ||
| } | ||
|
|
||
| @Override | ||
| public void empty(String traceId) { | ||
| List<StackTrace> stackTraces = this.stackTraces.remove(traceId); | ||
| if (stackTraces != null) { | ||
| exporter.get().export(stackTraces); | ||
| } | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I needed access to the
AutoConfigureUtilclass defined in this subproject. Used inStackTraceExporterActivatorto access theConfigPropertiesandResourcefields in theAutoConfiguredOpenTelemetrySdkwhen configuring the OtelLogger.The
compileOnlyaccess already defined did not allow me to test that the configuration was taking place.