-
Notifications
You must be signed in to change notification settings - Fork 45
Export Snapshot Profiling Stack Traces As OpenTelemetry Log Messages #2237
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 15 commits
72d1404
28772e7
2bec2fa
2eaf7e6
b78aa4b
cff68ec
4ec661d
62cbcfb
be77245
a9a98ad
7f70840
1190956
02ed77a
30551e4
aab9b3d
98d7943
7234dcf
fad3539
b50f54c
072faa0
826cfae
e0db0d6
9b10d29
b5bc51d
cc4c27d
475b5d9
a5bfcbc
bd562fd
c89be8a
f8ef40e
b6f6055
9358d52
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -181,6 +181,10 @@ private static int getJavaVersion() { | |
| return Integer.parseInt(javaSpecVersion); | ||
| } | ||
|
|
||
| public static boolean isSnapshotProfilingEnabled(ConfigProperties properties) { | ||
| return properties.getBoolean(CONFIG_KEY_ENABLE_SNAPSHOT_PROFILER, false); | ||
| } | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A 2nd use was added in |
||
|
|
||
| public static double getSnapshotSelectionRate(ConfigProperties properties) { | ||
| String selectionRatePropertyValue = | ||
| properties.getString( | ||
|
|
||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adapted from code in |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| /* | ||
| * Copyright Splunk Inc. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package com.splunk.opentelemetry.profiler; | ||
|
|
||
| import com.google.common.annotations.VisibleForTesting; | ||
| import io.opentelemetry.api.logs.Logger; | ||
| import io.opentelemetry.sdk.autoconfigure.spi.ConfigProperties; | ||
| import io.opentelemetry.sdk.logs.LogRecordProcessor; | ||
| import io.opentelemetry.sdk.logs.SdkLoggerProvider; | ||
| import io.opentelemetry.sdk.logs.export.LogRecordExporter; | ||
| import io.opentelemetry.sdk.logs.export.SimpleLogRecordProcessor; | ||
| import io.opentelemetry.sdk.resources.Resource; | ||
| import java.util.function.Function; | ||
|
|
||
| public class OtelLoggerFactory { | ||
| private final Function<ConfigProperties, LogRecordExporter> logRecordExporter; | ||
|
|
||
| public OtelLoggerFactory() { | ||
| this(LogExporterBuilder::fromConfig); | ||
| } | ||
|
|
||
| @VisibleForTesting | ||
| public OtelLoggerFactory(Function<ConfigProperties, LogRecordExporter> logRecordExporter) { | ||
| this.logRecordExporter = logRecordExporter; | ||
| } | ||
|
|
||
| public Logger build(ConfigProperties properties, Resource resource) { | ||
| LogRecordExporter exporter = createLogRecordExporter(properties); | ||
| LogRecordProcessor processor = SimpleLogRecordProcessor.create(exporter); | ||
| return buildOtelLogger(processor, resource); | ||
| } | ||
|
|
||
| private LogRecordExporter createLogRecordExporter(ConfigProperties properties) { | ||
| return logRecordExporter.apply(properties); | ||
| } | ||
|
|
||
| private Logger buildOtelLogger(LogRecordProcessor logProcessor, Resource resource) { | ||
| return SdkLoggerProvider.builder() | ||
| .addLogRecordProcessor(logProcessor) | ||
| .setResource(resource) | ||
| .build() | ||
| .loggerBuilder(ProfilingSemanticAttributes.OTEL_INSTRUMENTATION_NAME) | ||
| .setInstrumentationVersion(ProfilingSemanticAttributes.OTEL_INSTRUMENTATION_VERSION) | ||
| .build(); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| /* | ||
| * Copyright Splunk Inc. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package com.splunk.opentelemetry.profiler.snapshot; | ||
|
|
||
| import java.util.ArrayList; | ||
| import java.util.List; | ||
| import java.util.concurrent.ConcurrentHashMap; | ||
| import java.util.concurrent.ConcurrentMap; | ||
| import java.util.function.Supplier; | ||
|
|
||
| class AccumulatingStagingArea implements StagingArea { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here you collect stack traces per trace id and when the trace is done you export them. To me the staging area abstraction doesn't seem ideal. When there is a long request you end up accumulating many stack traces, when the request is short you export a small batch with only one or two traces. When there are multiple concurrent short requests you'll export many small batches. With the non-snapshot profiler we already get the input data in batches and the interval between the batches is large so we don't need to handle this. Did you consider alternative solutions like for example what is used in https://github.com/open-telemetry/opentelemetry-java/blob/main/sdk/trace/src/main/java/io/opentelemetry/sdk/trace/export/BatchSpanProcessorBuilder.java Place data in the queue, if queue has batch size items (there it is 512) export them, otherwise wait for a bit (there 5s) and export whatever is in the queue. If queue grows too large drop data.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This sounds like a very good idea. Would you be against doing this as a followup (this PR is a partial blocker to adding span correlation)? I don't think a time-based approach is necessarily the right approach however. What I'm envisioning is a This would control how many stack traces are bundled into a single log record. The frequency of the log record exporting is separately controlled by the
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
sure
Flushing when there are no new stack traces in some time isn't that hard. When you write stacks from all ongoing traces to the same pprof then flushing on ending the entry span doesn't make sense.
Using |
||
| private final ConcurrentMap<String, List<StackTrace>> stackTraces = new ConcurrentHashMap<>(); | ||
| private final Supplier<StackTraceExporter> exporter; | ||
|
|
||
| AccumulatingStagingArea(Supplier<StackTraceExporter> exporter) { | ||
| this.exporter = exporter; | ||
| } | ||
|
|
||
| @Override | ||
| public void stage(String traceId, StackTrace stackTrace) { | ||
| stackTraces.compute( | ||
| traceId, | ||
| (id, stackTraces) -> { | ||
| if (stackTraces == null) { | ||
| stackTraces = new ArrayList<>(); | ||
| } | ||
| stackTraces.add(stackTrace); | ||
| return stackTraces; | ||
| }); | ||
| } | ||
|
|
||
| @Override | ||
| public void empty(String traceId) { | ||
| List<StackTrace> stackTraces = this.stackTraces.remove(traceId); | ||
| if (stackTraces != null) { | ||
| exporter.get().export(stackTraces); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,135 @@ | ||
| /* | ||
| * Copyright Splunk Inc. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package com.splunk.opentelemetry.profiler.snapshot; | ||
|
|
||
| import static com.splunk.opentelemetry.profiler.ProfilingSemanticAttributes.DATA_FORMAT; | ||
| import static com.splunk.opentelemetry.profiler.ProfilingSemanticAttributes.DATA_TYPE; | ||
| import static com.splunk.opentelemetry.profiler.ProfilingSemanticAttributes.FRAME_COUNT; | ||
| import static com.splunk.opentelemetry.profiler.ProfilingSemanticAttributes.INSTRUMENTATION_SOURCE; | ||
| import static com.splunk.opentelemetry.profiler.ProfilingSemanticAttributes.PPROF_GZIP_BASE64; | ||
| import static com.splunk.opentelemetry.profiler.ProfilingSemanticAttributes.PROFILING_SOURCE; | ||
| import static com.splunk.opentelemetry.profiler.ProfilingSemanticAttributes.SOURCE_TYPE; | ||
|
|
||
| import com.google.common.annotations.VisibleForTesting; | ||
| import com.google.perftools.profiles.ProfileProto.Profile; | ||
| import com.splunk.opentelemetry.profiler.InstrumentationSource; | ||
| import com.splunk.opentelemetry.profiler.ProfilingDataType; | ||
| import io.opentelemetry.api.common.Attributes; | ||
| import io.opentelemetry.api.internal.ImmutableSpanContext; | ||
| import io.opentelemetry.api.logs.Logger; | ||
| import io.opentelemetry.api.logs.Severity; | ||
| import io.opentelemetry.api.trace.Span; | ||
| import io.opentelemetry.api.trace.SpanContext; | ||
| import io.opentelemetry.api.trace.SpanId; | ||
| import io.opentelemetry.api.trace.TraceFlags; | ||
| import io.opentelemetry.api.trace.TraceState; | ||
| import io.opentelemetry.context.Context; | ||
| import java.io.ByteArrayOutputStream; | ||
| import java.io.IOException; | ||
| import java.io.OutputStream; | ||
| import java.nio.charset.StandardCharsets; | ||
| import java.time.Clock; | ||
| import java.time.Instant; | ||
| import java.util.Base64; | ||
| import java.util.List; | ||
| import java.util.concurrent.ExecutorService; | ||
| import java.util.concurrent.Executors; | ||
| import java.util.logging.Level; | ||
| import java.util.zip.GZIPOutputStream; | ||
|
|
||
| class AsyncStackTraceExporter implements StackTraceExporter { | ||
| private static final java.util.logging.Logger logger = | ||
| java.util.logging.Logger.getLogger(AsyncStackTraceExporter.class.getName()); | ||
|
|
||
| private static final Attributes COMMON_ATTRIBUTES = | ||
| Attributes.builder() | ||
| .put(SOURCE_TYPE, PROFILING_SOURCE) | ||
| .put(DATA_TYPE, ProfilingDataType.CPU.value()) | ||
| .put(DATA_FORMAT, PPROF_GZIP_BASE64) | ||
| .put(INSTRUMENTATION_SOURCE, InstrumentationSource.SNAPSHOT.value()) | ||
| .build(); | ||
|
|
||
| private final ExecutorService executor = Executors.newSingleThreadScheduledExecutor(); | ||
|
||
| private final PprofTranslator translator = new PprofTranslator(); | ||
| private final Logger otelLogger; | ||
| private final Clock clock; | ||
|
|
||
| AsyncStackTraceExporter(Logger logger) { | ||
| this(logger, Clock.systemUTC()); | ||
| } | ||
|
|
||
| @VisibleForTesting | ||
| AsyncStackTraceExporter(Logger logger, Clock clock) { | ||
| this.otelLogger = logger; | ||
| this.clock = clock; | ||
| } | ||
|
|
||
| @Override | ||
| public void export(List<StackTrace> stackTraces) { | ||
| executor.submit(pprofExporter(otelLogger, stackTraces)); | ||
| } | ||
|
|
||
| private Runnable pprofExporter(Logger otelLogger, List<StackTrace> stackTraces) { | ||
| return () -> { | ||
| try { | ||
| Context context = createProfilingContext(stackTraces); | ||
| Pprof pprof = translator.toPprof(stackTraces); | ||
| Profile profile = pprof.build(); | ||
| otelLogger | ||
| .logRecordBuilder() | ||
| .setContext(context) | ||
| .setTimestamp(Instant.now(clock)) | ||
| .setSeverity(Severity.INFO) | ||
| .setAllAttributes(profilingAttributes(pprof)) | ||
| .setBody(serialize(profile)) | ||
| .emit(); | ||
| } catch (Exception e) { | ||
| logger.log(Level.SEVERE, "an exception was thrown", e); | ||
|
||
| } | ||
| }; | ||
| } | ||
|
|
||
| private Context createProfilingContext(List<StackTrace> stackTraces) { | ||
| String traceId = extractTraceId(stackTraces); | ||
| SpanContext spanContext = | ||
| ImmutableSpanContext.create( | ||
| traceId, | ||
| SpanId.getInvalid(), | ||
| TraceFlags.getDefault(), | ||
| TraceState.getDefault(), | ||
| false, | ||
| true); | ||
laurit marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| Span span = Span.wrap(spanContext); | ||
| return span.storeInContext(Context.root()); | ||
| } | ||
|
|
||
| private String extractTraceId(List<StackTrace> stackTraces) { | ||
| return stackTraces.stream().findFirst().map(StackTrace::getTraceId).orElse(null); | ||
laurit marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| private Attributes profilingAttributes(Pprof pprof) { | ||
| return COMMON_ATTRIBUTES.toBuilder().put(FRAME_COUNT, pprof.frameCount()).build(); | ||
| } | ||
|
|
||
| private String serialize(Profile profile) throws IOException { | ||
| ByteArrayOutputStream byteStream = new ByteArrayOutputStream(); | ||
| try (OutputStream outputStream = new GZIPOutputStream(Base64.getEncoder().wrap(byteStream))) { | ||
| profile.writeTo(outputStream); | ||
| } | ||
| return byteStream.toString(StandardCharsets.ISO_8859_1.name()); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I needed access to the
AutoConfigureUtilclass defined in this subproject. Used inStackTraceExporterActivatorto access theConfigPropertiesandResourcefields in theAutoConfiguredOpenTelemetrySdkwhen configuring the OtelLogger.The
compileOnlyaccess already defined did not allow me to test that the configuration was taking place.