-
-
Notifications
You must be signed in to change notification settings - Fork 203
Add SpanProcessor for OpenTelemetry #875
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
701310d
f50dd39
5846816
963fead
244eb76
e809fb5
1cc9ecb
33332bc
6902f52
2015985
d59e634
0dee050
c650545
cba0bb8
c576517
7bcf304
b8bd558
5ba8913
04b117c
880463e
378ad3c
10b4c38
1b8ed26
67b97d8
770fe80
c727e2b
c046097
9108901
564e8f7
320c62a
fbb8126
5778b3b
3a65258
e38c2f8
61b1934
b831a1f
84c6579
82a2214
d6f4373
ef2eac9
9a8ebf4
e6ad2bd
eff0e18
b55c844
1d62af3
3767f02
f1183b3
fcecbc5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -117,7 +117,7 @@ defmodule Sentry.Client do | |
|
||
result_type = Keyword.get_lazy(opts, :result, &Config.send_result/0) | ||
client = Keyword.get_lazy(opts, :client, &Config.client/0) | ||
sample_rate = Keyword.get_lazy(opts, :sample_rate, &Config.sample_rate/0) | ||
sample_rate = Keyword.get_lazy(opts, :sample_rate, &Config.traces_sample_rate/0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the errors There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment is not correct. It's not replacing the original one, it's switching from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm saying we need both |
||
before_send = Keyword.get_lazy(opts, :before_send, &Config.before_send/0) | ||
after_send_event = Keyword.get_lazy(opts, :after_send_event, &Config.after_send_event/0) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -125,6 +125,20 @@ defmodule Sentry.Config do | |
be used as the value for this option. | ||
""" | ||
], | ||
traces_sample_rate: [ | ||
type: {:custom, __MODULE__, :__validate_traces_sample_rate__, []}, | ||
default: 0.0, | ||
doc: """ | ||
The sample rate for transaction events. A value between `0.0` and `1.0` (inclusive). | ||
A value of `0.0` means no transactions will be sampled, while `1.0` means all transactions | ||
will be sampled. This value is also used to determine if tracing is enabled: if it's | ||
greater than `0`, tracing is enabled. | ||
|
||
Tracing requires OpenTelemetry packages to work. See [the | ||
OpenTelemetry setup documentation](https://opentelemetry.io/docs/languages/erlang/getting-started/) | ||
for guides on how to set it up. | ||
""" | ||
], | ||
included_environments: [ | ||
type: {:or, [{:in, [:all]}, {:list, {:or, [:atom, :string]}}]}, | ||
deprecated: "Use :dsn to control whether to send events to Sentry.", | ||
|
@@ -590,6 +604,9 @@ defmodule Sentry.Config do | |
@spec sample_rate() :: float() | ||
def sample_rate, do: fetch!(:sample_rate) | ||
|
||
@spec traces_sample_rate() :: float() | ||
def traces_sample_rate, do: fetch!(:traces_sample_rate) | ||
|
||
@spec hackney_opts() :: keyword() | ||
def hackney_opts, do: fetch!(:hackney_opts) | ||
|
||
|
@@ -627,6 +644,9 @@ defmodule Sentry.Config do | |
@spec integrations() :: keyword() | ||
def integrations, do: fetch!(:integrations) | ||
|
||
@spec tracing?() :: boolean() | ||
def tracing?, do: fetch!(:traces_sample_rate) > 0.0 | ||
Comment on lines
+647
to
+648
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is the way to go, as it makes the API a little harder to work with IMO. I would rather have two separate configuration options if possible, unless other Sentry SDKs do it this way. If they do, can you point me to examples? If they don't, let's go with:
This way users can easily toggle tracing on and off without having to worry about the rate itself. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @whatyouhide actually, that's how this works across the SDKs, so we need to stay consistent over here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yuck 😞 |
||
|
||
@spec put_config(atom(), term()) :: :ok | ||
def put_config(key, value) when is_atom(key) do | ||
unless key in @valid_keys do | ||
|
@@ -726,6 +746,15 @@ defmodule Sentry.Config do | |
end | ||
end | ||
|
||
def __validate_traces_sample_rate__(float) do | ||
if is_float(float) and float >= 0.0 and float <= 1.0 do | ||
{:ok, float} | ||
else | ||
{:error, | ||
"expected :traces_sample_rate to be a float between 0.0 and 1.0 (included), got: #{inspect(float)}"} | ||
end | ||
end | ||
|
||
def __validate_json_library__(nil) do | ||
{:error, "nil is not a valid value for the :json_library option"} | ||
end | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the sampler still needs to use |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
if Code.ensure_loaded?(:otel_sampler) do | ||
defmodule Sentry.OpenTelemetry.Sampler do | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you elaborate on why we need a Sentry-specific sampler? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @whatyouhide in order to provide a standard interface for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you elaborate on this with links to docs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @whatyouhide Sentry has a standard way of handling trace sampling via Here are docs about this feature in the Ruby SDK as a reference https://docs.sentry.io/platforms/ruby/configuration/sampling/#sampling-transaction-events There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would expect the Sampler to be my application-specific concern; this module seems unnecessary to me There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Controlling sampling in an easy way is very central to our performance offering business wise since that is what we charge users. We have a unified way we expose sampling on our SDKs ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As an end-user, the reason why we went with Sentry vs AppSignal is that Sentry allows us to sample our traces and stuff to avoid getting on beefier plans for some projects. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @sl0thentr0py but OTel has built-in sampling, and this integration is all based on OTel. Do we really need a separate way to do this, that is not how you'd do it with just OTel? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. its not just about functionality, its about a unified product and developer experience There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, I don't think this is the right move—the Elixir SDK is unique in it being (AFAIK) the only Sentry SDK that is solely based on OTel. Either way, your choice of course. @solnic let's split this up into yet its own separate PR please. |
||
@moduledoc false | ||
|
||
@behaviour :otel_sampler | ||
|
||
def setup(config) do | ||
config | ||
end | ||
|
||
def description(_) do | ||
"SentrySampler" | ||
end | ||
|
||
def should_sample( | ||
_ctx, | ||
_trace_id, | ||
_links, | ||
span_name, | ||
_span_kind, | ||
_attributes, | ||
config | ||
) do | ||
if span_name in config[:drop] do | ||
{:drop, [], []} | ||
else | ||
{:record_and_sample, [], []} | ||
end | ||
end | ||
end | ||
end |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,193 @@ | ||||||
if Code.ensure_loaded?(OpenTelemetry) do | ||||||
defmodule Sentry.OpenTelemetry.SpanProcessor do | ||||||
@moduledoc false | ||||||
|
||||||
@behaviour :otel_span_processor | ||||||
|
||||||
require OpenTelemetry.SemConv.ClientAttributes, as: ClientAttributes | ||||||
require OpenTelemetry.SemConv.Incubating.DBAttributes, as: DBAttributes | ||||||
require OpenTelemetry.SemConv.Incubating.HTTPAttributes, as: HTTPAttributes | ||||||
require OpenTelemetry.SemConv.Incubating.URLAttributes, as: URLAttributes | ||||||
require OpenTelemetry.SemConv.Incubating.MessagingAttributes, as: MessagingAttributes | ||||||
|
||||||
require Logger | ||||||
|
||||||
alias Sentry.{Transaction, OpenTelemetry.SpanStorage, OpenTelemetry.SpanRecord} | ||||||
alias Sentry.Interfaces.Span | ||||||
|
||||||
# This can be a no-op since we can postpone inserting the span into storage until on_end | ||||||
@impl :otel_span_processor | ||||||
def on_start(_ctx, otel_span, _config) do | ||||||
whatyouhide marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
otel_span | ||||||
end | ||||||
|
||||||
@impl :otel_span_processor | ||||||
def on_end(otel_span, _config) do | ||||||
span_record = SpanRecord.new(otel_span) | ||||||
|
||||||
SpanStorage.store_span(span_record) | ||||||
|
||||||
if span_record.parent_span_id == nil do | ||||||
child_span_records = SpanStorage.get_child_spans(span_record.span_id) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this somehow pop the child spans off of the storage? I’m worried about the concurrency of this all. I don't know how There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @whatyouhide OTel guarantees that |
||||||
transaction = build_transaction(span_record, child_span_records) | ||||||
|
||||||
result = | ||||||
case Sentry.send_transaction(transaction) do | ||||||
solnic marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
{:ok, _id} -> | ||||||
true | ||||||
|
||||||
:ignored -> | ||||||
true | ||||||
|
||||||
{:error, error} -> | ||||||
Logger.warning("Failed to send transaction to Sentry: #{inspect(error)}") | ||||||
{:error, :invalid_span} | ||||||
end | ||||||
|
||||||
:ok = SpanStorage.remove_root_span(span_record.span_id) | ||||||
|
||||||
result | ||||||
else | ||||||
true | ||||||
solnic marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
end | ||||||
end | ||||||
|
||||||
@impl :otel_span_processor | ||||||
def force_flush(_config) do | ||||||
:ok | ||||||
end | ||||||
Comment on lines
+56
to
+58
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the general purpose of |
||||||
|
||||||
defp build_transaction(root_span_record, child_span_records) do | ||||||
root_span = build_span(root_span_record) | ||||||
child_spans = Enum.map(child_span_records, &build_span(&1)) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Super nit:
Suggested change
|
||||||
|
||||||
Transaction.new(%{ | ||||||
span_id: root_span.span_id, | ||||||
transaction: transaction_name(root_span_record), | ||||||
transaction_info: %{source: :custom}, | ||||||
start_timestamp: root_span_record.start_time, | ||||||
timestamp: root_span_record.end_time, | ||||||
contexts: %{ | ||||||
trace: build_trace_context(root_span_record) | ||||||
}, | ||||||
spans: child_spans | ||||||
}) | ||||||
end | ||||||
|
||||||
defp transaction_name( | ||||||
%{attributes: %{unquote(to_string(MessagingAttributes.messaging_system())) => :oban}} = | ||||||
span_record | ||||||
) do | ||||||
span_record.attributes["oban.job.worker"] | ||||||
end | ||||||
|
||||||
defp transaction_name(span_record), do: span_record.name | ||||||
|
||||||
defp build_trace_context(span_record) do | ||||||
{op, description} = get_op_description(span_record) | ||||||
|
||||||
%{ | ||||||
trace_id: span_record.trace_id, | ||||||
span_id: span_record.span_id, | ||||||
parent_span_id: span_record.parent_span_id, | ||||||
op: op, | ||||||
description: description, | ||||||
origin: span_record.origin, | ||||||
data: span_record.attributes | ||||||
} | ||||||
end | ||||||
|
||||||
defp get_op_description( | ||||||
%{ | ||||||
attributes: %{ | ||||||
unquote(to_string(HTTPAttributes.http_request_method())) => http_request_method | ||||||
} | ||||||
} = span_record | ||||||
) do | ||||||
op = "http.#{span_record.kind}" | ||||||
|
||||||
client_address = | ||||||
Map.get(span_record.attributes, to_string(ClientAttributes.client_address())) | ||||||
|
||||||
url_path = Map.get(span_record.attributes, to_string(URLAttributes.url_path())) | ||||||
|
||||||
description = | ||||||
to_string(http_request_method) <> | ||||||
((client_address && " from #{client_address}") || "") <> | ||||||
((url_path && " #{url_path}") || "") | ||||||
|
||||||
{op, description} | ||||||
end | ||||||
|
||||||
defp get_op_description( | ||||||
%{attributes: %{unquote(to_string(DBAttributes.db_system())) => _db_system}} = | ||||||
span_record | ||||||
) do | ||||||
db_query_text = Map.get(span_record.attributes, "db.statement") | ||||||
|
||||||
{"db", db_query_text} | ||||||
end | ||||||
|
||||||
defp get_op_description(%{ | ||||||
attributes: | ||||||
%{unquote(to_string(MessagingAttributes.messaging_system())) => :oban} = attributes | ||||||
}) do | ||||||
{"queue.process", attributes["oban.job.worker"]} | ||||||
end | ||||||
Comment on lines
+131
to
+136
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wouldn't the Oban/OTel integration be the one responsible for getting this right here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @whatyouhide queue.process Based on the knowledge sources, queue.process is a span operation used in Sentry's tracing instrumentation for queue consumers. This operation is used to track when messages are delivered to or processed by a consumer in a messaging system. Consumer Instrumentation To implement queue consumer instrumentation with Sentry, you need to: |
||||||
|
||||||
defp get_op_description(span_record) do | ||||||
{span_record.name, span_record.name} | ||||||
end | ||||||
|
||||||
defp build_span(span_record) do | ||||||
{op, description} = get_op_description(span_record) | ||||||
|
||||||
%Span{ | ||||||
op: op, | ||||||
description: description, | ||||||
start_timestamp: span_record.start_time, | ||||||
timestamp: span_record.end_time, | ||||||
trace_id: span_record.trace_id, | ||||||
span_id: span_record.span_id, | ||||||
parent_span_id: span_record.parent_span_id, | ||||||
origin: span_record.origin, | ||||||
data: Map.put(span_record.attributes, "otel.kind", span_record.kind), | ||||||
status: span_status(span_record) | ||||||
} | ||||||
end | ||||||
|
||||||
defp span_status(%{ | ||||||
attributes: %{ | ||||||
unquote(to_string(HTTPAttributes.http_response_status_code())) => | ||||||
http_response_status_code | ||||||
} | ||||||
}) do | ||||||
to_status(http_response_status_code) | ||||||
end | ||||||
Comment on lines
+159
to
+166
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this an OTel spec thing? 🤔 |
||||||
|
||||||
defp span_status(_span_record), do: nil | ||||||
|
||||||
# WebSocket upgrade spans doesn't have a HTTP status | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit
Suggested change
|
||||||
defp to_status(nil), do: nil | ||||||
|
||||||
defp to_status(status) when status in 200..299, do: "ok" | ||||||
|
||||||
for {status, string} <- %{ | ||||||
400 => "invalid_argument", | ||||||
401 => "unauthenticated", | ||||||
403 => "permission_denied", | ||||||
404 => "not_found", | ||||||
409 => "already_exists", | ||||||
429 => "resource_exhausted", | ||||||
499 => "cancelled", | ||||||
500 => "internal_error", | ||||||
501 => "unimplemented", | ||||||
503 => "unavailable", | ||||||
504 => "deadline_exceeded" | ||||||
} do | ||||||
defp to_status(unquote(status)), do: unquote(string) | ||||||
end | ||||||
|
||||||
defp to_status(_any), do: "unknown_error" | ||||||
end | ||||||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should also rename the option that can be passed to this function to be
:traces_sample_rate
and document that.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@whatyouhide I'm gonna do a follow up PR with improved sampler which is meant to use this config option. @sl0thentr0py told me we need to sample sooner than in
send_transaction
and that the sampler is the correct place so we'd be dropping spans sooner rather than aggregating them in our storage to potentially drop later.