Skip to content

Commit ffecec5

Browse files
authored
feat(appengine): supoprt xandra telemetry bounce (#1462)
Re-bounces xandra telemetry under `astarte.appengine.database` to have visibility over queries failures or exceptions. Signed-off-by: Luca Zaninotto <luca.zaninotto@secomind.com>
1 parent c75b76a commit ffecec5

4 files changed

Lines changed: 216 additions & 0 deletions

File tree

apps/astarte_appengine_api/lib/astarte_appengine_api/config.ex

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,16 @@ defmodule Astarte.AppEngine.API.Config do
160160
type: :binary,
161161
default: "astarte"
162162

163+
@envdoc """
164+
"The handling method for database events. The default is `expose`, which means that the events are exposed trough telemetry. The other possible value, `log`, means that the events are logged instead."
165+
"""
166+
app_env :database_events_handling_method,
167+
:astarte_appengine_api,
168+
:database_events_handling_method,
169+
os_env: "DATABASE_EVENTS_HANDLING_METHOD",
170+
type: Astarte.AppEngine.API.Config.TelemetryType,
171+
default: :expose
172+
163173
@doc """
164174
Returns the routing key used for Rooms AMQP events consumer. A constant for now.
165175
"""
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#
2+
# This file is part of Astarte.
3+
#
4+
# Copyright 2025 SECO Mind Srl
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
# SPDX-License-Identifier: Apache-2.0
19+
#
20+
21+
defmodule Astarte.AppEngine.API.Config.TelemetryType do
22+
@moduledoc """
23+
The telemetry type that the node should use to report metrics.
24+
"""
25+
26+
use Skogsra.Type
27+
28+
@allowed_strategies ~w(expose log)
29+
30+
@impl Skogsra.Type
31+
def cast(value) when value in @allowed_strategies do
32+
case value do
33+
"expose" -> {:ok, :expose}
34+
"log" -> {:ok, :log}
35+
end
36+
end
37+
38+
@impl Skogsra.Type
39+
def cast(_) do
40+
:error
41+
end
42+
end

apps/astarte_appengine_api/lib/astarte_appengine_api_web/telemetry.ex

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ defmodule Astarte.AppEngine.APIWeb.Telemetry do
1919
use Supervisor
2020
import Telemetry.Metrics
2121

22+
alias Astarte.AppEngine.API.Config
23+
alias Astarte.AppEngine.APIWeb.Telemetry.DatabaseEvents
2224
alias Astarte.AppEngine.APIWeb.Telemetry.APIUsage
2325

2426
def start_link(arg) do
@@ -144,10 +146,57 @@ defmodule Astarte.AppEngine.APIWeb.Telemetry do
144146
counter("astarte.appengine.channels.unwatch_request.count",
145147
tags: [:realm],
146148
description: "Trigger deinstallation requests count"
149+
),
150+
151+
# Database exception metrics
152+
counter("astarte.appengine.database.execute_query.exception.count",
153+
tags: [:query, :reason, :kind, :stacktrace],
154+
tag_values: &to_valid_values/1,
155+
unit: {:native, :second}
156+
),
157+
counter("astarte.appengine.database.execute_query.stop.count",
158+
tags: [:query, :reason],
159+
tag_values: &to_valid_values/1,
160+
unit: {:native, :second}
161+
),
162+
163+
# Database preparation metrics
164+
counter("astarte.appengine.database.prepare_query.exception.count",
165+
tags: [:query, :reason, :kind, :stacktrace],
166+
tag_values: &to_valid_values/1,
167+
unit: {:native, :second}
168+
),
169+
counter("astarte.appengine.database.prepare_query.stop.count",
170+
tags: [:query, :reason],
171+
tag_values: &to_valid_values/1,
172+
unit: {:native, :second}
173+
),
174+
175+
# Database connection metrics
176+
counter(
177+
"astarte.appengine.database.cluster.control_connection.failed_to_connect.count",
178+
tag_values: &to_valid_values/1,
179+
tags: [:cluster_name, :host, :reason]
180+
),
181+
counter("astarte.appengine.database.failed_to_connect.conut",
182+
tag_values: &to_valid_values/1,
183+
tags: [:connection_name, :address, :port]
147184
)
148185
]
149186
end
150187

188+
defp to_valid_values(%{query: query, reason: reason}) do
189+
%{query: query.statement, reason: Xandra.Error.message(reason)}
190+
end
191+
192+
defp to_valid_values(%{cluster_name: cluster_name, host: host, reason: reason}) do
193+
%{cluster_name: cluster_name, host: inspect(host), reason: to_string(reason)}
194+
end
195+
196+
defp to_valid_values(%{connection_name: connection_name, address: address, port: port}) do
197+
%{connection_name: connection_name, address: inspect(address), port: inspect(port)}
198+
end
199+
151200
defp periodic_measurements do
152201
[
153202
# A module, function and arguments to be invoked periodically.
@@ -158,6 +207,36 @@ defmodule Astarte.AppEngine.APIWeb.Telemetry do
158207

159208
defp attach_handlers do
160209
:telemetry.attach(APIUsage, [:cowboy, :request, :stop], &APIUsage.handle_event/4, nil)
210+
211+
:telemetry.attach_many(
212+
DatabaseEvents,
213+
xandra_events(),
214+
&DatabaseEvents.handle_event/4,
215+
Config.database_events_handling_method!()
216+
)
217+
end
218+
219+
defp xandra_events do
220+
[
221+
[:xandra, :connected],
222+
[:xandra, :disconnected],
223+
[:xandra, :failed_to_connect],
224+
[:xandra, :prepared_cache, :hit],
225+
[:xandra, :prepared_cache, :miss],
226+
[:xandra, :prepare_query, :stop],
227+
[:xandra, :execute_query, :stop],
228+
[:xandra, :client_timeout],
229+
[:xandra, :timed_out_response],
230+
[:xandra, :server_warnings],
231+
[:xandra, :cluster, :change_event],
232+
[:xandra, :cluster, :control_connection, :connected],
233+
[:xandra, :cluster, :control_connection, :disconnected],
234+
[:xandra, :cluster, :control_connection, :failed_to_connect],
235+
[:xandra, :cluster, :pool, :started],
236+
[:xandra, :cluster, :pool, :restarted],
237+
[:xandra, :cluster, :pool, :stopped],
238+
[:xandra, :cluster, :discovered_peers]
239+
]
161240
end
162241

163242
defp extract_phoenix_buckets_metadata(%{
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#
2+
# This file is part of Astarte.
3+
#
4+
# Copyright 2025 SECO Mind Srl
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
# SPDX-License-Identifier: Apache-2.0
19+
#
20+
21+
defmodule Astarte.AppEngine.APIWeb.Telemetry.DatabaseEvents do
22+
@moduledoc """
23+
Telemetry handler for database events.
24+
25+
This module listens to database events and emits telemetry events
26+
with the relevant measurements and metadata.
27+
"""
28+
29+
@bounce_events [
30+
[:prepare_query, :stop],
31+
[:prepare_query, :exception],
32+
[:execute, :stop],
33+
[:execute, :exception],
34+
[:cluster, :control_connection, :failed_to_connect],
35+
[:failed_to_connect]
36+
]
37+
38+
require Logger
39+
alias Astarte.AppEngine.APIWeb.TelemetryTaskSupervisor
40+
41+
@doc """
42+
Handles telemetry events related to database operations.
43+
44+
See the documentation of Xandra for more details on the events:
45+
https://hexdocs.pm/xandra/telemetry-events.html
46+
47+
This handler drops the `:xandra` prefix from the event name and
48+
executes the telemetry event with the provided measurements and metadata.
49+
"""
50+
def handle_event([:xandra | event], measurements, metadata, :expose) do
51+
with :bounce <- validate_event(event) do
52+
Task.Supervisor.start_child(TelemetryTaskSupervisor, fn ->
53+
with :ok <- filter_event(event, metadata) do
54+
:telemetry.execute(
55+
[:astarte, :appengine, :database] ++ event,
56+
measurements,
57+
metadata
58+
)
59+
end
60+
end)
61+
end
62+
end
63+
64+
def handle_event(event, measurements, metadata, :log) do
65+
Xandra.Telemetry.handle_event(event, measurements, metadata, :no_config)
66+
end
67+
68+
defp validate_event(event) do
69+
case event in @bounce_events do
70+
true -> :bounce
71+
false -> :ok
72+
end
73+
end
74+
75+
defp filter_event([:execute_query, _], metadata), do: has_reason(metadata)
76+
defp filter_event([:prepare_query, _], metadata), do: has_reason(metadata)
77+
defp filter_event(_event, _metadata), do: :ok
78+
79+
defp has_reason(metadata) do
80+
case Map.has_key?(metadata, :reason) do
81+
true -> :ok
82+
false -> :do_not_bounce
83+
end
84+
end
85+
end

0 commit comments

Comments
 (0)