Skip to content

Commit 790e451

Browse files
committed
feat: Add manual tracing. (#6)
At this stage we don't have a great solution for automatically tracing SQL queries, etc, so we have added API to allow users to manually add traces to their test analytics from within a test.
1 parent e6a5c97 commit 790e451

15 files changed

Lines changed: 691 additions & 188 deletions

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ Randomized with seed 12345
5858

5959
If all is well, you should see the test run in the test analytics section of the Buildkite dashboard.
6060

61+
## 🎢 Tracing
62+
63+
Buildkite Test Analytics has support for tracing potentially slow operations within your tests (SQL queries, HTTP requests, etc). Because ExUnit can run multiple tests simultaneously, it is difficult to achieve this without requiring code changes - we cannot simply use [telemetry](https://hex.pm/packages/telemetry) events because we cannot easily attribute the events to specific tests across process boundaries. Instead we have provided [a simple API](https://hexdocs.pm/buildkite_test_collector/BuildkiteTestCollector.Tracing.html) to manually instrument operations within your tests.
6164

6265
## 🔜 Roadmap
6366

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,59 @@
11
defmodule BuildkiteTestCollector.Duration do
2+
defstruct usec: 0
3+
alias BuildkiteTestCollector.{Duration, Instant}
4+
25
@moduledoc """
3-
The analyics API specifies start and end times in fractional seconds since the
4-
beginning of the test run.
6+
The difference between two instants with microsecond accuracy.
7+
8+
The Buildkite analytics API stores all times as decimal seconds since the
9+
start of the test run. Therefore we use `Duration` to calculate the span
10+
between two `Instant` values.
11+
"""
512

6-
It's convenient for us to store these as a duration by specifying their start
7-
time and end time and then calculating the diference when serialising into
8-
JSON.
13+
@type t :: %Duration{usec: integer()}
914

10-
Internally these times are stored as microseconds from the system's monotonic
11-
clock. See `System.monotonic_time/1` for more information.
15+
@doc """
16+
Create a new duration from the specified number of seconds.
1217
"""
18+
@spec from_seconds(number) :: t
19+
def from_seconds(seconds) when is_integer(seconds),
20+
do: %Duration{usec: seconds * 1_000_000}
1321

14-
defstruct [:offset, :epoch]
15-
alias __MODULE__
22+
def from_seconds(seconds) when is_float(seconds),
23+
do: %Duration{usec: round(seconds * 1_000_000)}
1624

17-
@type microseconds :: integer
18-
@type seconds :: float
19-
@type t :: %Duration{
20-
epoch: microseconds,
21-
offset: microseconds
22-
}
25+
@doc """
26+
Create a new duration directly from microseconds.
27+
"""
28+
@spec from_microseconds(number) :: t
29+
def from_microseconds(microseconds) when is_integer(microseconds),
30+
do: %Duration{usec: microseconds}
31+
32+
def from_microseconds(microseconds) when is_float(microseconds),
33+
do: %Duration{usec: round(microseconds)}
2334

2435
@doc """
25-
The current time based on a zero-microsecond epoch.
36+
Return the elapsed time between two instants.
37+
"""
38+
@spec between(Instant.t(), Instant.t()) :: t
39+
def between(%Instant{usec: i0}, %Instant{usec: i1}), do: %Duration{usec: i0 - i1}
2640

27-
This esspentially returns a duration since the beginning of time, and is not
28-
that useful until you use it to as the epoch for other durations.
41+
@doc """
42+
Return the absolute duration (ie make the duration unsigned).
2943
"""
30-
@spec now :: t
31-
def now, do: %Duration{epoch: 0, offset: now_us()}
44+
@spec abs(t) :: t
45+
def abs(%Duration{usec: i}) when i < 0, do: %Duration{usec: -i}
46+
def abs(%Duration{} = duration), do: duration
3247

3348
@doc """
34-
Return a new now time based on the provided epoch.
49+
Returns the elapsed duration between `instant` and now.
3550
"""
36-
@spec since(t) :: t
37-
def since(%Duration{} = time) do
38-
new_epoch = time.epoch + time.offset
39-
new_offset = now_us() - new_epoch
40-
%Duration{epoch: new_epoch, offset: new_offset}
41-
end
51+
@spec elapsed(Instant.t()) :: t
52+
def elapsed(%Instant{} = instant), do: Duration.between(Instant.now(), instant)
4253

4354
@doc """
4455
The duration of the timing, in (fractional) seconds.
4556
"""
46-
@spec as_seconds(t) :: seconds
47-
def as_seconds(%Duration{offset: offset}), do: offset / 1_000_000.0
48-
49-
defp now_us, do: System.monotonic_time(:microsecond)
50-
51-
defimpl Jason.Encoder do
52-
@spec encode(Duration.t(), Jason.Encode.opts()) :: iodata
53-
def encode(timing, _opts) do
54-
timing
55-
|> Duration.as_seconds()
56-
|> Jason.Encode.float()
57-
end
58-
end
57+
@spec as_seconds(t) :: number
58+
def as_seconds(%Duration{usec: i}), do: i / 1_000_000.0
5959
end

lib/buildkite_test_collector/formatter.ex

Lines changed: 120 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,27 +11,103 @@ defmodule BuildkiteTestCollector.Formatter do
1111

1212
require Logger
1313

14-
alias BuildkiteTestCollector.{CiEnv, Duration, HttpTransport, Payload, TestResult}
14+
alias BuildkiteTestCollector.{CiEnv, Duration, HttpTransport, Instant, Payload, TestResult}
1515

16+
@typedoc """
17+
Unique identifier for tests.
18+
19+
Contains the module that the test is defined in, and the name of the test as an atom.
20+
"""
21+
@type test_id :: {module, atom}
22+
23+
@typedoc false
1624
@type state :: %{
1725
payload: Payload.t(),
18-
timings: %{required(test_id :: {module, atom}) => Duration.t()}
26+
timings: %{required(test_id) => Duration.t()},
27+
spans: %{required(test_id) => [TestResult.span()]}
1928
}
2029

21-
@type test_id :: {module, atom}
30+
@typedoc """
31+
The ExUnit tags, (specifically `module` and `test` tags)
32+
"""
33+
@type tags :: %{
34+
required(:module) => module,
35+
required(:test) => atom,
36+
optional(atom) => any
37+
}
38+
39+
@typedoc """
40+
A trace with known start and end time.
41+
"""
42+
@type span_with_start_and_end_at :: %{
43+
required(:section) => :http | :sql | :sleep | :annotation,
44+
required(:start_at) => Instant.t(),
45+
required(:end_at) => Instant.t(),
46+
optional(:duration) => Duration.t(),
47+
optional(:detail) => String.t()
48+
}
49+
50+
@typedoc """
51+
A trace with a known duration.
52+
"""
53+
@type span_with_duration :: %{
54+
required(:section) => :http | :sql | :sleep | :annotation,
55+
required(:duration) => Duration.t(),
56+
optional(:detail) => String.t()
57+
}
58+
59+
@doc """
60+
Manually add a trace span to the currently running test.
61+
62+
You can add timing information about sql queries, http requests, etc to your
63+
test analytics.
64+
65+
It's probably better to use the helpers in the `Tracing` module.
66+
67+
## Example
68+
69+
```elixir
70+
alias BuildkiteTestCollector.{Formatter, Instant}
71+
72+
test "example of instrumenting a query", tags do
73+
start_at = Instant.now()
74+
75+
MyApp.Repo.all(my_complicated_query)
76+
77+
end_at = Instant.now()
78+
79+
Formatter.add_span(tags, %{
80+
start_at: start_at,
81+
end_at: end_at,
82+
section: :sql,
83+
detail: inspect(my_complicated_query)
84+
})
85+
end
86+
```
87+
"""
88+
@spec add_span(tags | test_id, span_with_start_and_end_at() | span_with_duration()) :: :ok
89+
def add_span(%{module: module, test: name} = _tags, span),
90+
do: GenServer.cast(__MODULE__, {:add_span, {module, name}, span})
91+
92+
def add_span({module, name} = _test_id, span),
93+
do: GenServer.cast(__MODULE__, {:add_span, {module, name}, span})
2294

2395
@impl true
2496
@spec init(keyword) :: {:ok, state}
25-
def init(_opts) do
97+
def init(opts) do
2698
Process.flag(:trap_exit, true)
2799

28100
case CiEnv.detect_env() do
29101
{:ok, env_module} ->
30102
state = %{
31103
payload: Payload.init(env_module),
32-
timings: %{}
104+
timings: %{},
105+
spans: %{}
33106
}
34107

108+
if Keyword.get(opts, :register, true),
109+
do: Process.register(self(), __MODULE__)
110+
35111
{:ok, state}
36112

37113
:error ->
@@ -45,7 +121,7 @@ defmodule BuildkiteTestCollector.Formatter do
45121
def handle_cast({:suite_started, _}, state) do
46122
payload =
47123
state.payload
48-
|> Payload.set_start_time(Duration.now())
124+
|> Payload.set_start_time(Instant.now())
49125

50126
{:noreply, %{state | payload: payload}}
51127
end
@@ -61,17 +137,20 @@ defmodule BuildkiteTestCollector.Formatter do
61137
def handle_cast({:test_started, %ExUnit.Test{module: module, name: name}}, state) do
62138
timings =
63139
state.timings
64-
|> Map.put({module, name}, Duration.since(state.payload.started_at))
140+
|> Map.put({module, name}, Instant.now())
65141

66142
{:noreply, %{state | timings: timings}}
67143
end
68144

69145
def handle_cast({:test_finished, %ExUnit.Test{module: module, name: name} = test}, state) do
70-
end_time = Duration.since(state.payload.started_at)
146+
test_id = {module, name}
147+
{test_spans, spans} = Map.pop(state.spans, test_id, [])
148+
state = %{state | spans: spans}
71149

72-
case Map.pop(state.timings, {module, name}) do
73-
{%Duration{} = start_time, timings} ->
74-
test_result = TestResult.new(test, start_time, end_time)
150+
case Map.pop(state.timings, test_id) do
151+
{%Instant{} = start_time, timings} ->
152+
test_result = TestResult.new(test, start_time)
153+
test_result = Enum.reduce(test_spans, test_result, &TestResult.add_span(&2, &1))
75154

76155
payload =
77156
state.payload
@@ -95,6 +174,20 @@ defmodule BuildkiteTestCollector.Formatter do
95174
end
96175
end
97176

177+
def handle_cast({:add_span, {module, name}, span}, state) do
178+
case refine_span(span) do
179+
{:ok, span} ->
180+
spans =
181+
state.spans
182+
|> Map.update({module, name}, [span], &[span | &1])
183+
184+
{:noreply, %{state | spans: spans}}
185+
186+
:error ->
187+
{:noreply, state}
188+
end
189+
end
190+
98191
def handle_cast(_, state), do: {:noreply, state}
99192

100193
@impl true
@@ -105,4 +198,20 @@ defmodule BuildkiteTestCollector.Formatter do
105198
Logger.error("Error sending test suite analytics: #{inspect(reason)}")
106199
end
107200
end
201+
202+
defguardp valid_section?(section) when section in [:http, :sql, :sleep, :annotation]
203+
204+
defp refine_span(%{duration: duration, section: section} = span)
205+
when is_struct(duration, Duration) and valid_section?(section),
206+
do: {:ok, span}
207+
208+
defp refine_span(%{start_at: start_at, end_at: end_at, section: section} = span)
209+
when is_struct(start_at, Instant) and is_struct(end_at, Instant) and
210+
valid_section?(section),
211+
do: {:ok, Map.put(span, :duration, Duration.between(end_at, start_at))}
212+
213+
defp refine_span(span) do
214+
Logger.warn("Invalid span: #{inspect(span)}")
215+
:error
216+
end
108217
end
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
defmodule BuildkiteTestCollector.Instant do
2+
defstruct usec: 0
3+
alias BuildkiteTestCollector.{Duration, Instant}
4+
5+
@moduledoc """
6+
Represents a single instant with microsecond accuracy.
7+
8+
Wraps monotonic time to ensure that there are no unit mistakes.
9+
10+
## Why not just use `DateTime` instead?
11+
12+
Sadly, the wall clock time can move backwards and forwards between samples
13+
(for example if [NTP](https://www.ntp.org) is updating the clock, or during a
14+
daylight savings transition). The system monotonic clock always moves
15+
forwards, regardless of the current wall clock time. This means it's no good
16+
for measuring the absolute time, but is perfect for measuring relative time.
17+
18+
See
19+
[erlang:monotonic_time/0](https://www.erlang.org/doc/man/erlang.html#monotonic_time-0)
20+
for more information.
21+
"""
22+
23+
@type t :: %Instant{usec: integer()}
24+
25+
@doc """
26+
Return an instant representing "now" as understood by the system's monotonic
27+
clock.
28+
"""
29+
@spec now :: t
30+
def now, do: %Instant{usec: System.monotonic_time(:microsecond)}
31+
32+
@doc """
33+
Add a duration to an instant and return a new instant.
34+
"""
35+
@spec add(t, Duration.t()) :: t
36+
def add(%Instant{usec: instant}, %Duration{usec: duration}),
37+
do: %Instant{usec: instant + duration}
38+
end

lib/buildkite_test_collector/payload.ex

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ defmodule BuildkiteTestCollector.Payload do
55

66
defstruct run_env: nil, data: [], started_at: nil, data_size: 0
77

8-
alias BuildkiteTestCollector.{CiEnv, Duration, Payload, TestResult}
8+
alias BuildkiteTestCollector.{CiEnv, Instant, Payload, TestResult}
99

1010
@type t :: %Payload{
1111
run_env: serialised_environment,
1212
data: [TestResult.t()],
13-
started_at: nil | Duration.t(),
13+
started_at: nil | Instant.t(),
1414
data_size: non_neg_integer()
1515
}
1616

@@ -48,7 +48,7 @@ defmodule BuildkiteTestCollector.Payload do
4848
@doc """
4949
Set the start time of the suite.
5050
"""
51-
@spec set_start_time(Payload.t(), Duration.t()) :: Payload.t()
51+
@spec set_start_time(Payload.t(), Instant.t()) :: Payload.t()
5252
def set_start_time(%Payload{} = payload, started_at), do: %{payload | started_at: started_at}
5353

5454
defp serialise_env(ci_env_mod) do
@@ -58,4 +58,32 @@ defmodule BuildkiteTestCollector.Payload do
5858
key, env -> Map.put(env, key, apply(ci_env_mod, key, []))
5959
end)
6060
end
61+
62+
@doc """
63+
Convert the payload into a map ready for serialisation to JSON.
64+
65+
This is done as a separate step because all timings are relative to the
66+
payload start time, so must be calculated.
67+
"""
68+
@spec as_json(t) :: map
69+
def as_json(%Payload{} = payload) do
70+
%{
71+
format: "json",
72+
run_env: payload.run_env,
73+
data:
74+
payload.data
75+
|> Enum.map(&TestResult.as_json(&1, payload.started_at))
76+
|> Enum.reverse()
77+
}
78+
end
79+
80+
defimpl Jason.Encoder do
81+
@doc false
82+
@spec encode(Payload.t(), Jason.Encode.opts()) :: iodata
83+
def encode(payload, opts) do
84+
payload
85+
|> Payload.as_json()
86+
|> Jason.Encode.map(opts)
87+
end
88+
end
6189
end

0 commit comments

Comments
 (0)