Skip to content

Commit 855f683

Browse files
committed
sdk: resolve memory leaks in exporter
Signed-off-by: inge4pres <[email protected]>
1 parent 7b0d0ef commit 855f683

File tree

2 files changed

+61
-45
lines changed

2 files changed

+61
-45
lines changed

src/sdk/metrics/exporter.zig

Lines changed: 46 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,10 @@ pub const MetricExporter = struct {
2626

2727
allocator: std.mem.Allocator,
2828
exporter: *ExporterIface,
29-
hasShutDown: std.atomic.Value(bool) = std.atomic.Value(bool).init(false),
3029

31-
var exportCompleted: std.atomic.Value(bool) = std.atomic.Value(bool).init(false);
30+
// Lock helper to signal shutdown and/or export is in progress
31+
hasShutDown: bool = false,
32+
exportCompleted: std.Thread.Mutex = std.Thread.Mutex{},
3233

3334
pub fn new(allocator: std.mem.Allocator, exporter: *ExporterIface) !*Self {
3435
const s = try allocator.create(Self);
@@ -42,14 +43,14 @@ pub const MetricExporter = struct {
4243
/// ExportBatch exports a batch of metrics data by calling the exporter implementation.
4344
/// The passed metrics data will be owned by the exporter implementation.
4445
pub fn exportBatch(self: *Self, metrics: []Measurements) ExportResult {
45-
if (self.hasShutDown.load(.acquire)) {
46+
if (@atomicLoad(bool, &self.hasShutDown, .acquire)) {
4647
// When shutdown has already been called, calling export should be a failure.
4748
// https://opentelemetry.io/docs/specs/otel/metrics/sdk/#shutdown-2
4849
return ExportResult.Failure;
4950
}
50-
// Acquire the lock to ensure that forceFlush is waiting for export to complete.
51-
_ = exportCompleted.load(.acquire);
52-
defer exportCompleted.store(true, .release);
51+
// Acquire the lock to signal to forceFlush to wait for export to complete.
52+
self.exportCompleted.lock();
53+
defer self.exportCompleted.unlock();
5354

5455
// Call the exporter function to process metrics data.
5556
self.exporter.exportBatch(metrics) catch |e| {
@@ -60,24 +61,31 @@ pub const MetricExporter = struct {
6061
}
6162

6263
// Ensure that all the data is flushed to the destination.
63-
pub fn forceFlush(_: Self, timeout_ms: u64) !void {
64+
pub fn forceFlush(self: *Self, timeout_ms: u64) !void {
6465
const start = std.time.milliTimestamp(); // Milliseconds
6566
const timeout: i64 = @intCast(timeout_ms);
6667
while (std.time.milliTimestamp() < start + timeout) {
67-
if (exportCompleted.load(.acquire)) {
68+
if (self.exportCompleted.tryLock()) {
69+
self.exportCompleted.unlock();
6870
return;
69-
} else std.time.sleep(std.time.ns_per_ms);
71+
} else {
72+
std.time.sleep(std.time.ns_per_ms);
73+
}
7074
}
7175
return MetricReadError.ForceFlushTimedOut;
7276
}
7377

7478
pub fn shutdown(self: *Self) void {
75-
if (self.hasShutDown.load(.acquire)) {
76-
// When shutdown has already been called, calling shutdown again is a no-op.
79+
if (@atomicRmw(bool, &self.hasShutDown, .Xchg, true, .acq_rel)) {
7780
return;
7881
}
79-
self.hasShutDown.store(true, .release);
82+
// if (@atomicLoad(bool, &self.hasShutDown, .acquire)) {
83+
// // When shutdown has already been called, calling shutdown again is a no-op.
84+
// return;
85+
// } else {
86+
// @atomicStore(bool, &self.hasShutDown, true, .release);
8087
self.allocator.destroy(self);
88+
// }
8189
}
8290
};
8391

@@ -115,7 +123,7 @@ test "metric exporter no-op" {
115123

116124
var measure = [1]DataPoint(i64){.{ .value = 42 }};
117125
const measurement: []DataPoint(i64) = measure[0..];
118-
var metrics = [1]Measurements{Measurements{
126+
var metrics = [1]Measurements{.{
119127
.meterName = "my-meter",
120128
.instrumentKind = .Counter,
121129
.instrumentOptions = .{ .name = "my-counter" },
@@ -189,9 +197,8 @@ test "metric exporter force flush fails" {
189197
backgroundRunner,
190198
.{ me, &metrics },
191199
);
192-
bg.detach();
200+
bg.join();
193201

194-
std.time.sleep(10 * std.time.ns_per_ms); // sleep for 10 ms to ensure the background thread completed
195202
const e = me.forceFlush(0);
196203
try std.testing.expectError(MetricReadError.ForceFlushTimedOut, e);
197204
}
@@ -210,14 +217,16 @@ pub const ExporterIface = struct {
210217
};
211218

212219
/// InMemoryExporter stores in memory the metrics data to be exported.
213-
/// The memory representation uses the types defined in the library.
220+
/// The metics' representation in memory uses the types defined in the library.
214221
pub const InMemoryExporter = struct {
215222
const Self = @This();
216223
allocator: std.mem.Allocator,
217224
data: std.ArrayList(Measurements) = undefined,
218225
// Implement the interface via @fieldParentPtr
219226
exporter: ExporterIface,
220227

228+
mx: std.Thread.Mutex = std.Thread.Mutex{},
229+
221230
pub fn init(allocator: std.mem.Allocator) !*Self {
222231
const s = try allocator.create(Self);
223232
s.* = Self{
@@ -230,30 +239,35 @@ pub const InMemoryExporter = struct {
230239
return s;
231240
}
232241
pub fn deinit(self: *Self) void {
233-
for (self.data.items) |d| {
234-
var data = d;
235-
data.deinit(self.allocator);
242+
self.mx.lock();
243+
for (self.data.items) |*d| {
244+
d.*.deinit(self.allocator);
236245
}
237246
self.data.deinit();
247+
self.mx.unlock();
248+
238249
self.allocator.destroy(self);
239250
}
240251

252+
// Implements the ExportIFace interface only method.
241253
fn exportBatch(iface: *ExporterIface, metrics: []Measurements) MetricReadError!void {
242254
// Get a pointer to the instance of the struct that implements the interface.
243255
const self: *Self = @fieldParentPtr("exporter", iface);
256+
self.mx.lock();
257+
defer self.mx.unlock();
244258

245-
for (self.data.items) |d| {
246-
var data = d;
247-
data.deinit(self.allocator);
259+
// Free up the allocated data points from the previous export.
260+
for (self.data.items) |*d| {
261+
d.*.deinit(self.allocator);
248262
}
249-
self.data.clearRetainingCapacity();
263+
self.data.clearAndFree();
250264
self.data = std.ArrayList(Measurements).fromOwnedSlice(self.allocator, metrics);
251265
}
252266

253267
/// Read the metrics from the in memory exporter.
254-
//FIXME might need a mutex in the exporter as the field might be accessed
255-
// from a thread while it's being cleared in another (via exportBatch).
256268
pub fn fetch(self: *Self) ![]Measurements {
269+
self.mx.lock();
270+
defer self.mx.unlock();
257271
return self.data.items;
258272
}
259273
};
@@ -321,7 +335,7 @@ pub const PeriodicExportingReader = struct {
321335
exportTimeoutMillis: u64,
322336

323337
// Lock helper to signal shutdown is in progress
324-
shuttingDown: std.atomic.Value(bool) = std.atomic.Value(bool).init(false),
338+
shuttingDown: bool = false,
325339

326340
// This reader will collect metrics data from the MeterProvider.
327341
reader: *MetricReader,
@@ -361,8 +375,10 @@ pub const PeriodicExportingReader = struct {
361375
}
362376

363377
pub fn shutdown(self: *Self) void {
364-
self.shuttingDown.store(true, .release);
378+
// First signal the background exporter to stop collecting, then close the reader.
379+
@atomicStore(bool, &self.shuttingDown, true, .release);
365380
self.reader.shutdown();
381+
// Only when the background collector has stopped we can destroy.
366382
self.allocator.destroy(self);
367383
}
368384
};
@@ -371,17 +387,17 @@ pub const PeriodicExportingReader = struct {
371387
// FIXME there is not a timeout for the collect operation.
372388
fn collectAndExport(
373389
reader: *MetricReader,
374-
shuttingDown: *std.atomic.Value(bool),
390+
shuttingDown: *bool,
375391
exportIntervalMillis: u64,
376392
// TODO: add a timeout for the export operation
377393
_: u64,
378394
) void {
379395
// The execution should continue until the reader is shutting down
380-
while (!shuttingDown.*.load(.acquire)) {
396+
while (!@atomicLoad(bool, shuttingDown, .acquire)) {
381397
if (reader.meterProvider) |_| {
382398
// This will also call exporter.exportBatch() every interval.
383399
reader.collect() catch |e| {
384-
std.debug.print("PeriodicExportingReader: reader collect failed: {?}\n", .{e});
400+
std.debug.print("PeriodicExportingReader: collecting failed on reader: {?}\n", .{e});
385401
};
386402
} else {
387403
std.debug.print("PeriodicExportingReader: no meter provider is registered with this MetricReader {any}\n", .{reader});

src/sdk/metrics/reader.zig

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ pub const MetricReader = struct {
4949
temporality: TemporalitySelector = view.DefaultTemporalityFor,
5050
aggregation: AggregationSelector = view.DefaultAggregationFor,
5151
// Signal that shutdown has been called.
52-
hasShutDown: std.atomic.Value(bool) = std.atomic.Value(bool).init(false),
52+
hasShutDown: bool = false,
5353
mx: std.Thread.Mutex = std.Thread.Mutex{},
5454

5555
const Self = @This();
@@ -74,28 +74,29 @@ pub const MetricReader = struct {
7474
}
7575

7676
pub fn collect(self: *Self) !void {
77+
if (@atomicLoad(bool, &self.hasShutDown, .acquire)) {
78+
// When shutdown has already been called, collect is a no-op.
79+
return;
80+
}
7781
if (!self.mx.tryLock()) {
7882
return MetricReadError.ConcurrentCollectNotAllowed;
7983
}
8084
defer self.mx.unlock();
81-
82-
if (self.hasShutDown.load(.acquire)) {
83-
// When shutdown has already been called, collect is a no-op.
84-
return;
85-
}
8685
var toBeExported = std.ArrayList(Measurements).init(self.allocator);
8786
defer toBeExported.deinit();
8887

8988
if (self.meterProvider) |mp| {
9089
// Collect the data from each meter provider.
91-
// TODO: extract MeasurmentsData from all meters and accumulate them with Meter attributes.
92-
// MeasurementsData can be ported much more easilty to protobuf structs during export.
90+
// Measurements can be ported to protobuf structs during OTLP export.
9391
var meters = mp.meters.valueIterator();
9492
while (meters.next()) |meter| {
95-
const measurements: []Measurements = try AggregatedMetrics.fetch(self.allocator, meter, self.aggregation);
96-
defer self.allocator.free(measurements);
97-
93+
const measurements: []Measurements = AggregatedMetrics.fetch(self.allocator, meter, self.aggregation) catch |err| {
94+
std.debug.print("MetricReader: error aggregating data points from meter {s}: {?}", .{ meter.name, err });
95+
continue;
96+
};
97+
// this makes a copy of the measurements to the array list
9898
try toBeExported.appendSlice(measurements);
99+
self.allocator.free(measurements);
99100
}
100101

101102
//TODO: apply temporality before exporting, optionally keeping state in the reader.
@@ -105,7 +106,7 @@ pub const MetricReader = struct {
105106

106107
// Export the metrics data through the exporter.
107108
// The exporter will own the metrics and should free it
108-
// by calling deinit() on the MeterMeasurements once done.
109+
// by calling deinit() on the Measurements once done.
109110
// MetricExporter must be built with the same allocator as MetricReader
110111
// to ensure that the memory is managed correctly.
111112
const owned = try toBeExported.toOwnedSlice();
@@ -120,10 +121,10 @@ pub const MetricReader = struct {
120121
}
121122

122123
pub fn shutdown(self: *Self) void {
124+
@atomicStore(bool, &self.hasShutDown, true, .release);
123125
self.collect() catch |e| {
124126
std.debug.print("MetricReader shutdown: error while collecting metrics: {?}\n", .{e});
125127
};
126-
self.hasShutDown.store(true, .release);
127128
self.exporter.shutdown();
128129
self.allocator.destroy(self);
129130
}
@@ -164,8 +165,7 @@ test "metric reader collects data from meter provider" {
164165

165166
try reader.collect();
166167

167-
const data = try inMem.fetch();
168-
defer std.testing.allocator.free(data);
168+
_ = try inMem.fetch();
169169
}
170170

171171
fn deltaTemporality(_: Kind) view.Temporality {

0 commit comments

Comments
 (0)