Skip to content

Commit d946512

Browse files
Add container.cpu.time metric
1 parent 8b2e1bc commit d946512

File tree

8 files changed

+135
-11
lines changed

8 files changed

+135
-11
lines changed

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationParserCgroupV1.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ public long GetHostCpuUsageInNanoseconds()
145145
$"'{_procStat}' should contain whitespace separated values according to POSIX. We've failed trying to get {i}th value. File content: '{new string(stat)}'.");
146146
}
147147

148-
stat = stat.Slice(next, stat.Length - next);
148+
stat = stat.Slice(next);
149149
}
150150

151151
return (long)(total / (double)_userHz * NanosecondsInSecond);

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationParserCgroupV2.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ public long GetHostCpuUsageInNanoseconds()
163163
$"'{_procStat}' should contain whitespace separated values according to POSIX. We've failed trying to get {i}th value. File content: '{new string(stat)}'.");
164164
}
165165

166-
stat = stat.Slice(next, stat.Length - next);
166+
stat = stat.Slice(next);
167167
}
168168

169169
return (long)(total / (double)_userHz * NanosecondsInSecond);

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs

+12
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System;
5+
using System.Collections.Generic;
56
using System.Diagnostics.Metrics;
67
using Microsoft.Extensions.Logging;
78
using Microsoft.Extensions.Logging.Abstractions;
@@ -13,6 +14,7 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider
1314
{
1415
private const double One = 1.0;
1516
private const long Hundred = 100L;
17+
private const double NanosecondsInSecond = 1_000_000_000;
1618

1719
private readonly object _cpuLocker = new();
1820
private readonly object _memoryLocker = new();
@@ -65,6 +67,7 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
6567
var meter = meterFactory.Create(ResourceUtilizationInstruments.MeterName);
6668
#pragma warning restore CA2000 // Dispose objects before losing scope
6769

70+
_ = meter.CreateObservableCounter(name: ResourceUtilizationInstruments.ContainerCpuTime, observeValues: GetCpuTime, unit: "s", description: "CPU time used by the container.");
6871
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuLimit, unit: "1");
6972
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerMemoryLimitUtilization, observeValue: MemoryUtilization, unit: "1");
7073
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuRequest, unit: "1");
@@ -166,4 +169,13 @@ public Snapshot GetSnapshot()
166169
userTimeSinceStart: TimeSpan.FromTicks((long)(cgroupTime / Hundred * _scaleRelativeToCpuRequestForTrackerApi)),
167170
memoryUsageInBytes: memoryUsed);
168171
}
172+
173+
private IEnumerable<Measurement<double>> GetCpuTime()
174+
{
175+
long hostCpuTime = _parser.GetHostCpuUsageInNanoseconds();
176+
long cgroupCpuTime = _parser.GetCgroupCpuUsageInNanoseconds();
177+
178+
yield return new(cgroupCpuTime / NanosecondsInSecond, [new KeyValuePair<string, object?>("cpu.mode", "user")]);
179+
yield return new(hostCpuTime / NanosecondsInSecond, [new KeyValuePair<string, object?>("cpu.mode", "system")]);
180+
}
169181
}

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceUtilizationInstruments.cs

+8
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,14 @@ internal static class ResourceUtilizationInstruments
1616
/// </summary>
1717
public const string MeterName = "Microsoft.Extensions.Diagnostics.ResourceMonitoring";
1818

19+
/// <summary>
20+
/// The name of an instrument to retrieve CPU time consumed by the specific container on all available CPU cores, measured in seconds.
21+
/// </summary>
22+
/// <remarks>
23+
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableCounter{T}"/>.
24+
/// </remarks>
25+
public const string ContainerCpuTime = "container.cpu.time";
26+
1927
/// <summary>
2028
/// The name of an instrument to retrieve CPU limit consumption of all processes running inside a container or control group in range <c>[0, 1]</c>.
2129
/// </summary>

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Windows/WindowsContainerSnapshotProvider.cs

+12
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System;
5+
using System.Collections.Generic;
56
using System.Diagnostics.CodeAnalysis;
67
using System.Diagnostics.Metrics;
78
using System.Threading;
@@ -15,6 +16,7 @@ namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring.Windows;
1516
internal sealed class WindowsContainerSnapshotProvider : ISnapshotProvider
1617
{
1718
private const double Hundred = 100.0d;
19+
private const double TicksPerSecoundDouble = TimeSpan.TicksPerSecond;
1820

1921
private readonly Lazy<MEMORYSTATUSEX> _memoryStatus;
2022

@@ -109,6 +111,7 @@ internal WindowsContainerSnapshotProvider(
109111
#pragma warning restore CA2000 // Dispose objects before losing scope
110112

111113
// Container based metrics:
114+
_ = meter.CreateObservableCounter(name: ResourceUtilizationInstruments.ContainerCpuTime, observeValues: GetCpuTime, unit: "s", description: "CPU time used by the container.");
112115
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization, observeValue: CpuPercentage);
113116
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerMemoryLimitUtilization, observeValue: () => MemoryPercentage(() => _processInfo.GetMemoryUsage()));
114117

@@ -205,6 +208,15 @@ private double MemoryPercentage(Func<ulong> getMemoryUsage)
205208
}
206209
}
207210

211+
private IEnumerable<Measurement<double>> GetCpuTime()
212+
{
213+
using var jobHandle = _createJobHandleObject();
214+
var basicAccountingInfo = jobHandle.GetBasicAccountingInfo();
215+
216+
yield return new(basicAccountingInfo.TotalUserTime / TicksPerSecoundDouble, [new KeyValuePair<string, object?>("cpu.mode", "user")]);
217+
yield return new(basicAccountingInfo.TotalKernelTime / TicksPerSecoundDouble, [new KeyValuePair<string, object?>("cpu.mode", "system")]);
218+
}
219+
208220
private double CpuPercentage()
209221
{
210222
var now = _timeProvider.GetUtcNow();

test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/AcceptanceTest.cs

+33-7
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using System.Diagnostics.CodeAnalysis;
77
using System.Diagnostics.Metrics;
88
using System.IO;
9+
using System.Linq;
910
using System.Threading;
1011
using System.Threading.Tasks;
1112
using Microsoft.Extensions.Configuration;
@@ -209,6 +210,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
209210

210211
using var listener = new MeterListener();
211212
var clock = new FakeTimeProvider(DateTimeOffset.UtcNow);
213+
var cpuUserTime = 0.0d;
214+
var cpuKernelTime = 0.0d;
212215
var cpuFromGauge = 0.0d;
213216
var cpuLimitFromGauge = 0.0d;
214217
var cpuRequestFromGauge = 0.0d;
@@ -219,8 +222,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
219222
object? meterScope = null;
220223
listener.InstrumentPublished = (Instrument instrument, MeterListener meterListener)
221224
=> OnInstrumentPublished(instrument, meterListener, meterScope);
222-
listener.SetMeasurementEventCallback<double>((m, f, _, _)
223-
=> OnMeasurementReceived(m, f, ref cpuFromGauge, ref cpuLimitFromGauge, ref cpuRequestFromGauge, ref memoryFromGauge, ref memoryLimitFromGauge));
225+
listener.SetMeasurementEventCallback<double>((m, f, tags, _)
226+
=> OnMeasurementReceived(m, f, tags, ref cpuUserTime, ref cpuKernelTime, ref cpuFromGauge, ref cpuLimitFromGauge, ref cpuRequestFromGauge, ref memoryFromGauge, ref memoryLimitFromGauge));
224227
listener.Start();
225228

226229
using var host = FakeHost.CreateBuilder()
@@ -246,6 +249,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
246249
Assert.Equal(0, utilization.CpuUsedPercentage);
247250
Assert.Equal(100, utilization.MemoryUsedPercentage);
248251
Assert.True(double.IsNaN(cpuFromGauge));
252+
Assert.Equal(0.000102312, cpuUserTime);
253+
Assert.Equal(0.8, cpuKernelTime);
249254

250255
// gauge multiplied by 100 because gauges are in range [0, 1], and utilization is in range [0, 100]
251256
Assert.Equal(utilization.MemoryUsedPercentage, memoryFromGauge * 100);
@@ -264,6 +269,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
264269
Assert.Equal(1, utilization.CpuUsedPercentage);
265270
Assert.Equal(50, utilization.MemoryUsedPercentage);
266271
Assert.Equal(0.5, cpuLimitFromGauge * 100);
272+
Assert.Equal(0.000112312, cpuUserTime);
273+
Assert.Equal(0.81, cpuKernelTime);
267274
Assert.Equal(utilization.CpuUsedPercentage, cpuRequestFromGauge * 100);
268275
Assert.Equal(utilization.MemoryUsedPercentage, memoryLimitFromGauge * 100);
269276
Assert.Equal(utilization.CpuUsedPercentage, cpuFromGauge * 100);
@@ -292,6 +299,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
292299

293300
using var listener = new MeterListener();
294301
var clock = new FakeTimeProvider(DateTimeOffset.UtcNow);
302+
var cpuUserTime = 0.0d;
303+
var cpuKernelTime = 0.0d;
295304
var cpuFromGauge = 0.0d;
296305
var cpuLimitFromGauge = 0.0d;
297306
var cpuRequestFromGauge = 0.0d;
@@ -302,8 +311,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
302311
object? meterScope = null;
303312
listener.InstrumentPublished = (Instrument instrument, MeterListener meterListener)
304313
=> OnInstrumentPublished(instrument, meterListener, meterScope);
305-
listener.SetMeasurementEventCallback<double>((m, f, _, _)
306-
=> OnMeasurementReceived(m, f, ref cpuFromGauge, ref cpuLimitFromGauge, ref cpuRequestFromGauge, ref memoryFromGauge, ref memoryLimitFromGauge));
314+
listener.SetMeasurementEventCallback<double>((m, f, tags, _)
315+
=> OnMeasurementReceived(m, f, tags, ref cpuUserTime, ref cpuKernelTime, ref cpuFromGauge, ref cpuLimitFromGauge, ref cpuRequestFromGauge, ref memoryFromGauge, ref memoryLimitFromGauge));
307316
listener.Start();
308317

309318
using var host = FakeHost.CreateBuilder()
@@ -351,6 +360,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
351360
Assert.Equal(1, roundedCpuUsedPercentage);
352361
Assert.Equal(50, utilization.MemoryUsedPercentage);
353362
Assert.Equal(0.5, cpuLimitFromGauge * 100);
363+
Assert.Equal(0.000112, cpuUserTime);
364+
Assert.Equal(0.81, cpuKernelTime);
354365
Assert.Equal(roundedCpuUsedPercentage, Math.Round(cpuRequestFromGauge * 100));
355366
Assert.Equal(utilization.MemoryUsedPercentage, memoryLimitFromGauge * 100);
356367
Assert.Equal(roundedCpuUsedPercentage, Math.Round(cpuFromGauge * 100));
@@ -369,6 +380,7 @@ private static void OnInstrumentPublished(Instrument instrument, MeterListener m
369380
#pragma warning disable S1067 // Expressions should not be too complex
370381
if (instrument.Name == ResourceUtilizationInstruments.ProcessCpuUtilization ||
371382
instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization ||
383+
instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime ||
372384
instrument.Name == ResourceUtilizationInstruments.ContainerCpuRequestUtilization ||
373385
instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization ||
374386
instrument.Name == ResourceUtilizationInstruments.ContainerMemoryLimitUtilization)
@@ -378,10 +390,12 @@ private static void OnInstrumentPublished(Instrument instrument, MeterListener m
378390
#pragma warning restore S1067 // Expressions should not be too complex
379391
}
380392

393+
#pragma warning disable S107 // Methods should not have too many parameters
381394
private static void OnMeasurementReceived(
382-
Instrument instrument, double value,
383-
ref double cpuFromGauge, ref double cpuLimitFromGauge, ref double cpuRequestFromGauge,
384-
ref double memoryFromGauge, ref double memoryLimitFromGauge)
395+
Instrument instrument, double value, ReadOnlySpan<KeyValuePair<string, object?>> tags,
396+
ref double cpuUserTime, ref double cpuKernelTime, ref double cpuFromGauge, ref double cpuLimitFromGauge,
397+
ref double cpuRequestFromGauge, ref double memoryFromGauge, ref double memoryLimitFromGauge)
398+
#pragma warning restore S107 // Methods should not have too many parameters
385399
{
386400
if (instrument.Name == ResourceUtilizationInstruments.ProcessCpuUtilization)
387401
{
@@ -391,6 +405,18 @@ private static void OnMeasurementReceived(
391405
{
392406
memoryFromGauge = value;
393407
}
408+
else if (instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime)
409+
{
410+
var tagsArray = tags.ToArray();
411+
if (tagsArray.Contains(new KeyValuePair<string, object?>("cpu.mode", "user")))
412+
{
413+
cpuUserTime = value;
414+
}
415+
else if (tagsArray.Contains(new KeyValuePair<string, object?>("cpu.mode", "system")))
416+
{
417+
cpuKernelTime = value;
418+
}
419+
}
394420
else if (instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization)
395421
{
396422
cpuLimitFromGauge = value;

test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs

+8-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ public void Provider_Registers_Instruments()
7474
listener.Start();
7575
listener.RecordObservableInstruments();
7676

77-
Assert.Equal(5, samples.Count);
77+
Assert.Equal(7, samples.Count);
7878

7979
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization);
8080
Assert.True(double.IsNaN(samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization).value));
@@ -90,6 +90,9 @@ public void Provider_Registers_Instruments()
9090

9191
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization);
9292
Assert.Equal(0.5, samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization).value);
93+
94+
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - (50.0 / 1_000_000_000)) < 0.00001);
95+
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - 0.8) < 0.00001);
9396
}
9497

9598
[ConditionalFact]
@@ -143,7 +146,7 @@ public void Provider_Registers_Instruments_CgroupV2()
143146
listener.Start();
144147
listener.RecordObservableInstruments();
145148

146-
Assert.Equal(5, samples.Count);
149+
Assert.Equal(7, samples.Count);
147150

148151
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization);
149152
Assert.True(double.IsNaN(samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization).value));
@@ -159,6 +162,9 @@ public void Provider_Registers_Instruments_CgroupV2()
159162

160163
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization);
161164
Assert.Equal(1, samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization).value);
165+
166+
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - (102312.0 / 1_000_000)) < 0.00001);
167+
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - 0.8) < 0.00001);
162168
}
163169

164170
[Fact]

test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Windows/WindowsContainerSnapshotProviderTests.cs

+60
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,66 @@ public void GetSnapshot_With_JobMemoryLimit_Set_To_Zero_ProducesCorrectSnapshot(
190190
Assert.True(data.MemoryUsageInBytes > 0);
191191
}
192192

193+
[Fact]
194+
public void SnapshotProvider_EmitsCpuTimeMetric()
195+
{
196+
// Simulating 10% CPU usage (2 CPUs, 2000 ticks initially, 4000 ticks after 1 ms):
197+
JOBOBJECT_BASIC_ACCOUNTING_INFORMATION updatedAccountingInfo = default;
198+
updatedAccountingInfo.TotalKernelTime = 2500;
199+
updatedAccountingInfo.TotalUserTime = 1500;
200+
201+
_jobHandleMock.SetupSequence(j => j.GetBasicAccountingInfo())
202+
.Returns(_accountingInfo)
203+
.Returns(_accountingInfo)
204+
.Returns(updatedAccountingInfo)
205+
.Returns(updatedAccountingInfo)
206+
.Throws(new InvalidOperationException("We shouldn't hit here..."));
207+
208+
_sysInfo.NumberOfProcessors = 2;
209+
210+
var fakeClock = new FakeTimeProvider();
211+
using var meter = new Meter(nameof(SnapshotProvider_EmitsCpuMetrics));
212+
var meterFactoryMock = new Mock<IMeterFactory>();
213+
meterFactoryMock.Setup(x => x.Create(It.IsAny<MeterOptions>()))
214+
.Returns(meter);
215+
using var metricCollector = new MetricCollector<double>(meter, ResourceUtilizationInstruments.ContainerCpuTime, fakeClock);
216+
217+
var options = new ResourceMonitoringOptions { CpuConsumptionRefreshInterval = TimeSpan.FromMilliseconds(2) };
218+
219+
var snapshotProvider = new WindowsContainerSnapshotProvider(
220+
_memoryInfoMock.Object,
221+
_systemInfoMock.Object,
222+
_processInfoMock.Object,
223+
_logger,
224+
meterFactoryMock.Object,
225+
() => _jobHandleMock.Object,
226+
fakeClock,
227+
options);
228+
229+
// Step #0 - state in the beginning:
230+
metricCollector.RecordObservableInstruments();
231+
var snapshot = metricCollector.GetMeasurementSnapshot();
232+
Assert.Equal(2, snapshot.Count);
233+
Assert.Contains(_accountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
234+
Assert.Contains(_accountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
235+
236+
// Step #1 - simulate 1 millisecond passing and collect metrics again:
237+
fakeClock.Advance(TimeSpan.FromMilliseconds(1));
238+
metricCollector.RecordObservableInstruments();
239+
snapshot = metricCollector.GetMeasurementSnapshot();
240+
Assert.Contains(updatedAccountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
241+
Assert.Contains(updatedAccountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
242+
243+
// Step #2 - simulate 1 millisecond passing and collect metrics again:
244+
fakeClock.Advance(TimeSpan.FromMilliseconds(1));
245+
metricCollector.RecordObservableInstruments();
246+
snapshot = metricCollector.GetMeasurementSnapshot();
247+
248+
// CPU time should be the same as before, as we're not simulating any CPU usage:
249+
Assert.Contains(updatedAccountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
250+
Assert.Contains(updatedAccountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
251+
}
252+
193253
[Theory]
194254
[InlineData(ResourceUtilizationInstruments.ProcessCpuUtilization)]
195255
[InlineData(ResourceUtilizationInstruments.ContainerCpuLimitUtilization)]

0 commit comments

Comments
 (0)