Skip to content

Commit dfdf228

Browse files
evgenyfedorov2evgenyfedorov2
authored and
evgenyfedorov2
committed
Add container.cpu.time metric
1 parent 3640389 commit dfdf228

File tree

8 files changed

+135
-11
lines changed

8 files changed

+135
-11
lines changed

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationParserCgroupV1.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ public long GetHostCpuUsageInNanoseconds()
145145
$"'{_procStat}' should contain whitespace separated values according to POSIX. We've failed trying to get {i}th value. File content: '{new string(stat)}'.");
146146
}
147147

148-
stat = stat.Slice(next, stat.Length - next);
148+
stat = stat.Slice(next);
149149
}
150150

151151
return (long)(total / (double)_userHz * NanosecondsInSecond);

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationParserCgroupV2.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ public long GetHostCpuUsageInNanoseconds()
163163
$"'{_procStat}' should contain whitespace separated values according to POSIX. We've failed trying to get {i}th value. File content: '{new string(stat)}'.");
164164
}
165165

166-
stat = stat.Slice(next, stat.Length - next);
166+
stat = stat.Slice(next);
167167
}
168168

169169
return (long)(total / (double)_userHz * NanosecondsInSecond);

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs

+12
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System;
5+
using System.Collections.Generic;
56
using System.Diagnostics.Metrics;
67
using Microsoft.Extensions.Logging;
78
using Microsoft.Extensions.Logging.Abstractions;
@@ -14,6 +15,7 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider
1415
{
1516
private const double One = 1.0;
1617
private const long Hundred = 100L;
18+
private const double NanosecondsInSecond = 1_000_000_000;
1719

1820
private readonly object _cpuLocker = new();
1921
private readonly object _memoryLocker = new();
@@ -66,6 +68,7 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
6668
var meter = meterFactory.Create(ResourceUtilizationInstruments.MeterName);
6769
#pragma warning restore CA2000 // Dispose objects before losing scope
6870

71+
_ = meter.CreateObservableCounter(name: ResourceUtilizationInstruments.ContainerCpuTime, observeValues: GetCpuTime, unit: "s", description: "CPU time used by the container.");
6972
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuLimit, unit: "1");
7073
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerMemoryLimitUtilization, observeValue: MemoryUtilization, unit: "1");
7174
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuRequest, unit: "1");
@@ -167,4 +170,13 @@ public Snapshot GetSnapshot()
167170
userTimeSinceStart: TimeSpan.FromTicks((long)(cgroupTime / Hundred * _scaleRelativeToCpuRequestForTrackerApi)),
168171
memoryUsageInBytes: memoryUsed);
169172
}
173+
174+
private IEnumerable<Measurement<double>> GetCpuTime()
175+
{
176+
long hostCpuTime = _parser.GetHostCpuUsageInNanoseconds();
177+
long cgroupCpuTime = _parser.GetCgroupCpuUsageInNanoseconds();
178+
179+
yield return new(cgroupCpuTime / NanosecondsInSecond, [new KeyValuePair<string, object?>("cpu.mode", "user")]);
180+
yield return new(hostCpuTime / NanosecondsInSecond, [new KeyValuePair<string, object?>("cpu.mode", "system")]);
181+
}
170182
}

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Windows/WindowsContainerSnapshotProvider.cs

+12
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System;
5+
using System.Collections.Generic;
56
using System.Diagnostics.CodeAnalysis;
67
using System.Diagnostics.Metrics;
78
using System.Threading;
@@ -17,6 +18,7 @@ internal sealed class WindowsContainerSnapshotProvider : ISnapshotProvider
1718
{
1819
private const double One = 1.0d;
1920
private const double Hundred = 100.0d;
21+
private const double TicksPerSecoundDouble = TimeSpan.TicksPerSecond;
2022

2123
private readonly Lazy<MEMORYSTATUSEX> _memoryStatus;
2224

@@ -114,6 +116,7 @@ internal WindowsContainerSnapshotProvider(
114116
#pragma warning restore CA2000 // Dispose objects before losing scope
115117

116118
// Container based metrics:
119+
_ = meter.CreateObservableCounter(name: ResourceUtilizationInstruments.ContainerCpuTime, observeValues: GetCpuTime, unit: "s", description: "CPU time used by the container.");
117120
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization, observeValue: CpuPercentage);
118121
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerMemoryLimitUtilization, observeValue: () => MemoryPercentage(() => _processInfo.GetMemoryUsage()));
119122

@@ -211,6 +214,15 @@ private double MemoryPercentage(Func<ulong> getMemoryUsage)
211214
}
212215
}
213216

217+
private IEnumerable<Measurement<double>> GetCpuTime()
218+
{
219+
using var jobHandle = _createJobHandleObject();
220+
var basicAccountingInfo = jobHandle.GetBasicAccountingInfo();
221+
222+
yield return new(basicAccountingInfo.TotalUserTime / TicksPerSecoundDouble, [new KeyValuePair<string, object?>("cpu.mode", "user")]);
223+
yield return new(basicAccountingInfo.TotalKernelTime / TicksPerSecoundDouble, [new KeyValuePair<string, object?>("cpu.mode", "system")]);
224+
}
225+
214226
private double CpuPercentage()
215227
{
216228
var now = _timeProvider.GetUtcNow();

src/Shared/Instruments/ResourceUtilizationInstruments.cs

+8
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,14 @@ internal static class ResourceUtilizationInstruments
1818
/// </summary>
1919
public const string MeterName = "Microsoft.Extensions.Diagnostics.ResourceMonitoring";
2020

21+
/// <summary>
22+
/// The name of an instrument to retrieve CPU time consumed by the specific container on all available CPU cores, measured in seconds.
23+
/// </summary>
24+
/// <remarks>
25+
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableCounter{T}"/>.
26+
/// </remarks>
27+
public const string ContainerCpuTime = "container.cpu.time";
28+
2129
/// <summary>
2230
/// The name of an instrument to retrieve CPU limit consumption of all processes running inside a container or control group in range <c>[0, 1]</c>.
2331
/// </summary>

test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/AcceptanceTest.cs

+33-7
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using System.Diagnostics.CodeAnalysis;
77
using System.Diagnostics.Metrics;
88
using System.IO;
9+
using System.Linq;
910
using System.Threading;
1011
using System.Threading.Tasks;
1112
using Microsoft.Extensions.Configuration;
@@ -209,6 +210,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
209210

210211
using var listener = new MeterListener();
211212
var clock = new FakeTimeProvider(DateTimeOffset.UtcNow);
213+
var cpuUserTime = 0.0d;
214+
var cpuKernelTime = 0.0d;
212215
var cpuFromGauge = 0.0d;
213216
var cpuLimitFromGauge = 0.0d;
214217
var cpuRequestFromGauge = 0.0d;
@@ -219,8 +222,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
219222
object? meterScope = null;
220223
listener.InstrumentPublished = (Instrument instrument, MeterListener meterListener)
221224
=> OnInstrumentPublished(instrument, meterListener, meterScope);
222-
listener.SetMeasurementEventCallback<double>((m, f, _, _)
223-
=> OnMeasurementReceived(m, f, ref cpuFromGauge, ref cpuLimitFromGauge, ref cpuRequestFromGauge, ref memoryFromGauge, ref memoryLimitFromGauge));
225+
listener.SetMeasurementEventCallback<double>((m, f, tags, _)
226+
=> OnMeasurementReceived(m, f, tags, ref cpuUserTime, ref cpuKernelTime, ref cpuFromGauge, ref cpuLimitFromGauge, ref cpuRequestFromGauge, ref memoryFromGauge, ref memoryLimitFromGauge));
224227
listener.Start();
225228

226229
using var host = FakeHost.CreateBuilder()
@@ -246,6 +249,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
246249
Assert.Equal(0, utilization.CpuUsedPercentage);
247250
Assert.Equal(100, utilization.MemoryUsedPercentage);
248251
Assert.True(double.IsNaN(cpuFromGauge));
252+
Assert.Equal(0.000102312, cpuUserTime);
253+
Assert.Equal(0.8, cpuKernelTime);
249254

250255
// gauge multiplied by 100 because gauges are in range [0, 1], and utilization is in range [0, 100]
251256
Assert.Equal(utilization.MemoryUsedPercentage, memoryFromGauge * 100);
@@ -264,6 +269,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
264269
Assert.Equal(1, utilization.CpuUsedPercentage);
265270
Assert.Equal(50, utilization.MemoryUsedPercentage);
266271
Assert.Equal(0.5, cpuLimitFromGauge * 100);
272+
Assert.Equal(0.000112312, cpuUserTime);
273+
Assert.Equal(0.81, cpuKernelTime);
267274
Assert.Equal(utilization.CpuUsedPercentage, cpuRequestFromGauge * 100);
268275
Assert.Equal(utilization.MemoryUsedPercentage, memoryLimitFromGauge * 100);
269276
Assert.Equal(utilization.CpuUsedPercentage, cpuFromGauge * 100);
@@ -292,6 +299,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
292299

293300
using var listener = new MeterListener();
294301
var clock = new FakeTimeProvider(DateTimeOffset.UtcNow);
302+
var cpuUserTime = 0.0d;
303+
var cpuKernelTime = 0.0d;
295304
var cpuFromGauge = 0.0d;
296305
var cpuLimitFromGauge = 0.0d;
297306
var cpuRequestFromGauge = 0.0d;
@@ -302,8 +311,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
302311
object? meterScope = null;
303312
listener.InstrumentPublished = (Instrument instrument, MeterListener meterListener)
304313
=> OnInstrumentPublished(instrument, meterListener, meterScope);
305-
listener.SetMeasurementEventCallback<double>((m, f, _, _)
306-
=> OnMeasurementReceived(m, f, ref cpuFromGauge, ref cpuLimitFromGauge, ref cpuRequestFromGauge, ref memoryFromGauge, ref memoryLimitFromGauge));
314+
listener.SetMeasurementEventCallback<double>((m, f, tags, _)
315+
=> OnMeasurementReceived(m, f, tags, ref cpuUserTime, ref cpuKernelTime, ref cpuFromGauge, ref cpuLimitFromGauge, ref cpuRequestFromGauge, ref memoryFromGauge, ref memoryLimitFromGauge));
307316
listener.Start();
308317

309318
using var host = FakeHost.CreateBuilder()
@@ -351,6 +360,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
351360
Assert.Equal(1, roundedCpuUsedPercentage);
352361
Assert.Equal(50, utilization.MemoryUsedPercentage);
353362
Assert.Equal(0.5, cpuLimitFromGauge * 100);
363+
Assert.Equal(0.000112, cpuUserTime);
364+
Assert.Equal(0.81, cpuKernelTime);
354365
Assert.Equal(roundedCpuUsedPercentage, Math.Round(cpuRequestFromGauge * 100));
355366
Assert.Equal(utilization.MemoryUsedPercentage, memoryLimitFromGauge * 100);
356367
Assert.Equal(roundedCpuUsedPercentage, Math.Round(cpuFromGauge * 100));
@@ -369,6 +380,7 @@ private static void OnInstrumentPublished(Instrument instrument, MeterListener m
369380
#pragma warning disable S1067 // Expressions should not be too complex
370381
if (instrument.Name == ResourceUtilizationInstruments.ProcessCpuUtilization ||
371382
instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization ||
383+
instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime ||
372384
instrument.Name == ResourceUtilizationInstruments.ContainerCpuRequestUtilization ||
373385
instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization ||
374386
instrument.Name == ResourceUtilizationInstruments.ContainerMemoryLimitUtilization)
@@ -378,10 +390,12 @@ private static void OnInstrumentPublished(Instrument instrument, MeterListener m
378390
#pragma warning restore S1067 // Expressions should not be too complex
379391
}
380392

393+
#pragma warning disable S107 // Methods should not have too many parameters
381394
private static void OnMeasurementReceived(
382-
Instrument instrument, double value,
383-
ref double cpuFromGauge, ref double cpuLimitFromGauge, ref double cpuRequestFromGauge,
384-
ref double memoryFromGauge, ref double memoryLimitFromGauge)
395+
Instrument instrument, double value, ReadOnlySpan<KeyValuePair<string, object?>> tags,
396+
ref double cpuUserTime, ref double cpuKernelTime, ref double cpuFromGauge, ref double cpuLimitFromGauge,
397+
ref double cpuRequestFromGauge, ref double memoryFromGauge, ref double memoryLimitFromGauge)
398+
#pragma warning restore S107 // Methods should not have too many parameters
385399
{
386400
if (instrument.Name == ResourceUtilizationInstruments.ProcessCpuUtilization)
387401
{
@@ -391,6 +405,18 @@ private static void OnMeasurementReceived(
391405
{
392406
memoryFromGauge = value;
393407
}
408+
else if (instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime)
409+
{
410+
var tagsArray = tags.ToArray();
411+
if (tagsArray.Contains(new KeyValuePair<string, object?>("cpu.mode", "user")))
412+
{
413+
cpuUserTime = value;
414+
}
415+
else if (tagsArray.Contains(new KeyValuePair<string, object?>("cpu.mode", "system")))
416+
{
417+
cpuKernelTime = value;
418+
}
419+
}
394420
else if (instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization)
395421
{
396422
cpuLimitFromGauge = value;

test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs

+8-2
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ public void Provider_Registers_Instruments()
7575
listener.Start();
7676
listener.RecordObservableInstruments();
7777

78-
Assert.Equal(5, samples.Count);
78+
Assert.Equal(7, samples.Count);
7979

8080
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization);
8181
Assert.True(double.IsNaN(samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization).value));
@@ -91,6 +91,9 @@ public void Provider_Registers_Instruments()
9191

9292
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization);
9393
Assert.Equal(0.5, samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization).value);
94+
95+
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - (50.0 / 1_000_000_000)) < 0.00001);
96+
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - 0.8) < 0.00001);
9497
}
9598

9699
[ConditionalFact]
@@ -144,7 +147,7 @@ public void Provider_Registers_Instruments_CgroupV2()
144147
listener.Start();
145148
listener.RecordObservableInstruments();
146149

147-
Assert.Equal(5, samples.Count);
150+
Assert.Equal(7, samples.Count);
148151

149152
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization);
150153
Assert.True(double.IsNaN(samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization).value));
@@ -160,6 +163,9 @@ public void Provider_Registers_Instruments_CgroupV2()
160163

161164
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization);
162165
Assert.Equal(1, samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization).value);
166+
167+
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - (102312.0 / 1_000_000)) < 0.00001);
168+
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - 0.8) < 0.00001);
163169
}
164170

165171
[Fact]

test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Windows/WindowsContainerSnapshotProviderTests.cs

+60
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,66 @@ public void GetSnapshot_With_JobMemoryLimit_Set_To_Zero_ProducesCorrectSnapshot(
191191
Assert.True(data.MemoryUsageInBytes > 0);
192192
}
193193

194+
[Fact]
195+
public void SnapshotProvider_EmitsCpuTimeMetric()
196+
{
197+
// Simulating 10% CPU usage (2 CPUs, 2000 ticks initially, 4000 ticks after 1 ms):
198+
JOBOBJECT_BASIC_ACCOUNTING_INFORMATION updatedAccountingInfo = default;
199+
updatedAccountingInfo.TotalKernelTime = 2500;
200+
updatedAccountingInfo.TotalUserTime = 1500;
201+
202+
_jobHandleMock.SetupSequence(j => j.GetBasicAccountingInfo())
203+
.Returns(_accountingInfo)
204+
.Returns(_accountingInfo)
205+
.Returns(updatedAccountingInfo)
206+
.Returns(updatedAccountingInfo)
207+
.Throws(new InvalidOperationException("We shouldn't hit here..."));
208+
209+
_sysInfo.NumberOfProcessors = 2;
210+
211+
var fakeClock = new FakeTimeProvider();
212+
using var meter = new Meter(nameof(SnapshotProvider_EmitsCpuMetrics));
213+
var meterFactoryMock = new Mock<IMeterFactory>();
214+
meterFactoryMock.Setup(x => x.Create(It.IsAny<MeterOptions>()))
215+
.Returns(meter);
216+
using var metricCollector = new MetricCollector<double>(meter, ResourceUtilizationInstruments.ContainerCpuTime, fakeClock);
217+
218+
var options = new ResourceMonitoringOptions { CpuConsumptionRefreshInterval = TimeSpan.FromMilliseconds(2) };
219+
220+
var snapshotProvider = new WindowsContainerSnapshotProvider(
221+
_memoryInfoMock.Object,
222+
_systemInfoMock.Object,
223+
_processInfoMock.Object,
224+
_logger,
225+
meterFactoryMock.Object,
226+
() => _jobHandleMock.Object,
227+
fakeClock,
228+
options);
229+
230+
// Step #0 - state in the beginning:
231+
metricCollector.RecordObservableInstruments();
232+
var snapshot = metricCollector.GetMeasurementSnapshot();
233+
Assert.Equal(2, snapshot.Count);
234+
Assert.Contains(_accountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
235+
Assert.Contains(_accountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
236+
237+
// Step #1 - simulate 1 millisecond passing and collect metrics again:
238+
fakeClock.Advance(TimeSpan.FromMilliseconds(1));
239+
metricCollector.RecordObservableInstruments();
240+
snapshot = metricCollector.GetMeasurementSnapshot();
241+
Assert.Contains(updatedAccountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
242+
Assert.Contains(updatedAccountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
243+
244+
// Step #2 - simulate 1 millisecond passing and collect metrics again:
245+
fakeClock.Advance(TimeSpan.FromMilliseconds(1));
246+
metricCollector.RecordObservableInstruments();
247+
snapshot = metricCollector.GetMeasurementSnapshot();
248+
249+
// CPU time should be the same as before, as we're not simulating any CPU usage:
250+
Assert.Contains(updatedAccountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
251+
Assert.Contains(updatedAccountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
252+
}
253+
194254
[Theory]
195255
[InlineData(ResourceUtilizationInstruments.ProcessCpuUtilization, true)]
196256
[InlineData(ResourceUtilizationInstruments.ProcessCpuUtilization, false)]

0 commit comments

Comments
 (0)