Skip to content

Commit fa86c9a

Browse files
committed
Merged PR 41659: Add metrics with correct names for Resource Monitoring (#5341)
Add metrics with correct names for Resource Monitoring (#5341) Fixes #5113 Previous art: #5309 Add new metrics with correct names. Old metrics will continue to be enabled by default. ### Existing metric setup **Windows Snapshot provider class** `process.cpu.utilization` `dotnet.process.memory.virtual.utilization` **Windows Container Snapshot provider class** `process.cpu.utilization` `dotnet.process.memory.virtual.utilization` **Linix Utilization Provider class** `process.cpu.utilization` `dotnet.process.memory.virtual.utilization` ### New metric setup **Windows Snapshot provider class** `process.cpu.utilization` - no changes `dotnet.process.memory.virtual.utilization` - no changes **Windows Container Snapshot provider class** `process.cpu.utilization` - no changes `dotnet.process.memory.virtual.utilization` - calculates memory for the dotnet process only (instead of all processes) `container.cpu.limit.utilization` - new metric, same value as `process.cpu.utilization` `container.memory.limit.utilization` - new metric, calculates memory for all processes in the container **Linux Utilization Provider class** `process.cpu.utilization` - fixed incorrect scale calculation, instead of `host CPUs / CPU limit / CPU request`, it is now `host CPUs / CPU request` `dotnet.process.memory.virtual.utilization` - no changes `container.cpu.limit.utilization` - new metric, value is relative to CPU resource limit (aka maximum CPU units) `container.memory.limit.utilization` - new metric, calculates memory for all processes in the container `container.cpu.request.utilization` - new metric, same value as `process.cpu.utilization` ---- #### AI description (iteration 1) #### PR Classification New feature: Added metrics with correct names for resource monitoring. #### PR Summary This pull request introduces new metrics for resource monitoring with correct naming conventions and updates the related tests and implementation. - `LinuxUtilizationProvider.cs`: Added new metrics for container CPU and memory utilization, and updated existing metrics. - `AcceptanceTest.cs`: Added new tests for verifying the new metrics and updated existing tests for better coverage. - `ResourceUtilizationInstruments.cs`: Defined new constants for the new metrics. - Removed `WindowsCounters.cs` as it is no longer needed.
1 parent 62abfe3 commit fa86c9a

16 files changed

+363
-162
lines changed

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Calculator.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ public static ResourceUtilization CalculateUtilization(in Snapshot first, in Sna
2525
long runtimeTickDelta = second.TotalTimeSinceStart.Ticks - first.TotalTimeSinceStart.Ticks;
2626

2727
// Compute the total number of ticks available on the machine during that interval
28-
double totalSystemTicks = runtimeTickDelta * systemResources.GuaranteedCpuUnits;
28+
double totalSystemTicks = runtimeTickDelta;
2929

3030
// fudge to avoid divide by zero
3131
if (totalSystemTicks <= 0)

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationParserCgroupV1.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ private static bool TryGetCpuUnitsFromCgroups(IFileSystem fileSystem, out float
444444
/// <summary>
445445
/// In cgroup v1 the CPU shares is used to determine the CPU allocation.
446446
/// in cgroup v2 the CPU weight is used to determine the CPU allocation.
447-
/// To calculete CPU request in cgroup v2 we need to read the CPU weight and convert it to CPU shares.
447+
/// To calculate CPU request in cgroup v2 we need to read the CPU weight and convert it to CPU shares.
448448
/// But for cgroup v1 we can read the CPU shares directly from the file.
449449
/// 1024 equals 1 CPU core.
450450
/// In cgroup v1 on some systems the location of the CPU shares file is different.

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs

+56-59
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,13 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider
1515
private readonly object _cpuLocker = new();
1616
private readonly object _memoryLocker = new();
1717
private readonly ILinuxUtilizationParser _parser;
18-
private readonly ulong _totalMemoryInBytes;
18+
private readonly ulong _memoryLimit;
1919
private readonly TimeSpan _cpuRefreshInterval;
2020
private readonly TimeSpan _memoryRefreshInterval;
2121
private readonly TimeProvider _timeProvider;
22-
private readonly double _scale;
23-
private readonly double _scaleForTrackerApi;
22+
private readonly double _scaleRelativeToCpuLimit;
23+
private readonly double _scaleRelativeToCpuRequest;
24+
private readonly double _scaleRelativeToCpuRequestForTrackerApi;
2425

2526
private DateTimeOffset _refreshAfterCpu;
2627
private DateTimeOffset _refreshAfterMemory;
@@ -37,73 +38,73 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
3738
{
3839
_parser = parser;
3940
_timeProvider = timeProvider ?? TimeProvider.System;
40-
var now = _timeProvider.GetUtcNow();
41+
DateTimeOffset now = _timeProvider.GetUtcNow();
4142
_cpuRefreshInterval = options.Value.CpuConsumptionRefreshInterval;
4243
_memoryRefreshInterval = options.Value.MemoryConsumptionRefreshInterval;
4344
_refreshAfterCpu = now;
4445
_refreshAfterMemory = now;
45-
_totalMemoryInBytes = _parser.GetAvailableMemoryInBytes();
46+
_memoryLimit = _parser.GetAvailableMemoryInBytes();
4647
_previousHostCpuTime = _parser.GetHostCpuUsageInNanoseconds();
4748
_previousCgroupCpuTime = _parser.GetCgroupCpuUsageInNanoseconds();
4849

49-
var hostMemory = _parser.GetHostAvailableMemory();
50-
var hostCpus = _parser.GetHostCpuCount();
51-
var availableCpus = _parser.GetCgroupLimitedCpus();
52-
var cpuGuaranteedRequest = _parser.GetCgroupRequestCpu();
53-
_scale = hostCpus / availableCpus;
54-
_scaleForTrackerApi = hostCpus / availableCpus;
50+
float hostCpus = _parser.GetHostCpuCount();
51+
float cpuLimit = _parser.GetCgroupLimitedCpus();
52+
float cpuRequest = _parser.GetCgroupRequestCpu();
53+
_scaleRelativeToCpuLimit = hostCpus / cpuLimit;
54+
_scaleRelativeToCpuRequest = hostCpus / cpuRequest;
55+
_scaleRelativeToCpuRequestForTrackerApi = hostCpus; // the division by cpuRequest is performed later on in the ResourceUtilization class
5556

5657
#pragma warning disable CA2000 // Dispose objects before losing scope
5758
// We don't dispose the meter because IMeterFactory handles that
5859
// An issue on analyzer side: https://github.com/dotnet/roslyn-analyzers/issues/6912
5960
// Related documentation: https://github.com/dotnet/docs/pull/37170
60-
var meter = meterFactory.Create("Microsoft.Extensions.Diagnostics.ResourceMonitoring");
61+
var meter = meterFactory.Create(nameof(Microsoft.Extensions.Diagnostics.ResourceMonitoring));
6162
#pragma warning restore CA2000 // Dispose objects before losing scope
6263

63-
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.CpuUtilization, observeValue: CpuUtilization, unit: "1");
64-
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.MemoryUtilization, observeValue: MemoryUtilization, unit: "1");
64+
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuLimit, unit: "1");
65+
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerMemoryLimitUtilization, observeValue: MemoryUtilization, unit: "1");
66+
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuRequest, unit: "1");
6567

66-
// cpuGuaranteedRequest is a CPU request for pod, for host its 1 core
67-
// available CPUs is a CPU limit for a pod or for a host.
68-
// _totalMemoryInBytes - Resource Memory Limit (in k8s terms)
69-
// _totalMemoryInBytes - To keep the contract, this parameter will get the Host available memory
70-
Resources = new SystemResources(cpuGuaranteedRequest, availableCpus, _totalMemoryInBytes, _totalMemoryInBytes);
68+
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ProcessCpuUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuRequest, unit: "1");
69+
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ProcessMemoryUtilization, observeValue: MemoryUtilization, unit: "1");
70+
71+
// cpuRequest is a CPU request (aka guaranteed number of CPU units) for pod, for host its 1 core
72+
// cpuLimit is a CPU limit (aka max CPU units available) for a pod or for a host.
73+
// _memoryLimit - Resource Memory Limit (in k8s terms)
74+
// _memoryLimit - To keep the contract, this parameter will get the Host available memory
75+
Resources = new SystemResources(cpuRequest, cpuLimit, _memoryLimit, _memoryLimit);
7176
}
7277

7378
public double CpuUtilization()
7479
{
75-
var now = _timeProvider.GetUtcNow();
76-
bool needUpdate = false;
80+
DateTimeOffset now = _timeProvider.GetUtcNow();
7781

7882
lock (_cpuLocker)
7983
{
80-
if (now >= _refreshAfterCpu)
84+
if (now < _refreshAfterCpu)
8185
{
82-
needUpdate = true;
86+
return _cpuPercentage;
8387
}
8488
}
8589

86-
if (needUpdate)
87-
{
88-
var hostCpuTime = _parser.GetHostCpuUsageInNanoseconds();
89-
var cgroupCpuTime = _parser.GetCgroupCpuUsageInNanoseconds();
90+
long hostCpuTime = _parser.GetHostCpuUsageInNanoseconds();
91+
long cgroupCpuTime = _parser.GetCgroupCpuUsageInNanoseconds();
9092

91-
lock (_cpuLocker)
93+
lock (_cpuLocker)
94+
{
95+
if (now >= _refreshAfterCpu)
9296
{
93-
if (now >= _refreshAfterCpu)
97+
double deltaHost = hostCpuTime - _previousHostCpuTime;
98+
double deltaCgroup = cgroupCpuTime - _previousCgroupCpuTime;
99+
100+
if (deltaHost > 0 && deltaCgroup > 0)
94101
{
95-
var deltaHost = hostCpuTime - _previousHostCpuTime;
96-
var deltaCgroup = cgroupCpuTime - _previousCgroupCpuTime;
97-
98-
if (deltaHost > 0 && deltaCgroup > 0)
99-
{
100-
var percentage = Math.Min(One, deltaCgroup / deltaHost * _scale);
101-
102-
_cpuPercentage = percentage;
103-
_refreshAfterCpu = now.Add(_cpuRefreshInterval);
104-
_previousCgroupCpuTime = cgroupCpuTime;
105-
_previousHostCpuTime = hostCpuTime;
106-
}
102+
double percentage = Math.Min(One, deltaCgroup / deltaHost);
103+
104+
_cpuPercentage = percentage;
105+
_refreshAfterCpu = now.Add(_cpuRefreshInterval);
106+
_previousCgroupCpuTime = cgroupCpuTime;
107+
_previousHostCpuTime = hostCpuTime;
107108
}
108109
}
109110
}
@@ -113,30 +114,26 @@ public double CpuUtilization()
113114

114115
public double MemoryUtilization()
115116
{
116-
var now = _timeProvider.GetUtcNow();
117-
bool needUpdate = false;
117+
DateTimeOffset now = _timeProvider.GetUtcNow();
118118

119119
lock (_memoryLocker)
120120
{
121-
if (now >= _refreshAfterMemory)
121+
if (now < _refreshAfterMemory)
122122
{
123-
needUpdate = true;
123+
return _memoryPercentage;
124124
}
125125
}
126126

127-
if (needUpdate)
128-
{
129-
var memoryUsed = _parser.GetMemoryUsageInBytes();
127+
ulong memoryUsed = _parser.GetMemoryUsageInBytes();
130128

131-
lock (_memoryLocker)
129+
lock (_memoryLocker)
130+
{
131+
if (now >= _refreshAfterMemory)
132132
{
133-
if (now >= _refreshAfterMemory)
134-
{
135-
var memoryPercentage = Math.Min(One, (double)memoryUsed / _totalMemoryInBytes);
133+
double memoryPercentage = Math.Min(One, (double)memoryUsed / _memoryLimit);
136134

137-
_memoryPercentage = memoryPercentage;
138-
_refreshAfterMemory = now.Add(_memoryRefreshInterval);
139-
}
135+
_memoryPercentage = memoryPercentage;
136+
_refreshAfterMemory = now.Add(_memoryRefreshInterval);
140137
}
141138
}
142139

@@ -150,14 +147,14 @@ public double MemoryUtilization()
150147
/// </remarks>
151148
public Snapshot GetSnapshot()
152149
{
153-
var hostTime = _parser.GetHostCpuUsageInNanoseconds();
154-
var cgroupTime = _parser.GetCgroupCpuUsageInNanoseconds();
155-
var memoryUsed = _parser.GetMemoryUsageInBytes();
150+
long hostTime = _parser.GetHostCpuUsageInNanoseconds();
151+
long cgroupTime = _parser.GetCgroupCpuUsageInNanoseconds();
152+
ulong memoryUsed = _parser.GetMemoryUsageInBytes();
156153

157154
return new Snapshot(
158155
totalTimeSinceStart: TimeSpan.FromTicks(hostTime / Hundred),
159156
kernelTimeSinceStart: TimeSpan.Zero,
160-
userTimeSinceStart: TimeSpan.FromTicks((long)(cgroupTime / Hundred * _scaleForTrackerApi)),
157+
userTimeSinceStart: TimeSpan.FromTicks((long)(cgroupTime / Hundred * _scaleRelativeToCpuRequestForTrackerApi)),
161158
memoryUsageInBytes: memoryUsed);
162159
}
163160
}

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceMonitoringServiceCollectionExtensions.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ private static ResourceMonitorBuilder AddWindowsProvider(this ResourceMonitorBui
8888
builder.PickWindowsSnapshotProvider();
8989

9090
_ = builder.Services
91-
.AddActivatedSingleton<WindowsCounters>();
91+
.AddActivatedSingleton<WindowsNetworkMetrics>();
9292

9393
_ = builder.Services
9494
.AddActivatedSingleton<TcpTableInfo>();

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceUtilization.cs

+8-2
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,16 @@ public readonly struct ResourceUtilization
5151
/// <param name="systemResources">CPU and memory limits.</param>
5252
public ResourceUtilization(double cpuUsedPercentage, ulong memoryUsedInBytes, SystemResources systemResources)
5353
{
54-
CpuUsedPercentage = Throw.IfLessThan(cpuUsedPercentage, 0.0);
54+
double guaranteedCpuUnits = systemResources.GuaranteedCpuUnits;
55+
if (guaranteedCpuUnits <= 0)
56+
{
57+
guaranteedCpuUnits = 1;
58+
}
59+
60+
CpuUsedPercentage = Throw.IfLessThan(cpuUsedPercentage / guaranteedCpuUnits, 0.0);
5561
MemoryUsedInBytes = Throw.IfLessThan(memoryUsedInBytes, 0);
5662
SystemResources = systemResources;
57-
MemoryUsedPercentage = Math.Min(Hundred, (double)MemoryUsedInBytes / SystemResources.GuaranteedMemoryInBytes * Hundred);
63+
MemoryUsedPercentage = Math.Min(Hundred, (double)MemoryUsedInBytes / systemResources.GuaranteedMemoryInBytes * Hundred);
5864
}
5965

6066
/// <summary>

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceUtilizationInstruments.cs

+30-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4+
using System;
5+
46
namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring;
57

68
/// <summary>
@@ -13,18 +15,42 @@ namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring;
1315
internal static class ResourceUtilizationInstruments
1416
{
1517
/// <summary>
16-
/// Gets the CPU consumption of the running application in range <c>[0, 1]</c>.
18+
/// The name of an instrument to retrieve CPU limit consumption of all processes running inside a container or control group in range <c>[0, 1]</c>.
19+
/// </summary>
20+
/// <remarks>
21+
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableGauge{T}"/>.
22+
/// </remarks>
23+
public const string ContainerCpuLimitUtilization = "container.cpu.limit.utilization";
24+
25+
/// <summary>
26+
/// The name of an instrument to retrieve CPU request consumption of all processes running inside a container or control group in range <c>[0, 1]</c>.
27+
/// </summary>
28+
/// <remarks>
29+
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableGauge{T}"/>.
30+
/// </remarks>
31+
public const string ContainerCpuRequestUtilization = "container.cpu.request.utilization";
32+
33+
/// <summary>
34+
/// The name of an instrument to retrieve memory limit consumption of all processes running inside a container or control group in range <c>[0, 1]</c>.
35+
/// </summary>
36+
/// <remarks>
37+
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableGauge{T}"/>.
38+
/// </remarks>
39+
public const string ContainerMemoryLimitUtilization = "container.memory.limit.utilization";
40+
41+
/// <summary>
42+
/// The name of an instrument to retrieve CPU consumption share of the running process in range <c>[0, 1]</c>.
1743
/// </summary>
1844
/// <remarks>
1945
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableGauge{T}"/>.
2046
/// </remarks>
21-
public const string CpuUtilization = "process.cpu.utilization";
47+
public const string ProcessCpuUtilization = "process.cpu.utilization";
2248

2349
/// <summary>
24-
/// Gets the memory consumption of the running application in range <c>[0, 1]</c>.
50+
/// The name of an instrument to retrieve memory consumption share of the running process in range <c>[0, 1]</c>.
2551
/// </summary>
2652
/// <remarks>
2753
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableGauge{T}"/>.
2854
/// </remarks>
29-
public const string MemoryUtilization = "dotnet.process.memory.virtual.utilization";
55+
public const string ProcessMemoryUtilization = "dotnet.process.memory.virtual.utilization";
3056
}

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Windows/Interop/IProcessInfo.cs

+8-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,14 @@ namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring.Windows.Interop;
99
internal interface IProcessInfo
1010
{
1111
/// <summary>
12-
/// Retrieve the memory usage of a system.
12+
/// Retrieves the amount of memory, in bytes, used by the current process.
1313
/// </summary>
14-
/// <returns>Memory usage amount in bytes.</returns>
14+
/// <returns>The number of bytes allocated by the current process.</returns>
15+
ulong GetCurrentProcessMemoryUsage();
16+
17+
/// <summary>
18+
/// Retrieves the amount of memory, in bytes, used by the system.
19+
/// </summary>
20+
/// <returns>The number of bytes allocated by the system.</returns>
1521
ulong GetMemoryUsage();
1622
}

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Windows/Interop/ProcessInfo.cs

+6
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,10 @@ public ulong GetMemoryUsage()
3838

3939
return memoryUsage;
4040
}
41+
42+
public ulong GetCurrentProcessMemoryUsage()
43+
{
44+
using Process process = Process.GetCurrentProcess();
45+
return (ulong)process.WorkingSet64;
46+
}
4147
}

0 commit comments

Comments
 (0)