17
17
using System . Linq ;
18
18
using System . Net ;
19
19
using System . Threading ;
20
+ using System . IO ;
21
+ using System . Text ;
20
22
21
23
namespace NewRelic . Agent . Core . AgentHealth
22
24
{
@@ -26,6 +28,8 @@ public class AgentHealthReporter : ConfigurationBasedService, IAgentHealthReport
26
28
27
29
private readonly IMetricBuilder _metricBuilder ;
28
30
private readonly IScheduler _scheduler ;
31
+ private readonly IFileWrapper _fileWrapper ;
32
+ private readonly IDirectoryWrapper _directoryWrapper ;
29
33
private readonly IList < string > _recurringLogData = new ConcurrentList < string > ( ) ;
30
34
private readonly IDictionary < AgentHealthEvent , InterlockedCounter > _agentHealthEventCounters = new Dictionary < AgentHealthEvent , InterlockedCounter > ( ) ;
31
35
private readonly ConcurrentDictionary < string , InterlockedCounter > _logLinesCountByLevel = new ConcurrentDictionary < string , InterlockedCounter > ( ) ;
@@ -38,10 +42,30 @@ public class AgentHealthReporter : ConfigurationBasedService, IAgentHealthReport
38
42
private InterlockedCounter _traceContextCreateSuccessCounter ;
39
43
private InterlockedCounter _traceContextAcceptSuccessCounter ;
40
44
41
- public AgentHealthReporter ( IMetricBuilder metricBuilder , IScheduler scheduler )
45
+ private HealthCheck _healthCheck ;
46
+ private bool _healthChecksInitialized ;
47
+ private bool _healthChecksFailed ;
48
+ private string _healthCheckPath ;
49
+
50
+ public AgentHealthReporter ( IMetricBuilder metricBuilder , IScheduler scheduler , IFileWrapper fileWrapper , IDirectoryWrapper directoryWrapper )
42
51
{
43
52
_metricBuilder = metricBuilder ;
44
53
_scheduler = scheduler ;
54
+ _fileWrapper = fileWrapper ;
55
+ _directoryWrapper = directoryWrapper ;
56
+
57
+ if ( ! _configuration . AgentControlEnabled )
58
+ Log . Debug ( "Agent Control is disabled. Health checks will not be reported." ) ;
59
+ else
60
+ {
61
+ Log . Debug ( "Agent Control health checks will be published every {HealthCheckInterval} seconds" , _configuration . HealthFrequency ) ;
62
+
63
+ _healthCheck = new ( ) { IsHealthy = true , Status = "Agent starting" , LastError = string . Empty } ;
64
+
65
+ // schedule the health check and issue the first one immediately
66
+ _scheduler . ExecuteEvery ( PublishAgentControlHealthCheck , TimeSpan . FromSeconds ( _configuration . HealthFrequency ) , TimeSpan . Zero ) ;
67
+ }
68
+
45
69
_scheduler . ExecuteEvery ( LogPeriodicReport , _timeBetweenExecutions ) ;
46
70
var agentHealthEvents = Enum . GetValues ( typeof ( AgentHealthEvent ) ) as AgentHealthEvent [ ] ;
47
71
foreach ( var agentHealthEvent in agentHealthEvents )
@@ -258,9 +282,9 @@ public void ReportIfHostIsLinuxOs()
258
282
{
259
283
#if NETSTANDARD2_0
260
284
261
- bool isLinux = System . Runtime . InteropServices . RuntimeInformation . IsOSPlatform ( System . Runtime . InteropServices . OSPlatform . Linux ) ;
262
- var metric = _metricBuilder . TryBuildLinuxOsMetric ( isLinux ) ;
263
- TrySend ( metric ) ;
285
+ bool isLinux = System . Runtime . InteropServices . RuntimeInformation . IsOSPlatform ( System . Runtime . InteropServices . OSPlatform . Linux ) ;
286
+ var metric = _metricBuilder . TryBuildLinuxOsMetric ( isLinux ) ;
287
+ TrySend ( metric ) ;
264
288
#endif
265
289
}
266
290
@@ -667,6 +691,107 @@ public void ReportLogForwardingConfiguredValues()
667
691
668
692
#endregion
669
693
694
+ #region Agent Control
695
+
696
+ private void ReportIfAgentControlHealthEnabled ( )
697
+ {
698
+ if ( _configuration . AgentControlEnabled )
699
+ {
700
+ ReportSupportabilityCountMetric ( MetricNames . SupportabilityAgentControlHealthEnabled ) ;
701
+ }
702
+ }
703
+
704
+ public void SetAgentControlStatus ( ( bool IsHealthy , string Code , string Status ) healthStatus , params string [ ] statusParams )
705
+ {
706
+ // Do nothing if agent control is not enabled
707
+ if ( ! _configuration . AgentControlEnabled )
708
+ return ;
709
+
710
+ if ( healthStatus . Equals ( HealthCodes . AgentShutdownHealthy ) )
711
+ {
712
+ if ( _healthCheck . IsHealthy )
713
+ {
714
+ _healthCheck . TrySetHealth ( healthStatus ) ;
715
+ }
716
+ }
717
+ else
718
+ {
719
+ _healthCheck . TrySetHealth ( healthStatus , statusParams ) ;
720
+ }
721
+ }
722
+
723
+ public void PublishAgentControlHealthCheck ( )
724
+ {
725
+ if ( ! _healthChecksInitialized ) // initialize on first invocation
726
+ {
727
+ InitializeHealthChecks ( ) ;
728
+ _healthChecksInitialized = true ;
729
+ }
730
+
731
+ // stop the scheduled task if agent control isn't enabled or health checks fail for any reason
732
+ if ( ! _configuration . AgentControlEnabled || _healthChecksFailed )
733
+ {
734
+ _scheduler . StopExecuting ( PublishAgentControlHealthCheck ) ;
735
+ return ;
736
+ }
737
+
738
+ var healthCheckYaml = _healthCheck . ToYaml ( ) ;
739
+
740
+ Log . Finest ( "Publishing Agent Control health check report: {HealthCheckYaml}" , healthCheckYaml ) ;
741
+
742
+ try
743
+ {
744
+ using var fs = _fileWrapper . OpenWrite ( Path . Combine ( _healthCheckPath , _healthCheck . FileName ) ) ;
745
+ var payloadBytes = Encoding . UTF8 . GetBytes ( healthCheckYaml ) ;
746
+ fs . Write ( payloadBytes , 0 , payloadBytes . Length ) ;
747
+ fs . Flush ( ) ;
748
+ }
749
+ catch ( Exception ex )
750
+ {
751
+ Log . Warn ( ex , "Failed to write Agent Control health check report. Health checks will be disabled." ) ;
752
+ _healthChecksFailed = true ;
753
+ }
754
+ }
755
+
756
+ private void InitializeHealthChecks ( )
757
+ {
758
+ if ( ! _configuration . AgentControlEnabled )
759
+ {
760
+ Log . Debug ( "Agent Control is disabled. Health checks will not be reported." ) ;
761
+ return ;
762
+ }
763
+
764
+ Log . Debug ( "Initializing Agent Control health checks" ) ;
765
+
766
+ // make sure the delivery location is a file URI
767
+ var fileUri = new Uri ( _configuration . HealthDeliveryLocation ) ;
768
+ if ( fileUri . Scheme != Uri . UriSchemeFile )
769
+ {
770
+ Log . Warn (
771
+ "Agent Control is enabled but the provided agent_control.health.delivery_location is not a file URL. Health checks will be disabled." ) ;
772
+ _healthChecksFailed = true ;
773
+ return ;
774
+ }
775
+
776
+ _healthCheckPath = fileUri . LocalPath ;
777
+
778
+ // verify the directory exists
779
+ if ( ! _directoryWrapper . Exists ( _healthCheckPath ) )
780
+ {
781
+ Log . Warn ( "Agent Control is enabled but the path specified in agent_control.health.delivery_location does not exist. Health checks will be disabled." ) ;
782
+ _healthChecksFailed = true ;
783
+ }
784
+
785
+ // verify we can write a file to the directory
786
+ var testFile = Path . Combine ( _healthCheckPath , Path . GetRandomFileName ( ) ) ;
787
+ if ( ! _fileWrapper . TryCreateFile ( testFile ) )
788
+ {
789
+ Log . Warn ( "Agent Control is enabled but the agent is unable to create files in the directory specified in agent_control.health.delivery_location. Health checks will be disabled." ) ;
790
+ _healthChecksFailed = true ;
791
+ }
792
+ }
793
+ #endregion
794
+
670
795
public void ReportSupportabilityPayloadsDroppeDueToMaxPayloadSizeLimit ( string endpoint )
671
796
{
672
797
TrySend ( _metricBuilder . TryBuildSupportabilityPayloadsDroppedDueToMaxPayloadLimit ( endpoint ) ) ;
@@ -686,6 +811,7 @@ private void CollectOneTimeMetrics()
686
811
ReportIfInstrumentationIsDisabled ( ) ;
687
812
ReportIfGCSamplerV2IsEnabled ( ) ;
688
813
ReportIfAwsAccountIdProvided ( ) ;
814
+ ReportIfAgentControlHealthEnabled ( ) ;
689
815
}
690
816
691
817
public void CollectMetrics ( )
@@ -857,5 +983,10 @@ private void ReportIfAwsAccountIdProvided()
857
983
ReportSupportabilityCountMetric ( MetricNames . SupportabilityAwsAccountIdProvided ) ;
858
984
}
859
985
}
986
+
987
+ /// <summary>
988
+ /// FOR UNIT TESTING ONLY
989
+ /// </summary>
990
+ public bool HealthCheckFailed => _healthChecksFailed ;
860
991
}
861
992
}
0 commit comments