@@ -2,16 +2,21 @@ use crate::agent_control::config::{helmrelease_v2_type_meta, helmrepository_type
22use crate :: cli:: errors:: CliError ;
33use crate :: cli:: utils:: parse_key_value_pairs;
44use crate :: k8s:: annotations:: Annotations ;
5+ #[ cfg_attr( test, mockall_double:: double) ]
56use crate :: k8s:: client:: SyncK8sClient ;
67use crate :: k8s:: labels:: Labels ;
8+ use crate :: sub_agent:: health:: health_checker:: HealthChecker ;
9+ use crate :: sub_agent:: health:: k8s:: health_checker:: SubAgentHealthChecker ;
10+ use crate :: sub_agent:: health:: with_start_time:: StartTime ;
711use crate :: sub_agent:: identity:: AgentIdentity ;
812use clap:: Parser ;
913use kube:: {
1014 Resource ,
1115 api:: { DynamicObject , ObjectMeta } ,
12- core:: Duration ,
1316} ;
1417use std:: sync:: Arc ;
18+ use std:: thread:: sleep;
19+ use std:: time:: Duration ;
1520use std:: { collections:: BTreeMap , str:: FromStr } ;
1621use tracing:: { debug, info} ;
1722
@@ -20,6 +25,10 @@ const REPOSITORY_URL: &str = "https://helm-charts.newrelic.com";
2025const FIVE_MINUTES : & str = "5m" ;
2126const AC_DEPLOYMENT_CHART_NAME : & str = "agent-control-deployment" ;
2227
28+ const INSTALLATION_CHECK_DEFAULT_INITIAL_DELAY : Duration = Duration :: from_secs ( 10 ) ;
29+ const INSTALLATION_CHECK_DEFAULT_MAX_RETRIES : i32 = 10 ;
30+ const INSTALLATION_CHECK_DEFAULT_RETRY_INTERVAL : Duration = Duration :: from_secs ( 3 ) ;
31+
2332#[ derive( Debug , Parser ) ]
2433pub struct AgentControlInstallData {
2534 /// Release name
@@ -50,6 +59,10 @@ pub struct AgentControlInstallData {
5059 /// [k8s labels]: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
5160 #[ arg( long) ]
5261 pub extra_labels : Option < String > ,
62+
63+ /// Skip the installation check if set
64+ #[ arg( long) ]
65+ pub skip_installation_check : bool ,
5366}
5467
5568pub fn install_agent_control (
@@ -58,6 +71,7 @@ pub fn install_agent_control(
5871) -> Result < ( ) , CliError > {
5972 info ! ( "Installing agent control" ) ;
6073
74+ let skip_check = data. skip_installation_check ;
6175 let dynamic_objects = Vec :: < DynamicObject > :: from ( data) ;
6276
6377 let k8s_client = k8s_client ( namespace. clone ( ) ) ?;
@@ -66,12 +80,16 @@ pub fn install_agent_control(
6680 // For example, what happens if the user applies a remote configuration with a lower version
6781 // that includes a breaking change?
6882 info ! ( "Applying agent control resources" ) ;
69- for object in dynamic_objects {
70- apply_resource ( & k8s_client, & object) ?;
83+ for object in dynamic_objects. iter ( ) {
84+ apply_resource ( & k8s_client, object) ?;
7185 }
7286 info ! ( "Agent control resources applied successfully" ) ;
7387
74- info ! ( "Agent control installed successfully" ) ;
88+ if !skip_check {
89+ info ! ( "Checking Agent control installation" ) ;
90+ check_installation ( k8s_client, dynamic_objects) ?;
91+ info ! ( "Agent control installed successfully" ) ;
92+ }
7593
7694 Ok ( ( ) )
7795}
@@ -143,7 +161,7 @@ fn helm_repository(
143161 data : serde_json:: json!( {
144162 "spec" : {
145163 "url" : REPOSITORY_URL ,
146- "interval" : Duration :: from_str( FIVE_MINUTES ) . expect( "Hardcoded value should be correct" ) ,
164+ "interval" : kube :: core :: Duration :: from_str( FIVE_MINUTES ) . expect( "Hardcoded value should be correct" ) ,
147165 }
148166 } ) ,
149167 }
@@ -155,8 +173,10 @@ fn helm_release(
155173 labels : BTreeMap < String , String > ,
156174 annotations : BTreeMap < String , String > ,
157175) -> DynamicObject {
158- let interval = Duration :: from_str ( FIVE_MINUTES ) . expect ( "Hardcoded value should be correct" ) ;
159- let timeout = Duration :: from_str ( FIVE_MINUTES ) . expect ( "Hardcoded value should be correct" ) ;
176+ let interval =
177+ kube:: core:: Duration :: from_str ( FIVE_MINUTES ) . expect ( "Hardcoded value should be correct" ) ;
178+ let timeout =
179+ kube:: core:: Duration :: from_str ( FIVE_MINUTES ) . expect ( "Hardcoded value should be correct" ) ;
160180 let mut data = serde_json:: json!( {
161181 "spec" : {
162182 "interval" : interval,
@@ -206,6 +226,39 @@ fn secrets_to_json(secrets: BTreeMap<String, String>) -> serde_json::Value {
206226 serde_json:: json!( items)
207227}
208228
229+ fn check_installation (
230+ k8s_client : SyncK8sClient ,
231+ objects : Vec < DynamicObject > ,
232+ ) -> Result < ( ) , CliError > {
233+ let health_checker =
234+ SubAgentHealthChecker :: try_new ( Arc :: new ( k8s_client) , Arc :: new ( objects) , StartTime :: now ( ) )
235+ . map_err ( |err| {
236+ CliError :: InstallationCheck ( format ! ( "could not build health-checker: {err}" ) )
237+ } ) ?
238+ . ok_or_else ( || {
239+ CliError :: InstallationCheck ( "no resources to check health were found" . to_string ( ) )
240+ } ) ?;
241+
242+ // An initial delay is needed because the api-server can take a while to actually apply the changes and we could
243+ // perform the health check to previous objects which could lead to false positives.
244+ sleep ( INSTALLATION_CHECK_DEFAULT_INITIAL_DELAY ) ;
245+ let format_err = |err| {
246+ format ! (
247+ "installation check failed after {INSTALLATION_CHECK_DEFAULT_MAX_RETRIES} attempts: {err}"
248+ )
249+ } ;
250+ let health = health_checker
251+ . check_health_with_retry (
252+ INSTALLATION_CHECK_DEFAULT_MAX_RETRIES ,
253+ INSTALLATION_CHECK_DEFAULT_RETRY_INTERVAL ,
254+ )
255+ . map_err ( |err| CliError :: InstallationCheck ( format_err ( err. to_string ( ) ) ) ) ?;
256+ if let Some ( err) = health. last_error ( ) {
257+ return Err ( CliError :: InstallationCheck ( format_err ( err) ) ) ;
258+ }
259+ Ok ( ( ) )
260+ }
261+
209262#[ cfg( test) ]
210263mod tests {
211264 use super :: * ;
@@ -219,6 +272,7 @@ mod tests {
219272 chart_version : VERSION . to_string ( ) ,
220273 secrets : None ,
221274 extra_labels : None ,
275+ skip_installation_check : false ,
222276 }
223277 }
224278
0 commit comments