@@ -12,7 +12,7 @@ use crate::sub_agent::health::health_checker::{
1212use crate :: sub_agent:: health:: health_checker:: { Healthy , Unhealthy } ;
1313use crate :: sub_agent:: health:: on_host:: health_checker:: OnHostHealthChecker ;
1414use crate :: sub_agent:: health:: with_start_time:: { HealthWithStartTime , StartTime } ;
15- use crate :: sub_agent:: identity:: AgentIdentity ;
15+ use crate :: sub_agent:: identity:: { AgentIdentity , ID_ATTRIBUTE_NAME } ;
1616use crate :: sub_agent:: on_host:: command:: command:: CommandError ;
1717use crate :: sub_agent:: on_host:: command:: command_os:: CommandOSNotStarted ;
1818use crate :: sub_agent:: on_host:: command:: executable_data:: ExecutableData ;
@@ -140,6 +140,7 @@ impl NotStartedSupervisorOnHost {
140140 let health_checker =
141141 OnHostHealthChecker :: try_new ( http_client, health_config. clone ( ) , start_time) ?;
142142 let started_thread_context = spawn_health_checker (
143+ self . agent_identity . id . clone ( ) ,
143144 health_checker,
144145 sub_agent_internal_publisher,
145146 health_config. interval ,
@@ -159,6 +160,7 @@ impl NotStartedSupervisorOnHost {
159160 OnHostAgentVersionChecker :: checked_new ( self . agent_identity . agent_type_id . clone ( ) ) ?;
160161
161162 Some ( spawn_version_checker (
163+ self . agent_identity . id . clone ( ) ,
162164 onhost_version_checker,
163165 sub_agent_internal_publisher,
164166 VersionCheckerInterval :: default ( ) ,
@@ -176,10 +178,16 @@ impl NotStartedSupervisorOnHost {
176178 _ = wait_for_termination ( current_pid. clone ( ) , self . ctx . clone ( ) , shutdown_ctx. clone ( ) ) ;
177179
178180 let executable_data_clone = executable_data. clone ( ) ;
181+ let agent_id = self . agent_identity . id . clone ( ) ;
179182 // NotStartedThreadContext takes as input a callback that requires a EventConsumer<CancellationMessage>
180183 // as input. In that specific case it's not used, but we need to pass it to comply with the signature.
181184 // This should be refactored to work as the other threads used by the supervisor.
182185 let callback = move |_| loop {
186+ let span = info_span ! (
187+ "start_executable" ,
188+ { ID_ATTRIBUTE_NAME } = %agent_id
189+ ) ;
190+ let span_guard = span. enter ( ) ;
183191 // locks the current_pid to prevent `wait_for_termination` finishing before the process
184192 // is started and the pid is set.
185193 // In case starting the process fail the guard will be dropped and `wait_for_termination`
@@ -189,17 +197,11 @@ impl NotStartedSupervisorOnHost {
189197 // A context cancelled means that the supervisor has been gracefully stopped
190198 // before the process was started.
191199 if * Context :: get_lock_cvar ( & self . ctx ) . 0 . lock ( ) . unwrap ( ) {
192- debug ! (
193- supervisor = executable_data_clone. bin,
194- msg = "supervisor stopped before starting the process"
195- ) ;
200+ debug ! ( "supervisor stopped before starting the process" ) ;
196201 break ;
197202 }
198203
199- info ! (
200- supervisor = executable_data_clone. bin,
201- msg = "starting supervisor process"
202- ) ;
204+ info ! ( "starting supervisor process" ) ;
203205
204206 shutdown_ctx. reset ( ) . unwrap ( ) ;
205207 // Signals return exit_code 0, if in the future we need to act on them we can import
@@ -215,7 +217,14 @@ impl NotStartedSupervisorOnHost {
215217 HealthWithStartTime :: new ( init_health. into ( ) , supervisor_start_time) . into ( ) ,
216218 ) ;
217219
218- let exit_code = start_command ( not_started_command, pid_guard)
220+ let command_result = start_command ( not_started_command, pid_guard, span_guard) ;
221+ let span = info_span ! (
222+ "stop_executable" ,
223+ { ID_ATTRIBUTE_NAME } = %agent_id
224+ ) ;
225+ let _span_guard = span. enter ( ) ;
226+
227+ let exit_code = command_result
219228 . inspect_err ( |err| {
220229 error ! (
221230 supervisor = executable_data_clone. bin,
@@ -340,6 +349,7 @@ fn handle_termination(
340349fn start_command (
341350 not_started_command : CommandOSNotStarted ,
342351 mut pid : std:: sync:: MutexGuard < Option < u32 > > ,
352+ span_guard : tracing:: span:: Entered < ' _ > ,
343353) -> Result < ExitStatus , CommandError > {
344354 // run and stream the process
345355 let started = not_started_command. start ( ) ?;
@@ -351,6 +361,8 @@ fn start_command(
351361 // free the lock so the wait_for_termination can lock it on graceful shutdown
352362 drop ( pid) ;
353363
364+ drop ( span_guard) ;
365+
354366 streaming. wait ( )
355367}
356368
@@ -361,11 +373,11 @@ fn wait_for_termination(
361373 ctx : Context < bool > ,
362374 shutdown_ctx : Context < bool > ,
363375) -> JoinHandle < ( ) > {
364- let s = info_span ! ( "termination_signal_listener " ) ;
376+ let span = info_span ! ( "termination_signal " ) ;
365377 spawn_named_thread ( "OnHost Termination signal listener" , move || {
366- let _guards = s. enter ( ) ;
367378 let ( lck, cvar) = Context :: get_lock_cvar ( & ctx) ;
368379 drop ( cvar. wait_while ( lck. lock ( ) . unwrap ( ) , |finish| !* finish) ) ;
380+ let _span_guard = span. enter ( ) ;
369381
370382 // context is unlocked here so locking it again in other thread that is blocking current_pid is safe.
371383 if let Some ( pid) = * current_pid. lock ( ) . unwrap ( ) {
0 commit comments