Skip to content

Commit 7205785

Browse files
thedavekwonfacebook-github-bot
authored andcommitted
Derive actor telemetry IDs from ActorId (#4121)
Summary: Actor telemetry IDs should use runtime actor identity, not routable actor addresses (changed in D102822047) This updates actor rows, sender IDs, receiver IDs, and actor status IDs to hash `ActorId` while keeping `ActorAddr` strings only for display/debug fields. This prevents location changes from changing the telemetry actor join key. Reviewed By: mariusae Differential Revision: D107156872
1 parent 32a470f commit 7205785

10 files changed

Lines changed: 39 additions & 27 deletions

File tree

hyperactor/src/mailbox.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2163,15 +2163,15 @@ impl MailboxSender for Mailbox {
21632163
return_undeliverable,
21642164
} = metadata;
21652165

2166-
let to_actor_id = hash_to_u64(&dest);
2166+
let to_actor_id = hash_to_u64(dest.actor_addr().id());
21672167
let message_id = hyperactor_telemetry::generate_message_id(to_actor_id);
21682168
headers.set(crate::mailbox::headers::TELEMETRY_MESSAGE_ID, message_id);
21692169
// Only set sender hash if not already present (cast path
21702170
// pre-sets it with the originating actor).
21712171
if !headers.contains_key(crate::mailbox::headers::SENDER_ACTOR_ID_HASH) {
21722172
headers.set(
21732173
crate::mailbox::headers::SENDER_ACTOR_ID_HASH,
2174-
hash_to_u64(&sender),
2174+
hash_to_u64(sender.id()),
21752175
);
21762176
}
21772177
headers.set(crate::mailbox::headers::TELEMETRY_PORT_ID, dest.index());

hyperactor/src/mailbox/headers.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ declare_attrs! {
3232
/// The rust type of the message.
3333
pub attr RUST_MESSAGE_TYPE: String;
3434

35-
/// Hashed ActorAddr of the message sender, injected in post_unchecked().
35+
/// Hashed ActorId of the message sender, injected in post_unchecked().
3636
pub attr SENDER_ACTOR_ID_HASH: u64;
3737

3838
/// Full ActorAddr of the session owner — the actor whose Sequencer

hyperactor/src/proc.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2345,7 +2345,7 @@ impl<A: Actor> Instance<A> {
23452345
caller = %PanicLocation::caller(),
23462346
change_reason,
23472347
);
2348-
let actor_id = hash_to_u64(self.self_addr());
2348+
let actor_id = hash_to_u64(self.self_addr().id());
23492349
notify_actor_status_changed(ActorStatusEvent {
23502350
id: generate_actor_status_event_id(actor_id),
23512351
timestamp: std::time::SystemTime::now(),
@@ -3207,7 +3207,7 @@ impl<A: Actor> Instance<A> {
32073207
let from_actor_id = headers
32083208
.get(crate::mailbox::headers::SENDER_ACTOR_ID_HASH)
32093209
.unwrap_or(0);
3210-
let to_actor_id = hash_to_u64(self.self_addr());
3210+
let to_actor_id = hash_to_u64(self.self_addr().id());
32113211
let port_id = headers.get(crate::mailbox::headers::TELEMETRY_PORT_ID);
32123212

32133213
notify_message(hyperactor_telemetry::MessageEvent {

hyperactor_mesh/src/actor_mesh.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,7 @@ impl<A: Referable> ActorMeshRef<A> {
537537

538538
hyperactor_telemetry::notify_sent_message(hyperactor_telemetry::SentMessageEvent {
539539
timestamp: std::time::SystemTime::now(),
540-
sender_actor_id: hyperactor_telemetry::hash_to_u64(cx.mailbox().actor_addr()),
540+
sender_actor_id: hyperactor_telemetry::hash_to_u64(cx.mailbox().actor_addr().id()),
541541
actor_mesh_id: hyperactor_telemetry::hash_to_u64(&self.id.to_string()),
542542
view_json: serde_json::to_string(view::Ranked::region(self)).unwrap_or_default(),
543543
shape_json: {

hyperactor_mesh/src/comm/multicast.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ pub fn set_cast_info_on_headers(headers: &mut Flattrs, cast_point: Point, sender
371371
// CAST_ORIGINATING_SENDER -- they carry overlapping sender identity.
372372
headers.set(
373373
hyperactor::mailbox::headers::SENDER_ACTOR_ID_HASH,
374-
hyperactor_telemetry::hash_to_u64(&sender),
374+
hyperactor_telemetry::hash_to_u64(sender.id()),
375375
);
376376
headers.set(CAST_POINT, cast_point);
377377
headers.set(CAST_ORIGINATING_SENDER, sender);

hyperactor_mesh/src/host_mesh.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ impl HostMesh {
405405
for (rank, host) in self.current_ref.hosts().iter().enumerate() {
406406
let actor = host.mesh_agent();
407407
hyperactor_telemetry::notify_actor_created(hyperactor_telemetry::ActorEvent {
408-
id: hyperactor_telemetry::hash_to_u64(&actor.actor_addr()),
408+
id: hyperactor_telemetry::hash_to_u64(actor.actor_addr().id()),
409409
timestamp: now,
410410
mesh_id: mesh_id_hash,
411411
rank: rank as u64,

hyperactor_mesh/src/proc_mesh.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -205,14 +205,14 @@ impl ProcMesh {
205205
// These are skipped in Proc::spawn_inner. mesh_id directly points to proc mesh.
206206
let now = std::time::SystemTime::now();
207207
for rank in current_ref.ranks.iter() {
208-
let actor_id = rank.agent.actor_addr();
208+
let actor_addr = rank.agent.actor_addr();
209209

210210
hyperactor_telemetry::notify_actor_created(hyperactor_telemetry::ActorEvent {
211-
id: hyperactor_telemetry::hash_to_u64(&actor_id),
211+
id: hyperactor_telemetry::hash_to_u64(actor_addr.id()),
212212
timestamp: now,
213213
mesh_id: mesh_id_hash,
214214
rank: rank.create_rank as u64,
215-
full_name: actor_id.to_string(),
215+
full_name: actor_addr.to_string(),
216216
display_name: None,
217217
});
218218
}
@@ -883,13 +883,13 @@ impl ProcMeshRef {
883883
let point = self.region().extent().point_of_rank(rank).unwrap();
884884
crate::actor_display_name(sdn, &point)
885885
});
886-
let actor_id = proc_ref.actor_addr(&actor_mesh_id);
886+
let actor_addr = proc_ref.actor_addr(&actor_mesh_id);
887887
hyperactor_telemetry::notify_actor_created(hyperactor_telemetry::ActorEvent {
888-
id: hyperactor_telemetry::hash_to_u64(&actor_id),
888+
id: hyperactor_telemetry::hash_to_u64(actor_addr.id()),
889889
timestamp: now,
890890
mesh_id: mesh_id_hash,
891891
rank: rank as u64,
892-
full_name: actor_id.to_string(),
892+
full_name: actor_addr.to_string(),
893893
display_name,
894894
});
895895
}

hyperactor_telemetry/src/lib.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ use crate::config::ENABLE_RECORDER_TRACING;
111111
use crate::config::ENABLE_SQLITE_TRACING;
112112
use crate::config::MONARCH_LOG_SUFFIX;
113113
use crate::recorder::Recorder;
114+
114115
/// Hash any hashable value to a u64 using DefaultHasher.
115116
pub fn hash_to_u64(value: &impl Hash) -> u64 {
116117
let mut hasher = DefaultHasher::new();
@@ -335,11 +336,11 @@ pub fn end_user_span(id: u64) {
335336
/// Event data for actor creation.
336337
#[derive(Debug, Clone)]
337338
pub struct ActorEvent {
338-
/// Unique identifier for this actor (hashed from ActorAddr)
339+
/// Unique identifier for this actor, hashed from ActorId.
339340
pub id: u64,
340341
/// Timestamp when the actor was created
341342
pub timestamp: SystemTime,
342-
/// ID of the mesh this actor belongs to (hashed from actor_name)
343+
/// ID of the mesh this actor belongs to, matching `MeshEvent.id`.
343344
pub mesh_id: u64,
344345
/// Rank index into the mesh shape
345346
pub rank: u64,
@@ -407,7 +408,7 @@ pub fn notify_actor_status_changed(event: ActorStatusEvent) {
407408
#[derive(Debug, Clone)]
408409
pub struct SentMessageEvent {
409410
pub timestamp: SystemTime,
410-
/// Hash of the sending actor's [`ActorAddr`].
411+
/// Hash of the sending actor's ActorId.
411412
pub sender_actor_id: u64,
412413
/// Hash of the target actor mesh's name.
413414
pub actor_mesh_id: u64,
@@ -432,9 +433,9 @@ pub struct MessageEvent {
432433
pub timestamp: SystemTime,
433434
/// Unique identifier for this received message.
434435
pub id: u64,
435-
/// Hash of sender's ActorAddr.
436+
/// Hash of sender's ActorId.
436437
pub from_actor_id: u64,
437-
/// Hash of receiver's ActorAddr.
438+
/// Hash of receiver's ActorId.
438439
pub to_actor_id: u64,
439440
/// Endpoint name if this message targets a specific actor endpoint
440441
pub endpoint: Option<String>,

monarch_hyperactor/src/host_mesh.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -425,13 +425,13 @@ fn bootstrap_host(bootstrap_cmd: Option<PyBootstrapCommand>) -> PyResult<PyPytho
425425
parent_view_json: None,
426426
});
427427

428-
let host_agent_id = host_mesh_agent.actor_addr();
428+
let host_agent_addr = host_mesh_agent.actor_addr();
429429
hyperactor_telemetry::notify_actor_created(hyperactor_telemetry::ActorEvent {
430-
id: hyperactor_telemetry::hash_to_u64(host_agent_id),
430+
id: hyperactor_telemetry::hash_to_u64(host_agent_addr.id()),
431431
timestamp: now,
432432
mesh_id: host_mesh_id,
433433
rank: 0,
434-
full_name: host_agent_id.to_string(),
434+
full_name: host_agent_addr.to_string(),
435435
display_name: None,
436436
});
437437

@@ -453,13 +453,13 @@ fn bootstrap_host(bootstrap_cmd: Option<PyBootstrapCommand>) -> PyResult<PyPytho
453453
parent_view_json: None,
454454
});
455455

456-
let proc_agent_id = local_proc_agent.actor_addr();
456+
let proc_agent_addr = local_proc_agent.actor_addr();
457457
hyperactor_telemetry::notify_actor_created(hyperactor_telemetry::ActorEvent {
458-
id: hyperactor_telemetry::hash_to_u64(proc_agent_id),
458+
id: hyperactor_telemetry::hash_to_u64(proc_agent_addr.id()),
459459
timestamp: now,
460460
mesh_id: proc_mesh_id,
461461
rank: 0,
462-
full_name: proc_agent_id.to_string(),
462+
full_name: proc_agent_addr.to_string(),
463463
display_name: None,
464464
});
465465

@@ -477,7 +477,7 @@ fn bootstrap_host(bootstrap_cmd: Option<PyBootstrapCommand>) -> PyResult<PyPytho
477477
});
478478

479479
hyperactor_telemetry::notify_actor_created(hyperactor_telemetry::ActorEvent {
480-
id: hyperactor_telemetry::hash_to_u64(instance.self_addr()),
480+
id: hyperactor_telemetry::hash_to_u64(instance.self_addr().id()),
481481
timestamp: now,
482482
mesh_id: client_mesh_id,
483483
rank: 0,

python/tests/test_distributed_telemetry.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,17 @@ def test_actor_status_events_table() -> None:
668668
f"Expected at least one actor status event, got {event_count}"
669669
)
670670

671+
joined = engine.query(
672+
"SELECT ase.id FROM actor_status_events ase "
673+
"INNER JOIN actors a ON ase.actor_id = a.id "
674+
"WHERE a.display_name LIKE '%status_test_worker%'"
675+
)
676+
joined_count = len(joined.to_pydict().get("id", []))
677+
assert joined_count > 0, (
678+
"Expected actor_status_events.actor_id to join actors.id for "
679+
f"status_test_worker, got {joined_count} joined rows"
680+
)
681+
671682
# Verify new_status values are valid ActorStatus arm names
672683
valid_statuses = {
673684
"Unknown",
@@ -795,7 +806,7 @@ def test_sent_messages_table(
795806
All send paths (call, call_one, broadcast, choose) go through
796807
cast_with_selection in actor_mesh.rs, which calls notify_sent_message
797808
with a SentMessageEvent containing:
798-
- sender_actor_id: hash of the sending actor's ActorAddr
809+
- sender_actor_id: hash of the sending actor's ActorId
799810
- actor_mesh_id: hash of the target actor mesh name
800811
- view_json: serialized ndslice::Region of the current view
801812
- shape_json: serialized ndslice::Shape (converted from the Region)

0 commit comments

Comments
 (0)