Skip to content

Commit 00beb50

Browse files
committed
Revert "avoid copying ActorTableData when NodeMananger updates an actor to GCS (#5244)"
This reverts commit 6f682db.
1 parent d724f1f commit 00beb50

File tree

2 files changed

+34
-33
lines changed

2 files changed

+34
-33
lines changed

src/ray/raylet/node_manager.cc

+29-27
Original file line numberDiff line numberDiff line change
@@ -795,14 +795,14 @@ void NodeManager::HandleDisconnectedActor(const ActorID &actor_id, bool was_loca
795795
}
796796
}
797797
// Update the actor's state.
798-
ActorTableData new_actor_info = actor_entry->second.GetTableData();
799-
new_actor_info.set_state(new_state);
798+
ActorTableData new_actor_data = actor_entry->second.GetTableData();
799+
new_actor_data.set_state(new_state);
800800
if (was_local) {
801801
// If the actor was local, immediately update the state in actor registry.
802802
// So if we receive any actor tasks before we receive GCS notification,
803803
// these tasks can be correctly routed to the `MethodsWaitingForActorCreation`
804804
// queue, instead of being assigned to the dead actor.
805-
HandleActorStateTransition(actor_id, ActorRegistration(new_actor_info));
805+
HandleActorStateTransition(actor_id, ActorRegistration(new_actor_data));
806806
}
807807

808808
auto done = [was_local, actor_id](Status status) {
@@ -812,7 +812,7 @@ void NodeManager::HandleDisconnectedActor(const ActorID &actor_id, bool was_loca
812812
RAY_LOG(FATAL) << "Failed to update state for actor " << actor_id;
813813
}
814814
};
815-
auto actor_notification = std::make_shared<ActorTableData>(new_actor_info);
815+
auto actor_notification = std::make_shared<ActorTableData>(new_actor_data);
816816
RAY_CHECK_OK(gcs_client_->Actors().AsyncUpdate(actor_id, actor_notification, done));
817817
}
818818

@@ -1826,46 +1826,45 @@ void NodeManager::FinishAssignedTask(Worker &worker) {
18261826
}
18271827
}
18281828

1829-
std::shared_ptr<ActorTableData> NodeManager::CreateActorTableDataFromCreationTask(
1829+
ActorTableData NodeManager::CreateActorTableDataFromCreationTask(
18301830
const TaskSpecification &task_spec) {
18311831
RAY_CHECK(task_spec.IsActorCreationTask());
18321832
auto actor_id = task_spec.ActorCreationId();
18331833
auto actor_entry = actor_registry_.find(actor_id);
1834-
std::shared_ptr<ActorTableData> actor_info_ptr;
1834+
ActorTableData new_actor_data;
18351835
// TODO(swang): If this is an actor that was reconstructed, and previous
18361836
// actor notifications were delayed, then this node may not have an entry for
18371837
// the actor in actor_regisry_. Then, the fields for the number of
18381838
// reconstructions will be wrong.
18391839
if (actor_entry == actor_registry_.end()) {
1840-
actor_info_ptr.reset(new ActorTableData());
18411840
// Set all of the static fields for the actor. These fields will not
18421841
// change even if the actor fails or is reconstructed.
1843-
actor_info_ptr->set_actor_id(actor_id.Binary());
1844-
actor_info_ptr->set_actor_creation_dummy_object_id(
1842+
new_actor_data.set_actor_id(actor_id.Binary());
1843+
new_actor_data.set_actor_creation_dummy_object_id(
18451844
task_spec.ActorDummyObject().Binary());
1846-
actor_info_ptr->set_job_id(task_spec.JobId().Binary());
1847-
actor_info_ptr->set_max_reconstructions(task_spec.MaxActorReconstructions());
1845+
new_actor_data.set_job_id(task_spec.JobId().Binary());
1846+
new_actor_data.set_max_reconstructions(task_spec.MaxActorReconstructions());
18481847
// This is the first time that the actor has been created, so the number
18491848
// of remaining reconstructions is the max.
1850-
actor_info_ptr->set_remaining_reconstructions(task_spec.MaxActorReconstructions());
1849+
new_actor_data.set_remaining_reconstructions(task_spec.MaxActorReconstructions());
18511850
} else {
18521851
// If we've already seen this actor, it means that this actor was reconstructed.
18531852
// Thus, its previous state must be RECONSTRUCTING.
18541853
RAY_CHECK(actor_entry->second.GetState() == ActorTableData::RECONSTRUCTING);
18551854
// Copy the static fields from the current actor entry.
1856-
actor_info_ptr.reset(new ActorTableData(actor_entry->second.GetTableData()));
1855+
new_actor_data = actor_entry->second.GetTableData();
18571856
// We are reconstructing the actor, so subtract its
18581857
// remaining_reconstructions by 1.
1859-
actor_info_ptr->set_remaining_reconstructions(
1860-
actor_info_ptr->remaining_reconstructions() - 1);
1858+
new_actor_data.set_remaining_reconstructions(
1859+
new_actor_data.remaining_reconstructions() - 1);
18611860
}
18621861

18631862
// Set the new fields for the actor's state to indicate that the actor is
18641863
// now alive on this node manager.
1865-
actor_info_ptr->set_node_manager_id(
1864+
new_actor_data.set_node_manager_id(
18661865
gcs_client_->client_table().GetLocalClientId().Binary());
1867-
actor_info_ptr->set_state(ActorTableData::ALIVE);
1868-
return actor_info_ptr;
1866+
new_actor_data.set_state(ActorTableData::ALIVE);
1867+
return new_actor_data;
18691868
}
18701869

18711870
void NodeManager::FinishAssignedActorTask(Worker &worker, const Task &task) {
@@ -1971,8 +1970,8 @@ void NodeManager::FinishAssignedActorCreationTask(const ActorID &parent_actor_id
19711970
bool resumed_from_checkpoint) {
19721971
// Notify the other node managers that the actor has been created.
19731972
const ActorID actor_id = task_spec.ActorCreationId();
1974-
auto new_actor_info = CreateActorTableDataFromCreationTask(task_spec);
1975-
new_actor_info->set_parent_actor_id(parent_actor_id.Binary());
1973+
auto new_actor_data = CreateActorTableDataFromCreationTask(task_spec);
1974+
new_actor_data.set_parent_actor_id(parent_actor_id.Binary());
19761975
auto update_callback = [actor_id](Status status) {
19771976
if (!status.ok()) {
19781977
// Only one node at a time should succeed at creating or updating the actor.
@@ -1990,20 +1989,21 @@ void NodeManager::FinishAssignedActorCreationTask(const ActorID &parent_actor_id
19901989
<< actor_id;
19911990
RAY_CHECK_OK(gcs_client_->actor_checkpoint_table().Lookup(
19921991
JobID::Nil(), checkpoint_id,
1993-
[this, actor_id, new_actor_info, update_callback](
1992+
[this, actor_id, new_actor_data, update_callback](
19941993
ray::gcs::RedisGcsClient *client, const UniqueID &checkpoint_id,
19951994
const ActorCheckpointData &checkpoint_data) {
19961995
RAY_LOG(INFO) << "Restoring registration for actor " << actor_id
19971996
<< " from checkpoint " << checkpoint_id;
19981997
ActorRegistration actor_registration =
1999-
ActorRegistration(*new_actor_info, checkpoint_data);
1998+
ActorRegistration(new_actor_data, checkpoint_data);
20001999
// Mark the unreleased dummy objects in the checkpoint frontier as local.
20012000
for (const auto &entry : actor_registration.GetDummyObjects()) {
20022001
HandleObjectLocal(entry.first);
20032002
}
20042003
HandleActorStateTransition(actor_id, std::move(actor_registration));
2004+
auto actor_notification = std::make_shared<ActorTableData>(new_actor_data);
20052005
// The actor was created before.
2006-
RAY_CHECK_OK(gcs_client_->Actors().AsyncUpdate(actor_id, new_actor_info,
2006+
RAY_CHECK_OK(gcs_client_->Actors().AsyncUpdate(actor_id, actor_notification,
20072007
update_callback));
20082008
},
20092009
[actor_id](ray::gcs::RedisGcsClient *client, const UniqueID &checkpoint_id) {
@@ -2013,14 +2013,16 @@ void NodeManager::FinishAssignedActorCreationTask(const ActorID &parent_actor_id
20132013
} else {
20142014
// The actor did not resume from a checkpoint. Immediately notify the
20152015
// other node managers that the actor has been created.
2016-
HandleActorStateTransition(actor_id, ActorRegistration(*new_actor_info));
2016+
HandleActorStateTransition(actor_id, ActorRegistration(new_actor_data));
2017+
auto actor_notification = std::make_shared<ActorTableData>(new_actor_data);
20172018
if (actor_registry_.find(actor_id) != actor_registry_.end()) {
20182019
// The actor was created before.
2019-
RAY_CHECK_OK(
2020-
gcs_client_->Actors().AsyncUpdate(actor_id, new_actor_info, update_callback));
2020+
RAY_CHECK_OK(gcs_client_->Actors().AsyncUpdate(actor_id, actor_notification,
2021+
update_callback));
20212022
} else {
20222023
// The actor was never created before.
2023-
RAY_CHECK_OK(gcs_client_->Actors().AsyncRegister(new_actor_info, update_callback));
2024+
RAY_CHECK_OK(
2025+
gcs_client_->Actors().AsyncRegister(actor_notification, update_callback));
20242026
}
20252027
}
20262028
if (!resumed_from_checkpoint) {

src/ray/raylet/node_manager.h

+5-6
Original file line numberDiff line numberDiff line change
@@ -287,8 +287,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler,
287287
///
288288
/// \param task_spec Task specification of the actor creation task that created the
289289
/// actor.
290-
std::shared_ptr<ActorTableData> CreateActorTableDataFromCreationTask(
291-
const TaskSpecification &task_spec);
290+
ActorTableData CreateActorTableDataFromCreationTask(const TaskSpecification &task_spec);
292291
/// Handle a worker finishing an assigned actor task or actor creation task.
293292
/// \param worker The worker that finished the task.
294293
/// \param task The actor task or actor creation task.
@@ -297,11 +296,11 @@ class NodeManager : public rpc::NodeManagerServiceHandler,
297296
/// Helper function for handling worker to finish its assigned actor task
298297
/// or actor creation task. Gets invoked when tasks's parent actor is known.
299298
///
300-
/// \param parent_actor_id The actor id corresponding to the actor which creates
301-
/// the new actor.
302-
/// \param task_spec Task specification of the actor creation task that created the
303-
/// actor.
299+
/// \param actor_id The actor id corresponding to the actor (creation) task.
300+
/// \param actor_handle_id The actor id corresponding to the actor (creation) task.
301+
/// \param new_actor_data The struct which will be used to register the task.
304302
/// \param resumed_from_checkpoint If the actor was resumed from a checkpoint.
303+
/// \param dummy_object Dummy object corresponding to the actor creation task.
305304
/// \return Void.
306305
void FinishAssignedActorCreationTask(const ActorID &parent_actor_id,
307306
const TaskSpecification &task_spec,

0 commit comments

Comments
 (0)