-
Notifications
You must be signed in to change notification settings - Fork 2.2k
[Server::Shutdown] Fix the matter server shutdown sequence, introduced few APIs to do *ClusterServerShutdown so that we can reinitialise the matter server without reboot on embedded platforms. #38515
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
5cb5e1a
68af966
05101aa
db4a0b2
a3195a3
d2a0e3f
35fc765
1d4cf19
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -340,6 +340,39 @@ class InteractionModelEngine : public Messaging::UnsolicitedMessageHandler, | |
#endif // CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION | ||
#endif // CHIP_CONFIG_PERSIST_SUBSCRIPTIONS | ||
|
||
/** Previously, ShutdowActiveReads() was only available in unit test builds via `#if CONFIG_BUILD_FOR_HOST_UNIT_TEST`. | ||
* However, we now require proper cleanup of active read handlers during the application's shutdown sequence, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand why we are making this API public. We already shut down the read handlers in As far as ReadClients: actual reads get shut down by shutting down sessions/exchanges, and subscriptions are shut down in |
||
* especially before calling SetDataModelProvider(nullptr). This ensures that there are no ongoing IM reads, | ||
* which could otherwise result in crashes or undefined behavior. | ||
|
||
* Therefore, this method has been moved out of the CONFIG_BUILD_FOR_HOST_UNIT_TEST block so that it can be | ||
* called as part of the normal shutdown flow. | ||
|
||
* Note: This was originally introduced for unit tests to clean up subscriptions created via the high-level | ||
* APIs in src/controller/ReadInteraction.h, which did not expose a way to shut down active subscriptions. | ||
**/ | ||
|
||
void ShutdownActiveReads() | ||
{ | ||
#if CHIP_CONFIG_ENABLE_READ_CLIENT | ||
for (auto * readClient = mpActiveReadClientList; readClient != nullptr;) | ||
{ | ||
readClient->mpImEngine = nullptr; | ||
auto * tmpClient = readClient->GetNextClient(); | ||
readClient->SetNextClient(nullptr); | ||
readClient->Close(CHIP_NO_ERROR); | ||
readClient = tmpClient; | ||
} | ||
|
||
// | ||
// After that, we just null out our tracker. | ||
// | ||
mpActiveReadClientList = nullptr; | ||
#endif // CHIP_CONFIG_ENABLE_READ_CLIENT | ||
|
||
mReadHandlers.ReleaseAll(); | ||
} | ||
|
||
#if CONFIG_BUILD_FOR_HOST_UNIT_TEST | ||
// | ||
// Get direct access to the underlying read handler pool | ||
|
@@ -391,33 +424,6 @@ class InteractionModelEngine : public Messaging::UnsolicitedMessageHandler, | |
mSubscriptionResumptionRetrySecondsOverride = seconds; | ||
} | ||
#endif | ||
|
||
// | ||
// When testing subscriptions using the high-level APIs in src/controller/ReadInteraction.h, | ||
// they don't provide for the ability to shut down those subscriptions after they've been established. | ||
// | ||
// So for the purposes of unit tests, add a helper here to shut down and clean-up all active handlers. | ||
// | ||
void ShutdownActiveReads() | ||
{ | ||
#if CHIP_CONFIG_ENABLE_READ_CLIENT | ||
for (auto * readClient = mpActiveReadClientList; readClient != nullptr;) | ||
{ | ||
readClient->mpImEngine = nullptr; | ||
auto * tmpClient = readClient->GetNextClient(); | ||
readClient->SetNextClient(nullptr); | ||
readClient->Close(CHIP_NO_ERROR); | ||
readClient = tmpClient; | ||
} | ||
|
||
// | ||
// After that, we just null out our tracker. | ||
// | ||
mpActiveReadClientList = nullptr; | ||
#endif // CHIP_CONFIG_ENABLE_READ_CLIENT | ||
|
||
mReadHandlers.ReleaseAll(); | ||
} | ||
#endif | ||
|
||
DataModel::Provider * GetDataModelProvider() const; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1153,12 +1153,15 @@ void emberAfScenesManagementClusterServerInitCallback(EndpointId endpoint) | |
|
||
void MatterScenesManagementClusterServerShutdownCallback(EndpointId endpoint) | ||
{ | ||
uint16_t endpointTableSize = 0; | ||
VerifyOrReturn(Status::Success == Attributes::SceneTableSize::Get(endpoint, &endpointTableSize)); | ||
|
||
// Get Scene Table Instance | ||
SceneTable * sceneTable = scenes::GetSceneTableImpl(endpoint, endpointTableSize); | ||
sceneTable->RemoveEndpoint(); | ||
// TODO: Remove the commented code. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know this code leads to incorrect behavior on shutdown, but not having this code leads to incorrect behavior on endpoint removal (storage leaks). So just commenting it out is breaking critical things, and chances are we need to fix that before we can land this PR. Have you talked to the authors of this code about this situation, like we discussed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes I have talked with the owner @lpbeliveau-silabs. He dropped the comment #38515 (comment) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, but who is doing that "moved to a different handler" bit and when? Because we can't ship things in the state that we will have after this PR.... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll try to catch up with the changes and address this shortly. So to confirm, this specific behavior is for embedded systems that shutdown endpoints during power cycling, but don't necessarily remove them? Just want to make sure I got this right before implementing it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking into the SDK, it seems like the previous intended behavior of Shutdown WAS to handle endpoints being removed. This PR is modifying this behavior and I do not think that this is a "Scenes is holding back the PR" situation. The distinction between Endpoint Shutdown vs Removal should either be added to the handler or we need to have some code responsible for cleanup on endpoint removal implemented along this change. |
||
// MatterScenesManagementClusterServerShutdownCallback was not being called before and in the sceneTable->RemoveEndpoint(); | ||
// it removes the SceneTableEntry from the persistent storage. | ||
// uint16_t endpointTableSize = 0; | ||
// VerifyOrReturn(Status::Success == Attributes::SceneTableSize::Get(endpoint, &endpointTableSize)); | ||
|
||
// // Get Scene Table Instance | ||
// SceneTable * sceneTable = scenes::GetSceneTableImpl(endpoint, endpointTableSize); | ||
// sceneTable->RemoveEndpoint(); | ||
Comment on lines
+1156
to
+1164
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The existing shutdown logic for the Scenes cluster has been commented out, with a TODO indicating it needs to be removed. However, this logic ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TLDR: why are we checking in commented out code? |
||
} | ||
|
||
void MatterScenesManagementPluginServerInitCallback() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -106,7 +106,12 @@ class ScenesServer : public CommandHandlerInterface, public AttributeAccessInter | |
|
||
private: | ||
ScenesServer() : CommandHandlerInterface(Optional<EndpointId>(), Id), AttributeAccessInterface(Optional<EndpointId>(), Id) {} | ||
~ScenesServer() { Shutdown(); } | ||
~ScenesServer() | ||
{ | ||
// TODO: Remove the commented code. | ||
// ScenesServer::Shutdown is being called from MatterScenesManagementPluginServerShutdownCallback. | ||
// Shutdown(); | ||
Comment on lines
+111
to
+113
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to the .cpp file, the call to |
||
} | ||
|
||
bool mIsInitialized = false; | ||
|
||
|
jadhavrohit924 marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -651,6 +651,14 @@ void Server::Shutdown() | |
#if CHIP_CONFIG_ENABLE_ICD_SERVER | ||
app::DnssdServer::Instance().SetICDManager(nullptr); | ||
#endif // CHIP_CONFIG_ENABLE_ICD_SERVER | ||
|
||
/** Shutdown all active read handlers to ensure a clean shutdown of the Interaction Model. | ||
* This is required before resetting the DataModelProvider, as ongoing reads may access it | ||
* and lead to crashes or undefined behavior. | ||
**/ | ||
app::InteractionModelEngine::GetInstance()->ShutdownActiveReads(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again: during startup we first set the DM provider (line 320 in this diff), then do InteractionModelEngine::Init (line 393), right? So during shutdown we should be doing InteractionModelEngine::Shutdown before clearing the DM provider, I would think. That's what #38515 was about. But for some reason, instead of moving It's not just reads that touch the DM provider, anyway (commands and writes do too), so you really have to InteractionModelEngine::Shutdown before clearing the provider. If we did that, InteractionModelEngine::Shutdown would clear any active read handlers, as it already does. And clear subscription clients. Which would leave read clients.... Those would get shut down when we expire their sessions, but if we want to more proactively expire them, we could do that in the IM engine shutdown. Either way, read clients don't touch DM provider, right? |
||
|
||
app::InteractionModelEngine::GetInstance()->SetDataModelProvider(nullptr); | ||
app::DnssdServer::Instance().SetCommissioningModeProvider(nullptr); | ||
Dnssd::ServiceAdvertiser::Instance().Shutdown(); | ||
|
||
|
@@ -681,11 +689,13 @@ void Server::Shutdown() | |
mCommissioningWindowManager.Shutdown(); | ||
mMessageCounterManager.Shutdown(); | ||
mExchangeMgr.Shutdown(); | ||
mFabrics.RemoveFabricDelegate(&mFabricDelegate); | ||
mSessions.Shutdown(); | ||
mTransports.Close(); | ||
mAccessControl.Finish(); | ||
Access::ResetAccessControlToDefault(); | ||
Credentials::SetGroupDataProvider(nullptr); | ||
app::EventManagement::GetInstance().DestroyEventManagement(); | ||
#if CHIP_CONFIG_ENABLE_ICD_SERVER | ||
// Remove Test Event Trigger Handler | ||
if (mTestEventTriggerDelegate != nullptr) | ||
|
@@ -695,6 +705,7 @@ void Server::Shutdown() | |
mICDManager.Shutdown(); | ||
#endif // CHIP_CONFIG_ENABLE_ICD_SERVER | ||
|
||
mFabrics.Shutdown(); | ||
// TODO(16969): Remove chip::Platform::MemoryInit() call from Server class, it belongs to outer code | ||
chip::Platform::MemoryShutdown(); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,3 +25,4 @@ | |
#include <app/util/DataModelHandler.h> | ||
|
||
void InitDataModelHandler() {} | ||
void ShutdownDataModelHandler() {} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is ShutdowActiveReads?