Skip to content

Commit 30273dc

Browse files
authored
[Diagnostics] Add diagnostics of execution time and periodicity of the hardware components (#2086)
1 parent b2d9bf7 commit 30273dc

File tree

17 files changed

+891
-40
lines changed

17 files changed

+891
-40
lines changed

controller_manager/doc/parameters_context.yaml

+9-3
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,13 @@ hardware_components_initial_state: |
1515
- "base3"
1616
1717
diagnostics.threshold.controllers.periodicity: |
18-
The ``periodicity`` diagnostics will be published only for the asynchronous controllers, because any affect to the synchronous controllers will be reflected directly in the controller manager's periodicity.
18+
The ``periodicity`` diagnostics will be published for the asynchronous controllers, because any affect to the synchronous controllers will be reflected directly in the controller manager's periodicity. It is also published for the synchronous controllers that have a different update rate than the controller manager update rate.
1919
20-
diagnostics.threshold.controllers.execution_time: |
21-
The ``execution_time`` diagnostics will be published for all controllers. The ``mean_error`` for a synchronous controller will be computed against zero, as it should be as low as possible. However, the ``mean_error`` for an asynchronous controller will be computed against the controller's desired update period, as the controller can take a maximum of the desired period cycle to execute it's update cycle.
20+
diagnostics.threshold.controllers.execution_time.mean_error: |
21+
The ``execution_time`` diagnostics will be published for all controllers. The ``mean_error`` for a synchronous controller will be computed against zero, as it should be as low as possible. However, the ``mean_error`` for an asynchronous controller will be computed against the controller's desired update period, as the controller can take a maximum of the desired period to execute its update cycle.
22+
23+
diagnostics.threshold.hardware_components.periodicity: |
24+
The ``periodicity`` diagnostics will be published for the asynchronous hardware components, because any affect to the synchronous hardware components will be reflected directly in the controller manager's periodicity. It is also published for the synchronous hardware components that have a different read/write rate than the controller manager update rate.
25+
26+
diagnostics.threshold.hardware_components.execution_time.mean_error: |
27+
The ``execution_time`` diagnostics will be published for all hardware components. The ``mean_error`` for a synchronous hardware component will be computed against zero, as it should be as low as possible. However, the ``mean_error`` for an asynchronous hardware component will be computed against its desired read/write period, as the hardware component can take a maximum of the desired period to execute the read/write cycle.

controller_manager/src/controller_manager.cpp

+161-15
Original file line numberDiff line numberDiff line change
@@ -3553,7 +3553,9 @@ void ControllerManager::controller_activity_diagnostic_callback(
35533553
const auto exec_time_stats = controllers[i].execution_time_statistics->GetStatistics();
35543554
stat.add(
35553555
controllers[i].info.name + exec_time_suffix, make_stats_string(exec_time_stats, "us"));
3556-
if (is_async)
3556+
const bool publish_periodicity_stats =
3557+
is_async || (controllers[i].c->get_update_rate() != this->get_update_rate());
3558+
if (publish_periodicity_stats)
35573559
{
35583560
stat.add(
35593561
controllers[i].info.name + periodicity_suffix,
@@ -3653,12 +3655,21 @@ void ControllerManager::controller_activity_diagnostic_callback(
36533655
void ControllerManager::hardware_components_diagnostic_callback(
36543656
diagnostic_updater::DiagnosticStatusWrapper & stat)
36553657
{
3658+
if (!is_resource_manager_initialized())
3659+
{
3660+
stat.summary(
3661+
diagnostic_msgs::msg::DiagnosticStatus::ERROR, "Resource manager is not yet initialized!");
3662+
return;
3663+
}
3664+
36563665
bool all_active = true;
36573666
bool atleast_one_hw_active = false;
3667+
const std::string read_cycle_suffix = ".read_cycle";
3668+
const std::string write_cycle_suffix = ".write_cycle";
3669+
const std::string state_suffix = ".state";
36583670
const auto & hw_components_info = resource_manager_->get_components_status();
36593671
for (const auto & [component_name, component_info] : hw_components_info)
36603672
{
3661-
stat.add(component_name, component_info.state.label());
36623673
if (component_info.state.id() != lifecycle_msgs::msg::State::PRIMARY_STATE_ACTIVE)
36633674
{
36643675
all_active = false;
@@ -3668,31 +3679,166 @@ void ControllerManager::hardware_components_diagnostic_callback(
36683679
atleast_one_hw_active = true;
36693680
}
36703681
}
3671-
if (!is_resource_manager_initialized())
3682+
if (hw_components_info.empty())
36723683
{
36733684
stat.summary(
3674-
diagnostic_msgs::msg::DiagnosticStatus::ERROR, "Resource manager is not yet initialized!");
3685+
diagnostic_msgs::msg::DiagnosticStatus::ERROR, "No hardware components are loaded!");
3686+
return;
36753687
}
3676-
else if (hw_components_info.empty())
3688+
else if (!atleast_one_hw_active)
36773689
{
36783690
stat.summary(
3679-
diagnostic_msgs::msg::DiagnosticStatus::ERROR, "No hardware components are loaded!");
3691+
diagnostic_msgs::msg::DiagnosticStatus::WARN, "No hardware components are currently active");
3692+
return;
36803693
}
3681-
else
3694+
3695+
stat.summary(
3696+
diagnostic_msgs::msg::DiagnosticStatus::OK,
3697+
all_active ? "All hardware components are active" : "Not all hardware components are active");
3698+
3699+
if (cm_param_listener_->is_old(*params_))
36823700
{
3683-
if (!atleast_one_hw_active)
3701+
*params_ = cm_param_listener_->get_params();
3702+
}
3703+
3704+
auto make_stats_string =
3705+
[](const auto & statistics_data, const std::string & measurement_unit) -> std::string
3706+
{
3707+
std::ostringstream oss;
3708+
oss << std::fixed << std::setprecision(2);
3709+
oss << "Avg: " << statistics_data.average << " [" << statistics_data.min << " - "
3710+
<< statistics_data.max << "] " << measurement_unit
3711+
<< ", StdDev: " << statistics_data.standard_deviation;
3712+
return oss.str();
3713+
};
3714+
3715+
// Variable to define the overall status of the controller diagnostics
3716+
auto level = diagnostic_msgs::msg::DiagnosticStatus::OK;
3717+
3718+
std::vector<std::string> high_exec_time_hw;
3719+
std::vector<std::string> bad_periodicity_async_hw;
3720+
3721+
for (const auto & [component_name, component_info] : hw_components_info)
3722+
{
3723+
stat.add(component_name + state_suffix, component_info.state.label());
3724+
if (component_info.state.id() != lifecycle_msgs::msg::State::PRIMARY_STATE_ACTIVE)
36843725
{
3685-
stat.summary(
3686-
diagnostic_msgs::msg::DiagnosticStatus::WARN,
3687-
"No hardware components are currently active");
3726+
all_active = false;
36883727
}
36893728
else
36903729
{
3691-
stat.summary(
3692-
diagnostic_msgs::msg::DiagnosticStatus::OK, all_active
3693-
? "All hardware components are active"
3694-
: "Not all hardware components are active");
3730+
atleast_one_hw_active = true;
36953731
}
3732+
if (component_info.state.id() == lifecycle_msgs::msg::State::PRIMARY_STATE_ACTIVE)
3733+
{
3734+
auto update_stats =
3735+
[&bad_periodicity_async_hw, &high_exec_time_hw, &stat, &make_stats_string, this](
3736+
const std::string & comp_name, const auto & statistics,
3737+
const std::string & statistics_type_suffix, auto & diag_level, const auto & comp_info,
3738+
const auto & params)
3739+
{
3740+
if (!statistics)
3741+
{
3742+
return;
3743+
}
3744+
const bool is_async = comp_info.is_async;
3745+
const std::string periodicity_suffix = ".periodicity";
3746+
const std::string exec_time_suffix = ".execution_time";
3747+
const auto periodicity_stats = statistics->periodicity.get_statistics();
3748+
const auto exec_time_stats = statistics->execution_time.get_statistics();
3749+
stat.add(
3750+
comp_name + statistics_type_suffix + exec_time_suffix,
3751+
make_stats_string(exec_time_stats, "us"));
3752+
const bool publish_periodicity_stats =
3753+
is_async || (comp_info.rw_rate != this->get_update_rate());
3754+
if (publish_periodicity_stats)
3755+
{
3756+
stat.add(
3757+
comp_name + statistics_type_suffix + periodicity_suffix,
3758+
make_stats_string(periodicity_stats, "Hz") +
3759+
" -> Desired : " + std::to_string(comp_info.rw_rate) + " Hz");
3760+
const double periodicity_error =
3761+
std::abs(periodicity_stats.average - static_cast<double>(comp_info.rw_rate));
3762+
if (
3763+
periodicity_error >
3764+
params->diagnostics.threshold.hardware_components.periodicity.mean_error.error ||
3765+
periodicity_stats.standard_deviation > params->diagnostics.threshold.hardware_components
3766+
.periodicity.standard_deviation.error)
3767+
{
3768+
diag_level = diagnostic_msgs::msg::DiagnosticStatus::ERROR;
3769+
add_element_to_list(bad_periodicity_async_hw, comp_name);
3770+
}
3771+
else if (
3772+
periodicity_error >
3773+
params->diagnostics.threshold.hardware_components.periodicity.mean_error.warn ||
3774+
periodicity_stats.standard_deviation >
3775+
params->diagnostics.threshold.hardware_components.periodicity.standard_deviation.warn)
3776+
{
3777+
if (diag_level != diagnostic_msgs::msg::DiagnosticStatus::ERROR)
3778+
{
3779+
diag_level = diagnostic_msgs::msg::DiagnosticStatus::WARN;
3780+
}
3781+
add_element_to_list(bad_periodicity_async_hw, comp_name);
3782+
}
3783+
}
3784+
const double max_exp_exec_time =
3785+
is_async ? 1.e6 / static_cast<double>(comp_info.rw_rate) : 0.0;
3786+
if (
3787+
(exec_time_stats.average - max_exp_exec_time) >
3788+
params->diagnostics.threshold.hardware_components.execution_time.mean_error.error ||
3789+
exec_time_stats.standard_deviation > params->diagnostics.threshold.hardware_components
3790+
.execution_time.standard_deviation.error)
3791+
{
3792+
diag_level = diagnostic_msgs::msg::DiagnosticStatus::ERROR;
3793+
high_exec_time_hw.push_back(comp_name);
3794+
}
3795+
else if (
3796+
(exec_time_stats.average - max_exp_exec_time) >
3797+
params->diagnostics.threshold.hardware_components.execution_time.mean_error.warn ||
3798+
exec_time_stats.standard_deviation > params->diagnostics.threshold.hardware_components
3799+
.execution_time.standard_deviation.warn)
3800+
{
3801+
if (diag_level != diagnostic_msgs::msg::DiagnosticStatus::ERROR)
3802+
{
3803+
diag_level = diagnostic_msgs::msg::DiagnosticStatus::WARN;
3804+
}
3805+
high_exec_time_hw.push_back(comp_name);
3806+
}
3807+
};
3808+
3809+
// For components : {actuator, sensor and system}
3810+
update_stats(
3811+
component_name, component_info.read_statistics, read_cycle_suffix, level, component_info,
3812+
params_);
3813+
// For components : {actuator and system}
3814+
update_stats(
3815+
component_name, component_info.write_statistics, write_cycle_suffix, level, component_info,
3816+
params_);
3817+
}
3818+
}
3819+
3820+
if (!high_exec_time_hw.empty())
3821+
{
3822+
std::string high_exec_time_hw_string;
3823+
for (const auto & hw_comp : high_exec_time_hw)
3824+
{
3825+
high_exec_time_hw_string.append(hw_comp);
3826+
high_exec_time_hw_string.append(" ");
3827+
}
3828+
stat.mergeSummary(
3829+
level, "\nHigh execution jitter or mean error : [ " + high_exec_time_hw_string + "]");
3830+
}
3831+
if (!bad_periodicity_async_hw.empty())
3832+
{
3833+
std::string bad_periodicity_async_hw_string;
3834+
for (const auto & hw_comp : bad_periodicity_async_hw)
3835+
{
3836+
bad_periodicity_async_hw_string.append(hw_comp);
3837+
bad_periodicity_async_hw_string.append(" ");
3838+
}
3839+
stat.mergeSummary(
3840+
level,
3841+
"\nHigh periodicity jitter or mean error : [ " + bad_periodicity_async_hw_string + "]");
36963842
}
36973843
}
36983844

controller_manager/src/controller_manager_parameters.yaml

+71
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,74 @@ controller_manager:
134134
gt<>: 0.0,
135135
}
136136
}
137+
hardware_components:
138+
periodicity:
139+
mean_error:
140+
warn: {
141+
type: double,
142+
default_value: 5.0,
143+
description: "The warning threshold for the mean error of the hardware component's read/write loop in Hz. If the mean error exceeds this threshold, a warning diagnostic will be published.",
144+
validation: {
145+
gt<>: 0.0,
146+
}
147+
}
148+
error: {
149+
type: double,
150+
default_value: 10.0,
151+
description: "The error threshold for the mean error of the hardware component's read/write loop in Hz. If the mean error exceeds this threshold, an error diagnostic will be published.",
152+
validation: {
153+
gt<>: 0.0,
154+
}
155+
}
156+
standard_deviation:
157+
warn: {
158+
type: double,
159+
default_value: 5.0,
160+
description: "The warning threshold for the standard deviation of the hardware component's read/write loop in Hz. If the standard deviation exceeds this threshold, a warning diagnostic will be published.",
161+
validation: {
162+
gt<>: 0.0,
163+
}
164+
}
165+
error: {
166+
type: double,
167+
default_value: 10.0,
168+
description: "The error threshold for the standard deviation of the hardware component's read/write loop in Hz. If the standard deviation exceeds this threshold, an error diagnostic will be published.",
169+
validation: {
170+
gt<>: 0.0,
171+
}
172+
}
173+
execution_time:
174+
mean_error:
175+
warn: {
176+
type: double,
177+
default_value: 1000.0,
178+
description: "The warning threshold for the mean error of the hardware component's read/write cycle execution time in microseconds. If the mean error exceeds this threshold, a warning diagnostic will be published.",
179+
validation: {
180+
gt<>: 0.0,
181+
}
182+
}
183+
error: {
184+
type: double,
185+
default_value: 2000.0,
186+
description: "The error threshold for the mean error of the hardware component's read/write cycle execution time in microseconds. If the mean error exceeds this threshold, a error diagnostic will be published.",
187+
validation: {
188+
gt<>: 0.0,
189+
}
190+
}
191+
standard_deviation:
192+
warn: {
193+
type: double,
194+
default_value: 100.0,
195+
description: "The warning threshold for the standard deviation of the hardware component's read/write cycle execution time in microseconds. If the standard deviation exceeds this threshold, a warning diagnostic will be published.",
196+
validation: {
197+
gt<>: 0.0,
198+
}
199+
}
200+
error: {
201+
type: double,
202+
default_value: 200.0,
203+
description: "The error threshold for the standard deviation of the hardware component's update cycle execution time in microseconds. If the standard deviation exceeds this threshold, an error diagnostic will be published.",
204+
validation: {
205+
gt<>: 0.0,
206+
}
207+
}

doc/release_notes.rst

+1
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ hardware_interface
168168
* With (`#1567 <https://github.com/ros-controls/ros2_control/pull/1567>`_) all the Hardware components now have a fully functional asynchronous functionality, by simply adding ``is_async`` tag to the ros2_control tag in the URDF. This will allow the hardware components to run in a separate thread, and the controller manager will be able to run the controllers in parallel with the hardware components.
169169
* The hardware components can be easily introspect the internal member variables using the macro ``REGISTER_ROS2_CONTROL_INTROSPECTION`` (`#1918 <https://github.com/ros-controls/ros2_control/pull/1918>`_)
170170
* Added new ``get_optional`` method that returns ``std::optional`` of the templated type, and this can be used to check if the value is available or not (`#1976 <https://github.com/ros-controls/ros2_control/pull/1976>`_ and `#2061 <https://github.com/ros-controls/ros2_control/pull/2061>`_)
171+
* Added hardware components execution time and periodicity statistics diagnostics (`#2086 <https://github.com/ros-controls/ros2_control/pull/2086>`_)
171172

172173
joint_limits
173174
************

hardware_interface/include/hardware_interface/actuator.hpp

+8
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "hardware_interface/handle.hpp"
2424
#include "hardware_interface/hardware_info.hpp"
2525
#include "hardware_interface/types/hardware_interface_return_values.hpp"
26+
#include "hardware_interface/types/statistics_types.hpp"
2627
#include "rclcpp/duration.hpp"
2728
#include "rclcpp/logger.hpp"
2829
#include "rclcpp/node_interfaces/node_clock_interface.hpp"
@@ -85,6 +86,10 @@ class Actuator final
8586

8687
const rclcpp::Time & get_last_write_time() const;
8788

89+
const HardwareComponentStatisticsCollector & get_read_statistics() const;
90+
91+
const HardwareComponentStatisticsCollector & get_write_statistics() const;
92+
8893
return_type read(const rclcpp::Time & time, const rclcpp::Duration & period);
8994

9095
return_type write(const rclcpp::Time & time, const rclcpp::Duration & period);
@@ -98,6 +103,9 @@ class Actuator final
98103
rclcpp::Time last_read_cycle_time_;
99104
// Last write cycle time
100105
rclcpp::Time last_write_cycle_time_;
106+
// Component statistics
107+
HardwareComponentStatisticsCollector read_statistics_;
108+
HardwareComponentStatisticsCollector write_statistics_;
101109
};
102110

103111
} // namespace hardware_interface

hardware_interface/include/hardware_interface/hardware_component_info.hpp

+14-1
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,21 @@
1919
#ifndef HARDWARE_INTERFACE__HARDWARE_COMPONENT_INFO_HPP_
2020
#define HARDWARE_INTERFACE__HARDWARE_COMPONENT_INFO_HPP_
2121

22+
#include <memory>
2223
#include <string>
2324
#include <vector>
2425

25-
#include <rclcpp/time.hpp>
26+
#include "rclcpp/time.hpp"
2627
#include "rclcpp_lifecycle/state.hpp"
2728

29+
#include "hardware_interface/types/statistics_types.hpp"
2830
namespace hardware_interface
2931
{
32+
struct HardwareComponentStatisticsData
33+
{
34+
ros2_control::MovingAverageStatisticsData execution_time;
35+
ros2_control::MovingAverageStatisticsData periodicity;
36+
};
3037
/// Hardware Component Information
3138
/**
3239
* This struct contains information about a given hardware component.
@@ -59,6 +66,12 @@ struct HardwareComponentInfo
5966

6067
/// List of provided command interfaces by the component.
6168
std::vector<std::string> command_interfaces;
69+
70+
/// Read cycle statistics of the component.
71+
std::shared_ptr<HardwareComponentStatisticsData> read_statistics = nullptr;
72+
73+
/// Write cycle statistics of the component.
74+
std::shared_ptr<HardwareComponentStatisticsData> write_statistics = nullptr;
6275
};
6376

6477
} // namespace hardware_interface

0 commit comments

Comments
 (0)