Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ jobs:
diagnostic_common_diagnostics
diagnostic_remote_logging
diagnostic_updater
diagnostic_topic_monitor
self_test

check_licenses:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ jobs:
diagnostic_aggregator,
diagnostic_common_diagnostics,
diagnostic_remote_logging,
diagnostic_topic_monitor,
diagnostic_updater,
self_test,
]
Expand Down
2 changes: 2 additions & 0 deletions diagnostic_topic_monitor/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.pytest_cache
**/__pycache__
100 changes: 100 additions & 0 deletions diagnostic_topic_monitor/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
cmake_minimum_required(VERSION 3.8)
project(diagnostic_topic_monitor)

if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
add_compile_options(-Wall -Wextra -Wpedantic)
endif()

# find dependencies
find_package(ament_cmake REQUIRED)
# uncomment the following section in order to fill in
# further dependencies manually.
# find_package(<dependency> REQUIRED)

find_package(ament_cmake_python REQUIRED)

find_package(rclpy REQUIRED)
find_package(diagnostic_msgs REQUIRED)
find_package(diagnostic_updater REQUIRED)
find_package(rclcpp_lifecycle REQUIRED)
find_package(rclcpp_components REQUIRED)
find_package(lifecycle_msgs REQUIRED)
find_package(rclcpp REQUIRED)
find_package(diagnostic_updater REQUIRED)
find_package(std_msgs REQUIRED)

add_library(topic_frequency_monitor_component SHARED src/topic_frequency_monitor_component.cpp)
rclcpp_components_register_node(topic_frequency_monitor_component
PLUGIN "diagnostic_topic_monitor::TopicFrequencyMonitor"
EXECUTABLE topic_frequency_monitor
)
target_include_directories(topic_frequency_monitor_component PRIVATE include)
ament_target_dependencies(topic_frequency_monitor_component
"lifecycle_msgs"
"rclcpp"
"rclcpp_lifecycle"
"rclcpp_components"
"diagnostic_msgs"
"diagnostic_updater"
)
ament_export_targets(export_topic_frequency_monitor_component)
install(TARGETS topic_frequency_monitor_component
EXPORT export_topic_frequency_monitor_component
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib
RUNTIME DESTINATION bin
)

add_library(topic_age_monitor_component SHARED src/topic_age_monitor_component.cpp)
rclcpp_components_register_node(topic_age_monitor_component
PLUGIN "diagnostic_topic_monitor::TopicAgeMonitor"
EXECUTABLE topic_age_monitor
)
target_include_directories(topic_age_monitor_component PRIVATE include)
ament_target_dependencies(topic_age_monitor_component
"lifecycle_msgs"
"rclcpp"
"rclcpp_lifecycle"
"rclcpp_components"
"diagnostic_msgs"
"diagnostic_updater"
)
ament_export_targets(export_topic_age_monitor_component)
install(TARGETS topic_age_monitor_component
EXPORT export_topic_age_monitor_component
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib
RUNTIME DESTINATION bin
)

install(
DIRECTORY include/
DESTINATION include
)

if(BUILD_TESTING)
find_package(ament_lint_auto REQUIRED)
find_package(launch_testing_ament_cmake REQUIRED)

# Install the test configs and dummy publisher used for testing
install(
DIRECTORY test
DESTINATION share/${PROJECT_NAME}/
)
install(
PROGRAMS test/dummy_publishers.py
DESTINATION lib/${PROJECT_NAME}
)

add_launch_test(
test/test_topic_frequency_monitor.py
TARGET test_topic_frequency_monitor
TIMEOUT 20)

add_launch_test(
test/test_topic_age_monitor.py
TARGET test_topic_age_monitor
TIMEOUT 20)
endif()

ament_package()
107 changes: 107 additions & 0 deletions diagnostic_topic_monitor/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Diagnostic Topic Monitor

Offers diagnostic tools which monitor topics for health, such as frequency and age of messages. The output can be further processed with the [`diagnostic_aggregator`](https://github.com/ros/diagnostics/tree/ros2/diagnostic_aggregator) for cleaner observation.

## Monitors

There are two monitors currently available, which can be used as components or standalone nodes:
* topic_frequency_monitor -- this supports _any_ kind of topic and checks for frequency based on receive time
* topic_age_monitor -- this supports only topics with a Header, and it checks that message age is within a given range

It is totally possible to run both of these at the same time, for different or the same topics, to check for both frequency
and max message age at the same time.

## Usage

You typically add the monitor(s) you want to a launch file in your package and configure the parameters to your liking. However, the nodes and components are ROS 2 life-cycle based, which means you need to configure and activate them manually.

See the launch tests in the `test` folder for examples of how start the standalone monitor nodes, and example configs in `test/config`.

> TIP: If you like the brevity and don't mind the added dependency, you can use the [nav2_lifecycle_manager](https://docs.nav2.org/configuration/packages/configuring-lifecycle.html) with parameter `autostart` set to `true` and target your monitor nodes to autostart them.

> Example launch snippet:
```python
# Include this node in your LaunchConfiguration
autostart_monitors_node = LifecycleNode(
package="nav2_lifecycle_manager",
executable="lifecycle_manager",
name="autostart_monitors",
parameters=[
{
"autostart": True,
"node_names": [<your monitor nodes>],
"bond_timeout": 0.0,
}
],
)
```

> For this, you need to have the `nav2_lifecycle_manager` package available (binary or source code installation), and ideally add it as an `exec_depend` in your package.

### Limiting which topics to monitor

By default, the following internal topic names will be ignored by the monitors, as they are usually irrelevant:

* ^/rosout$
* .*/parameter_events
* ^/diagnostics$
* .*/transition_event

ROS2 parameter `monitor_configured_only` for both monitors:

If false (the default), the topic monitor will monitor everything but the internal topics mentioned above.
To instead only monitor those topics which have been explicitly configured, set this to "true"

### Influencing the output

ROS2 parameter `diag_prefix` for both the monitors:

This string will be prefixed to the name of the diagnostic for use within the aggregator.

## `topic_frequency_monitor`

Without any parameters, this node (component also available) will subscribe to all normal topics in a system and minimally monitor that there is activity on them (defined as "at least one message per second"). For more specific configuration, the following parameters are available.

### Frequency Monitoring

Usually, we want to make sure that the frequency of a topic is within certain bounds.

For example, to monitor that the frequency of the "talker" topic is about 25Hz and the frequency of the "cmd_vel" topic is about 10Hz, use the following three parameters and pass them lists in the same order. This means, the second element in "min_values" applies to the topic given by the second element of the "topics" parameter.

topics: [ "talker", "cmd_vel" ]
min_values: [ 23.0, 8.0 ]
max_values: [ 26.5, 11.0 ]

Please note that the frequencies will be computed purely based on _arrival_ timing, the header is currently ignored.

## `topic_age_monitor`

Without any parameters, this node (component also available) will subscribe to all normal topics in a system and minimally monitor that there is activity on them (defined as "at least one message per second"). For more specific configuration, the following parameters are available.

### Message Age Monitoring

Usually, we want to make sure that message is not much older than expected (allowing for some jitter due to network transmission),
but also not newer than expected (which would indicate issues with timestamp or system time between machines).

Since this relies on a sender-side timestamp, this monitor only works with message that include a Header.

NOTE: You can configure it for any topic and it _will_ interpret the data in there as a timestamp, but unless it
really is a header, the results will be rather random ;-)

Configuration is similar to the frequency monitor, but giving a range for delays instead:

topics: [ "talker", "cmd_vel" ]
min_values: [ 80, 8.0 ]
max_values: [ 120, 11.0 ]
monitor_configured_only: True

> It is highly prudent to only monitor configured topics in the age monitor. If you set it to monitor all,
and you have topics without header fields, you will get a lot of warning messages!!

## Testing

To run all tests in this package:

```bash
colcon test --event-handlers console_direct+ --packages-select diagnostic_topic_monitor && colcon test-result
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (c) 2024, 2025 Robert Bosch GmbH
//
// See the top-level LICENSE file for licensing terms.

#ifndef ACTIVITY_DIAGNOSTIC_TASK_HPP
#define ACTIVITY_DIAGNOSTIC_TASK_HPP

#include <diagnostic_updater/diagnostic_updater.hpp> // NOLINT: upstream

using namespace std::chrono_literals;

namespace diagnostic_topic_monitor
{

/**
* @brief Very simply DiagnosticTask that just checks whether there has been _any_
* kind of activity during the last period
*/
class ActivityDiagnosticTask : public diagnostic_updater::DiagnosticTask
{
public:
ActivityDiagnosticTask(
const std::string & name, rclcpp::Clock::SharedPtr clock,
std::chrono::duration<double> warning_threshold = 1.1s)
: diagnostic_updater::DiagnosticTask(name),
clock_(clock),
last_tick_(0),
warning_threshold_(warning_threshold)
{
}
virtual void tick() {last_tick_ = clock_->now();}
void run(diagnostic_updater::DiagnosticStatusWrapper & stat) override
{
stat.name = getName();
if (last_tick_.seconds() == 0) {
stat.level = diagnostic_msgs::msg::DiagnosticStatus::WARN;
stat.message = "No data received, yet";
stat.addf("period", "%f", std::numeric_limits<double>::max());
} else {
const auto currentTime = clock_->now();
// more than 1s without tick is considered warning
const auto elapsed = currentTime - last_tick_;
stat.addf("period", "%f", elapsed.seconds());
if (elapsed <= rclcpp::Duration(warning_threshold_)) {
stat.level = diagnostic_msgs::msg::DiagnosticStatus::OK;
stat.message = "OK";
} else {
stat.level = diagnostic_msgs::msg::DiagnosticStatus::WARN;
stat.message = "Last data received more than 1s ago";
}
}
}

private:
rclcpp::Clock::SharedPtr clock_;
rclcpp::Time last_tick_;
std::chrono::duration<double> warning_threshold_;
};

} // namespace diagnostic_topic_monitor

#endif // ACTIVITY_DIAGNOSTIC_TASK_HPP
Loading
Loading