Skip to content

Commit 9584771

Browse files
authored
Merge pull request #75 from ROBOTIS-GIT/feature-jazzy-update
Update machine learning package for Jazzy
2 parents 86f49b4 + 6931a62 commit 9584771

File tree

10 files changed

+340
-250
lines changed

10 files changed

+340
-250
lines changed

.github/workflows/ros-ci.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ jobs:
4545
with:
4646
required-ros-distributions: ${{ matrix.ros_distribution }}
4747

48+
- name: Add pip break-system-packages for rosdep
49+
run: |
50+
printf "[install]\nbreak-system-packages = true\n" | sudo tee /etc/pip.conf
51+
4852
- name: Build and Test
4953
uses: ros-tooling/[email protected]
5054
env:

turtlebot3_dqn/CHANGELOG.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,15 @@
22
Changelog for package turtlebot3_dqn
33
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
44

5+
1.0.1 (2025-05-02)
6+
------------------
7+
* Support for ROS 2 Jazzy version
8+
* Gazebo simulation support for the package
9+
* Contributors: ChanHyeong Lee
10+
511
1.0.0 (2025-04-17)
612
------------------
713
* Support for ROS 2 Humble version
814
* Renewal of package structure
915
* Improved behavioral rewards for agents
10-
Contributors: ChanHyeong Lee
16+
* Contributors: ChanHyeong Lee

turtlebot3_dqn/package.xml

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<?xml version="1.0"?>
22
<package format="2">
33
<name>turtlebot3_dqn</name>
4-
<version>1.0.0</version>
4+
<version>1.0.1</version>
55
<description>
66
The turtlebot3_dqn package using reinforcement learning with DQN (Deep Q-Learning).
77
</description>
@@ -12,13 +12,18 @@
1212
<url type="bugtracker">https://github.com/ROBOTIS-GIT/turtlebot3_machine_learning/issues</url>
1313
<author email="[email protected]">Gilbert</author>
1414
<author email="[email protected]">ChanHyeong Lee</author>
15-
<depend>geometry_msgs</depend>
16-
<depend>rclpy</depend>
17-
<depend>sensor_msgs</depend>
18-
<depend>std_srvs</depend>
19-
<depend>turtlebot3_msgs</depend>
20-
<depend>python-keras-pip</depend>
21-
<depend>python-tensorflow-pip</depend>
15+
<exec_depend>python3-pip</exec_depend>
16+
<exec_depend>ament_index_python</exec_depend>
17+
<exec_depend>geometry_msgs</exec_depend>
18+
<exec_depend>python-tensorflow-pip</exec_depend>
19+
<exec_depend>python3-numpy</exec_depend>
20+
<exec_depend>python3-pyqt5</exec_depend>
21+
<exec_depend>python3-pyqtgraph</exec_depend>
22+
<exec_depend>rclpy</exec_depend>
23+
<exec_depend>sensor_msgs</exec_depend>
24+
<exec_depend>std_msgs</exec_depend>
25+
<exec_depend>std_srvs</exec_depend>
26+
<exec_depend>turtlebot3_msgs</exec_depend>
2227
<export>
2328
<build_type>ament_python</build_type>
2429
</export>

turtlebot3_dqn/setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@
1414

1515
setup(
1616
name=package_name,
17-
version='1.0.0',
17+
version='1.0.1',
1818
packages=find_packages(),
1919
data_files=[
2020
('share/ament_index/resource_index/packages', ['resource/' + package_name]),
2121
('share/' + package_name, ['package.xml']),
2222
('share/' + package_name + '/launch', glob.glob('launch/*.py')),
2323
],
24-
install_requires=['setuptools', 'launch',],
24+
install_requires=['setuptools', 'launch'],
2525
zip_safe=True,
2626
author=authors,
2727
author_email=author_emails,

turtlebot3_dqn/turtlebot3_dqn/dqn_agent.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,18 @@
2626
import sys
2727
import time
2828

29-
from keras.api.layers import Dense
30-
from keras.api.models import load_model
31-
from keras.api.models import Sequential
32-
from keras.api.optimizers import Adam
3329
import numpy
3430
import rclpy
3531
from rclpy.node import Node
3632
from std_msgs.msg import Float32MultiArray
3733
from std_srvs.srv import Empty
3834
import tensorflow
35+
from tensorflow.keras.layers import Dense
36+
from tensorflow.keras.layers import Input
37+
from tensorflow.keras.losses import MeanSquaredError
38+
from tensorflow.keras.models import load_model
39+
from tensorflow.keras.models import Sequential
40+
from tensorflow.keras.optimizers import Adam
3941

4042
from turtlebot3_msgs.srv import Dqn
4143

@@ -84,7 +86,7 @@ def __init__(self, stage_num, max_training_episodes):
8486
self.learning_rate = 0.0007
8587
self.epsilon = 1.0
8688
self.step_counter = 0
87-
self.epsilon_decay = 20000
89+
self.epsilon_decay = 6000 * self.stage
8890
self.epsilon_min = 0.05
8991
self.batch_size = 128
9092

@@ -120,7 +122,10 @@ def __init__(self, stage_num, max_training_episodes):
120122

121123
if LOGGING:
122124
tensorboard_file_name = current_time + ' dqn_stage' + str(self.stage) + '_reward'
123-
dqn_reward_log_dir = 'logs/gradient_tape/' + tensorboard_file_name
125+
home_dir = os.path.expanduser('~')
126+
dqn_reward_log_dir = os.path.join(
127+
home_dir, 'turtlebot3_dqn_logs', 'gradient_tape', tensorboard_file_name
128+
)
124129
self.dqn_reward_writer = tensorflow.summary.create_file_writer(dqn_reward_log_dir)
125130
self.dqn_reward_metric = DQNMetric()
126131

@@ -276,11 +281,12 @@ def step(self, action):
276281

277282
def create_qnetwork(self):
278283
model = Sequential()
279-
model.add(Dense(512, input_shape=(self.state_size,), activation='relu'))
284+
model.add(Input(shape=(self.state_size,)))
285+
model.add(Dense(512, activation='relu'))
280286
model.add(Dense(256, activation='relu'))
281287
model.add(Dense(128, activation='relu'))
282288
model.add(Dense(self.action_size, activation='linear'))
283-
model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
289+
model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=self.learning_rate))
284290
model.summary()
285291

286292
return model

turtlebot3_dqn/turtlebot3_dqn/dqn_environment.py

Lines changed: 101 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
# Authors: Ryan Shim, Gilbert, ChanHyeong Lee
1919

2020
import math
21+
import os
2122

2223
from geometry_msgs.msg import Twist
24+
from geometry_msgs.msg import TwistStamped
2325
from nav_msgs.msg import Odometry
2426
import numpy
2527
import rclpy
@@ -34,11 +36,13 @@
3436
from turtlebot3_msgs.srv import Goal
3537

3638

39+
ROS_DISTRO = os.environ.get('ROS_DISTRO')
40+
41+
3742
class RLEnvironment(Node):
3843

3944
def __init__(self):
4045
super().__init__('rl_environment')
41-
self.train_mode = True
4246
self.goal_pose_x = 0.0
4347
self.goal_pose_y = 0.0
4448
self.robot_pose_x = 0.0
@@ -55,15 +59,20 @@ def __init__(self):
5559
self.goal_distance = 1.0
5660
self.init_goal_distance = 0.5
5761
self.scan_ranges = []
62+
self.front_ranges = []
5863
self.min_obstacle_distance = 10.0
64+
self.is_front_min_actual_front = False
5965

6066
self.local_step = 0
6167
self.stop_cmd_vel_timer = None
6268
self.angular_vel = [1.5, 0.75, 0.0, -0.75, -1.5]
6369

6470
qos = QoSProfile(depth=10)
6571

66-
self.cmd_vel_pub = self.create_publisher(Twist, 'cmd_vel', qos)
72+
if ROS_DISTRO == 'humble':
73+
self.cmd_vel_pub = self.create_publisher(Twist, 'cmd_vel', qos)
74+
else:
75+
self.cmd_vel_pub = self.create_publisher(TwistStamped, 'cmd_vel', qos)
6776

6877
self.odom_sub = self.create_subscription(
6978
Odometry,
@@ -112,6 +121,7 @@ def __init__(self):
112121
)
113122

114123
def make_environment_callback(self, request, response):
124+
self.get_logger().info('Make environment called')
115125
while not self.initialize_environment_client.wait_for_service(timeout_sec=1.0):
116126
self.get_logger().warn(
117127
'service for initialize the environment is not available, waiting ...'
@@ -166,17 +176,32 @@ def call_task_failed(self):
166176

167177
def scan_sub_callback(self, scan):
168178
self.scan_ranges = []
179+
self.front_ranges = []
180+
self.front_angles = []
181+
169182
num_of_lidar_rays = len(scan.ranges)
183+
angle_min = scan.angle_min
184+
angle_increment = scan.angle_increment
185+
186+
self.front_distance = scan.ranges[0]
170187

171188
for i in range(num_of_lidar_rays):
172-
if scan.ranges[i] == float('Inf'):
173-
self.scan_ranges.append(3.5)
174-
elif numpy.isnan(scan.ranges[i]):
175-
self.scan_ranges.append(0)
176-
else:
177-
self.scan_ranges.append(scan.ranges[i])
189+
angle = angle_min + i * angle_increment
190+
distance = scan.ranges[i]
191+
192+
if distance == float('Inf'):
193+
distance = 3.5
194+
elif numpy.isnan(distance):
195+
distance = 0.0
196+
197+
self.scan_ranges.append(distance)
198+
199+
if (0 <= angle <= math.pi/2) or (3*math.pi/2 <= angle <= 2*math.pi):
200+
self.front_ranges.append(distance)
201+
self.front_angles.append(angle)
178202

179203
self.min_obstacle_distance = min(self.scan_ranges)
204+
self.front_min_obstacle_distance = min(self.front_ranges) if self.front_ranges else 10.0
180205

181206
def odom_sub_callback(self, msg):
182207
self.robot_pose_x = msg.pose.pose.position.x
@@ -204,81 +229,112 @@ def calculate_state(self):
204229
state = []
205230
state.append(float(self.goal_distance))
206231
state.append(float(self.goal_angle))
207-
208-
for var in self.scan_ranges:
232+
for var in self.front_ranges:
209233
state.append(float(var))
210234
self.local_step += 1
211235

212236
if self.goal_distance < 0.20:
213237
self.get_logger().info('Goal Reached')
214238
self.succeed = True
215239
self.done = True
216-
self.cmd_vel_pub.publish(Twist())
240+
if ROS_DISTRO == 'humble':
241+
self.cmd_vel_pub.publish(Twist())
242+
else:
243+
self.cmd_vel_pub.publish(TwistStamped())
217244
self.local_step = 0
218245
self.call_task_succeed()
219246

220247
if self.min_obstacle_distance < 0.15:
221248
self.get_logger().info('Collision happened')
222249
self.fail = True
223250
self.done = True
224-
self.cmd_vel_pub.publish(Twist())
251+
if ROS_DISTRO == 'humble':
252+
self.cmd_vel_pub.publish(Twist())
253+
else:
254+
self.cmd_vel_pub.publish(TwistStamped())
225255
self.local_step = 0
226256
self.call_task_failed()
227257

228258
if self.local_step == self.max_step:
229259
self.get_logger().info('Time out!')
230260
self.fail = True
231261
self.done = True
232-
self.cmd_vel_pub.publish(Twist())
262+
if ROS_DISTRO == 'humble':
263+
self.cmd_vel_pub.publish(Twist())
264+
else:
265+
self.cmd_vel_pub.publish(TwistStamped())
233266
self.local_step = 0
234267
self.call_task_failed()
235268

236269
return state
237270

238-
def calculate_reward(self):
239-
if self.train_mode:
271+
def compute_directional_weights(self, relative_angles, max_weight=10.0):
272+
power = 6
273+
raw_weights = (numpy.cos(relative_angles))**power + 0.1
274+
scaled_weights = raw_weights * (max_weight / numpy.max(raw_weights))
275+
normalized_weights = scaled_weights / numpy.sum(scaled_weights)
276+
return normalized_weights
240277

241-
if not hasattr(self, 'prev_goal_distance'):
242-
self.prev_goal_distance = self.init_goal_distance
278+
def compute_weighted_obstacle_reward(self):
279+
if not self.front_ranges or not self.front_angles:
280+
return 0.0
243281

244-
distance_reward = self.prev_goal_distance - self.goal_distance
245-
self.prev_goal_distance = self.goal_distance
282+
front_ranges = numpy.array(self.front_ranges)
283+
front_angles = numpy.array(self.front_angles)
246284

247-
yaw_reward = (1 - 2 * math.sqrt(math.fabs(self.goal_angle / math.pi)))
285+
valid_mask = front_ranges <= 0.5
286+
if not numpy.any(valid_mask):
287+
return 0.0
248288

249-
obstacle_reward = 0.0
250-
if self.min_obstacle_distance < 0.50:
251-
obstacle_reward = -1.0
289+
front_ranges = front_ranges[valid_mask]
290+
front_angles = front_angles[valid_mask]
252291

253-
reward = (distance_reward * 10) + (yaw_reward / 5) + obstacle_reward
292+
relative_angles = numpy.unwrap(front_angles)
293+
relative_angles[relative_angles > numpy.pi] -= 2 * numpy.pi
254294

255-
if self.succeed:
256-
reward = 30.0
257-
elif self.fail:
258-
reward = -10.0
295+
weights = self.compute_directional_weights(relative_angles, max_weight=10.0)
259296

260-
else:
261-
if self.succeed:
262-
reward = 5.0
263-
elif self.fail:
264-
reward = -5.0
265-
else:
266-
reward = 0.0
297+
safe_dists = numpy.clip(front_ranges - 0.25, 1e-2, 3.5)
298+
decay = numpy.exp(-3.0 * safe_dists)
299+
300+
weighted_decay = numpy.dot(weights, decay)
301+
302+
reward = - (1.0 + 4.0 * weighted_decay)
303+
304+
return reward
305+
306+
def calculate_reward(self):
307+
yaw_reward = 1 - (2 * abs(self.goal_angle) / math.pi)
308+
obstacle_reward = self.compute_weighted_obstacle_reward()
309+
310+
print('directional_reward: %f, obstacle_reward: %f' % (yaw_reward, obstacle_reward))
311+
reward = yaw_reward + obstacle_reward
312+
313+
if self.succeed:
314+
reward = 100.0
315+
elif self.fail:
316+
reward = -50.0
267317

268318
return reward
269319

270320
def rl_agent_interface_callback(self, request, response):
271321
action = request.action
272-
twist = Twist()
273-
twist.linear.x = 0.15
274-
twist.angular.z = self.angular_vel[action]
275-
self.cmd_vel_pub.publish(twist)
322+
if ROS_DISTRO == 'humble':
323+
msg = Twist()
324+
msg.linear.x = 0.2
325+
msg.angular.z = self.angular_vel[action]
326+
else:
327+
msg = TwistStamped()
328+
msg.twist.linear.x = 0.2
329+
msg.twist.angular.z = self.angular_vel[action]
330+
331+
self.cmd_vel_pub.publish(msg)
276332
if self.stop_cmd_vel_timer is None:
277333
self.prev_goal_distance = self.init_goal_distance
278-
self.stop_cmd_vel_timer = self.create_timer(1.8, self.timer_callback)
334+
self.stop_cmd_vel_timer = self.create_timer(0.8, self.timer_callback)
279335
else:
280336
self.destroy_timer(self.stop_cmd_vel_timer)
281-
self.stop_cmd_vel_timer = self.create_timer(1.8, self.timer_callback)
337+
self.stop_cmd_vel_timer = self.create_timer(0.8, self.timer_callback)
282338

283339
response.state = self.calculate_state()
284340
response.reward = self.calculate_reward()
@@ -293,7 +349,10 @@ def rl_agent_interface_callback(self, request, response):
293349

294350
def timer_callback(self):
295351
self.get_logger().info('Stop called')
296-
self.cmd_vel_pub.publish(Twist())
352+
if ROS_DISTRO == 'humble':
353+
self.cmd_vel_pub.publish(Twist())
354+
else:
355+
self.cmd_vel_pub.publish(TwistStamped())
297356
self.destroy_timer(self.stop_cmd_vel_timer)
298357

299358
def euler_from_quaternion(self, quat):

0 commit comments

Comments
 (0)