1818# Authors: Ryan Shim, Gilbert, ChanHyeong Lee
1919
2020import math
21+ import os
2122
2223from geometry_msgs .msg import Twist
24+ from geometry_msgs .msg import TwistStamped
2325from nav_msgs .msg import Odometry
2426import numpy
2527import rclpy
3436from turtlebot3_msgs .srv import Goal
3537
3638
39+ ROS_DISTRO = os .environ .get ('ROS_DISTRO' )
40+
41+
3742class RLEnvironment (Node ):
3843
3944 def __init__ (self ):
4045 super ().__init__ ('rl_environment' )
41- self .train_mode = True
4246 self .goal_pose_x = 0.0
4347 self .goal_pose_y = 0.0
4448 self .robot_pose_x = 0.0
@@ -55,15 +59,20 @@ def __init__(self):
5559 self .goal_distance = 1.0
5660 self .init_goal_distance = 0.5
5761 self .scan_ranges = []
62+ self .front_ranges = []
5863 self .min_obstacle_distance = 10.0
64+ self .is_front_min_actual_front = False
5965
6066 self .local_step = 0
6167 self .stop_cmd_vel_timer = None
6268 self .angular_vel = [1.5 , 0.75 , 0.0 , - 0.75 , - 1.5 ]
6369
6470 qos = QoSProfile (depth = 10 )
6571
66- self .cmd_vel_pub = self .create_publisher (Twist , 'cmd_vel' , qos )
72+ if ROS_DISTRO == 'humble' :
73+ self .cmd_vel_pub = self .create_publisher (Twist , 'cmd_vel' , qos )
74+ else :
75+ self .cmd_vel_pub = self .create_publisher (TwistStamped , 'cmd_vel' , qos )
6776
6877 self .odom_sub = self .create_subscription (
6978 Odometry ,
@@ -112,6 +121,7 @@ def __init__(self):
112121 )
113122
114123 def make_environment_callback (self , request , response ):
124+ self .get_logger ().info ('Make environment called' )
115125 while not self .initialize_environment_client .wait_for_service (timeout_sec = 1.0 ):
116126 self .get_logger ().warn (
117127 'service for initialize the environment is not available, waiting ...'
@@ -166,17 +176,32 @@ def call_task_failed(self):
166176
167177 def scan_sub_callback (self , scan ):
168178 self .scan_ranges = []
179+ self .front_ranges = []
180+ self .front_angles = []
181+
169182 num_of_lidar_rays = len (scan .ranges )
183+ angle_min = scan .angle_min
184+ angle_increment = scan .angle_increment
185+
186+ self .front_distance = scan .ranges [0 ]
170187
171188 for i in range (num_of_lidar_rays ):
172- if scan .ranges [i ] == float ('Inf' ):
173- self .scan_ranges .append (3.5 )
174- elif numpy .isnan (scan .ranges [i ]):
175- self .scan_ranges .append (0 )
176- else :
177- self .scan_ranges .append (scan .ranges [i ])
189+ angle = angle_min + i * angle_increment
190+ distance = scan .ranges [i ]
191+
192+ if distance == float ('Inf' ):
193+ distance = 3.5
194+ elif numpy .isnan (distance ):
195+ distance = 0.0
196+
197+ self .scan_ranges .append (distance )
198+
199+ if (0 <= angle <= math .pi / 2 ) or (3 * math .pi / 2 <= angle <= 2 * math .pi ):
200+ self .front_ranges .append (distance )
201+ self .front_angles .append (angle )
178202
179203 self .min_obstacle_distance = min (self .scan_ranges )
204+ self .front_min_obstacle_distance = min (self .front_ranges ) if self .front_ranges else 10.0
180205
181206 def odom_sub_callback (self , msg ):
182207 self .robot_pose_x = msg .pose .pose .position .x
@@ -204,81 +229,112 @@ def calculate_state(self):
204229 state = []
205230 state .append (float (self .goal_distance ))
206231 state .append (float (self .goal_angle ))
207-
208- for var in self .scan_ranges :
232+ for var in self .front_ranges :
209233 state .append (float (var ))
210234 self .local_step += 1
211235
212236 if self .goal_distance < 0.20 :
213237 self .get_logger ().info ('Goal Reached' )
214238 self .succeed = True
215239 self .done = True
216- self .cmd_vel_pub .publish (Twist ())
240+ if ROS_DISTRO == 'humble' :
241+ self .cmd_vel_pub .publish (Twist ())
242+ else :
243+ self .cmd_vel_pub .publish (TwistStamped ())
217244 self .local_step = 0
218245 self .call_task_succeed ()
219246
220247 if self .min_obstacle_distance < 0.15 :
221248 self .get_logger ().info ('Collision happened' )
222249 self .fail = True
223250 self .done = True
224- self .cmd_vel_pub .publish (Twist ())
251+ if ROS_DISTRO == 'humble' :
252+ self .cmd_vel_pub .publish (Twist ())
253+ else :
254+ self .cmd_vel_pub .publish (TwistStamped ())
225255 self .local_step = 0
226256 self .call_task_failed ()
227257
228258 if self .local_step == self .max_step :
229259 self .get_logger ().info ('Time out!' )
230260 self .fail = True
231261 self .done = True
232- self .cmd_vel_pub .publish (Twist ())
262+ if ROS_DISTRO == 'humble' :
263+ self .cmd_vel_pub .publish (Twist ())
264+ else :
265+ self .cmd_vel_pub .publish (TwistStamped ())
233266 self .local_step = 0
234267 self .call_task_failed ()
235268
236269 return state
237270
238- def calculate_reward (self ):
239- if self .train_mode :
271+ def compute_directional_weights (self , relative_angles , max_weight = 10.0 ):
272+ power = 6
273+ raw_weights = (numpy .cos (relative_angles ))** power + 0.1
274+ scaled_weights = raw_weights * (max_weight / numpy .max (raw_weights ))
275+ normalized_weights = scaled_weights / numpy .sum (scaled_weights )
276+ return normalized_weights
240277
241- if not hasattr (self , 'prev_goal_distance' ):
242- self .prev_goal_distance = self .init_goal_distance
278+ def compute_weighted_obstacle_reward (self ):
279+ if not self .front_ranges or not self .front_angles :
280+ return 0.0
243281
244- distance_reward = self . prev_goal_distance - self .goal_distance
245- self . prev_goal_distance = self .goal_distance
282+ front_ranges = numpy . array ( self .front_ranges )
283+ front_angles = numpy . array ( self .front_angles )
246284
247- yaw_reward = (1 - 2 * math .sqrt (math .fabs (self .goal_angle / math .pi )))
285+ valid_mask = front_ranges <= 0.5
286+ if not numpy .any (valid_mask ):
287+ return 0.0
248288
249- obstacle_reward = 0.0
250- if self .min_obstacle_distance < 0.50 :
251- obstacle_reward = - 1.0
289+ front_ranges = front_ranges [valid_mask ]
290+ front_angles = front_angles [valid_mask ]
252291
253- reward = (distance_reward * 10 ) + (yaw_reward / 5 ) + obstacle_reward
292+ relative_angles = numpy .unwrap (front_angles )
293+ relative_angles [relative_angles > numpy .pi ] -= 2 * numpy .pi
254294
255- if self .succeed :
256- reward = 30.0
257- elif self .fail :
258- reward = - 10.0
295+ weights = self .compute_directional_weights (relative_angles , max_weight = 10.0 )
259296
260- else :
261- if self .succeed :
262- reward = 5.0
263- elif self .fail :
264- reward = - 5.0
265- else :
266- reward = 0.0
297+ safe_dists = numpy .clip (front_ranges - 0.25 , 1e-2 , 3.5 )
298+ decay = numpy .exp (- 3.0 * safe_dists )
299+
300+ weighted_decay = numpy .dot (weights , decay )
301+
302+ reward = - (1.0 + 4.0 * weighted_decay )
303+
304+ return reward
305+
306+ def calculate_reward (self ):
307+ yaw_reward = 1 - (2 * abs (self .goal_angle ) / math .pi )
308+ obstacle_reward = self .compute_weighted_obstacle_reward ()
309+
310+ print ('directional_reward: %f, obstacle_reward: %f' % (yaw_reward , obstacle_reward ))
311+ reward = yaw_reward + obstacle_reward
312+
313+ if self .succeed :
314+ reward = 100.0
315+ elif self .fail :
316+ reward = - 50.0
267317
268318 return reward
269319
270320 def rl_agent_interface_callback (self , request , response ):
271321 action = request .action
272- twist = Twist ()
273- twist .linear .x = 0.15
274- twist .angular .z = self .angular_vel [action ]
275- self .cmd_vel_pub .publish (twist )
322+ if ROS_DISTRO == 'humble' :
323+ msg = Twist ()
324+ msg .linear .x = 0.2
325+ msg .angular .z = self .angular_vel [action ]
326+ else :
327+ msg = TwistStamped ()
328+ msg .twist .linear .x = 0.2
329+ msg .twist .angular .z = self .angular_vel [action ]
330+
331+ self .cmd_vel_pub .publish (msg )
276332 if self .stop_cmd_vel_timer is None :
277333 self .prev_goal_distance = self .init_goal_distance
278- self .stop_cmd_vel_timer = self .create_timer (1 .8 , self .timer_callback )
334+ self .stop_cmd_vel_timer = self .create_timer (0 .8 , self .timer_callback )
279335 else :
280336 self .destroy_timer (self .stop_cmd_vel_timer )
281- self .stop_cmd_vel_timer = self .create_timer (1 .8 , self .timer_callback )
337+ self .stop_cmd_vel_timer = self .create_timer (0 .8 , self .timer_callback )
282338
283339 response .state = self .calculate_state ()
284340 response .reward = self .calculate_reward ()
@@ -293,7 +349,10 @@ def rl_agent_interface_callback(self, request, response):
293349
294350 def timer_callback (self ):
295351 self .get_logger ().info ('Stop called' )
296- self .cmd_vel_pub .publish (Twist ())
352+ if ROS_DISTRO == 'humble' :
353+ self .cmd_vel_pub .publish (Twist ())
354+ else :
355+ self .cmd_vel_pub .publish (TwistStamped ())
297356 self .destroy_timer (self .stop_cmd_vel_timer )
298357
299358 def euler_from_quaternion (self , quat ):
0 commit comments