DeepRacer 找到的最好的奖励函数 reward function

Posted 2021-10-24 架构师易筋
tags:
篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了DeepRacer 找到的最好的奖励函数 reward function相关的知识，希望对你有一定的参考价值。
这里需要替换的是不同的地图，换成不同的racing_track数组。
import math


class Reward:
    def __init__(self, verbose=False):
        self.first_racingpoint_index = None
        self.verbose = verbose

    def reward_function(self, params):


        ################## HELPER FUNCTIONS ###################

        def dist_2_points(x1, x2, y1, y2):
            return abs(abs(x1 - x2) ** 2 + abs(y1 - y2) ** 2) ** 0.5

        def closest_2_racing_points_index(racing_coords, car_coords):

            # Calculate all distances to racing points
            distances = []
            for i in range(len(racing_coords)):
                distance = dist_2_points(x1=racing_coords[i][0], x2=car_coords[0],
                                         y1=racing_coords[i][1], y2=car_coords[1])
                distances.append(distance)

            # Get index of the closest racing point
            closest_index = distances.index(min(distances))

            # Get index of the second closest racing point
            distances_no_closest = distances.copy()
            distances_no_closest[closest_index] = 999
            second_closest_index = distances_no_closest.index(
                min(distances_no_closest))

            return [closest_index, second_closest_index]

        def dist_to_racing_line(closest_coords, second_closest_coords, car_coords):

            # Calculate the distances between 2 closest racing points
            a = abs(dist_2_points(x1=closest_coords[0],
                                  x2=second_closest_coords[0],
                                  y1=closest_coords[1],
                                  y2=second_closest_coords[1]))

            # Distances between car and closest and second closest racing point
            b = abs(dist_2_points(x1=car_coords[0],
                                  x2=closest_coords[0],
                                  y1=car_coords[1],
                                  y2=closest_coords[1]))
            c = abs(dist_2_points(x1=car_coords[0],
                                  x2=second_closest_coords[0],
                                  y1=car_coords[1],
                                  y2=second_closest_coords[1]))

            # Calculate distance between car and racing line (goes through 2 closest racing points)
            # try-except in case a=0 (rare bug in DeepRacer)
            try:
                distance = abs(-(a ** 4) + 2 * (a ** 2) * (b ** 2) + 2 * (a ** 2) * (c ** 2) -
                               (b ** 4) + 2 * (b ** 2) * (c ** 2) - (c ** 4)) ** 0.5 / (2 * a)
            except:
                distance = b

            return distance

        # Calculate which one of the closest racing points is the next one and which one the previous one
        def next_prev_racing_point(closest_coords, second_closest_coords, car_coords, heading):

            # Virtually set the car more into the heading direction
            heading_vector = [math.cos(math.radians(
                heading)), math.sin(math.radians(heading))]
            new_car_coords = [car_coords[0] + heading_vector[0],
                              car_coords[1] + heading_vector[1]]

            # Calculate distance from new car coords to 2 closest racing points
            distance_closest_coords_new = dist_2_points(x1=new_car_coords[0],
                                                        x2=closest_coords[0],
                                                        y1=new_car_coords[1],
                                                        y2=closest_coords[1])
            distance_second_closest_coords_new = dist_2_points(x1=new_car_coords[0],
                                                               x2=second_closest_coords[0],
                                                               y1=new_car_coords[1],
                                                               y2=second_closest_coords[1])

            if distance_closest_coords_new <= distance_second_closest_coords_new:
                next_point_coords = closest_coords
                prev_point_coords = second_closest_coords
            else:
                next_point_coords = second_closest_coords
                prev_point_coords = closest_coords

            return [next_point_coords, prev_point_coords]

        def racing_direction_diff(closest_coords, second_closest_coords, car_coords, heading):

            # Calculate the direction of the center line based on the closest waypoints
            next_point, prev_point = next_prev_racing_point(closest_coords,
                                                            second_closest_coords,
                                                            car_coords,
                                                            heading)

            # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
            track_direction = math.atan2(
                next_point[1] - prev_point[1], next_point[0] - prev_point[0])

            # Convert to degree
            track_direction = math.degrees(track_direction)

            # Calculate the difference between the track direction and the heading direction of the car
            direction_diff = abs(track_direction - heading)
            if direction_diff > 180:
                direction_diff = 360 - direction_diff

            return direction_diff

        # Gives back indexes that lie between start and end index of a cyclical list 
        # (start index is included, end index is not)
        def indexes_cyclical(start, end, array_len):
            if end is None or start is None:
                return []

            if end < start:
                end += array_len

            return [index % array_len for index in range(start, end)]

        # Calculate how long car would take for entire lap, if it continued like it did until now
        def projected_time(first_index, closest_index, step_count, times_list):

            # Calculate how much time has passed since start
            current_actual_time = (step_count - 1) / 15

            # Calculate which indexes were already passed
            indexes_traveled = indexes_cyclical(first_index, closest_index, len(times_list))

            # Calculate how much time should have passed if car would have followed optimals
            current_expected_time = sum([times_list[i] for i in indexes_traveled])

            # Calculate how long one entire lap takes if car follows optimals
            total_expected_time = sum(times_list)

            # Calculate how long car would take for entire lap, if it continued like it did until now
            try:
                projected_time = (current_actual_time / current_expected_time) * total_expected_time
            except:
                projected_time = 9999

            return projected_time

        #################### RACING LINE ######################

        # Optimal racing line for the 2018
        # Each row: [x,y,speed,timeFromPreviousPoint]
        racing_track = [[3.07857, 0.7234, 3.2, 0.04483],
                        [3.22295, 0.71246, 3.2, 0.04525],
                        [3.36865, 0.70402, 3.2, 0.04561],
                        [3.51539, 0.69762, 3.2, 0.0459],
                        [3.66294, 0.69287, 3.2, 0.04613],
                        [3.81112, 0.68942, 3.2, 0.04632],
                        [3.95978, 0.68698, 3.2, 0.04646],
                        [4.10881, 0.68536, 3.2, 0.04658],
                        [4.25813, 0.68454, 3.2, 0.04666],
                        [4.4074, 0.68487, 3.2, 0.04665],
                        [4.55614, 0.68678, 3.2, 0.04648],
                        [4.704, 0.69061, 3.2, 0.04622],
                        [4.85072, 0.69669, 3.2, 0.04589],
                        [4.99598, 0.70537, 3.2, 0.04548],
                        [5.13949, 0.71702, 3.1441, 0.04579],
                        [5.28093, 0.73198, 2.99692, 0.04746],
                        [5.41997, 0.75056, 2.81078, 0.0499],
                        [5.55628, 0.77305, 2.57432, 0.05367],
                        [5.68961, 0.79959, 2.38821, 0.05693],
                        [5.81987, 0.83014, 2.13351, 0.06271],
                        [5.94681, 0.86481, 1.91826, 0.0686],
                        [6.07015, 0.90377, 1.70891, 0.07569],
                        [6.18943, 0.94729, 1.5267, 0.08317],
                        [6.30396, 0.99586, 1.33303, 0.09332],
                        [6.41305, 1.04984, 1.33303, 0.09131],
                        [6.51548, 1.10999, 1.33303, 0.08911],
                        [6.60983, 1.17694, 1.2, 0.09641],
                        [6.69419, 1.25136, 1.2, 0.09375],
                        [6.76624, 1.33366, 1.2, 0.09115],
                        [6.82221, 1.42397, 1.2, 0.08854],
                        [6.86523, 1.51907, 1.2, 0.08698],
                        [6.89274, 1.61832, 1.2, 0.08582],
                        [6.90063, 1.72008, 1.25561, 0.08129],
                        [6.89071, 1.82141, 1.31099, 0.07766],
                        [6.86585, 1.92062, 1.31099, 0.07801],
                        [6.82793, 2.01677, 1.31099, 0.07884],
                        [6.77364, 2.10731, 1.47028, 0.0718],
                        [6.70615, 2.19179, 1.61668, 0.06688],
                        [6.62745, 2.27016, 1.7578, 0.06318],
                        [6.53892, 2.34243, 1.98083, 0.0577],
                        [6.4423, 2.4092, 2.18742, 0.05369],
                        [6.33878, 2.4709, 2.428, 0.04964],
                        [6.2294, 2.52805, 2.72594, 0.04527],
                        [6.11518, 2.58128, 3.12658, 0.0403],
                        [5.99717, 2.63131, 3.2, 0.04006],
                        [5.87631, 2.67889, 3.2, 0.04059],
                        [5.75359, 2.72482, 3.2, 0.04095],
                        [5.62981, 2.76984, 3.2, 0.04116],
                        [5.49795, 2.81748, 3.2, 0.04381],
                        [5.36653, 2.86607, 3.2, 0.04379],
                        [5.23582, 2.91617, 3.2, 0.04375],
                        [5.10609, 2.96836, 3.2, 0.0437],
                        [4.97753, 3.02305, 3.2, 0.04366],
                        [4.8503, 3.08056, 3.2, 0.04363],
                        [4.72449, 3.14103, 3.2, 0.04362],
                        [4.6001, 3.20451, 3.2, 0.04364],
                        [4.47711, 3.27091, 3.2, 0.04368],
                        [4.3554, 3.33998, 3.2, 0.04373],
                        [4.23479, 3.41134, 3.2, 0.04379],
                        [4.11504, 3.48448, 3.2, 0.04385],
                        [3.99593, 3.55896, 3.2, 0.0439],
                        [3.87735, 3.63453, 3.18039, 0.04421],
                        [3.76113, 3.7075, 2.93388, 0.04678],
                        [3.64439, 3.77888, 2.75264, 0.04971],
                        [3.52693, 3.84793, 2.61896, 0.05202],
                        [3.40855, 3.91383, 2.52022, 0.05376],
                        [3.28907, 3.97573, 2.45157, 0.05489],
                        [3.16841, 4.03287, 2.4139, 0.05531],
                        [3.04654, 4.08445, 2.4139, 0.05482],
                        [2.92352, 4.12983, 2.4139, 0.05432],
                        [2.79954, 4.16845, 2.4139, 0.0538],
                        [2.67484, 4.19988, 2.4139, 0.05328],
                        [2.54972, 4.22385, 2.33725, 0.0545],
                        [2.42455, 4.24027, 2.2162, 0.05696],
                        [2.29967, 4.24929, 2.05076, 0.06105],
                        [2.17537, 4.25144, 1.91759, 0.06483],
                        [2.05199, 4.24673, 1.70962, 0.07222],
                        [1.92988, 4.23511, 1.50117, 0.08171],
                        [1.80954, 4.21626, 1.33113, 0.09151],
                        [1.69159, 4.1896, 1.33113, 0.09084],
                        [1.57699, 4.15419, 1.33113, 0.09011],
                        [1.46688, 4.10924, 1.33113, 0.08935],
                        [1.36327, 4.053, 1.33113, 0.08856],
                        [1.26935, 3.98333, 1.33113, 0.08785],
                        [1.18961, 3.89844, 1.42458, 0.08176],
                        [1.12366, 3.8017, 1.59614, 0.07335],
                        [1.06963, 3.69616, 1.74427, 0.06797],
                        [1.02636, 3.58353, 1.88984, 0.06385],
                        [0.99299, 3.46502, 2.03412, 0.06053]以上是关于DeepRacer 找到的最好的奖励函数 reward function的主要内容，如果未能解决你的问题，请参考以下文章