基於影象處理和tensorflow實現GTA5的車輛自動駕駛——第八節偽AI駕駛

阿新 • • 發佈：2020-12-16

綜述

上節實現了在理想環境下給道路畫線的功能
不理想的環境1：如果不在道路中間，而靠近道路右邊，會出現下面的情況(直線的斜率同號，且直線的斜率為負)
不理想的環境2：如果不在道路中間，而靠近道路左邊，會出現下面的情況(直線的斜率同號，且直線的斜率為正)

方法

正確行駛

即保持在兩條直線的中間
如果太偏左(兩條直線的斜率都為正)，那麼讓人物向右開
如果太偏右(兩條直線的斜率都為負)，那麼讓人物向左開

獲取斜率

在函式compare_lines()裡，只是沒有返回，找到斜率（我紅色框框起來的地方）讓其返回即可

接收斜率

在函式draw_lines()裡，呼叫了compare_lines()，增加變數接收資料即可

在函式convert_To_gray()裡，呼叫了draw_lines()

先讓draw_lines()返回斜率
convert_To_gray()接收斜率
convert_To_gray()返回斜率給主函式
聽起來比較麻煩，我畫圖解釋一下把

主檔案

import numpy as np
from PIL import ImageGrab
import cv2
import time


def compare_lines(lines, color=[0, 255, 255], thickness=3):
    '''
    try:
        for line in lines:
            coords = line[0]
            cv2.line(img=img, pt1=(coords[0], coords[1]),
                     pt2=(coords[2], coords[3]), color=[255, 255, 255], thickness=3
                     )
    except:
        pass
    '''
    lane1_id = 0
    lane2_id = 0
    # if this fails, go with some default line
    try:

        # finds the maximum y value for a lane marker
        # (since we cannot assume the horizon will always be at the same point.)

        ys = []
        for i in lines:
            for ii in i:
                ys += [ii[1], ii[3]]
        min_y = min(ys)
        max_y = 600
        new_lines = []
        line_dict = {}

        for idx, i in enumerate(lines):
            for xyxy in i:
                # These four lines:
                # modified from http://stackoverflow.com/questions/21565994/method-to-return-the-equation-of-a-straight-line-given-two-points
                # Used to calculate the definition of a line, given two sets of coords.
                x_coords = (xyxy[0], xyxy[2])
                y_coords = (xyxy[1], xyxy[3])
                A = np.vstack([x_coords, np.ones(len(x_coords))]).T
                m, b = np.linalg.lstsq(A, y_coords)[0]

                # Calculating our new, and improved, xs
                x1 = (min_y - b) / (m)
                x2 = (max_y - b) / m

                line_dict[idx] = [m, b, [int(x1), min_y, int(x2), max_y]]
                new_lines.append([int(x1), min_y, int(x2), max_y])

        final_lanes = {}

        for idx in line_dict:
            final_lanes_copy = final_lanes.copy()
            m = line_dict[idx][0]
            b = line_dict[idx][1]
            line = line_dict[idx][2]

            if len(final_lanes) == 0:
                final_lanes[m] = [[m, b, line]]

            else:
                found_copy = False

                for other_ms in final_lanes_copy:

                    if not found_copy:
                        if abs(other_ms * 1.2) > abs(m) > abs(other_ms * 0.8):
                            if abs(final_lanes_copy[other_ms][0][1] * 1.2) > abs(b) > abs(
                                    final_lanes_copy[other_ms][0][1] * 0.8):
                                final_lanes[other_ms].append([m, b, line])
                                found_copy = True
                                break
                        else:
                            final_lanes[m] = [[m, b, line]]

        line_counter = {}

        for lanes in final_lanes:
            line_counter[lanes] = len(final_lanes[lanes])

        top_lanes = sorted(line_counter.items(), key=lambda item: item[1])[::-1][:2]

        lane1_id = top_lanes[0][0]
        lane2_id = top_lanes[1][0]

        def average_lane(lane_data):
            x1s = []
            y1s = []
            x2s = []
            y2s = []
            for data in lane_data:
                x1s.append(data[2][0])
                y1s.append(data[2][1])
                x2s.append(data[2][2])
                y2s.append(data[2][3])
            return int(np.mean(x1s)), int(np.mean(y1s)), int(np.mean(x2s)), int(np.mean(y2s))

        l1_x1, l1_y1, l1_x2, l1_y2 = average_lane(final_lanes[lane1_id])
        l2_x1, l2_y1, l2_x2, l2_y2 = average_lane(final_lanes[lane2_id])

        return [l1_x1, l1_y1, l1_x2, l1_y2], [l2_x1, l2_y1, l2_x2, l2_y2], lane1_id, lane2_id
    except Exception as e:
        print(str(e))


def draw_lines(image, gray_img, lines):
    slope1 = 0
    slope2 = 0
    try:
        l1, l2, slope1, slope2 = compare_lines(lines)
        cv2.line(image, (l1[0], l1[1]), (l1[2], l1[3]), [0, 255, 0], 30)
        cv2.line(image, (l2[0], l2[1]), (l2[2], l2[3]), [0, 255, 0], 30)
        return slope1, slope2
    except Exception as e:
        print(str(e))
        pass
    try:
        for coords in lines:
            coords = coords[0]
            try:
                cv2.line(gray_img, (coords[0], coords[1]), (coords[2], coords[3]), [255, 0, 0], 3)


            except Exception as e:
                print(str(e))
    except Exception as e:
        pass


def roi(img, vertices):
    mask = np.zeros_like(img)
    cv2.fillPoly(mask, vertices, 255)
    masked = cv2.bitwise_and(img, mask)
    return masked


def convert_To_gray(image):
    # to gray
    gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # canny
    gray_img = cv2.Canny(gray_img, threshold1=100, threshold2=200)
    # 高斯模糊
    gray_img = cv2.GaussianBlur(gray_img, ksize=(5, 5), sigmaX=0)
    # mask img 只取紅色區域的資料
    vertices = np.array([[10, 500], [10, 300], [300, 200], [500, 200], [800, 300], [800, 500],
                         ], np.int32)
    gray_img = roi(gray_img, [vertices])
    # 劃線
    slope1 = 0
    slope2 = 0
    lines = cv2.HoughLinesP(gray_img, rho=1, theta=np.pi / 180, threshold=180, lines=np.array([]), minLineLength=150,
                            maxLineGap=5)
    try:
        slope1, slope2 = draw_lines(image=image, gray_img=gray_img, lines=lines)
    except:
        pass

    return slope1, slope2


from Keys import PressKey, ReleaseKey, Key_Mapping


def go_forward():
    # 按w鍵 把其他鍵鬆開
    PressKey(Key_Mapping.w)


def go_left():
    # 按a鍵 把其他鍵鬆開
    PressKey(Key_Mapping.a)
    ReleaseKey(Key_Mapping.w)
    ReleaseKey(Key_Mapping.a)
def go_right():
    # 按d鍵 把其他鍵鬆開
    PressKey(Key_Mapping.d)
    ReleaseKey(Key_Mapping.w)
    ReleaseKey(Key_Mapping.d)

def depressAll():
    # 按w鍵 把其他鍵鬆開
    ReleaseKey(Key_Mapping.w)
    ReleaseKey(Key_Mapping.a)
    ReleaseKey(Key_Mapping.s)
    ReleaseKey(Key_Mapping.d)


def screen_record():
    i = 3
    while i != 0:
        print("time:", i)
        time.sleep(0.5)
        i -= 1

    last_time = time.time()
    while True:
        # 800x600 windowed mode for GTA 5, at the top left position of your main screen.
        # 40 px accounts for title bar. 
        printscreen = np.array(ImageGrab.grab(bbox=(0, 40, 800, 640)))
        print('loop took {} seconds'.format(time.time() - last_time))
        last_time = time.time()
        slope1, slope2 = convert_To_gray(printscreen)
        # 判斷靠左還是靠右
        if slope1<0 and slope2<0:
            go_right()
            print("right"*10)
        elif slope1>0 and slope2>0:
            go_left()
            print("left"*10)
        else:
            go_forward()
            print('go'*10)
        # cv2.imshow('window', gray_img)
        cv2.imshow('window', cv2.cvtColor(printscreen, cv2.COLOR_BGR2RGB))
        if cv2.waitKey(25) & 0xFF == ord('q'):
            cv2.destroyAllWindows()
            break


screen_record()

按鍵對映檔案 Keys.py (不要改名)

import ctypes
import time

SendInput = ctypes.windll.user32.SendInput

# C struct redefinitions 
PUL = ctypes.POINTER(ctypes.c_ulong)


class KeyBdInput(ctypes.Structure):
    _fields_ = [("wVk", ctypes.c_ushort),
                ("wScan", ctypes.c_ushort),
                ("dwFlags", ctypes.c_ulong),
                ("time", ctypes.c_ulong),
                ("dwExtraInfo", PUL)]


class HardwareInput(ctypes.Structure):
    _fields_ = [("uMsg", ctypes.c_ulong),
                ("wParamL", ctypes.c_short),
                ("wParamH", ctypes.c_ushort)]


class MouseInput(ctypes.Structure):
    _fields_ = [("dx", ctypes.c_long),
                ("dy", ctypes.c_long),
                ("mouseData", ctypes.c_ulong),
                ("dwFlags", ctypes.c_ulong),
                ("time", ctypes.c_ulong),
                ("dwExtraInfo", PUL)]


class Input_I(ctypes.Union):
    _fields_ = [("ki", KeyBdInput),
                ("mi", MouseInput),
                ("hi", HardwareInput)]


class Input(ctypes.Structure):
    _fields_ = [("type", ctypes.c_ulong),
                ("ii", Input_I)]


# Actuals Functions

def PressKey(hexKeyCode):
    extra = ctypes.c_ulong(0)
    ii_ = Input_I()
    ii_.ki = KeyBdInput(0, hexKeyCode, 0x0008, 0, ctypes.pointer(extra))
    x = Input(ctypes.c_ulong(1), ii_)
    ctypes.windll.user32.SendInput(1, ctypes.pointer(x), ctypes.sizeof(x))


def ReleaseKey(hexKeyCode):
    extra = ctypes.c_ulong(0)
    ii_ = Input_I()
    ii_.ki = KeyBdInput(0, hexKeyCode, 0x0008 | 0x0002, 0, ctypes.pointer(extra))
    x = Input(ctypes.c_ulong(1), ii_)
    ctypes.windll.user32.SendInput(1, ctypes.pointer(x), ctypes.sizeof(x))


class Key_Mapping:
    num1 = 0x02
    num2 = 0x03
    num3 = 0x04
    num4 = 0x05
    num5 = 0x06
    num6 = 0x07
    num7 = 0x08
    num8 = 0x09
    num9 = 0x0a
    num0 = 0x0b
    escape = 0x01
    equal = 0x0d
    backspace = 0x0e
    tab = 0x0f
    q = 0x10
    w = 0x11
    e = 0x12
    r = 0x13
    t = 0x14
    y = 0x15
    u = 0x16
    i = 0x17
    o = 0x18
    p = 0x19
    enter = 0x1c
    lcontrol = 0x1d
    a = 0x1e
    s = 0x1f
    d = 0x20
    f = 0x21
    g = 0x22
    h = 0x23
    j = 0x24
    k = 0x25
    l = 0x26
    z = 0x2c
    x = 0x2d
    c = 0x2e
    v = 0x2f
    b = 0x30
    n = 0x31
    m = 0x32
    shift = 0x36
    multiply = 0x37
    space = 0x39
    capital = 0x3a
    f1 = 0x3b
    f2 = 0x3c
    f3 = 0x3d
    f4 = 0x3e
    f5 = 0x3f
    f6 = 0x40
    f7 = 0x41
    f8 = 0x42
    f9 = 0x43
    f10 = 0x44
    numlock = 0x45
    f11 = 0x57
    f12 = 0x58
    divide = 0xb5
    home = 0xc7
    up = 0xc8
    prior = 0xc9
    left = 0xcb
    right = 0xcd
    end = 0xcf
    down = 0xd0
    next = 0xd1
    insert = 0xd2
    delete = 0xd3
    divide = 0xb5
    home = 0xc7
    up = 0xc8
    prior = 0xc9
    left = 0xcb
    right = 0xcd
    end = 0xcf
    down = 0xd0
    next = 0xd1
    insert = 0xd2
    delete = 0xd3