基於影象處理和tensorflow實現GTA5的車輛自動駕駛——第八節偽AI駕駛
阿新 • • 發佈:2020-12-16
綜述
- 上節實現了在理想環境下給道路畫線的功能
- 不理想的環境1:如果不在道路中間,而靠近道路右邊,會出現下面的情況(直線的斜率同號,且直線的斜率為負)
- 不理想的環境2:如果不在道路中間,而靠近道路左邊,會出現下面的情況(直線的斜率同號,且直線的斜率為正)
方法
正確行駛
- 即保持在兩條直線的中間
- 如果太偏左(兩條直線的斜率都為正),那麼讓人物向右開
- 如果太偏右(兩條直線的斜率都為負),那麼讓人物向左開
獲取斜率
在函式compare_lines()裡,只是沒有返回,找到斜率(我紅色框框起來的地方)讓其返回即可
接收斜率
在函式draw_lines()裡,呼叫了compare_lines(),增加變數接收資料即可
在函式convert_To_gray()裡,呼叫了draw_lines()
- 先讓draw_lines()返回斜率
- convert_To_gray()接收斜率
- convert_To_gray()返回斜率給主函式
聽起來比較麻煩, 我畫圖解釋一下把
主檔案
import numpy as np from PIL import ImageGrab import cv2 import time def compare_lines(lines, color=[0, 255, 255], thickness=3): ''' try: for line in lines: coords = line[0] cv2.line(img=img, pt1=(coords[0], coords[1]), pt2=(coords[2], coords[3]), color=[255, 255, 255], thickness=3 ) except: pass ''' lane1_id = 0 lane2_id = 0 # if this fails, go with some default line try: # finds the maximum y value for a lane marker # (since we cannot assume the horizon will always be at the same point.) ys = [] for i in lines: for ii in i: ys += [ii[1], ii[3]] min_y = min(ys) max_y = 600 new_lines = [] line_dict = {} for idx, i in enumerate(lines): for xyxy in i: # These four lines: # modified from http://stackoverflow.com/questions/21565994/method-to-return-the-equation-of-a-straight-line-given-two-points # Used to calculate the definition of a line, given two sets of coords. x_coords = (xyxy[0], xyxy[2]) y_coords = (xyxy[1], xyxy[3]) A = np.vstack([x_coords, np.ones(len(x_coords))]).T m, b = np.linalg.lstsq(A, y_coords)[0] # Calculating our new, and improved, xs x1 = (min_y - b) / (m) x2 = (max_y - b) / m line_dict[idx] = [m, b, [int(x1), min_y, int(x2), max_y]] new_lines.append([int(x1), min_y, int(x2), max_y]) final_lanes = {} for idx in line_dict: final_lanes_copy = final_lanes.copy() m = line_dict[idx][0] b = line_dict[idx][1] line = line_dict[idx][2] if len(final_lanes) == 0: final_lanes[m] = [[m, b, line]] else: found_copy = False for other_ms in final_lanes_copy: if not found_copy: if abs(other_ms * 1.2) > abs(m) > abs(other_ms * 0.8): if abs(final_lanes_copy[other_ms][0][1] * 1.2) > abs(b) > abs( final_lanes_copy[other_ms][0][1] * 0.8): final_lanes[other_ms].append([m, b, line]) found_copy = True break else: final_lanes[m] = [[m, b, line]] line_counter = {} for lanes in final_lanes: line_counter[lanes] = len(final_lanes[lanes]) top_lanes = sorted(line_counter.items(), key=lambda item: item[1])[::-1][:2] lane1_id = top_lanes[0][0] lane2_id = top_lanes[1][0] def average_lane(lane_data): x1s = [] y1s = [] x2s = [] y2s = [] for data in lane_data: x1s.append(data[2][0]) y1s.append(data[2][1]) x2s.append(data[2][2]) y2s.append(data[2][3]) return int(np.mean(x1s)), int(np.mean(y1s)), int(np.mean(x2s)), int(np.mean(y2s)) l1_x1, l1_y1, l1_x2, l1_y2 = average_lane(final_lanes[lane1_id]) l2_x1, l2_y1, l2_x2, l2_y2 = average_lane(final_lanes[lane2_id]) return [l1_x1, l1_y1, l1_x2, l1_y2], [l2_x1, l2_y1, l2_x2, l2_y2], lane1_id, lane2_id except Exception as e: print(str(e)) def draw_lines(image, gray_img, lines): slope1 = 0 slope2 = 0 try: l1, l2, slope1, slope2 = compare_lines(lines) cv2.line(image, (l1[0], l1[1]), (l1[2], l1[3]), [0, 255, 0], 30) cv2.line(image, (l2[0], l2[1]), (l2[2], l2[3]), [0, 255, 0], 30) return slope1, slope2 except Exception as e: print(str(e)) pass try: for coords in lines: coords = coords[0] try: cv2.line(gray_img, (coords[0], coords[1]), (coords[2], coords[3]), [255, 0, 0], 3) except Exception as e: print(str(e)) except Exception as e: pass def roi(img, vertices): mask = np.zeros_like(img) cv2.fillPoly(mask, vertices, 255) masked = cv2.bitwise_and(img, mask) return masked def convert_To_gray(image): # to gray gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # canny gray_img = cv2.Canny(gray_img, threshold1=100, threshold2=200) # 高斯模糊 gray_img = cv2.GaussianBlur(gray_img, ksize=(5, 5), sigmaX=0) # mask img 只取紅色區域的資料 vertices = np.array([[10, 500], [10, 300], [300, 200], [500, 200], [800, 300], [800, 500], ], np.int32) gray_img = roi(gray_img, [vertices]) # 劃線 slope1 = 0 slope2 = 0 lines = cv2.HoughLinesP(gray_img, rho=1, theta=np.pi / 180, threshold=180, lines=np.array([]), minLineLength=150, maxLineGap=5) try: slope1, slope2 = draw_lines(image=image, gray_img=gray_img, lines=lines) except: pass return slope1, slope2 from Keys import PressKey, ReleaseKey, Key_Mapping def go_forward(): # 按w鍵 把其他鍵鬆開 PressKey(Key_Mapping.w) def go_left(): # 按a鍵 把其他鍵鬆開 PressKey(Key_Mapping.a) ReleaseKey(Key_Mapping.w) ReleaseKey(Key_Mapping.a) def go_right(): # 按d鍵 把其他鍵鬆開 PressKey(Key_Mapping.d) ReleaseKey(Key_Mapping.w) ReleaseKey(Key_Mapping.d) def depressAll(): # 按w鍵 把其他鍵鬆開 ReleaseKey(Key_Mapping.w) ReleaseKey(Key_Mapping.a) ReleaseKey(Key_Mapping.s) ReleaseKey(Key_Mapping.d) def screen_record(): i = 3 while i != 0: print("time:", i) time.sleep(0.5) i -= 1 last_time = time.time() while True: # 800x600 windowed mode for GTA 5, at the top left position of your main screen. # 40 px accounts for title bar. printscreen = np.array(ImageGrab.grab(bbox=(0, 40, 800, 640))) print('loop took {} seconds'.format(time.time() - last_time)) last_time = time.time() slope1, slope2 = convert_To_gray(printscreen) # 判斷靠左還是靠右 if slope1<0 and slope2<0: go_right() print("right"*10) elif slope1>0 and slope2>0: go_left() print("left"*10) else: go_forward() print('go'*10) # cv2.imshow('window', gray_img) cv2.imshow('window', cv2.cvtColor(printscreen, cv2.COLOR_BGR2RGB)) if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyAllWindows() break screen_record()
按鍵對映檔案 Keys.py (不要改名)
import ctypes import time SendInput = ctypes.windll.user32.SendInput # C struct redefinitions PUL = ctypes.POINTER(ctypes.c_ulong) class KeyBdInput(ctypes.Structure): _fields_ = [("wVk", ctypes.c_ushort), ("wScan", ctypes.c_ushort), ("dwFlags", ctypes.c_ulong), ("time", ctypes.c_ulong), ("dwExtraInfo", PUL)] class HardwareInput(ctypes.Structure): _fields_ = [("uMsg", ctypes.c_ulong), ("wParamL", ctypes.c_short), ("wParamH", ctypes.c_ushort)] class MouseInput(ctypes.Structure): _fields_ = [("dx", ctypes.c_long), ("dy", ctypes.c_long), ("mouseData", ctypes.c_ulong), ("dwFlags", ctypes.c_ulong), ("time", ctypes.c_ulong), ("dwExtraInfo", PUL)] class Input_I(ctypes.Union): _fields_ = [("ki", KeyBdInput), ("mi", MouseInput), ("hi", HardwareInput)] class Input(ctypes.Structure): _fields_ = [("type", ctypes.c_ulong), ("ii", Input_I)] # Actuals Functions def PressKey(hexKeyCode): extra = ctypes.c_ulong(0) ii_ = Input_I() ii_.ki = KeyBdInput(0, hexKeyCode, 0x0008, 0, ctypes.pointer(extra)) x = Input(ctypes.c_ulong(1), ii_) ctypes.windll.user32.SendInput(1, ctypes.pointer(x), ctypes.sizeof(x)) def ReleaseKey(hexKeyCode): extra = ctypes.c_ulong(0) ii_ = Input_I() ii_.ki = KeyBdInput(0, hexKeyCode, 0x0008 | 0x0002, 0, ctypes.pointer(extra)) x = Input(ctypes.c_ulong(1), ii_) ctypes.windll.user32.SendInput(1, ctypes.pointer(x), ctypes.sizeof(x)) class Key_Mapping: num1 = 0x02 num2 = 0x03 num3 = 0x04 num4 = 0x05 num5 = 0x06 num6 = 0x07 num7 = 0x08 num8 = 0x09 num9 = 0x0a num0 = 0x0b escape = 0x01 equal = 0x0d backspace = 0x0e tab = 0x0f q = 0x10 w = 0x11 e = 0x12 r = 0x13 t = 0x14 y = 0x15 u = 0x16 i = 0x17 o = 0x18 p = 0x19 enter = 0x1c lcontrol = 0x1d a = 0x1e s = 0x1f d = 0x20 f = 0x21 g = 0x22 h = 0x23 j = 0x24 k = 0x25 l = 0x26 z = 0x2c x = 0x2d c = 0x2e v = 0x2f b = 0x30 n = 0x31 m = 0x32 shift = 0x36 multiply = 0x37 space = 0x39 capital = 0x3a f1 = 0x3b f2 = 0x3c f3 = 0x3d f4 = 0x3e f5 = 0x3f f6 = 0x40 f7 = 0x41 f8 = 0x42 f9 = 0x43 f10 = 0x44 numlock = 0x45 f11 = 0x57 f12 = 0x58 divide = 0xb5 home = 0xc7 up = 0xc8 prior = 0xc9 left = 0xcb right = 0xcd end = 0xcf down = 0xd0 next = 0xd1 insert = 0xd2 delete = 0xd3 divide = 0xb5 home = 0xc7 up = 0xc8 prior = 0xc9 left = 0xcb right = 0xcd end = 0xcf down = 0xd0 next = 0xd1 insert = 0xd2 delete = 0xd3