1. 程式人生 > >ROS Python中深度學習前向傳播速度變慢

ROS Python中深度學習前向傳播速度變慢

問題描述:

yolo檢測演算法,在非ROS環境下前向傳播時間0.019s左右,放在ros環境下就1.3s了。慢的難以忍受

分析原因:

因為前向傳播在訂閱的回撥函式中實現,出現了程序問題,所以變慢了。需要重開程序

解決:

訂閱函式儲存全域性影象,新開程序進行前向傳播

關鍵語句:

(1)

import thread


def call_rosspin():
  print ('running rosspin...')
  rospy.spin()

thread.start_new_thread(call_rosspin, ())

(2)訂閱函式寫為儲存全域性影象

def img_L_sub(imgmsg):
  global img_L
  global flag_img_L
  bridge = CvBridge()
  img_L = bridge.imgmsg_to_cv2(imgmsg, "bgr8")
  flag_img_L = True
  print 'img_L recieved'

(3)例項程式

#!/usr/bin/env python
import numpy as np
import cv2
import os, sys

import rospy
from sensor_msgs.msg import Image
from cv_bridge import CvBridge
import thread
from datetime import datetime
from yolo_ros.msg import TargetsInfo

#sys.path.insert(0, '../../python/')
caffe_root = '/home/alvin/Desktop/caffe-yolo/' 
sys.path.insert(0, caffe_root + 'python')
import caffe
caffe.set_device(0)
caffe.set_mode_gpu()

mean = np.require([104, 117, 123], dtype=np.float32)[:, np.newaxis, np.newaxis] ##??[104, 117, 123] [ r g b]???

img_L = cv2.imread('Null')
flag_img_L = False

pb = 0.3  

def img_L_sub(imgmsg):
  global img_L
  global flag_img_L
  bridge = CvBridge()
  img_L = bridge.imgmsg_to_cv2(imgmsg, "bgr8")
  flag_img_L = True
  print 'img_L recieved'


def call_rosspin():
  print ('running rosspin...')
  rospy.spin()

def nms(dets, thresh):
  # -------------------------
  # Pure Python NMS baseline.
  # Written by Ross Girshick
  # -------------------------
  x1 = dets[:, 0] - dets[:, 2] / 2.
  y1 = dets[:, 1] - dets[:, 3] / 2.
  x2 = dets[:, 0] + dets[:, 2] / 2.
  y2 = dets[:, 1] + dets[:, 3] / 2.
  scores = dets[:, 4]
  areas = (x2 - x1 + 1) * (y2 - y1 + 1)
  order = scores.argsort()[::-1]
  keep = []
  while order.size > 0:
    i = order[0]
    keep.append(i)
    xx1 = np.maximum(x1[i], x1[order[1:]])
    yy1 = np.maximum(y1[i], y1[order[1:]])
    xx2 = np.minimum(x2[i], x2[order[1:]])
    yy2 = np.minimum(y2[i], y2[order[1:]])
    w = np.maximum(0.0, xx2 - xx1 + 1)
    h = np.maximum(0.0, yy2 - yy1 + 1)
    inter = w * h
    ovr = inter / (areas[i] + areas[order[1:]] - inter)
    inds = np.where(ovr <= thresh)[0]
    order = order[inds + 1]
  return dets[np.require(keep), :]

def parse_result(reg_out):
  num_classes = 20
  num_objects = 2
  side = 7
  locations = side ** 2 
  boxes = np.zeros((num_objects * locations, 6), dtype=np.float32)
  for i in range(locations):
    tmp_scores = reg_out[i:num_classes*locations:locations]
    max_class_ind = np.argsort(tmp_scores)[-1]
    max_prob = np.max(tmp_scores)
    obj_index = num_classes * locations + i
    obj_scores = max_prob * reg_out[obj_index:(obj_index+num_objects*locations):locations]
    coor_index = (num_classes + num_objects) * locations + i
    for j in range(num_objects):
      boxes[i*num_objects+j][5] = max_class_ind
      boxes[i*num_objects+j][4] = obj_scores[j]
      box_index = coor_index + j * 4 * locations
      boxes[i*num_objects+j][0] = (i % side + reg_out[box_index + 0 * locations]) / float(side)
      boxes[i*num_objects+j][1] = (i / side + reg_out[box_index + 1 * locations]) / float(side)
      boxes[i*num_objects+j][2] = reg_out[box_index + 2 * locations] ** 2
      boxes[i*num_objects+j][3] = reg_out[box_index + 3 * locations] ** 2
  return nms(boxes, 0.7)

def show_boxes(im, boxes, thresh=0.7, show=0):
  print boxes.shape
  #im = cv2.imread(im_path)
  ori_w = im.shape[1]
  ori_h = im.shape[0]
  for box in boxes:
    if box[4] < thresh:  #box[4] is the possibality of some class 
      continue
    print box
    print box[5]
    box = box[:4]  # [:4]means what??
    x1 = max(0, int((box[0] - box[2] / 2.) * ori_w))
    y1 = max(0, int((box[1] - box[3] / 2.) * ori_h))
    x2 = min(ori_w - 1, int((box[0] + box[2] / 2.) * ori_w))
    y2 = min(ori_h - 1, int((box[1] + box[3] / 2.) * ori_h))
    cv2.rectangle(im, (x1, y1), (x2, y2), (0, 0, 255), 2)
  return im


def det(model, im_path, show=0):
  '''forward processing'''
  im = cv2.imread(im_path)
  im = cv2.resize(im, (448, 448))
  im = np.require(im.transpose((2, 0, 1)), dtype=np.float32)##?? rearrgage r g b channel??
  im -= mean #  substract mean     
  model.blobs['data'].data[...] = im # ... means what??
  out_blobs = model.forward() #out 1470
  reg_out = out_blobs["regression"] #regression what kind of keyword??
  boxes = parse_result(reg_out[0])
  show_boxes(im_path, boxes, 0.2)

if __name__=="__main__":
  rospy.init_node('yolo_uav', anonymous=True)

  sub_img_L = rospy.Subscriber("/usb_cam/image_raw", Image, img_L_sub)
  targets_Pub = rospy.Publisher("/yolo/targets", TargetsInfo, queue_size=80)
  
  thread.start_new_thread(call_rosspin, ())


  net_proto = "./net/gnet_deploy.prototxt"
  model_path = "./models/gnet_yolo_iter_12000.caffemodel"  
  model = caffe.Net(net_proto, model_path, caffe.TEST)
  TargetsInfo_t = TargetsInfo()
  
  
  key = 100
  thresh = 0.2

  while not rospy.is_shutdown():
    if flag_img_L == 0:
      continue
    
    flag_img_L = 0  

    start = datetime.now()
    TargetsInfo_t.targetNum = 0
    
    img = img_L
    im = cv2.resize(img,(448,448))
    im = np.require(im.transpose((2, 0, 1)), dtype=np.float32)
    im -= mean
    model.blobs['data'].data[...] = im # ... means what??
    out_blobs = model.forward() 
    reg_out = out_blobs['regression']
    boxes = parse_result(reg_out[0])

    im = img
    print boxes.shape
    ori_w = im.shape[1]
    ori_h = im.shape[0]
    for box in boxes:   
      if box[4] < pb:   #box[4] is the possibality of some class 
        continue
      i = int(TargetsInfo_t.targetNum)  
      #box[5] #[x2  x1  y2  y1  thresh  ID]
      x1 = max(0, int((box[0] - box[2] / 2.) * ori_w))
      y1 = max(0, int((box[1] - box[3] / 2.) * ori_h))
      x2 = min(ori_w - 1, int((box[0] + box[2] / 2.) * ori_w))
      y2 = min(ori_h - 1, int((box[1] + box[3] / 2.) * ori_h))
      cv2.rectangle(im, (x1, y1), (x2, y2), (0, 0, 255), 2)
      TargetsInfo_t.targetId[i] = int(box[5])
      TargetsInfo_t.probability[i] = float(box[4])
      TargetsInfo_t.x1[i] = x1
      TargetsInfo_t.y1[i] = y1
      TargetsInfo_t.x2[i] = x2
      TargetsInfo_t.y2[i] = y2
      TargetsInfo_t.targetNum = TargetsInfo_t.targetNum +1
    targets_Pub.publish(TargetsInfo_t)
    end = datetime.now()
    elapsedTime = end-start
    elapsedTime = elapsedTime.total_seconds()*1000
    elapsedTime = 1000 / elapsedTime
    print 'TargetsInfo_t.targetNum',TargetsInfo_t.targetNum
    print 'FPS: ', elapsedTime
    cv2.imshow('out_2', im)
    key = cv2.waitKey(3)