C4: Real-time pedestrian detection——C4實時行人檢測演算法

阿新 • • 發佈：2018-12-05
http://cs.nju.edu.cn/wujx/projects/C4/C4.htm
Jianxin Wu實現的快速行人檢測方法。
Real-Time Human Detection Using Contour Cues：
http://c2inet.sce.ntu.edu.sg/Jianxin/paper/ICRA_final.pdf
C4能夠達到比現有人體檢測演算法更高的速度。在640*480的20fps視訊上，使用單核2.8GHz處理器，現有的最快系統(保證有較低的虛警率和較高的檢測率)能達到大約10fps，但它是使用了GPU的並行處理。C4在小解析度的圖片上速度更快，在480*360解析度的視訊上速度達到36.3fps，在320*240解析度大小的視訊上能達到109fps。
不需要顯式地構造Hlin的特徵向量並不是使得C4如此快的唯一原因。在INRIA資料集上進行的測試表明，第一級線性分類器Hlin是一個強大的分類器，可以過濾大約99.43%的圖片區域，只有不到0.6%的圖片區域需要HIK核分類器Hhik的處理。C4處理INRIA測試集中的所有圖片用了27.1秒，而HOG檢測器[1]需要2167.5秒(所以C4相對於HOG大約有80倍的加速)。
下面是僅僅使用CPU執行的程式碼：
#include <iostream>
#include <fstream>
#include <algorithm>
#include <numeric>
#include <cassert>
#include <string>
#include <cmath>
#include <vector>

#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>

#define USE_DOUBLE

#ifdef USE_DOUBLE
typedef double REAL;
#else
typedef float REAL;
#endif

template<class T> class Array2dC;

template<class T>
class Array2d
{
public:
    int nrow;
    int ncol;
    T** p;
public:
    Array2d():nrow(0),ncol(0),p(NULL) { }
    Array2d(const int nrow,const int ncol):nrow(0),ncol(0),p(NULL)
    {
        Create(nrow,ncol);
    }
    Array2d(const Array2d<T>& source);
    virtual ~Array2d()
    {
        Clear();
    }

    Array2d<T>& operator=(const Array2d<T>& source);
    void Create(const int _nrow,const int _ncol);
    void Swap(Array2d<T>& array2);
    void Clear();
    void Zero(const T t = 0);
};

template<class T>
class Array2dC
{
public:
    int nrow;
    int ncol;
    T** p;
    T* buf;
public:
    Array2dC():nrow(0),ncol(0),p(NULL),buf(NULL) {}
    Array2dC(const int nrow,const int ncol):nrow(0),ncol(0),p(NULL),buf(NULL)
    {
        Create(nrow,ncol);
    }
    Array2dC(const Array2dC<T>& source);
    virtual ~Array2dC()
    {
        Clear();
    }

    Array2dC<T>& operator=(const Array2dC<T>& source);
    void Create(const int _nrow,const int _ncol);
    void Swap(Array2dC<T>& array2);
    void Zero(const T t = 0);
    void Clear();
};

template<class T>
Array2d<T>::Array2d(const Array2d<T>& source):nrow(0),ncol(0),p(NULL)
{
    if(source.p!=NULL)
    {
        Create(source.nrow,source.ncol);
        for(int i=0; i<nrow; i++) std::copy(source.p[i],source.p[i]+ncol,p[i]);
    }
}

template<class T>
Array2d<T>& Array2d<T>::operator=(const Array2d<T>& source)
{
    if(source.p!=NULL)
    {
        Create(source.nrow,source.ncol);
        for(int i=0; i<nrow; i++) std::copy(source.p[i],source.p[i]+ncol,p[i]);
    }
    else
        Clear();
    return *this;
}

template<class T>
void Array2d<T>::Create(const int _nrow,const int _ncol)
{
    assert(_nrow>0 && _ncol>0);
    Clear();
    nrow = _nrow;
    ncol = _ncol;
    p = new T*[nrow];
    assert(p!=NULL);
    for(int i=0; i<nrow; i++)
    {
        p[i] = new T[ncol];
        assert(p[i]!=NULL);
    }
}

template<class T>
void Array2d<T>::Swap(Array2d<T>& array2)
{
    std::swap(nrow,array2.nrow);
    std::swap(ncol,array2.ncol);
    std::swap(p,array2.p);
}

template<class T>
void Array2d<T>::Zero(const T t)
{
    if(nrow>0)
    {
        for(int i=0; i<nrow; i++) std::fill(p[i],p[i]+ncol,t);
    }
}

template<class T>
void Array2d<T>::Clear()
{
    for(int i=0; i<nrow; i++)
    {
        delete[] p[i];
        p[i] = NULL;
    }
    delete[] p;
    p = NULL;
    nrow = ncol = 0;
}

template<class T>
Array2dC<T>::Array2dC(const Array2dC<T>& source):nrow(0),ncol(0),p(NULL),buf(NULL)
{
    if(source.buf!=NULL)
    {
        Create(source.nrow,source.ncol);
        std::copy(source.buf,source.buf+nrow*ncol,buf);
    }
}

template<class T>
Array2dC<T>& Array2dC<T>::operator=(const Array2dC<T>& source)
{
    if(source.buf!=NULL)
    {
        Create(source.nrow,source.ncol);
        std::copy(source.buf,source.buf+nrow*ncol,buf);
    }
    else
        Clear();
    return *this;
}

template<class T>
void Array2dC<T>::Create(const int _nrow,const int _ncol)
{
    assert(_nrow>0 && _ncol>0);
    if(nrow==_nrow && ncol==_ncol) return;
    Clear();
    nrow = _nrow;
    ncol = _ncol;
    buf = new T[nrow*ncol];
    assert(buf!=NULL);
    p = new T*[nrow];
    assert(p!=NULL);
    for(int i=0; i<nrow; i++) p[i] = buf + i * ncol;
}

template<class T>
void Array2dC<T>::Swap(Array2dC<T>& array2)
{
    std::swap(nrow,array2.nrow);
    std::swap(ncol,array2.ncol);
    std::swap(p,array2.p);
    std::swap(buf,array2.buf);
}

template<class T>
void Array2dC<T>::Zero(const T t)
{
    if(nrow>0) std::fill(buf,buf+nrow*ncol,t);
}

template<class T>
void Array2dC<T>::Clear()
{
    delete[] buf;
    buf = NULL;
    delete[] p;
    p = NULL;
    nrow = ncol = 0;
}


/*****************************************/
// IntImage.h
/*****************************************/

template<class T>
class IntImage:public Array2dC<T>
{
private:
    IntImage(const IntImage<T> &source) { } // prohibit copy constructor

public:
    IntImage():variance(0.0),label(-1) { }
    virtual ~IntImage()
    {
        Clear();
    }

    virtual void Clear(void);
    inline void SetSize(const int h, const int w);
    bool Load(cv::Mat img, const char channel = 'I');
    void Save(const std::string& filename) const;
    void Swap(IntImage<T>& image2);

    void CalcIntegralImageInPlace(void);
    void Resize(IntImage<T> &result,const REAL ratio) const;
    void Resize(IntImage<T>& result,const int height,const int width) const;

    IntImage<T>& operator=(const IntImage<T>& source);

    void Sobel(IntImage<REAL>& result,const bool useSqrt,const bool normalize);
public:
    using Array2dC<T>::nrow;
    using Array2dC<T>::ncol;
    using Array2dC<T>::buf;
    using Array2dC<T>::p;
    REAL variance;
    int label;
};

template<class T>
void IntImage<T>::Clear(void)
{
    Array2dC<T>::Clear();
    variance = 0.0;
    label = -1;
}

template<class T>
bool IntImage<T>::Load(cv::Mat img, const char channel)
{
    if (img.empty()) return false;

    if (channel == 'R' || channel == 'G' || channel == 'B')
    {
        int c;
        if (channel == 'B') c = 0;
        else if (channel == 'G') c = 1;
        else c = 2; // OpenCV is 'BGR' ordering
        cv::Mat planes[3];
        split(img, planes);
        img = planes[c];
    }
    else // use gray scale for all others
    {
        cv::cvtColor(img, img, cv::COLOR_BGR2GRAY);
    }

    SetSize(img.rows, img.cols);
    for(int i=0,ih=img.rows,iw=img.cols; i<ih; i++)
    {
        T* pdata = p[i];
        unsigned char* pimg = reinterpret_cast<unsigned char*>(img.data+img.step*i);
        for(int j=0; j<iw; j++) pdata[j] = pimg[j];
    }

    return true;
}

template<class T>
void IntImage<T>::Save(const std::string& filename) const
{
    IplImage* img;

    img = cvCreateImage(cvSize(ncol,nrow),IPL_DEPTH_8U,1);
    for(int i=0,ih=img->height,iw=img->width; i<ih; i++)
    {
        T* pdata = p[i];
        unsigned char* pimg = reinterpret_cast<unsigned char*>(img->imageData+img->widthStep*i);
        for(int j=0; j<iw; j++) pimg[j] = (unsigned char)pdata[j];
    }
    cvSaveImage(filename.c_str(),img);
    cvReleaseImage(&img);
}

template<class T>
void IntImage<T>::SetSize(const int h,const int w)
{
    if((h == nrow) && (w == ncol)) return;
    Clear();
    Array2dC<T>::Create(h,w);
}

template<class T>
IntImage<T>& IntImage<T>::operator=(const IntImage<T>& source)
{
    if(&source==this) return *this;
    SetSize(source.nrow,source.ncol);
    std::copy(source.buf,source.buf+nrow*ncol,buf);
    label = source.label;
    variance = source.variance;
    return *this;
}

template<class T>
void IntImage<T>::Resize(IntImage<T> &result,const REAL ratio) const
{
    Resize(result,int(nrow*ratio),int(ncol*ratio));
}

template<class T>
void IntImage<T>::Resize(IntImage<T>& result,const int height,const int width) const
{
    assert(height>0 && width>0);
    result.SetSize(height,width);
    REAL ixratio = nrow*1.0/height, iyratio = ncol*1.0/width;

    REAL* p_y = new REAL[result.ncol];
    assert(p_y!=NULL);
    int* p_y0 = new int[result.ncol];
    assert(p_y0!=NULL);
    for(int i=0; i<width; i++)
    {
        p_y[i] = i*iyratio;
        p_y0[i] = (int)p_y[i];
        if(p_y0[i]==ncol-1) p_y0[i]--;
        p_y[i] -= p_y0[i];
    }

    for(int i=0; i<height; i++)
    {
        int x0;
        REAL x;
        x = i*ixratio;
        x0 = (int)x;
        if(x0==nrow-1) x0--;
        x -= x0;
        T* rp = result.p[i];
        const T* px0 = p[x0];
        const T* px1 = p[x0+1];
        for(int j=0; j<width; j++)
        {
            int y0=p_y0[j];
            REAL y=p_y[j],fx0,fx1;

            fx0 = REAL(px0[y0] + y*(px0[y0+1]-px0[y0]));
            fx1 = REAL(px1[y0] + y*(px1[y0+1]-px1[y0]));

            rp[j] = T(fx0 + x*(fx1-fx0));
        }
    }

    delete[] p_y;
    p_y=NULL;
    delete[] p_y0;
    p_y0=NULL;
}

template<class T>
void IntImage<T>::CalcIntegralImageInPlace(void)
// We pad a zero column and a zero row, so 24*24 image will be 25*25 in size
// if the input image is not padded, the results on 1st row will be problematic
{
    for(int i=1; i<ncol; i++)   // process the first line
    {
        buf[i] += buf[i-1];
    }
    for(int i=1; i<nrow; i++)
    {
        REAL partialsum = 0;
        T* curp = p[i];
        T* prep = p[i-1];
        for(int j=0; j<ncol; j++)
        {
            partialsum += REAL(curp[j]);
            curp[j] = prep[j] + partialsum;
        }
    }
}

template<class T>
void IntImage<T>::Swap(IntImage<T>& image2)
{
    Array2dC<T>::Swap(image2);
    std::swap(variance,image2.variance);
    std::swap(label,image2.label);
}

template<class T>
void IntImage<T>::Sobel(IntImage<REAL>& result,const bool useSqrt,const bool normalize)
{
    // compute the Sobel gradient. For now, we just use the very inefficient way. Optimization can be done later
// if useSqrt = true, we compute the real Sobel gradient; otherwise, the square of it
// if normalize = true, the numbers are normalized to be in 0..255
    result.Create(nrow,ncol);
    for(int i=0; i<nrow; i++) result.p[i][0] = result.p[i][ncol-1] = 0;
    std::fill(result.p[0],result.p[0]+ncol,0.0);
    std::fill(result.p[nrow-1],result.p[nrow-1]+ncol,0.0);
    for(int i=1; i<nrow-1; i++)
    {
        T* p1 = p[i-1];
        T* p2 = p[i];
        T* p3 = p[i+1];
        REAL* pr = result.p[i];
        for(int j=1; j<ncol-1; j++)
        {
            REAL gx =     p1[j-1] - p1[j+1]
                          + 2*(p2[j-1]   - p2[j+1])
                          +    p3[j-1] - p3[j+1];
            REAL gy =     p1[j-1] - p3[j-1]
                          + 2*(p1[j]   - p3[j])
                          +    p1[j+1] - p3[j+1];
            pr[j] = gx*gx+gy*gy;
        }
    }
    if(useSqrt || normalize ) // if we want to normalize the result image, we'd better use the true Sobel gradient
        for(int i=1; i<nrow-1; i++)
            for(int j=1; j<ncol-1; j++)
                result.p[i][j] = sqrt(result.p[i][j]);

    if(normalize)
    {
        REAL minv = 1e20, maxv = -minv;
        for(int i=1; i<nrow-1; i++)
        {
            for(int j=1; j<ncol-1; j++)
            {
                if(result.p[i][j]<minv)
                    minv = result.p[i][j];
                else if(result.p[i][j]>maxv)
                    maxv = result.p[i][j];
            }
        }
        for(int i=0; i<nrow; i++) result.p[i][0] = result.p[i][ncol-1] = minv;
        for(int i=0; i<ncol; i++) result.p[0][i] = result.p[nrow-1][i] = minv;
        REAL s = 255.0/(maxv-minv);
        for(int i=0; i<nrow*ncol; i++) result.buf[i] = (result.buf[i]-minv)*s;
    }
}


/*****************************************/
// Pedestrian.h
/*****************************************/

// my replacement for the CRect class in MS MFC -- only provides a limited number of functions
class CRect
{
public:
    double left;
    double top;
    double right;
    double bottom;
public:
    CRect()
    {
        Clear();
    }
    ~CRect()
    {
        Clear();
    }
public:
    bool Empty() const
    {
        return (left >= right) || (top >= bottom);
    }
    void Clear()
    {
        left = right = top = bottom = 0;
    }
    double Size() const
    {
        if(Empty())
            return 0;
        else
            return (bottom-top)*(right-left);
    }
    // Intersect and Union of two rectangles, both function should be able to run when &result==this
    bool Intersect(CRect& result,const CRect& rect2) const;
    bool Union(CRect& result,const CRect& rect2) const;
};

class NodeDetector
{
public:
    enum NodeType { CD_LIN, CD_HIK, LINEAR, HISTOGRAM };
public:
    int type; // linear or histogram?
    Array2dC<double> classifier;
    double thresh;
    int featurelength;
    int upper_bound;
    int index;
    std::string filename;
public:
    NodeDetector(const NodeType _type,const int _featurelength,const int _upper_bound,const int _index,const char* _filename)
    {
        Load(_type,_featurelength,_upper_bound,_index,_filename);
        minvalue = DBL_MAX;
        maxvalue = -minvalue;
    }
    ~NodeDetector()
    {
    }

    void Load(const NodeType _type,const int _featurelength,const int _upper_bound,const int _index,const char* _filename);
    bool Classify(int* f);
private:
    double minvalue;
    double maxvalue;
public:
    void SetValues(const double v)
    {
        if(v>maxvalue) maxvalue = v;
        if(v<minvalue) minvalue = v;
    }
};

class CascadeDetector
{
public:
    int size;
    int length;
    NodeDetector** nodes;
public:

public:
    CascadeDetector()
        : size(20), length(0)
    {
        nodes = new NodeDetector*[size];
    }
    ~CascadeDetector()
    {
        for(int i=0; i<length; i++) delete nodes[i];
        delete[] nodes;
    }

    void AddNode(const NodeDetector::NodeType _type,const int _featurelength,const int _upper_bound,const char* _filename);
};

class DetectionScanner // who does the dirty jobs
{
public:
    int height,width;
    int xdiv,ydiv;
    int baseflength;
    double ratio;

    CascadeDetector* cascade;
public:
    DetectionScanner()
        : height(0), width(0), xdiv(0), ydiv(0), baseflength(0), ratio(0.0), cascade(NULL), integrals(NULL)
    {
    }
    DetectionScanner(const int _height,const int _width,const int _xdiv,const int _ydiv,
                     const int _baseflength,const double _ratio)
        :height(_height),width(_width),xdiv(_xdiv),ydiv(_ydiv),
         baseflength(_baseflength),ratio(_ratio),cascade(NULL),integrals(NULL)
    {
    }
    ~DetectionScanner()
    {
        delete cascade;
        delete[] integrals;
    }
public:
    void LoadDetector(std::vector<NodeDetector::NodeType>& types,std::vector<int>& upper_bounds,std::vector<std::string>& filenames);
private:
    IntImage<double>* integrals;
    IntImage<double> image,sobel;
    IntImage<int> ct;
    Array2dC<int> hist;
    IntImage<double> scores;

    void InitImage(IntImage<double>& original);
    void InitIntegralImages(const int stepsize);
    void ResizeImage();
public:
    int Scan(IntImage<double>& original,std::vector<CRect>& results,const int stepsize,const int round,std::ofstream* out,const int upper_bound);
    int FastScan(IntImage<double>& original,std::vector<CRect>& results,const int stepsize);
    int FeatureLength() const
    {
        return (xdiv-1)*(ydiv-1)*baseflength;
    }
};

void RunFiles();

/*****************************************/
// Pedestrian_ICRA.cpp
/*****************************************/

const int HUMAN_height = 108;
const int HUMAN_width = 36;
const int HUMAN_xdiv = 9;
const int HUMAN_ydiv = 4;
static const int EXT = 1;

// The detector
DetectionScanner scanner(HUMAN_height,HUMAN_width,HUMAN_xdiv,HUMAN_ydiv,256,0.8);

// ---------------------------------------------------------------------
// Helper functions


// compute the Sobel image "ct" from "original"
void ComputeCT(IntImage<double>& original,IntImage<int>& ct)
{
    ct.Create(original.nrow,original.ncol);
    for(int i=2; i<original.nrow-2; i++)
    {
        double* p1 = original.p[i-1];
        double* p2 = original.p[i];
        double* p3 = original.p[i+1];
        int* ctp = ct.p[i];
        for(int j=2; j<original.ncol-2; j++)
        {
            int index = 0;
            if(p2[j]<=p1[j-1]) index += 0x80;
            if(p2[j]<=p1[j]) index += 0x40;
            if(p2[j]<=p1[j+1]) index += 0x20;
            if(p2[j]<=p2[j-1]) index += 0x10;
            if(p2[j]<=p2[j+1]) index += 0x08;
            if(p2[j]<=p3[j-1]) index += 0x04;
            if(p2[j]<=p3[j]) index += 0x02;
            if(p2[j]<=p3[j+1]) index ++;
            ctp[j] = index;
        }
    }
}

// Load SVM models -- linear SVM trained using LIBLINEAR
double UseSVM_CD_FastEvaluationStructure(const char* modelfile,const int m,Array2dC<double>& result)
{
    std::ifstream in(modelfile);
    if(in.good()==false)
    {
        std::cout<<"SVM model "<<modelfile<<" can not be loaded."<<std::endl;
        exit(-1);
    }
    std::string buffer;
    std::getline(in,buffer); // first line
    std::getline(in,buffer); // second line
    std::getline(in,buffer); // third line
    in>>buffer;
    assert(buffer=="nr_feature");
    int num_dim;
    in>>num_dim;
    assert(num_dim>0 && num_dim==m);
    std::getline(in,buffer); // end of line 4
    in>>buffer;
    assert(buffer=="bias");
    int bias;
    in>>bias;
    std::getline(in,buffer); //end of line 5;
    in>>buffer;
    assert(buffer=="w");
    std::getline(in,buffer); //end of line 6
    result.Create(1,num_dim);
    for(int i=0; i<num_dim; i++) in>>result.buf[i];
    double rho = 0;
    if(bias>=0) in>>rho;
    in.close();
    return rho;
}

// Load SVM models -- Histogram Intersectin Kernel SVM trained by libHIK
double UseSVM_CD_FastEvaluationStructure(const char* modelfile, const int m, const int upper_bound, Array2dC<double>& result)
{

    std::ifstream fs(modelfile, std::fstream::binary);
	if( !fs.is_open() )
	{
		std::cout << "SVM model " << modelfile << " can not be loaded." << std::endl;
		exit(-1);
	}
    // Header
    int rows, cols, type, channels;
    fs.read((char*)&rows, sizeof(int));         // rows
    fs.read((char*)&cols, sizeof(int));         // cols
    fs.read((char*)&type, sizeof(int));         // type
    fs.read((char*)&channels, sizeof(int));     // channels

    // Data
    cv::Mat mat(rows, cols, type);
    fs.read((char*)mat.data, CV_ELEM_SIZE(type) * rows * cols);

    int num_dim = m;

    result.Create(num_dim, upper_bound);
    for(int i=0; i<num_dim; i++)
        for (int j = 0; j < upper_bound; j++)
        {
            result.p[i][j]= mat.at<double>(i, j);
        }

    return -0.00455891;
}

// find the intersection of "this" and "rect2", and put into "result"
bool CRect::Intersect(CRect& result,const CRect& rect2) const
{
    if( Empty() || rect2.Empty() ||
            left >= rect2.right || rect2.left >= right ||
            top >= rect2.bottom || rect2.top >= bottom )
    {
        result.Clear();
        return false;
    }
    result.left   = std::max( left, rect2.left );
    result.right  = std::min( right, rect2.right );
    result.top    = std::max( top, rect2.top );
    result.bottom = std::min( bottom, rect2.bottom );
    return true;
}

// find the union of "this" and "rect2", and put into "result"
bool CRect::Union(CRect& result,const CRect& rect2) const
{
    if(Empty())
    {
        if(rect2.Empty())
        {
            result.Clear();
            return false;
        }
        else
            result = rect2;
    }
    else
    {
        if(rect2.Empty())
            result = *this;
        else
        {
            result.left   = std::min( left, rect2.left );
            result.right  = std::max( right, rect2.right );
            result.top    = std::min( top, rect2.top );
            result.bottom = std::max( bottom, rect2.bottom );
        }
    }
    return true;
}

// A simple post-process (NMS, non-maximal suppression)
// "result" -- rectangles before merging
//          -- after this function it contains rectangles after NMS
// "combine_min" -- threshold of how many detection are needed to survive
void PostProcess(std::vector<CRect>& result,const int combine_min)
{
    std::vector<CRect> res1;
    std::vector<CRect> resmax;
    std::vector<int> res2;
    bool yet;
    CRect rectInter;

    for(unsigned int i=0,size_i=result.size(); i<size_i; i++)
    {
        yet = false;
        CRect& result_i = result[i];
        for(unsigned int j=0,size_r=res1.size(); j<size_r; j++)
        {
            CRect& resmax_j = resmax[j];
            if(result_i.Intersect(rectInter,resmax_j))
            {
                if(  rectInter.Size()>0.6*result_i.Size()
                        && rectInter.Size()>0.6*resmax_j.Size()
                  )
                {
                    CRect& res1_j = res1[j];
                    resmax_j.Union(resmax_j,result_i);
                    res1_j.bottom += result_i.bottom;
                    res1_j.top += result_i.top;
                    res1_j.left += result_i.left;
                    res1_j.right += result_i.right;
                    res2[j]++;
                    yet = true;
                    break;
                }
            }
        }
        if(yet==false)
        {
            res1.push_back(result_i);
            resmax.push_back(result_i);
            res2.push_back(1);
        }
    }

    for(unsigned int i=0,size=res1.size(); i<size; i++)
    {
        const int count = res2[i];
        CRect& res1_i = res1[i];
        res1_i.top /= count;
        res1_i.bottom /= count;
        res1_i.left /= count;
        res1_i.right /= count;
    }

    result.clear();
    for(unsigned int i=0,size=res1.size(); i<size; i++)
        if(res2[i]>combine_min)
            result.push_back(res1[i]);
}

// If one detection (after NMS) is inside another, remove the inside one
void RemoveCoveredRectangles(std::vector<CRect>& result)
{
    std::vector<bool> covered;
    covered.resize(result.size());
    std::fill(covered.begin(),covered.end(),false);
    CRect inter;
    for(unsigned int i=0; i<result.size(); i++)
    {
        for(unsigned int j=i+1; j<result.size(); j++)
        {
            result[i].Intersect(inter,result[j]);
            double isize = inter.Size();
            if(isize>result[i].Size()*0.65)
                covered[i] = true;
            if(isize>result[j].Size()*0.65)
                covered[j] = true;
        }
    }
    std::vector<CRect> newresult;
    for(unsigned int i=0; i<result.size(); i++)
        if(covered[i]==false)
            newresult.push_back(result[i]);
    result.clear();
    result.insert(result.begin(),newresult.begin(),newresult.end());
    newresult.clear();
}

// End of Helper functions
// ---------------------------------------------------------------------

// ---------------------------------------------------------------------
// Functions that load the two classifiers
void LoadCascade(DetectionScanner& ds)
{
    std::vector<NodeDetector::NodeType> types;
    std::vector<int> upper_bounds;
    std::vector<std::string> filenames;

    types.push_back(NodeDetector::CD_LIN); // first node
    upper_bounds.push_back(100);
    filenames.push_back("combined.txt.model");
    types.push_back(NodeDetector::CD_HIK); // second node
    upper_bounds.push_back(353);
    filenames.push_back("combined.txt.model_");

    ds.LoadDetector(types,upper_bounds,filenames);
    // You can adjust these parameters for different speed, accuracy etc
    ds.cascade->nodes[0]->thresh += 0.8;
    ds.cascade->nodes[1]->thresh -= 0.095;
}

void DetectionScanner::LoadDetector(std::vector<NodeDetector::NodeType>& types,std::vector<int>& upper_bounds,std::vector<std::string>& filenames)
{
    unsigned int depth = types.size();
    assert(depth>0 && depth==upper_bounds.size() && depth==filenames.size());
    if(cascade)
        delete cascade;
    cascade = new CascadeDetector;
    assert(xdiv>0 && ydiv>0);
    for(unsigned int i=0; i<depth; i++)
        cascade->AddNode(types[i],(xdiv-EXT)*(ydiv-EXT)*baseflength,upper_bounds[i],filenames[i].c_str());

    hist.Create(1,baseflength*(xdiv-EXT)*(ydiv-EXT));
}

void NodeDetector::Load(const NodeType _type,const int _featurelength,const int _upper_bound,const int _index,const char* _filename)
{
    type = _type;
    index = _index;
    filename = _filename;
    featurelength = _featurelength;
    upper_bound = _upper_bound;
    if(type==CD_LIN)
        thresh = UseSVM_CD_FastEvaluationStructure(_filename,_featurelength,classifier);
    else if(type==CD_HIK)
        thresh = UseSVM_CD_FastEvaluationStructure(_filename,_featurelength,upper_bound,classifier);

    if(type==CD_LIN) type = LINEAR;
    if(type==CD_HIK) type = HISTOGRAM;
}

void CascadeDetector::AddNode(const NodeDetector::NodeType _type,const int _featurelength,const int _upper_bound,const char* _filename)
{
    if(length==size)
    {
        int newsize = size * 2;
        NodeDetector** p = new NodeDetector*[newsize];
        assert(p!=NULL);
        std::copy(nodes,nodes+size,p);
        size = newsize;
        delete[] nodes;
        nodes = p;
    }
    nodes[length] = new NodeDetector(_type,_featurelength,_upper_bound,length,_filename);
    length++;
}

// End of functions that load the two classifiers
// ---------------------------------------------------------------------

// ---------------------------------------------------------------------
// Detection functions

// initialization -- compute the Census Tranform image for CENTRIST
void DetectionScanner::InitImage(IntImage<double>& original)
{
    image = original;
    image.Sobel(sobel,false,false);
    ComputeCT(sobel,ct);
}

// combine the (xdiv-1)*(ydiv-1) integral images into a single one
void DetectionScanner::InitIntegralImages(const int stepsize)
{
    if(cascade->nodes[0]->type!=NodeDetector::LINEAR)
        return; // No need to prepare integral images

    const int hd = height/xdiv*2-2;
    const int wd = width/ydiv*2-2;
    scores.Create(ct.nrow,ct.ncol);
    scores.Zero(cascade->nodes[0]->thresh/hd/wd);
    double* linearweights = cascade->nodes[0]->classifier.buf;
    for(int i=0; i<xdiv-EXT; i++)
    {
        const int xoffset = height/xdiv*i;
        for(int j=0; j<ydiv-EXT; j++)
        {
            const int yoffset = width/ydiv*j;
            for(int x=2; x<ct.nrow-2-xoffset; x++)
            {
                int* ctp = ct.p[x+xoffset]+yoffset;
                double* tempp = scores.p[x];
                for(int y=2; y<ct.ncol-2-yoffset; y++)
                    tempp[y] += linearweights[ctp[y]];
            }
            linearweights += baseflength;
        }
    }
    scores.CalcIntegralImageInPlace();
    for(int i=2; i<ct.nrow-2-height; i+=stepsize)
    {
        double* p1 = scores.p[i];
        double* p2 = scores.p[i+hd];
        for(int j=2; j<ct.ncol-2-width; j+=stepsize)
            p1[j] += (p2[j+wd] - p2[j] - p1[j+wd]);
    }
}

// Resize the input image and then re-compute Sobel image etc
void DetectionScanner::ResizeImage()
{
    image.Resize(sobel,ratio);
    image.Swap(sobel);
    image.Sobel(sobel,false,false);
    ComputeCT(sobel,ct);
}

// The function that does the real detection
int DetectionScanner::FastScan(IntImage<double>& original,std::vector<CRect>& results,const int stepsize)
{
    if(original.nrow<height+5 || original.ncol<width+5) return 0;
    const int hd = height/xdiv;
    const int wd = width/ydiv;
    InitImage(original);
    results.clear();

    hist.Create(1,baseflength*(xdiv-EXT)*(ydiv-EXT));

    NodeDetector* node = cascade->nodes[1];
    double** pc = node->classifier.p;
    int oheight = original.nrow, owidth = original.ncol;
    CRect rect;
    while(image.nrow>=height && image.ncol>=width)
    {
        InitIntegralImages(stepsize);
        for(int i=2; i+height<image.nrow-2; i+=stepsize)
        {
            const double* sp = scores.p[i];
            for(int j=2; j+width<image.ncol-2; j+=stepsize)
            {
                if(sp[j]<=0) continue;
                int* p = hist.buf;
                hist.Zero();
                for(int k=0; k<xdiv-EXT; k++)
                {
                    for(int t=0; t<ydiv-EXT; t++)
                    {
                        for(int x=i+k*hd+1; x<i+(k+1+EXT)*hd-1; x++)
                        {
                            int* ctp = ct.p[x];
                            for(int y=j+t*wd+1; y<j+(t+1+EXT)*wd-1; y++)
                                p[ctp[y]]++;
                        }
                        p += baseflength;
                    }
                }
                double score = node->thresh;
                for(int k=0; k<node->classifier.nrow; k++) score += pc[k][hist.buf[k]];
                if(score>0)
                {
                    rect.top = i*oheight/image.nrow;
                    rect.bottom = (i+height)*oheight/image.nrow;
                    rect.left = j*owidth/image.ncol;
                    rect.right = (j+width)*owidth/image.ncol;
                    results.push_back(rect);
                }
            }
        }
        ResizeImage();
    }
    return 0;
}

// The interface function that detects pedestrians
// "filename" -- an input image
// detect pedestrians in image "filename" and save results to "outname"
// "ds" -- the detector cascade -- pass "scanner" as this parameter
// "out" -- file stream that saves the output rectangle information
int totaltime = 0;

int DetectHuman(const char* filename,DetectionScanner& ds)
{
    std::vector<CRect> results;
    IntImage<double> original;
//    original.Load(filename);

    ds.FastScan(original,results,2);
    PostProcess(results,2);
    PostProcess(results,0);
    RemoveCoveredRectangles(results);

    cvNamedWindow("show");
    {
        IplImage* iplImage = NULL;
        iplImage = cvLoadImage(filename);
        for(unsigned int i=0; i<results.size(); i++)
            cvRectangle(iplImage,cvPoint(results[i].left,results[i].top),cvPoint(results[i].right,results[i].bottom),CV_RGB(255,0,0),2);
        cvShowImage("show",iplImage);
        cvWaitKey();
        cvReleaseImage(&iplImage);
    }
    return (int)results.size();
}

// End of Detection functions
// ---------------------------------------------------------------------

int main(int argc,char* argv[])
{
	std::cout << "usage:" << std::endl;
	std::cout << argv[0] << " <video_file>" << std::endl << std::endl;
	std::cout << "keys:" << std::endl;
	std::cout << "space : toggle using simple post-process (NMS, non-maximal suppression)" << std::endl;
	std::cout << "0     : waits to process next frame until a key pressed" << std::endl;
	std::cout << "1     : doesn't wait to process next frame" << std::endl;
	std::cout << "2     : resize frames 1/2" << std::endl;
	std::cout << "3     : don't resize frames" << std::endl;
	std::cout << "4     : resize frames 1/4" << std::endl;
	if (argc < 2)
		return 0;

	cv::Mat src;
    cv::VideoCapture capture( argv[1] );

    LoadCascade(scanner);
    std::cout<<"Detectors loaded."<<std::endl;
    int key = 0;
	int wait_time = 1;
	float fx = 1;
    bool rect_organization = true;
	IntImage<double> original;

    while( key != 27 )
    {
        capture >> src;
        if( src.empty() ) break;  
		
		if (fx < 1)
		{
			cv::resize(src, src, cv::Size(), fx, fx);
		}
		
        original.Load( src );
        std::vector<CRect> results;
        scanner.FastScan(original, results, 2);

        if(rect_organization)
        {
            PostProcess(results,2);
            PostProcess(results,0);
            RemoveCoveredRectangles(results);
        }

        for(size_t i = 0; i < results.size(); i++)
        {
            cv::rectangle(src, cvPoint(results[i].left,results[i].top),cvPoint(results[i].right,results[i].bottom),cv::Scalar(0,255,0),2 );
        }

        cv::imshow("result",src);
        key = cv::waitKey( wait_time );

		if (key == 32)
			rect_organization = !rect_organization;

		if (key == 48)
			wait_time = 0;

		if (key == 49)
			wait_time = 1;

		if (key == 50)
			fx = 0.5;

		if (key == 51)
			fx = 1;

		if (key == 52)
			fx = 0.25;
	}
    cv::waitKey();
    return 0;
}
C4: Real-time pedestrian detection——C4實時行人檢測演算法

C4: Real-time pedestrian detection——C4實時行人檢測演算法

Train C4: Real-time pedestrian detection models——C4行人檢測演算法訓練過程

行人檢測論文筆記：Robust Real-Time Face Detection

魯棒的實時人臉檢測：Robust Real-Time Face Detection

YOLO(You Only Look Once):Real-Time Object Detection

論文閱讀筆記（六）Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks

【Faster RCNN】《Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks》

《You Only Look Once: Unified, Real-Time Object Detection》論文筆記

YOLO前篇---Real-Time Grasp Detection Using Convolutional Neural Networks

論文閱讀筆記二十六：Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks（CVPR 2016）

You Only Look Once: Unified, Real-Time Object Detection 論文閱讀

Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks

faced: CPU Real Time face detection using Deep Learning

論文閱讀：You Only Look Once: Unified, Real-Time Object Detection

【論文筆記】Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks

【筆記】Faster-R-CNN: Towards Real-Time Object Detection with Region Proposal Networks

經典計算機視覺論文筆記——《Robust Real-Time Face Detection》

[論文學習]《Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks 》

【翻譯】Faster-R-CNN: Towards Real-Time Object Detection with Region Proposal Networks

CPU Real-time Face Detection and Alignment-68 using MTCNN

C4: Real-time pedestrian detection——C4實時行人檢測演算法

相關推薦