OpenCV基於傅立葉變換以及霍夫直線檢測的旋轉文字校正

阿新 • • 發佈：2019-01-25

最近剛好結束了霍夫三部曲以及離散傅立葉變換的總結，剛好了解到它們兩個的結合可以實現一個很有意思的功能

旋轉文字影象的校正，於是參考了幾篇部落格，記錄下來。

主要參考部落格：

標準霍夫直線檢測以及影象的傅立葉變換

關於傅立葉變換的原理請看我的上一篇部落格，也是為這篇文章做了個鋪墊

一幅文字影象

對影象進行旋轉矯正，關鍵是要獲取旋轉角度是多少！獲取了旋轉角度就可以用仿射變換對影象進行矯正，影象旋轉的程式碼下面會貼出。

旋轉角度怎麼獲取？可以對影象作傅立葉變換獲取這個角度，有了上篇文章的理論基礎，相信不難理解

文字影象的明顯特徵就是存在分行間隔，那麼行與文字之間這個灰度值變化就不如真正的文字及文字間的變化劇烈，那麼相應的這些地方的頻譜值也低，即頻譜的低譜部分，因為傅立葉變換就是表徵影象各點的變化頻率的嘛~當文字影象旋轉時，基頻域中的頻譜也會隨之改變，那麼我就可以根據這一特點來計算這個角度。

Samples：

/*
 *  Author: John Hany
 *  Website: http://johnhany.net
 *  Source code updates: https://github/johnhany/textRotCorrect
 *  If you have any advice, you could contact me at: [email protected]
 *  Need OpenCV environment!
 *
 */

#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <iostream>

using namespace cv;
using namespace std;

#define GRAY_THRESH 150
#define HOUGH_VOTE 100

//#define DEGREE 27

int main(int argc, char **argv)
{
    //Read a single-channel image
    const char* filename = "imageText_02_R.jpg";
    Mat srcImg = imread(filename, CV_LOAD_IMAGE_GRAYSCALE);
    if(srcImg.empty())
        return -1;
    imshow("source", srcImg);

    Point center(srcImg.cols/2, srcImg.rows/2);

#ifdef DEGREE
    //Rotate source image
    Mat rotMatS = getRotationMatrix2D(center, DEGREE, 1.0);
    warpAffine(srcImg, srcImg, rotMatS, srcImg.size(), 1, 0, Scalar(255,255,255));
    imshow("RotatedSrc", srcImg);
    //imwrite("imageText_R.jpg",srcImg);
#endif

    //Expand image to an optimal size, for faster processing speed
    //Set widths of borders in four directions
    //If borderType==BORDER_CONSTANT, fill the borders with (0,0,0)
    Mat padded;
    int opWidth = getOptimalDFTSize(srcImg.rows);
    int opHeight = getOptimalDFTSize(srcImg.cols);
    copyMakeBorder(srcImg, padded, 0, opWidth-srcImg.rows, 0, opHeight-srcImg.cols, BORDER_CONSTANT, Scalar::all(0));

    Mat planes[] = {Mat_<float>(padded), Mat::zeros(padded.size(), CV_32F)};
    Mat comImg;
    //Merge into a double-channel image
    merge(planes,2,comImg);

    //Use the same image as input and output,
    //so that the results can fit in Mat well
    dft(comImg, comImg);

    //Compute the magnitude
    //planes[0]=Re(DFT(I)), planes[1]=Im(DFT(I))
    //magnitude=sqrt(Re^2+Im^2)
    split(comImg, planes);
    magnitude(planes[0], planes[1], planes[0]);

    //Switch to logarithmic scale, for better visual results
    //M2=log(1+M1)
    Mat magMat = planes[0];
    magMat += Scalar::all(1);
    log(magMat, magMat);

    //Crop the spectrum
    //Width and height of magMat should be even, so that they can be divided by 2
    //-2 is 11111110 in binary system, operator & make sure width and height are always even
    magMat = magMat(Rect(0, 0, magMat.cols & -2, magMat.rows & -2));

    //Rearrange the quadrants of Fourier image,
    //so that the origin is at the center of image,
    //and move the high frequency to the corners
    int cx = magMat.cols/2;
    int cy = magMat.rows/2;

    Mat q0(magMat, Rect(0, 0, cx, cy));
    Mat q1(magMat, Rect(0, cy, cx, cy));
    Mat q2(magMat, Rect(cx, cy, cx, cy));
    Mat q3(magMat, Rect(cx, 0, cx, cy));

    Mat tmp;
    q0.copyTo(tmp);
    q2.copyTo(q0);
    tmp.copyTo(q2);

    q1.copyTo(tmp);
    q3.copyTo(q1);
    tmp.copyTo(q3);

    //Normalize the magnitude to [0,1], then to[0,255]
    normalize(magMat, magMat, 0, 1, CV_MINMAX);
    Mat magImg(magMat.size(), CV_8UC1);
    magMat.convertTo(magImg,CV_8UC1,255,0);
    imshow("magnitude", magImg);
    //imwrite("imageText_mag.jpg",magImg);

    //Turn into binary image
    threshold(magImg,magImg,GRAY_THRESH,255,CV_THRESH_BINARY);
    imshow("mag_binary", magImg);
    //imwrite("imageText_bin.jpg",magImg);

    //Find lines with Hough Transformation
    vector<Vec2f> lines;
    float pi180 = (float)CV_PI/180;
    Mat linImg(magImg.size(),CV_8UC3);
    HoughLines(magImg,lines,1,pi180,HOUGH_VOTE,0,0);
    int numLines = lines.size();
    for(int l=0; l<numLines; l++)
    {
        float rho = lines[l][0], theta = lines[l][1];
        Point pt1, pt2;
        double a = cos(theta), b = sin(theta);
        double x0 = a*rho, y0 = b*rho;
        pt1.x = cvRound(x0 + 1000*(-b));
        pt1.y = cvRound(y0 + 1000*(a));
        pt2.x = cvRound(x0 - 1000*(-b));
        pt2.y = cvRound(y0 - 1000*(a));
        line(linImg,pt1,pt2,Scalar(255,0,0),3,8,0);
    }
    imshow("lines",linImg);
    //imwrite("imageText_line.jpg",linImg);
    if(lines.size() == 3){
        cout << "found three angels:" << endl;
        cout << lines[0][1]*180/CV_PI << endl << lines[1][1]*180/CV_PI << endl << lines[2][1]*180/CV_PI << endl << endl;
    }

    //Find the proper angel from the three found angels
    float angel=0;
    float piThresh = (float)CV_PI/90;
    float pi2 = CV_PI/2;
    for(int l=0; l<numLines; l++)
    {
        float theta = lines[l][1];
        if(abs(theta) < piThresh || abs(theta-pi2) < piThresh)
            continue;
        else{
            angel = theta;
            break;
        }
    }

    //Calculate the rotation angel
    //The image has to be square,
    //so that the rotation angel can be calculate right
    angel = angel<pi2 ? angel : angel-CV_PI;
    if(angel != pi2){
        float angelT = srcImg.rows*tan(angel)/srcImg.cols;
        angel = atan(angelT);
    }
    float angelD = angel*180/(float)CV_PI;
    cout << "the rotation angel to be applied:" << endl << angelD << endl << endl;

    //Rotate the image to recover
    Mat rotMat = getRotationMatrix2D(center,angelD,1.0);
    Mat dstImg = Mat::ones(srcImg.size(),CV_8UC3);
    warpAffine(srcImg,dstImg,rotMat,srcImg.size(),1,0,Scalar(255,255,255));
    imshow("result",dstImg);
    //imwrite("imageText_D.jpg",dstImg);
    
    waitKey(0);

    return 0;
}

過程詳解：

讀取圖片

Mat srcImg = imread(filename, CV_LOAD_IMAGE_GRAYSCALE);
    if(srcImg.empty())
        return -1;

srcImg.empty()用來判斷是否成功讀進影象，如果srcImg中沒有資料，在後面的步驟會產生記憶體錯誤。
由於處理的是文字，彩色資訊不會提供額外幫助，所以要用CV_LOAD_IMAGE_GRAYSCALE表明以灰度形式讀進影象。
假定讀取的影象如下：

旋轉影象(可選)

#ifdef DEGREE
    //Rotate source image
    Mat rotMatS = getRotationMatrix2D(center, DEGREE, 1.0);
    warpAffine(srcImg, srcImg, rotMatS, srcImg.size(), 1, 0, Scalar(255,255,255));
    imshow("RotatedSrc", srcImg);
    //imwrite("imageText_R.jpg",srcImg);
#endif

如果手頭沒有這樣的傾斜影象，可以選擇一張正放的文字影象，再把第12行#define DEGREE那行前的註釋符號去掉。然後這部分程式碼就會把所給的影象旋轉你規定的角度，再交給後面處理。

然後直接就是DFT部分以及霍夫直線檢測部分的結合，這兩部分就不多說了，你懂得，我前面部落格有詳細介紹

提及一下涉及到的兩個引數

兩個引數GRAY_THRESH和HOUGH_VOTE需要手動指定，不同的影象需要設定不同的引數，同一段文字旋轉不同的角度也需要不同的引數。GRAY_THRESH越大，二值化的閾值就越高；HOUGH_VOTE越大，霍夫檢測的投票數就越高（需要更多的共線點來確定一條直線）。說白了，如果發現二值化影象中直線附近有很多散點，就要適當提高GRAY_THRESH；如果發現從二值影象的一條直線上檢測到了幾條角度相差很小的直線，就需要適當提高HOUGH_VOTE。

檢測到的結果：

dft

二值化

霍夫

計算傾斜角
上面得到了三個角度，一個是0度，一個是90度，另一個就是我們所需要的傾斜角。要把這個角找出來，而且要考慮誤差。

//Find the proper angel from the three found angels
    float angel=0;
    float piThresh = (float)CV_PI/90;
    float pi2 = CV_PI/2;
    for(int l=0; l<numLines; l++)
    {
        float theta = lines[l][1];
        if(abs(theta) < piThresh || abs(theta-pi2) < piThresh)
            continue;
        else{
            angel = theta;
            break;
        }
    }

    //Calculate the rotation angel
    //The image has to be square,
    //so that the rotation angel can be calculate right
    angel = angel<pi2 ? angel : angel-CV_PI;
    if(angel != pi2){
        float angelT = srcImg.rows*tan(angel)/srcImg.cols;
        angel = atan(angelT);
    }
    float angelD = angel*180/(float)CV_PI;
    cout << "the rotation angel to be applied:" << endl << angelD << endl << endl;

由於DFT的特點，只有輸入影象是正方形時，檢測到的角才是文字真正旋轉的角度。但我們的輸入影象不一定是正方形的，所以要根據影象的長寬比改變這個角度。（這個地方望高手給出解答）
還有一個需要注意的細節，雖然HoughLines()輸出的傾斜角在[0,180)之間，但在[0,90]和(90,180)之間這個角的含義是不同的。請看圖示：

當傾斜角大於90度時，(180-傾斜角)才是直線相對豎直方向的偏離角度。在OpenCV中，逆時針旋轉，角度為正。要把影象轉回去，

這個角度就變成了(傾斜角-180)。

傾斜角：

校正影象

Mat rotMat = getRotationMatrix2D(center,angelD,1.0);
    Mat dstImg = Mat::ones(srcImg.size(),CV_8UC3);
    warpAffine(srcImg,dstImg,rotMat,srcImg.size(),1,0,Scalar(255,255,255));

先用getRotationMatrix2D()獲得一個2*3的仿射變換矩陣，再把這個矩陣輸入warpAffine()，做一個單純旋轉的仿射變換。warpAffine()

的最後一個引數Scalar(255,255,255)是把由於旋轉產生的空白用白色填充。

最後結果：

可以很明顯的看出來清晰度下降了不少，感覺還是由能量損失（不知道啥原因）

至於其他一些文字的情況，大家可以自行檢驗

OpenCV基於傅立葉變換以及霍夫直線檢測的旋轉文字校正

OpenCV基於傅立葉變換以及霍夫直線檢測的旋轉文字校正

OpenCV—python 影象矯正（基於傅立葉變換—基於透視變換）

opencv 中傅立葉變換 FFT

二維離散傅立葉變換以及濾波應用

影象處理中傅立葉變換以及頻率域影象增強詳解

基於傅立葉變換和PyQt4開發一個簡單的頻率計數器

opencv實現傅立葉變換

OpenCv-C++-小案例實戰-直線檢測（以及霍夫直線檢測程式碼）

0022-在OpenCV環境下做影象或矩陣的傅立葉變換

基於python的快速傅立葉變換FFT（二）

【GCN】圖卷積網路初探——基於圖（Graph）的傅立葉變換和卷積

傅立葉變換及其在opencv中影象去噪的實現

opencv 傅立葉變換及其逆變換例項及其理解

分別用OpenCV-Python和Numpy實現傅立葉變換和逆傅立葉變換

OpenCV中對影象進行二維離散傅立葉變換

OpenCV學習筆記（六）離散傅立葉變換

基於二維傅立葉變換法的MRI成像原理的Matlab模擬（3）

opencv 中快速傅立葉變換 FFT

Python下opencv使用筆記（十）（影象頻域濾波與傅立葉變換）

Python中二維快速傅立葉變換----基於numpy庫

OpenCV基於傅立葉變換以及霍夫直線檢測的旋轉文字校正

相關推薦