opencv字元切割與識別（訓練分類器）

阿新 • • 發佈：2021-01-24

訓練（分類器）

//opencv2.4.9 + vs2012 + 64位
#include <windows.h>
#include <iostream>
#include <fstream>
#include <opencv2/opencv.hpp>

using namespace cv;
using namespace std;

char* WcharToChar(const wchar_t* wp)
{
	char *m_char;
	int len = WideCharToMultiByte(CP_ACP, 
 0, wp, wcslen(wp), NULL, 0, NULL, NULL);
	m_char = new char[len + 1];
	WideCharToMultiByte(CP_ACP, 0, wp, wcslen(wp), m_char, len, NULL, NULL);
	m_char[len] = '\0';
	return m_char;
}

wchar_t* CharToWchar(const char* c)
{
	wchar_t *m_wchar;
	int len = MultiByteToWideChar(CP_ACP, 0, c, strlen(c), NULL, 0) 
;
	m_wchar = new wchar_t[len + 1];
	MultiByteToWideChar(CP_ACP, 0, c, strlen(c), m_wchar, len);
	m_wchar[len] = '\0';
	return m_wchar;
}

wchar_t* StringToWchar(const string& s)
{
	const char* p = s.c_str();
	return CharToWchar(p);
}

void train(const string perfileReadPath, const string fileform) 

{
	const int sample_mun_perclass = 40;//訓練字元每類數量
	const int class_mun = 10 + 26;//訓練字元類數 0-9 A-Z 除了I、O

	const int image_cols = 8;
	const int image_rows = 16;
	string  fileReadName, fileReadPath;
	char temp[256];

	float trainingData[class_mun*sample_mun_perclass][image_rows*image_cols] = { { 0 } };//每一行一個訓練樣本
	float labels[class_mun*sample_mun_perclass][class_mun] = { { 0 } };//訓練樣本標籤

	for (int i = 0; i <= class_mun - 1; i++)//不同類
	{
		//讀取每個類資料夾下所有影象
		int j = 0;//每一類讀取影象個數計數

		if (i <= 9)//0-9
		{
			sprintf(temp, "%d", i);
			//printf("%d\n", i);
		}
		else//A-Z
		{
			sprintf(temp, "%c", i + 55);
			//printf("%c\n", i+55);
		}

		fileReadPath = perfileReadPath + "/" + temp + "/" + fileform;
		cout << "資料夾" << temp << endl;

		HANDLE hFile;
		LPCTSTR lpFileName = StringToWchar(fileReadPath);//指定搜尋目錄和檔案型別，如搜尋d盤的音訊檔案可以是"D:\\*.mp3"
		WIN32_FIND_DATA pNextInfo;  //搜尋得到的檔案資訊將儲存在pNextInfo中;
		hFile = FindFirstFile(lpFileName, &pNextInfo);//請注意是 &pNextInfo , 不是 pNextInfo;
		if (hFile == INVALID_HANDLE_VALUE)
		{
			continue;//搜尋失敗
		}
		//do-while迴圈讀取
		do
		{
			if (pNextInfo.cFileName[0] == '.')//過濾.和..
				continue;
			j++;//讀取一張圖
			//wcout<<pNextInfo.cFileName<<endl;
			printf("%s\n", WcharToChar(pNextInfo.cFileName));
			//對讀入的圖片進行處理
			Mat srcImage = imread(perfileReadPath + "/" + temp + "/" + WcharToChar(pNextInfo.cFileName), CV_LOAD_IMAGE_GRAYSCALE);
			Mat resizeImage;
			Mat trainImage;
			Mat result;

			resize(srcImage, resizeImage, Size(image_cols, image_rows), (0, 0), (0, 0), CV_INTER_AREA);//使用象素關係重取樣。當影象縮小時候，該方法可以避免波紋出現
			threshold(resizeImage, trainImage, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);

			for (int k = 0; k<image_rows*image_cols; ++k)
			{
				trainingData[i*sample_mun_perclass + (j - 1)][k] = (float)trainImage.data[k];
				//trainingData[i*sample_mun_perclass+(j-1)][k] = (float)trainImage.at<unsigned char>((int)k/8,(int)k%8);//(float)train_image.data[k];
				//cout<<trainingData[i*sample_mun_perclass+(j-1)][k] <<" "<< (float)trainImage.at<unsigned char>(k/8,k%8)<<endl;
			}

		} while (FindNextFile(hFile, &pNextInfo) && j<sample_mun_perclass);//如果設定讀入的圖片數量，則以設定的為準，如果圖片不夠，則讀取資料夾下所有圖片

	}

	// Set up training data Mat
	Mat trainingDataMat(class_mun*sample_mun_perclass, image_rows*image_cols, CV_32FC1, trainingData);
	cout << "trainingDataMat——OK！" << endl;

	// Set up label data 
	for (int i = 0; i <= class_mun - 1; ++i)
	{
		for (int j = 0; j <= sample_mun_perclass - 1; ++j)
		{
			for (int k = 0; k < class_mun; ++k)
			{
				if (k == i)
					labels[i*sample_mun_perclass + j][k] = 1;
				else
					labels[i*sample_mun_perclass + j][k] = 0;
			}
		}
	}
	Mat labelsMat(class_mun*sample_mun_perclass, class_mun, CV_32FC1, labels);
	cout << "labelsMat:" << endl;
	ofstream outfile("out.txt");
	outfile << labelsMat;
	//cout<<labelsMat<<endl;
	cout << "labelsMat——OK！" << endl;

	//訓練程式碼

	cout << "training start...." << endl;
	CvANN_MLP bp;
	// Set up BPNetwork's parameters
	CvANN_MLP_TrainParams params;
	params.train_method = CvANN_MLP_TrainParams::BACKPROP;
	params.bp_dw_scale = 0.001;
	params.bp_moment_scale = 0.1;
	params.term_crit = cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 10000, 0.0001);  //設定結束條件

	//Setup the BPNetwork
	Mat layerSizes = (Mat_<int>(1, 5) << image_rows*image_cols, 128, 128, 128, class_mun);
	bp.create(layerSizes, CvANN_MLP::SIGMOID_SYM, 1.0, 1.0);//CvANN_MLP::SIGMOID_SYM
	//CvANN_MLP::GAUSSIAN
	//CvANN_MLP::IDENTITY
	cout << "正在訓練..." << endl;
	bp.train(trainingDataMat, labelsMat, Mat(), Mat(), params);
	bp.save("bpcharModel.xml"); //save classifier
	cout << "訓練結束,生成bpModel1.xml檔案" << endl;
}

void test(string xmlfile,string imgfile)
{
	CvANN_MLP bp;
	char bufxml[1024];
	strcpy(bufxml, xmlfile.c_str());
	bp.load(bufxml);  //載入xml

	const int image_cols = 8;
	const int image_rows = 16;
	char temp[256];
	//測試神經網路
	cout << "測試：" << endl;
	Mat test_image = imread(imgfile, CV_LOAD_IMAGE_GRAYSCALE);
	Mat test_temp;
	resize(test_image, test_temp, Size(image_cols, image_rows), (0, 0), (0, 0), CV_INTER_AREA);//使用象素關係重取樣。當影象縮小時候，該方法可以避免波紋出現
	threshold(test_temp, test_temp, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
	Mat_<float>sampleMat(1, image_rows*image_cols);
	for (int i = 0; i<image_rows*image_cols; ++i)
	{
		sampleMat.at<float>(0, i) = (float)test_temp.at<uchar>(i / 8, i % 8);
	}

	Mat responseMat;
	bp.predict(sampleMat, responseMat);
	Point maxLoc;
	double maxVal = 0;
	minMaxLoc(responseMat, NULL, &maxVal, NULL, &maxLoc);

	if (maxLoc.x <= 9)//0-9
	{
		sprintf(temp, "%d", maxLoc.x);
		//printf("%d\n", i);
	}
	else//A-Z
	{
		sprintf(temp, "%c", maxLoc.x + 55);
		//printf("%c\n", i+55);
	}

	cout << "識別結果：" << temp << "	相似度:" << maxVal * 100 << "%" << endl;
	imshow("test_image", test_image);
	waitKey(0);
}

int main()
{
	train("C:\\Users\\GuoJawee\\Desktop\\資料集_\\訓練集1", "*.jpg");
	system("pause");
	//test("bpcharModel.xml","1.bmp");
}

字元識別/切割

在這裡插入圖片描述

#include <iostream>

#include <stdlib.h>
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <map>
#include <time.h>

using namespace cv;
using namespace std;


//膨脹的size(3,3)
int dilateValue = 1;
//面積、寬高比
double min_area = 200;
double min_width_div_height = 0.08;
double max_width_div_height = 2.0;


CvANN_MLP bp;


//載入分類模型
void loadXML(string xmlfile)  
{
	char buf[1024];
	strcpy(buf, xmlfile.c_str()); 
	bp.load(buf);
}

//單個字元分類
char charClassify(Mat roi)
{
	const int image_cols = 8;
	const int image_rows = 16;
	char temp[256];

	resize(roi, roi, Size(image_cols, image_rows), (0, 0), (0, 0), CV_INTER_AREA);//使用象素關係重取樣。當影象縮小時候，該方法可以避免波紋出現
	cvtColor(roi, roi, CV_RGB2GRAY, 0); //必須轉換
	threshold(roi, roi, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
	Mat_<float>sampleMat(1, image_rows*image_cols);
	for (int i = 0; i < image_rows*image_cols; ++i)
	{
		sampleMat.at<float>(0, i) = (float)roi.at<uchar>(i / 8, i % 8);
	}

	Mat responseMat;
	bp.predict(sampleMat, responseMat);
	Point maxLoc;
	double maxVal = 0;
	minMaxLoc(responseMat, NULL, &maxVal, NULL, &maxLoc);

	if (maxLoc.x <= 9)//0-9
	{
		sprintf(temp, "%d", maxLoc.x);
		//printf("%d\n", i);
	}
	else//A-Z
	{
		sprintf(temp, "%c", maxLoc.x + 55);
		//printf("%c\n", i+55);
	}
	return temp[0];
}


//字元識別
string charRecognition(string imgfile, bool showflag)
{
	Mat srcImg = imread(imgfile);
	Mat greyImg, dilateImg;
	cvtColor(srcImg, dilateImg, CV_BGR2GRAY);
	threshold(dilateImg, dilateImg, 0, 255, CV_THRESH_OTSU);//自適應二值化
	dilateImg = 255 - dilateImg;//顏色反轉
	Mat element = getStructuringElement(MORPH_RECT, Size(dilateValue, dilateValue)); //第一個引數MORPH_RECT表示矩形的卷積核，當然還可以選擇橢圓形的、交叉型的
	dilate(dilateImg, dilateImg, element);

	Mat dstImg = srcImg.clone();
	vector<vector<Point>> contours;
	vector<Vec4i> hierarcy;
	findContours(dilateImg, contours, hierarcy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE);
	vector<Rect> boundRect(contours.size());   
	vector<RotatedRect> box(contours.size());  
	Point2f rect[4];

	map<int, char> map_; //左上角座標  字元

	for (int i = 0; i < contours.size(); i++)
	{
		box[i] = minAreaRect(Mat(contours[i]));   
		boundRect[i] = boundingRect(Mat(contours[i]));

		box[i].points(rect);   

		double areaValue = (double)boundRect[i].width * (double)boundRect[i].height;
		double width_div_height = (double)boundRect[i].width / (double)boundRect[i].height;

		if (areaValue < min_area || min_width_div_height > width_div_height || width_div_height > max_width_div_height)
		{
			continue;
		}
		//cout << "面積 = " << areaValue << "    寬高比 = " << width_div_height << endl;

		Mat roi = dstImg(Rect(Point(boundRect[i].x, boundRect[i].y), Point(boundRect[i].x + boundRect[i].width, boundRect[i].y + boundRect[i].height)));
		char ch = charClassify(roi);
		map_.emplace(boundRect[i].x, ch);
		if (showflag == true)
		{
			rectangle(dstImg,
				Point(boundRect[i].x, boundRect[i].y),
				Point(boundRect[i].x + boundRect[i].width, boundRect[i].y + boundRect[i].height),
				Scalar(0, 0, 255), 2, 8);

			string tempstr(1, ch); //ch轉str
			putText(dstImg, tempstr, Point(boundRect[i].x, boundRect[i].y), FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 1, 8, 0);
		}
	}

	string resstr = "";
	for (auto node : map_)
	{
		string tmpstr(1, node.second);
		resstr += tmpstr;
	}
	if (showflag == true)
	{
		cv::imshow("結果圖", dstImg);
		cv::imwrite("res" + imgfile, dstImg);
	}
	return resstr;
}


int main()
{
	loadXML("./bpcharModel.xml");  //載入模型

	clock_t start = clock();

	string resstr = charRecognition("4.png", false);   //影象路徑  是否顯示
	cout << "識別字符結果 = " << resstr << endl << endl;

	clock_t ends = clock();
	cout << "程式執行時間 = " << (double)(ends - start) / CLOCKS_PER_SEC << endl;

	waitKey(0);
	system("pause");
	return 0;
}