Premature end of JPEG file 終極解決方案
Premature end of JPEG file
問題分析
我們在Caffe訓練中,常常遇到異常提示:Premature end of JPEG file;
如下圖中
這個異常通常是不會影響你正常訓練節奏的,但是它卻可能影響你的訓練效果(當然出現少,看不出明顯差距)
那麼這到底是個什麼異常呢?
答案是:JPEG影象格式破損;也就是你的訓練樣本JPEG影象格式出了毛病
JPEG格式
正常影象:
你用影象瀏覽器開啟,是正常的,如下圖
同樣影象用UltraEdit工具開啟,檢視其16進位制格式的首尾,如圖
檔案頭2個位元組:0xff,0xd8(JPEG檔案標識SOI)
檔案尾2個位元組:0xff,0xd9(JPEG檔案結束標識EOI)
異常影象
注意其右下角,存在灰色馬賽克,其末尾位元組0xd9,0x9c與標識不符,如下圖:
有些影象破損更嚴重,影象瀏覽器都無法開啟影象
解決方案
寫個小程式,check一遍資料集,將存在破損的JPEG影象挑出來
將這部分影象用opencv重寫一遍,因為有些破損影象重寫以後,在不影響訓練情況下,可以繼續用著訓練集;如上圖破損影象,在重寫後與該影象一樣,但格式變正常,而其右下角馬賽克,對像分類、人臉檢測、人像分割等任務是沒有影響的,則可以繼續放回訓練集。直接上程式碼:
#include "iostream" #include "opencv2\opencv.hpp" #include <string> #include <vector> #include <io.h> #include <fstream> #include <direct.h> #include <stdlib.h> using namespace std; using namespace cv; #pragma comment(lib,"opencv_world300.lib") using namespace std; void ReadDirPath(string basePath, vector<string>& dirList) { //dirList.push_back(basePath); //檔案控制代碼 long long hFile = 0; //檔案資訊 struct _finddata_t fileinfo; string p; if ((hFile = _findfirst(p.assign(basePath).append("\\*").c_str(), &fileinfo)) != -1) { do { //如果是目錄,迭代之 //如果不是,加入列表 if (strcmp(fileinfo.name, ".") == 0 || strcmp(fileinfo.name, "..") == 0) { continue; } else { if ((fileinfo.attrib & 0x10) == _A_SUBDIR) { string dir = p.assign(basePath).append("\\").append(fileinfo.name); dirList.push_back(dir); ReadDirPath(dir, dirList); } } } while (_findnext(hFile, &fileinfo) == 0); _findclose(hFile); } } void ReadImagePath(string basePath, vector<string>& imageList) { if (!imageList.empty()) { imageList.clear(); } vector<string> dirList; dirList.push_back(basePath); ReadDirPath(basePath, dirList); for (int i = 0; i < dirList.size(); i++) { long long hFile = 0; //檔案資訊 struct _finddata_t fileinfo; string p; if ((hFile = _findfirst(p.assign(dirList[i]).append("\\*.*").c_str(), &fileinfo)) != -1) { do { if (strcmp(fileinfo.name, ".") == 0 || strcmp(fileinfo.name, "..") == 0) { continue; } else { string name = fileinfo.name; if (name.size()<5) { continue; } name = name.substr(name.size() - 4, name.size()); if (name == ".jpg" || name == ".JPG" || name == ".png" || name == ".PNG" || name == "jpeg" || name == "JPEG") { string dir = p.assign(dirList[i]).append("\\").append(fileinfo.name); imageList.push_back(dir); } } } while (_findnext(hFile, &fileinfo) == 0); _findclose(hFile); } } } bool CheckJpeg(string file) { if (file.empty()) { return false; } ifstream in(file.c_str(), ios::in | ios::binary); if (!in.is_open()) { cout << "Error opening file!" << endl; return false; } int start; in.read((char*)&start, 4); short int lstart = start << 16 >> 16; //cout << hex << lstart << " "; in.seekg(-4, ios::end); int end; in.read((char*)&end, 4); short int lend = end >> 16; //cout << hex << lend << endl; in.close(); if ((lstart != -9985) || (lend != -9729)) //0xd8ff 0xd9ff { return true; } return false; } int main(int argc, char* argv[]) { if (argc < 5) { cout << "Please Use: [image_path] [width] [hight] [isdemo]" << endl; system("pause"); return -1; } else if (argc >5) { cout << "Parameters too much" << endl; system("pause"); return -1; } string image_path = argv[1]; int nW = atoi(argv[2]); int nH = atoi(argv[3]); vector<string> image_list; ReadImagePath(image_path, image_list); if (image_list.size()<1) { cout << image_path <<": This path has no jpeg image!" << endl; system("pause"); return -1; } int num = image_list.size(); cout << "Check image plan: "<<endl; for (size_t i = 0; i < num; i++) { printf("%d/%d\r", i, num), fflush(stdout); string save_dir = image_path; string name = image_list[i]; name = name.substr(name.size() - 4, name.size()); bool isJpg = false; if (name == ".jpg" || name == ".JPG" || name == "jpeg" || name == "JPEG") { isJpg = CheckJpeg(image_list[i]); } if (isJpg) { save_dir += "_false"; } else { save_dir = save_dir + "_" + argv[2] + "x" + argv[3]; } Mat img = imread(image_list[i]); if (atoi(argv[4]) && !img.empty()) { imshow("img", img); cvWaitKey(0); } if (isJpg) //格式破損 { if (_access(save_dir.c_str(), 6) == -1) { _mkdir(save_dir.c_str()); } if (!img.empty()) { string image_name = image_list[i]; image_name = image_name.substr(image_name.rfind("\\")); imwrite(save_dir + image_name, img); } remove(image_list[i].c_str()); } else if (img.cols <= nW || img.rows <= nH) { string dir = image_list[i]; dir = dir.replace(0,image_path.size(), save_dir); dir = dir.substr(0,dir.rfind("\\")); if (_access(dir.c_str(), 6) == -1) { _mkdir(dir.c_str()); } if (!img.empty()) { string image_name = image_list[i]; image_name = image_name.substr(image_name.rfind("\\")); imwrite(dir + image_name, img); } remove(image_list[i].c_str()); } } cout << "finished!" << endl; //system("pause"); return 0; }
編譯為exe,可用run.bat啟動
本工具還加入了以影象大小分類影象的功能,命令列說明
“Please Use: [image_path] [width] [hight] [isdemo]”
分別表示:影象路徑資料夾;分類影象的寬度;分類影象的高度;是否顯示影象
當只使用Check功能時,將後面3個引數置0即可快速Check資料集,最終會將存在問題的影象重寫一遍,放在與你影象路徑資料夾xxx相同位置下,新建xxx_false的檔案下。分類的話也會新建資料夾,當然讀者也可根據自己喜好更改。