java 去掉TXT文字檔案的bom頭資訊
阿新 • • 發佈:2019-02-16
import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.PushbackInputStream; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Locale; import javax.servlet.ServletConfig; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpSession; import com.jspsmart.upload.SmartUpload; import com.jspsmart.upload.SmartUploadException; /** * 一級標籤批量新增(上傳 *.txt 檔案的方式) * <p>Title:CMS</p> * <p>Description:TODO</p> * <p>Copyright (C): 2013</p> * <p>Company:Huawei</p> * <p>Date:Jun 25, 2013</p> * @author:bKF51722 */ public class UploadLabelServlet extends HttpServlet { /** * */ private static final long serialVersionUID = -4129735769163441128L; /** * 日誌工廠 */ private static final DebugLog log = LogFactory.getDebugLog("CONTENTTAG"); /** * Constructor of the object. */ public UploadLabelServlet() { super(); } /** * Servlet配置物件 */ private ServletConfig config; /** * 銷燬 * Destruction of the servlet. <br> */ public void destroy() { super.destroy(); // Just puts "destroy" string in log // Put your code here } /** * Initialization of the servlet. <br> * * @throws ServletException if an error occurs */ public void init(ServletConfig configs) throws ServletException { this.config = configs; } /** * Servlet的POST處理方法 * @param request HTTP請求物件 * @param response HTTP請求響應物件 * @throws ServletException * @throws IOException */ protected void service(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { request.setCharacterEncoding("UTF-8"); response.setCharacterEncoding("UTF-8"); //設定返回訊息 String msg = "message"; //管理上傳物件 SmartUpload labelUpload = new SmartUpload(); //初始化上傳物件 labelUpload.initialize(config, request, response); //從request請求中獲取session HttpSession session = request.getSession(false); //防止登入超時引起空指標異常 if(null == session || "".equals(session)) { //重定向到登入頁面 response.sendRedirect(request.getContextPath()+"/admin/adminlogin.action"); } //建立本地臨時目錄 String capLocalPath = Path.getWebRootPath() + "txtLabelNameFileTemp"; FileHelper.createDir(capLocalPath); //得到session裡面存放的使用者資訊 AdminInfo admin = (AdminInfo) session.getAttribute( Constant.SESSION_KEY_ADMIN_INFO); //操作員 String operator = admin.getOperator(); String fatherNodeId =""; try { //設定檔案上傳大小的限制(設定為3M) labelUpload.setMaxFileSize((1024*1024)*3L); //執行上傳操作 labelUpload.upload(); //從頁面獲取父節點ID fatherNodeId = labelUpload.getRequest().getParameter("fatherNodeId"); if("".equals(fatherNodeId) || null == fatherNodeId) { request.setAttribute(msg, "父節點為空!:"+fatherNodeId); request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId); //跳轉到上傳失敗頁面,提示使用者上傳失敗:父節點為空! request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; } } catch (SmartUploadException upe) { request.setAttribute(msg, "檔案上傳出現異常!:"+upe.getMessage()); request.setAttribute("backUrl", "admin/qeuryTagListPage.action"); //跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案上傳出現異常! request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; } catch (IOException ioe) { request.setAttribute(msg, "檔案讀寫錯誤!:"+ioe.getMessage()); request.setAttribute("backUrl", "admin/qeuryTagListPage.action"); //跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案讀寫錯誤! request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; } catch(RuntimeException e) { request.setAttribute(msg, "檔案太大!檔案不能超過3M :"+e.getMessage()); request.setAttribute("backUrl", "admin/qeuryTagListPage.action"); //跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案上傳錯誤! request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; } catch (Exception e) { request.setAttribute(msg, "檔案上傳錯誤!:"+e.getMessage()); request.setAttribute("backUrl", "admin/qeuryTagListPage.action"); //跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案上傳錯誤! request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; } //檢查檔案上傳數量是否正確,只允許使用者一次上傳一個txt檔案 if(labelUpload.getFiles().getCount() > 0) { for (int i = 0; i < labelUpload.getFiles().getCount(); i++) { //獲取檔案 com.jspsmart.upload.File labelNameFile = labelUpload.getFiles().getFile(i); //如果檔案找不到或者檔案內容為空 if(labelNameFile.isMissing() || labelNameFile.getSize() <= 0) { request.setAttribute(msg, "沒有檔案!或者上傳的檔案中沒有資料!"); request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId); //跳轉到上傳失敗頁面,提示使用者上傳失敗:沒有選擇檔案 request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; //continue;//跳出當前的這一次迴圈,進入下一次迴圈 } //獲取檔名稱 String fileName = labelNameFile.getFileName().toUpperCase(Locale.getDefault()); //判斷後綴名是不是 .txt if(!fileName.endsWith(".TXT")) { request.setAttribute(msg, "檔案字尾名不對!上傳的檔案必須是.txt的文字檔案!"); request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId); //跳轉到上傳失敗頁面,提示使用者:檔案字尾名稱不對 request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; } //檔案存放路徑 String filePath = capLocalPath+ "/" +getNewFileName(operator,labelNameFile.getFileName()); //上傳檔案到本地服務 try { labelNameFile.saveAs(filePath); } catch (SmartUploadException upe) { delete(filePath);//刪除檔案 request.setAttribute(msg, "檔案上傳到本地出現異常!:"+upe.getMessage()); request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId); //跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案上傳出現異常! request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; } catch (Exception e) { delete(filePath);//刪除檔案 request.setAttribute(msg, "檔案上傳到本地錯誤!:"+e.getMessage()); request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId); //跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案上傳錯誤! request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; } //獲取檔案編碼格式(因判斷檔案格式需要檔案的BOM頭資訊,如果該檔案沒有BOM頭資訊的話,則預設它不是UTF-8格式的) String codeFormat = getCharset(filePath); //如果不是UTF-8格式 if(!"UTF-8".equals(codeFormat)) { //返回錯誤頁面,提示使用者txt文字檔案的編碼格式必須是UTF-8 delete(filePath);//刪除檔案 request.setAttribute(msg, "上傳的TXT文字檔案的編碼格式必須是UTF-8 !"); request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId); //跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案上傳錯誤! request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; } //獲取配置項中配置的最大標籤個數 String maxLabelNum = SystemConfig.getCmsValue("maxLabelNum"); //匹配是否正整數(0也算是正整數) //java.util.regex.Pattern.matches("^\\d+$", maxLabelNum); if(null == maxLabelNum || "".equals(maxLabelNum) || !java.util.regex.Pattern.matches("^\\d+$", maxLabelNum) || Integer.valueOf(maxLabelNum) > 10000) { maxLabelNum = "10000"; } //去掉utf-8格式的文字檔案的BOM頭資訊 trimBom(filePath); UploadResponseEvt uploadEvt = new UploadResponseEvt(); try { //讀取檔案內容,新增一級標籤 uploadEvt = readFile(filePath,fatherNodeId,request,uploadEvt,maxLabelNum); } catch (FileNotFoundException fe) { delete(filePath);//刪除檔案 request.setAttribute(msg, "讀取檔案內容錯誤,檔案沒找到!:"+fe.getMessage()); request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId); //跳轉到上傳失敗頁面,提示使用者上傳失敗:讀取檔案內容錯誤,檔案沒找到! request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; } catch (IOException IOe) { delete(filePath);//刪除檔案 request.setAttribute(msg, "讀取檔案內容錯誤!:"+IOe.getMessage()); request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId); //跳轉到上傳失敗頁面,提示使用者上傳失敗:讀取檔案內容錯誤! request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; } //成功 if("0".equals(uploadEvt.getStatus())) { delete(filePath);//刪除檔案 request.setAttribute(msg, "上傳成功!總條數:"+uploadEvt.getNumber()[0]+" 入庫成功條數:"+uploadEvt.getNumber()[1]+" 重複的條數:"+(uploadEvt.getNumber()[0] - uploadEvt.getNumber()[1])); request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId); //跳轉到上傳成功頁面,提示使用者上傳成功。 request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadSuccess.jsp").forward(request, response); return; } //超出最大行數 else if("1".equals(uploadEvt.getStatus())) { delete(filePath);//刪除檔案 request.setAttribute(msg, "一次最多允許上傳"+maxLabelNum+"個標籤!請修改後重新上傳。現總行數:"+uploadEvt.getRowsErr()); request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId); //跳轉到上傳失敗頁面,提示使用者上傳失敗:一次最多允許上傳10000個標籤 request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; } //有特殊字元 else if("2".equals(uploadEvt.getStatus())) { delete(filePath);//刪除檔案 request.setAttribute(msg, "檔案中有特殊字元!請修改後重新上傳。行數:"+uploadEvt.getRowsErr()+" 標籤:"+uploadEvt.getTagNameErr()); request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId); //跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案中有特殊字元!請重新上傳。 request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; } else//標籤名稱大於100個字 { delete(filePath);//刪除檔案 request.setAttribute(msg, "檔案中有標籤名稱大於100字元(中文、字母、數字,都算一個字元)!請修改後重新上傳。行數:"+uploadEvt.getRowsErr()+" 標籤:"+uploadEvt.getTagNameErr()); request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId); //跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案中有特殊字元!請重新上傳。 request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; } } } else { request.setAttribute(msg, "沒有檔案!請先選擇需要上傳的txt文字檔案。"); request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId); //跳轉到上傳失敗頁面,提示使用者上傳失敗:沒有選擇檔案 request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response); return; } } /** * 讀取檔案中的標籤名稱,然後入庫 * @param filePath 檔案的完整路徑及名稱 * @param fatherNodeId 父節點ID * @param request http請求物件 * @param uploadEvt 上傳實體Bean物件 * @param maxLabelNum 允許上傳的最大條數 * @throws FileNotFoundException,IOException */ private UploadResponseEvt readFile(String filePath,String fatherNodeId,HttpServletRequest request,UploadResponseEvt uploadEvt,String maxLabelNum) throws FileNotFoundException,IOException { //得到session裡面存放的使用者資訊 AdminInfo admin = (AdminInfo) request.getSession().getAttribute( Constant.SESSION_KEY_ADMIN_INFO); //操作員 String operator = admin.getOperator(); //定義讀取檔案的狀態 0:成功,1:超出最大行數,2:有特殊字元,3:標籤名稱大於100個字 String status = "0"; /** * 檔案內容格式校驗 * 判斷是否超出允許的最大行數 */ //BufferedReader maxNumbr = new BufferedReader(new FileReader(new File(filePath))); BufferedReader maxNumbr = new BufferedReader(new InputStreamReader(new FileInputStream(filePath),"UTF-8")); String lineCounts = maxNumbr.readLine(); //記錄有多少行資料 int count = 0; int allCount = 0;//記錄所有行數,包括空行 while(null != lineCounts) { allCount++; //去掉空行之後的有效行數 if(!lineCounts.trim().equals("")) { count++; //過濾特殊字元的方法 if(InvalidChar(lineCounts)) { status = "2";//有特殊字元 //返回方法前關閉流 if (maxNumbr != null) { maxNumbr.close(); } uploadEvt.setRowsErr(String.valueOf(allCount)); uploadEvt.setTagNameErr(lineCounts); uploadEvt.setStatus(status); //方法返回 return uploadEvt; //break;//跳出當前while迴圈(終止當前while迴圈執行下一行程式碼) } //判斷當前行是否大於100個字元(中文、字母、數字,都算一個字元) if(lineCounts.trim().length() > 100) { status = "3";//標籤名稱大於50個字元(中文、字母、數字,都算一個字元) //返回方法前關閉流 if (maxNumbr != null) { maxNumbr.close(); } uploadEvt.setRowsErr(String.valueOf(allCount)); uploadEvt.setTagNameErr(lineCounts); uploadEvt.setStatus(status); //方法返回 return uploadEvt; } } lineCounts = maxNumbr.readLine(); } if (maxNumbr != null) { maxNumbr.close(); } //超出允許的最大標籤個數 if(count > Integer.valueOf(maxLabelNum)) { uploadEvt.setRowsErr(String.valueOf(allCount)); status = "1";//超出最大行數 uploadEvt.setStatus(status); return uploadEvt; } /** * 讀取資料入庫 */ //BufferedReader br = new BufferedReader(new FileReader(new File(filePath))); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filePath),"UTF-8")); String line = br.readLine(); //獲取service物件 ContentTagService service = (ContentTagService) ContextHolder.getService("contentTagService"); int a = 0;//總行數 int b = 0;//匯入成功的總行數 while(null != line) { //去掉空行之後的有效行數 if(!"".equals(line.trim())) { //校驗該父節點下有沒有該標籤名,如果沒有則新增入庫 if(!service.queryTagByName(fatherNodeId, line.trim())) { //獲取序列 String labelId = service.getRESTagId(); //入庫操作(新增一級標籤) boolean flag = service.batchInsertTag(fatherNodeId, line.trim(),labelId); //新增標籤成功,記錄新增日誌 if(flag) { } flag = false; //清空序列 labelId = ""; b++; } a++; } line = br.readLine(); } if (br != null) { br.close(); } uploadEvt.setNumber(new int[]{a,b}); uploadEvt.setStatus(status); return uploadEvt; } /** * 獲取檔案的編碼格式 * @param fileName * @return * @throws IOException */ private String getCharset(String fileName) throws IOException{ BufferedInputStream bin = new BufferedInputStream(new FileInputStream(fileName)); int p = (bin.read() << 8) +bin.read(); String code = null; switch (p) { case 0xefbb: code = "UTF-8"; break; case 0xfffe: code = "Unicode"; break; case 0xfeff: code = "UTF-16BE"; break; default: code = "GBK"; } return code; } /** * 讀取流中前面的字元,看是否有bom,如果有bom,將bom頭先讀掉丟棄 * * @param in * @return * @throws java.io.IOException */ public static InputStream getInputStream(InputStream in) throws IOException { PushbackInputStream testin = new PushbackInputStream(in); int ch = testin.read(); if (ch != 0xEF) { testin.unread(ch); } else if ((ch = testin.read()) != 0xBB) { testin.unread(ch); testin.unread(0xef); } else if ((ch = testin.read()) != 0xBF) { throw new IOException("錯誤的UTF-8格式檔案"); } else { // 不需要做,這裡是bom頭被讀完了 // System.out.println("still exist bom"); } return testin; } /** * 根據一個檔名,讀取完檔案,幹掉bom頭。 * * @param fileName * @throws java.io.IOException */ public static void trimBom(String fileName) throws IOException { FileInputStream fin = new FileInputStream(fileName); // 開始寫臨時檔案 InputStream in = getInputStream(fin); ByteArrayOutputStream bos = new ByteArrayOutputStream(); byte b[] = new byte[4096]; int len = 0; while (in.available() > 0) { len = in.read(b, 0, 4096); //out.write(b, 0, len); bos.write(b, 0, len); } in.close(); fin.close(); bos.close(); //臨時檔案寫完,開始將臨時檔案寫回本檔案。 FileOutputStream out = new FileOutputStream(fileName); out.write(bos.toByteArray()); out.close(); } /** * 利用系統的當前時間+原來上傳的標籤檔名建立一個唯一的臨時檔名 */ private String getNewFileName(String code, String oldFileName) { // 列印方法入口日誌 log.info(this.getClass().getName() + "method getNewFileName() start"); //獲取檔案字尾名稱 String ext = oldFileName.substring(oldFileName.lastIndexOf("."), oldFileName.length()); DateFormat df = new SimpleDateFormat("yyyyMMddHHmmss"); // 構造臨時鈴音檔名: String tempfileName = code + df.format(new Date()) + ext; // 列印方法出口日誌 log.info(this.getClass().getName() + "method getNewFileName() end"); return tempfileName; } /** * 刪除上傳的檔案 * * @param file */ private void delete(String file) { // 列印方法入口日誌 log.info(this.getClass().getName() + "method delete() start"); File f = new File(file); if (f.exists()) { f.delete(); } // 列印方法出口日誌 log.info(this.getClass().getName() + "method delete() end"); } /** * 過濾特殊字元 * @param keyWord 需要判斷是否有特殊字元的字串 * @return true:有特殊字串;false:沒有特殊字串 */ public static boolean InvalidChar(String keyWord) { if(keyWord.trim().length() > 0) { for (int i = 0; i < keyWord.length(); i++) { int c = keyWord.charAt(i); /*if (c == '`' || c == '~' || c == '!' || c == '@' || c == '#' || c == '$' || c == '^' || c == '&' || c == '*'|| c == ')'|| c == '(' || c == '{' || c == '}' || c == '/' || c == '[' || c == ']' || c == ':' || c == '”' || c == ';' || c == '’'|| c == '<' || c == '>'|| c == '?'|| c == ','|| c == '.'|| c == '\\' || c == '"'||c == '%' || c == '|') { return true; }*/ if (c == '<' || c == '>'|| c == '|') { return true; } } } else { return true; } return false; } }