1. 程式人生 > >java 去掉TXT文字檔案的bom頭資訊

java 去掉TXT文字檔案的bom頭資訊


import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PushbackInputStream;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;

import javax.servlet.ServletConfig;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.http.HttpSession;

import com.jspsmart.upload.SmartUpload;
import com.jspsmart.upload.SmartUploadException;

/**
 * 	一級標籤批量新增(上傳 *.txt 檔案的方式)
 *  <p>Title:CMS</p>
 *  <p>Description:TODO</p>
 *  <p>Copyright (C): 2013</p>
 *  <p>Company:Huawei</p>
 *  <p>Date:Jun 25, 2013</p> 
 *  @author:bKF51722
 */
public class UploadLabelServlet extends HttpServlet {

	/**
	 * 
	 */
	private static final long serialVersionUID = -4129735769163441128L;
	
	/**
	 * 日誌工廠
	 */
	private static final DebugLog log = LogFactory.getDebugLog("CONTENTTAG");

	/**
	 * Constructor of the object.
	 */
	public UploadLabelServlet() {
		super();
	}
	
	/**
     * Servlet配置物件
     */
    private ServletConfig config;

	/**
	 * 銷燬
	 * Destruction of the servlet. <br>
	 */
	public void destroy() {
		super.destroy(); // Just puts "destroy" string in log
		// Put your code here
	}

	/**
	 * Initialization of the servlet. <br>
	 *
	 * @throws ServletException if an error occurs
	 */
	public void init(ServletConfig configs) throws ServletException {
		this.config = configs;
	}

	/**
	 * Servlet的POST處理方法
	 * @param request HTTP請求物件
	 * @param response HTTP請求響應物件
	 * @throws ServletException
	 * @throws IOException
	 */
	protected void service(HttpServletRequest request, HttpServletResponse response)
			throws ServletException, IOException {
		
		request.setCharacterEncoding("UTF-8");
		response.setCharacterEncoding("UTF-8");
		//設定返回訊息
		String msg = "message";
		
		//管理上傳物件
		SmartUpload labelUpload = new SmartUpload();
		
		//初始化上傳物件
		labelUpload.initialize(config, request, response);
		
		//從request請求中獲取session
        HttpSession session = request.getSession(false);
        //防止登入超時引起空指標異常
        if(null == session || "".equals(session))
        {
        	//重定向到登入頁面
        	response.sendRedirect(request.getContextPath()+"/admin/adminlogin.action");
        }
       
        //建立本地臨時目錄
        String capLocalPath = Path.getWebRootPath() + "txtLabelNameFileTemp";
        FileHelper.createDir(capLocalPath);
        
        //得到session裡面存放的使用者資訊
	    AdminInfo admin = (AdminInfo) session.getAttribute(
	            Constant.SESSION_KEY_ADMIN_INFO);
	    //操作員
		String operator = admin.getOperator();
		String fatherNodeId ="";
		try {
			
			//設定檔案上傳大小的限制(設定為3M)
			labelUpload.setMaxFileSize((1024*1024)*3L);
			
			//執行上傳操作
			labelUpload.upload();
			
			 //從頁面獲取父節點ID
			fatherNodeId = labelUpload.getRequest().getParameter("fatherNodeId");
			
			if("".equals(fatherNodeId) || null == fatherNodeId)
			{
				request.setAttribute(msg, "父節點為空!:"+fatherNodeId);
				request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId);
				//跳轉到上傳失敗頁面,提示使用者上傳失敗:父節點為空!
				request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
				return;
			}
		}
		catch (SmartUploadException upe) 
		{
			request.setAttribute(msg, "檔案上傳出現異常!:"+upe.getMessage());
			request.setAttribute("backUrl", "admin/qeuryTagListPage.action");
			//跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案上傳出現異常!
			request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
			return;
		}
		catch (IOException ioe) 
		{
			request.setAttribute(msg, "檔案讀寫錯誤!:"+ioe.getMessage());
			request.setAttribute("backUrl", "admin/qeuryTagListPage.action");
			//跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案讀寫錯誤!
			request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
			return;
		}
		catch(RuntimeException e)
		{
			request.setAttribute(msg, "檔案太大!檔案不能超過3M :"+e.getMessage());
			request.setAttribute("backUrl", "admin/qeuryTagListPage.action");
			//跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案上傳錯誤!
			request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
			return;
		}
		catch (Exception e) 
		{
			request.setAttribute(msg, "檔案上傳錯誤!:"+e.getMessage());
			request.setAttribute("backUrl", "admin/qeuryTagListPage.action");
			//跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案上傳錯誤!
			request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
			return;
		}
		
		//檢查檔案上傳數量是否正確,只允許使用者一次上傳一個txt檔案
		if(labelUpload.getFiles().getCount() > 0)
		{
			for (int i = 0; i < labelUpload.getFiles().getCount(); i++) 
			{
				//獲取檔案
				com.jspsmart.upload.File labelNameFile = labelUpload.getFiles().getFile(i);
				
				//如果檔案找不到或者檔案內容為空
				if(labelNameFile.isMissing() || labelNameFile.getSize() <= 0)
				{
					request.setAttribute(msg, "沒有檔案!或者上傳的檔案中沒有資料!");
					request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId);
					//跳轉到上傳失敗頁面,提示使用者上傳失敗:沒有選擇檔案
					request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
					return;
					//continue;//跳出當前的這一次迴圈,進入下一次迴圈
				}
				//獲取檔名稱
				String fileName = labelNameFile.getFileName().toUpperCase(Locale.getDefault());
				//判斷後綴名是不是 .txt
				if(!fileName.endsWith(".TXT"))
				{
					request.setAttribute(msg, "檔案字尾名不對!上傳的檔案必須是.txt的文字檔案!");
					request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId);
					//跳轉到上傳失敗頁面,提示使用者:檔案字尾名稱不對
					request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
					return;
				}
				//檔案存放路徑
				String filePath = capLocalPath+ "/" +getNewFileName(operator,labelNameFile.getFileName());
				
				//上傳檔案到本地服務
				try {
					labelNameFile.saveAs(filePath);
				}
				catch (SmartUploadException upe) 
				{
					delete(filePath);//刪除檔案
					request.setAttribute(msg, "檔案上傳到本地出現異常!:"+upe.getMessage());
					request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId);
					//跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案上傳出現異常!
					request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
					return;
				}
				catch (Exception e) 
				{
					delete(filePath);//刪除檔案
					request.setAttribute(msg, "檔案上傳到本地錯誤!:"+e.getMessage());
					request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId);
					//跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案上傳錯誤!
					request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
					return;
				}
				//獲取檔案編碼格式(因判斷檔案格式需要檔案的BOM頭資訊,如果該檔案沒有BOM頭資訊的話,則預設它不是UTF-8格式的)
				String codeFormat = getCharset(filePath);
				
				//如果不是UTF-8格式
				if(!"UTF-8".equals(codeFormat))
				{
					//返回錯誤頁面,提示使用者txt文字檔案的編碼格式必須是UTF-8
					delete(filePath);//刪除檔案
					request.setAttribute(msg, "上傳的TXT文字檔案的編碼格式必須是UTF-8 !");
					request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId);
					//跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案上傳錯誤!
					request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
					return;
				}
				//獲取配置項中配置的最大標籤個數
		    	String maxLabelNum = SystemConfig.getCmsValue("maxLabelNum");
		    	//匹配是否正整數(0也算是正整數)
		    	//java.util.regex.Pattern.matches("^\\d+$", maxLabelNum);
		    	if(null == maxLabelNum || "".equals(maxLabelNum) 
		    			|| !java.util.regex.Pattern.matches("^\\d+$", maxLabelNum)
		    			|| Integer.valueOf(maxLabelNum) > 10000)
		    	{
		    		maxLabelNum = "10000";
		    	}
				//去掉utf-8格式的文字檔案的BOM頭資訊
				trimBom(filePath);
				UploadResponseEvt uploadEvt = new UploadResponseEvt();
				try {
					//讀取檔案內容,新增一級標籤
					uploadEvt = readFile(filePath,fatherNodeId,request,uploadEvt,maxLabelNum);
				}
				catch (FileNotFoundException fe) 
				{
					delete(filePath);//刪除檔案
					request.setAttribute(msg, "讀取檔案內容錯誤,檔案沒找到!:"+fe.getMessage());
					request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId);
					//跳轉到上傳失敗頁面,提示使用者上傳失敗:讀取檔案內容錯誤,檔案沒找到!
					request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
					return;
				}
				catch (IOException IOe) 
				{
					delete(filePath);//刪除檔案
					request.setAttribute(msg, "讀取檔案內容錯誤!:"+IOe.getMessage());
					request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId);
					//跳轉到上傳失敗頁面,提示使用者上傳失敗:讀取檔案內容錯誤!
					request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
					return;
				}
				//成功
				if("0".equals(uploadEvt.getStatus()))
				{
					delete(filePath);//刪除檔案
					request.setAttribute(msg, "上傳成功!總條數:"+uploadEvt.getNumber()[0]+" 入庫成功條數:"+uploadEvt.getNumber()[1]+" 重複的條數:"+(uploadEvt.getNumber()[0] - uploadEvt.getNumber()[1]));
					request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId);
					//跳轉到上傳成功頁面,提示使用者上傳成功。
					request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadSuccess.jsp").forward(request, response);
					return;
				}
				//超出最大行數
				else if("1".equals(uploadEvt.getStatus()))
				{
					delete(filePath);//刪除檔案
					request.setAttribute(msg, "一次最多允許上傳"+maxLabelNum+"個標籤!請修改後重新上傳。現總行數:"+uploadEvt.getRowsErr());
					request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId);
					//跳轉到上傳失敗頁面,提示使用者上傳失敗:一次最多允許上傳10000個標籤
					request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
					return;
				}
				//有特殊字元
				else if("2".equals(uploadEvt.getStatus()))
				{
					delete(filePath);//刪除檔案
					request.setAttribute(msg, "檔案中有特殊字元!請修改後重新上傳。行數:"+uploadEvt.getRowsErr()+" 標籤:"+uploadEvt.getTagNameErr());
					request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId);
					//跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案中有特殊字元!請重新上傳。
					request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
					return;
				}
				else//標籤名稱大於100個字
				{
					delete(filePath);//刪除檔案
					request.setAttribute(msg, "檔案中有標籤名稱大於100字元(中文、字母、數字,都算一個字元)!請修改後重新上傳。行數:"+uploadEvt.getRowsErr()+" 標籤:"+uploadEvt.getTagNameErr());
					request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId);
					//跳轉到上傳失敗頁面,提示使用者上傳失敗:檔案中有特殊字元!請重新上傳。
					request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
					return;
				}
			}
		}
		else
		{
			request.setAttribute(msg, "沒有檔案!請先選擇需要上傳的txt文字檔案。");
			request.setAttribute("backUrl", "admin/qeuryTagListPage.action?search=no&tagid="+fatherNodeId);
			//跳轉到上傳失敗頁面,提示使用者上傳失敗:沒有選擇檔案
			request.getRequestDispatcher("/WEB-INF/jsp/ums/admin/contenttag/uploadError.jsp").forward(request, response);
			return;
		}
	}

	/**
	 * 讀取檔案中的標籤名稱,然後入庫
	 * @param filePath 檔案的完整路徑及名稱
	 * @param fatherNodeId 父節點ID
	 * @param request  http請求物件
	 * @param uploadEvt 上傳實體Bean物件
	 * @param maxLabelNum 允許上傳的最大條數
	 * @throws FileNotFoundException,IOException 
	 */
    private UploadResponseEvt readFile(String filePath,String fatherNodeId,HttpServletRequest request,UploadResponseEvt uploadEvt,String maxLabelNum) throws FileNotFoundException,IOException {
    	
    	//得到session裡面存放的使用者資訊
	    AdminInfo admin = (AdminInfo) request.getSession().getAttribute(
	            Constant.SESSION_KEY_ADMIN_INFO);
	    //操作員
		String operator = admin.getOperator();
		
    	//定義讀取檔案的狀態 0:成功,1:超出最大行數,2:有特殊字元,3:標籤名稱大於100個字
		String status = "0";
    	
    	/**
    	 * 檔案內容格式校驗
    	 * 判斷是否超出允許的最大行數
    	 */
    	//BufferedReader maxNumbr = new BufferedReader(new FileReader(new File(filePath)));
    	BufferedReader maxNumbr = new BufferedReader(new InputStreamReader(new FileInputStream(filePath),"UTF-8"));
    	String lineCounts = maxNumbr.readLine();
    	//記錄有多少行資料
    	int count = 0;
    	int allCount = 0;//記錄所有行數,包括空行
    	while(null != lineCounts)
    	{
    		allCount++;
    		//去掉空行之後的有效行數
    		if(!lineCounts.trim().equals(""))
    		{
    			count++;
    			//過濾特殊字元的方法
        		if(InvalidChar(lineCounts))
        		{
        			status = "2";//有特殊字元
        			//返回方法前關閉流
        			if (maxNumbr != null)
        	        {
        				maxNumbr.close();
        	        }
        			uploadEvt.setRowsErr(String.valueOf(allCount));
        			uploadEvt.setTagNameErr(lineCounts);
        			uploadEvt.setStatus(status);
        			//方法返回
        			return uploadEvt;
        			//break;//跳出當前while迴圈(終止當前while迴圈執行下一行程式碼)
        		}
        		
        		//判斷當前行是否大於100個字元(中文、字母、數字,都算一個字元)
        		if(lineCounts.trim().length() > 100)
        		{
        			status = "3";//標籤名稱大於50個字元(中文、字母、數字,都算一個字元)
        			//返回方法前關閉流
        			if (maxNumbr != null)
        	        {
        				maxNumbr.close();
        	        }
        			uploadEvt.setRowsErr(String.valueOf(allCount));
        			uploadEvt.setTagNameErr(lineCounts);
        			uploadEvt.setStatus(status);
        			//方法返回
        			return uploadEvt;
        		}
    		}
    		
    		lineCounts = maxNumbr.readLine();
    	}
    	if (maxNumbr != null)
        {
    		maxNumbr.close();
        }
    	//超出允許的最大標籤個數
    	if(count > Integer.valueOf(maxLabelNum))
    	{
    		uploadEvt.setRowsErr(String.valueOf(allCount));
    		status = "1";//超出最大行數
    		uploadEvt.setStatus(status);
    		return uploadEvt;
    	}
    	/**
    	 * 讀取資料入庫
    	 */
    	//BufferedReader br = new BufferedReader(new FileReader(new File(filePath)));
    	BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filePath),"UTF-8"));
    	String line = br.readLine();
    	//獲取service物件
    	ContentTagService service = (ContentTagService) ContextHolder.getService("contentTagService");
    	int a = 0;//總行數
    	int b = 0;//匯入成功的總行數
    	while(null != line)
    	{
			//去掉空行之後的有效行數
    		if(!"".equals(line.trim()))
    		{
    			
        		//校驗該父節點下有沒有該標籤名,如果沒有則新增入庫
    			if(!service.queryTagByName(fatherNodeId, line.trim()))
    			{
    				//獲取序列
    				String labelId = service.getRESTagId();
    				
    				//入庫操作(新增一級標籤)
    				boolean flag = service.batchInsertTag(fatherNodeId, line.trim(),labelId);
    				
    				//新增標籤成功,記錄新增日誌
    				if(flag)
    				{
    					
    				}
    				flag = false;
    				//清空序列
    				labelId = "";
    				b++;
    			}
    			a++;
    		}
			line = br.readLine();
    	}
		if (br != null)
        {
			br.close();
        }
		uploadEvt.setNumber(new int[]{a,b});
		uploadEvt.setStatus(status);
		return uploadEvt;
	}

    /**
     * 獲取檔案的編碼格式
     * @param fileName
     * @return
     * @throws IOException
     */
    private String getCharset(String fileName) throws IOException{

        BufferedInputStream bin = new BufferedInputStream(new FileInputStream(fileName));    
        int p = (bin.read() << 8) +bin.read();

        String code = null;
        switch (p) {
            case 0xefbb:
                code = "UTF-8";
                break;
            case 0xfffe:
                code = "Unicode"; 
                break;
            case 0xfeff:
                code = "UTF-16BE";
                break;
            default:
                code = "GBK";
        }
        return code;
    }

    /**
     * 讀取流中前面的字元,看是否有bom,如果有bom,將bom頭先讀掉丟棄
     *
     * @param in
     * @return
     * @throws java.io.IOException
     */
    public static InputStream getInputStream(InputStream in) throws IOException {

        PushbackInputStream testin = new PushbackInputStream(in);
        int ch = testin.read();
        if (ch != 0xEF) {
            testin.unread(ch);
        } else if ((ch = testin.read()) != 0xBB) {
            testin.unread(ch);
            testin.unread(0xef);
        } else if ((ch = testin.read()) != 0xBF) {
            throw new IOException("錯誤的UTF-8格式檔案");
        } else {
		// 不需要做,這裡是bom頭被讀完了
		// System.out.println("still exist bom");
        }
        return testin;

    }

    /**
     * 根據一個檔名,讀取完檔案,幹掉bom頭。
     *
     * @param fileName
     * @throws java.io.IOException
     */
    public static void trimBom(String fileName) throws IOException {

        FileInputStream fin = new FileInputStream(fileName);
		// 開始寫臨時檔案
        InputStream in = getInputStream(fin);
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        byte b[] = new byte[4096];

        int len = 0;
        while (in.available() > 0) {
            len = in.read(b, 0, 4096);
            //out.write(b, 0, len);
            bos.write(b, 0, len);
        }

        in.close();
        fin.close();
        bos.close();

		//臨時檔案寫完,開始將臨時檔案寫回本檔案。
        FileOutputStream out = new FileOutputStream(fileName);
        out.write(bos.toByteArray());
        out.close();
    }
    
	/**
     * 利用系統的當前時間+原來上傳的標籤檔名建立一個唯一的臨時檔名
     */
    private String getNewFileName(String code, String oldFileName)

    {
        // 列印方法入口日誌
        log.info(this.getClass().getName() + "method getNewFileName() start");

        //獲取檔案字尾名稱
        String ext = oldFileName.substring(oldFileName.lastIndexOf("."),
                oldFileName.length());
        DateFormat df = new SimpleDateFormat("yyyyMMddHHmmss");
        // 構造臨時鈴音檔名:
        String tempfileName = code + df.format(new Date()) + ext;

        // 列印方法出口日誌
        log.info(this.getClass().getName() + "method getNewFileName() end");

        return tempfileName;
    }

    /**
     * 刪除上傳的檔案
     * 
     * @param file
     */
    private void delete(String file)
    {
        // 列印方法入口日誌
        log.info(this.getClass().getName() + "method delete() start");

        File f = new File(file);

        if (f.exists())
        {
            f.delete();
        }

        // 列印方法出口日誌
        log.info(this.getClass().getName() + "method delete() end");

    }
	
    /**
     * 過濾特殊字元
     * @param keyWord 需要判斷是否有特殊字元的字串
     * @return true:有特殊字串;false:沒有特殊字串
     */
	public static boolean InvalidChar(String keyWord) {
		if(keyWord.trim().length() > 0)
		{
			for (int i = 0; i < keyWord.length(); i++) 
			{
				int c = keyWord.charAt(i);
				
				/*if (c == '`' || c == '~' || c == '!' || c == '@' || c == '#' || c == '$' || c == '^'
	                  || c == '&' || c == '*'|| c == ')'|| c == '(' || c == '{' || c == '}' || c == '/' 
	                      || c == '[' || c == ']' || c == ':' || c == '”' || c == ';' || c == '’'|| c == '<'
	                             || c == '>'|| c == '?'|| c == ','|| c == '.'|| c == '\\' || c == '"'||c == '%' || c == '|') 
				{
					return true;
				}*/
				if (c == '<' || c == '>'|| c == '|') 
				{
					return true;
				}
			}
		}
		else
		{
			return true;
		}
		return false;
	}
	
}