1. 程式人生 > >使用POI進行Excel匯入時解決的一些問題

使用POI進行Excel匯入時解決的一些問題

最近在用POI做Excel匯入,用了公司的共通,本以為應該很順利,但是在實際使用過程中遇到了不少為題。這裡做一下總結。

一.大檔案匯入

POI在匯入資料的資料量很大的時候會有問題,Excel壓根就讀不進來。

網站上查了一下,需要將Excel轉換成CSV後匯入。

將匯入方法稍作修改後的程式碼如下:

/*			ImportExcel ei = new ImportExcel(file, 0, 0);
			List<FpPkhImport> list = ei.getDataList(FpPkhImport.class);
			
			// 如果資料檔案格式有問題,比如說第一行欄位定義,共有多少欄位,每個欄位的名稱是否和預定的一致,則彈出提示框說明資料來源檔案格式不正確
			HashMap<Integer, String> headerList = getExcelHeader();
			for (int i=0;i<27;i++){
				if (!headerList.get(i).toString().equals(ei.getRow(0).getCell(i).toString())){
					failureMsg.append("<br/>第" + (i+1) + "列標題錯誤,正確:" + headerList.get(i).toString() + ", Excel中:" + ei.getRow(0).getCell(i).toString());
				}
			}
			if (!StringUtils.isBlank(failureMsg)){
				failureMsg.insert(0, "資料來源檔案格式錯誤:");
				restfulResult.setResult("Error");
				restfulResult.setMessage(failureMsg.toString());
		    	AjaxExchange.printDataJason(response, restfulResult);
		    	return;
			}*/
	    	
	    	// 讀取XLSX
	    	List<String[]> excelList = XLSXCovertCSVReader.readerExcel(request.getSession().getServletContext().getRealPath("/") + savePath, "Sheet1", 27);  
	    	
	    	// 檢查標題
	    	HashMap<Integer, String> headerList = getExcelHeader();
			for (int i=0;i<27;i++){
				if (!headerList.get(i).toString().equals(excelList.get(0)[i])){
					failureMsg.append("<br/>第" + (i+1) + "列標題錯誤,正確:" + headerList.get(i).toString() + ", Excel中:" + excelList.get(0)[i]);
				}
			}
			
			// 獲取資料
	    	List<FpPkhImport> list = new ArrayList<FpPkhImport>();
	    	for(int i=1;i<excelList.size();i++){
	    		FpPkhImport pkhImport = new FpPkhImport();
	    		
	    		pkhImport.setListNo(excelList.get(i)[0].replace("\"", ""));
	    		pkhImport.setCity(excelList.get(i)[1].replace("\"", ""));
	    		...
	    		pkhImport.setPhone(excelList.get(i)[26].replace("\"", ""));
	    		
	    		list.add(pkhImport);
	    	}

XLSXCovertCSVReader:

/**
 * Copyright &copy; 2012-2016 <a href="https://github.com/thinkgem/jeesite">JeeSite</a> All rights reserved.
 */
package com.thinkgem.jeesite.common.utils.excel;

import java.io.IOException;  
import java.io.InputStream;  
import java.io.PrintStream;  
import java.text.SimpleDateFormat;  
import java.util.ArrayList;  
import java.util.Date;  
import java.util.List;  
  
import javax.xml.parsers.ParserConfigurationException;  
import javax.xml.parsers.SAXParser;  
import javax.xml.parsers.SAXParserFactory;  
  
import org.apache.poi.hssf.usermodel.HSSFDateUtil;  
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;  
import org.apache.poi.openxml4j.opc.OPCPackage;  
import org.apache.poi.openxml4j.opc.PackageAccess;  
import org.apache.poi.ss.usermodel.BuiltinFormats;  
import org.apache.poi.ss.usermodel.DataFormatter;  
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;  
import org.apache.poi.xssf.eventusermodel.XSSFReader;  
import org.apache.poi.xssf.model.StylesTable;  
import org.apache.poi.xssf.usermodel.XSSFCellStyle;  
import org.apache.poi.xssf.usermodel.XSSFRichTextString;  
import org.xml.sax.Attributes;  
import org.xml.sax.InputSource;  
import org.xml.sax.SAXException;  
import org.xml.sax.XMLReader;  
import org.xml.sax.helpers.DefaultHandler;  

/**
 * 使用者工具類
 * @author Sun
 * @version 2017-12-15
 */
public class XLSXCovertCSVReader {
    /** 
     * The type of the data value is indicated by an attribute on the cell. The 
     * value is usually in a "v" element within the cell. 
     */  
    enum xssfDataType {  
        BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,  
    }  
  
    /** 
     * 使用xssf_sax_API處理Excel,請參考: http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api 
     * <p/> 
     * Also see Standard ECMA-376, 1st edition, part 4, pages 1928ff, at 
     * http://www.ecma-international.org/publications/standards/Ecma-376.htm 
     * <p/> 
     * A web-friendly version is http://openiso.org/Ecma/376/Part4 
     */  
    class MyXSSFSheetHandler extends DefaultHandler {  
  
        /** 
         * Table with styles 
         */  
        private StylesTable stylesTable;  
  
        /** 
         * Table with unique strings 
         */  
        private ReadOnlySharedStringsTable sharedStringsTable;  
  
        /** 
         * Destination for data 
         */  
        private final PrintStream output;  
  
        /** 
         * Number of columns to read starting with leftmost 
         */  
        private final int minColumnCount;  
  
        // Set when V start element is seen  
        private boolean vIsOpen;  
  
        // Set when cell start element is seen;  
        // used when cell close element is seen.  
        private xssfDataType nextDataType;  
  
        // Used to format numeric cell values.  
        private short formatIndex;  
        private String formatString;  
        private final DataFormatter formatter;  
  
        private int thisColumn = -1;  
        // The last column printed to the output stream  
        private int lastColumnNumber = -1;  
  
        // Gathers characters as they are seen.  
        private StringBuffer value;  
        private String[] record;  
        private List<String[]> rows = new ArrayList<String[]>();  
        private boolean isCellNull = false;  
  
        /** 
         * Accepts objects needed while parsing. 
         *  
         * @param styles 
         *            Table of styles 
         * @param strings 
         *            Table of shared strings 
         * @param cols 
         *            Minimum number of columns to show 
         * @param target 
         *            Sink for output 
         */  
        public MyXSSFSheetHandler(StylesTable styles,  
                ReadOnlySharedStringsTable strings, int cols, PrintStream target) {  
            this.stylesTable = styles;  
            this.sharedStringsTable = strings;  
            this.minColumnCount = cols;  
            this.output = target;  
            this.value = new StringBuffer();  
            this.nextDataType = xssfDataType.NUMBER;  
            this.formatter = new DataFormatter();  
            record = new String[this.minColumnCount];  
            rows.clear();// 每次讀取都清空行集合  
        }  
  
        /* 
         * (non-Javadoc) 
         *  
         * @see 
         * org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, 
         * java.lang.String, java.lang.String, org.xml.sax.Attributes) 
         */  
        public void startElement(String uri, String localName, String name,  
                Attributes attributes) throws SAXException {  
  
            if ("inlineStr".equals(name) || "v".equals(name)) {  
                vIsOpen = true;  
                // Clear contents cache  
                value.setLength(0);  
            }  
            // c => cell  
            else if ("c".equals(name)) {  
                // Get the cell reference  
                String r = attributes.getValue("r");  
                int firstDigit = -1;  
                for (int c = 0; c < r.length(); ++c) {  
                    if (Character.isDigit(r.charAt(c))) {  
                        firstDigit = c;  
                        break;  
                    }  
                }  
                thisColumn = nameToColumn(r.substring(0, firstDigit));  
  
                // Set up defaults.  
                this.nextDataType = xssfDataType.NUMBER;  
                this.formatIndex = -1;  
                this.formatString = null;  
                String cellType = attributes.getValue("t");  
                String cellStyleStr = attributes.getValue("s");  
                if ("b".equals(cellType))  
                    nextDataType = xssfDataType.BOOL;  
                else if ("e".equals(cellType))  
                    nextDataType = xssfDataType.ERROR;  
                else if ("inlineStr".equals(cellType))  
                    nextDataType = xssfDataType.INLINESTR;  
                else if ("s".equals(cellType))  
                    nextDataType = xssfDataType.SSTINDEX;  
                else if ("str".equals(cellType))  
                    nextDataType = xssfDataType.FORMULA;  
                else if (cellStyleStr != null) {  
                    // It's a number, but almost certainly one  
                    // with a special style or format  
                    int styleIndex = Integer.parseInt(cellStyleStr);  
                    XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);  
                    this.formatIndex = style.getDataFormat();  
                    this.formatString = style.getDataFormatString();  
                    if (this.formatString == null)  
                        this.formatString = BuiltinFormats  
                                .getBuiltinFormat(this.formatIndex);  
                }  
            }  
  
        }  
  
        /* 
         * (non-Javadoc) 
         *  
         * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, 
         * java.lang.String, java.lang.String) 
         */  
        public void endElement(String uri, String localName, String name)  
                throws SAXException {  
  
            String thisStr = null;  
  
            // v => contents of a cell  
            if ("v".equals(name)) {  
                // Process the value contents as required.  
                // Do now, as characters() may be called more than once  
                switch (nextDataType) {  
  
                case BOOL:  
                    char first = value.charAt(0);  
                    thisStr = first == '0' ? "FALSE" : "TRUE";  
                    break;  
  
                case ERROR:  
                    thisStr = "\"ERROR:" + value.toString() + '"';  
                    break;  
  
                case FORMULA:  
                    // A formula could result in a string value,  
                    // so always add double-quote characters.  
                    thisStr = '"' + value.toString() + '"';  
                    break;  
  
                case INLINESTR:  
                    // TODO: have seen an example of this, so it's untested.  
                    XSSFRichTextString rtsi = new XSSFRichTextString(  
                            value.toString());  
                    thisStr = '"' + rtsi.toString() + '"';  
                    break;  
  
                case SSTINDEX:  
                    String sstIndex = value.toString();  
                    try {  
                        int idx = Integer.parseInt(sstIndex);  
                        XSSFRichTextString rtss = new XSSFRichTextString(  
                                sharedStringsTable.getEntryAt(idx));  
                        thisStr = '"' + rtss.toString() + '"';  
                    } catch (NumberFormatException ex) {  
                        output.println("Failed to parse SST index '" + sstIndex  
                                + "': " + ex.toString());  
                    }  
                    break;  
  
                case NUMBER:  
                    String n = value.toString();  
                    // 判斷是否是日期格式  
                    if (HSSFDateUtil.isADateFormat(this.formatIndex, n)) {  
                        Double d = Double.parseDouble(n);  
                        Date date=HSSFDateUtil.getJavaDate(d);  
                        thisStr=formateDateToString(date);  
                    } else if (this.formatString != null)  
                        thisStr = formatter.formatRawCellContents(  
                                Double.parseDouble(n), this.formatIndex,  
                                this.formatString);  
                    else  
                        thisStr = n;  
                    break;  
  
                default:  
                    thisStr = "(TODO: Unexpected type: " + nextDataType + ")";  
                    break;  
                }  
  
                // Output after we've seen the string contents  
                // Emit commas for any fields that were missing on this row  
                if (lastColumnNumber == -1) {  
                    lastColumnNumber = 0;  
                }  
                //判斷單元格的值是否為空  
                if (thisStr == null || "".equals(isCellNull)) {  
                    isCellNull = true;// 設定單元格是否為空值  
                }  
                record[thisColumn] = thisStr;  
                // Update column  
                if (thisColumn > -1)  
                    lastColumnNumber = thisColumn;  
  
            } else if ("row".equals(name)) {  
  
                // Print out any missing commas if needed  
                if (minColumns > 0) {  
                    // Columns are 0 based  
                    if (lastColumnNumber == -1) {  
                        lastColumnNumber = 0;  
                    }  
                    if (isCellNull == false && record[0] != null  
                            && record[1] != null)// 判斷是否空行  
                    {  
                        rows.add(record.clone());  
                        isCellNull = false;  
                        for (int i = 0; i < record.length; i++) {  
                            record[i] = null;  
                        }  
                    }  
                }  
                lastColumnNumber = -1;  
            }  
  
        }  
  
        public List<String[]> getRows() {  
            return rows;  
        }  
  
        public void setRows(List<String[]> rows) {  
            this.rows = rows;  
        }  
  
        /** 
         * Captures characters only if a suitable element is open. Originally 
         * was just "v"; extended for inlineStr also. 
         */  
        public void characters(char[] ch, int start, int length)  
                throws SAXException {  
            if (vIsOpen)  
                value.append(ch, start, length);  
        }  
  
        /** 
         * Converts an Excel column name like "C" to a zero-based index. 
         *  
         * @param name 
         * @return Index corresponding to the specified name 
         */  
        private int nameToColumn(String name) {  
            int column = -1;  
            for (int i = 0; i < name.length(); ++i) {  
                int c = name.charAt(i);  
                column = (column + 1) * 26 + c - 'A';  
            }  
            return column;  
        }  
  
        private String formateDateToString(Date date) {  
            SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//格式化日期  
            return sdf.format(date);  
  
        }  
  
    }  
  
    // /////////////////////////////////////  
  
    private OPCPackage xlsxPackage;  
    private int minColumns;  
    private PrintStream output;  
    private String sheetName;  
  
    /** 
     * Creates a new XLSX -> CSV converter 
     *  
     * @param pkg 
     *            The XLSX package to process 
     * @param output 
     *            The PrintStream to output the CSV to 
     * @param minColumns 
     *            The minimum number of columns to output, or -1 for no minimum 
     */  
    public XLSXCovertCSVReader(OPCPackage pkg, PrintStream output,  
            String sheetName, int minColumns) {  
        this.xlsxPackage = pkg;  
        this.output = output;  
        this.minColumns = minColumns;  
        this.sheetName = sheetName;  
    }  
  
    /** 
     * Parses and shows the content of one sheet using the specified styles and 
     * shared-strings tables. 
     *  
     * @param styles 
     * @param strings 
     * @param sheetInputStream 
     */  
    public List<String[]> processSheet(StylesTable styles,  
            ReadOnlySharedStringsTable strings, InputStream sheetInputStream)  
            throws IOException, ParserConfigurationException, SAXException {  
  
        InputSource sheetSource = new InputSource(sheetInputStream);  
        SAXParserFactory saxFactory = SAXParserFactory.newInstance();  
        SAXParser saxParser = saxFactory.newSAXParser();  
        XMLReader sheetParser = saxParser.getXMLReader();  
        MyXSSFSheetHandler handler = new MyXSSFSheetHandler(styles, strings,  
                this.minColumns, this.output);  
        sheetParser.setContentHandler(handler);  
        sheetParser.parse(sheetSource);  
        return handler.getRows();  
    }  
  
    /** 
     * 初始化這個處理程式 將 
     *  
     * @throws IOException 
     * @throws OpenXML4JException 
     * @throws ParserConfigurationException 
     * @throws SAXException 
     */  
    public List<String[]> process() throws IOException, OpenXML4JException,  
            ParserConfigurationException, SAXException {  
  
        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(  
                this.xlsxPackage);  
        XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);  
        List<String[]> list = null;  
        StylesTable styles = xssfReader.getStylesTable();  
        XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader  
                .getSheetsData();  
        //int index = 0;  
        while (iter.hasNext()) {  
            InputStream stream = iter.next();  
            String sheetNameTemp = iter.getSheetName();  
            if (this.sheetName.equals(sheetNameTemp)) {  
                list = processSheet(styles, strings, stream);  
                stream.close();  
                //++index;  
            }  
        }  
        return list;  
    }  
  
    /** 
     * 讀取Excel 
     *  
     * @param path 
     *            檔案路徑 
     * @param sheetName 
     *            sheet名稱 
     * @param minColumns 
     *            列總數 
     * @return 
     * @throws SAXException 
     * @throws ParserConfigurationException 
     * @throws OpenXML4JException 
     * @throws IOException 
     */  
	public static List<String[]> readerExcel(String path, String sheetName,  
            int minColumns) throws IOException, OpenXML4JException,  
            ParserConfigurationException, SAXException {  
        OPCPackage p = OPCPackage.open(path, PackageAccess.READ);  
        XLSXCovertCSVReader xlsx2csv = new XLSXCovertCSVReader(p, System.out,  
                sheetName, minColumns);  
        List<String[]> list = xlsx2csv.process();  
        p.close();  
        return list;  
    } 
	
}

二.資料匯入速度

資料量大,做的Check多,速度當然慢,所以如何提高速度又是一個大問題。

資料插入是無法提高速度的,只能在Check的時候想辦法。

我是將與Check相關的資料直接放到HashMap裡面,key-value的檢索方式速度既快,又能以減少對資料庫的訪問。

畢竟一個是在記憶體,一個在硬碟,速度不是一個數量級。

但是在伺服器上執行的時候,某一天突然遇到了GC overhead limit exceed。

這是因為大量資料放在記憶體,導致OOM,且JVM多次進行資源回收都沒能釋放資源導致的。

解決方法:提高JVM的記憶體。

我用的是Tomcat,在tomcat/bin下的catalina.bat中,

找到rem Guess CATALINA_HOME if not defined

在下面增加:
set JAVA_OPTS=-Xms256m -Xmx2048m

不過提高記憶體總是有限的,如果仍然OOM,就只能用SQL進行校驗了,但會慢很多。