1. 程式人生 > >Java POI SAX模式 讀取大資料Excel

Java POI SAX模式 讀取大資料Excel

業務描述:
字尾為.xlsx的Excel檔案,只有一個sheet頁,且該sheet頁對應資料庫中的1張表,從A1開始有資料,第1行的資料對應表的各個欄位,從第2行開始是要匯入的資料,將該Excel匯入到資料庫中
解決方案:
因為Excel包含大量資料,如果採用POI的使用者模式,會消耗大量內容,容易造成記憶體溢位

java.lang.OutOfMemoryError

所以這裡採用SAX模式(事件模式)讀取,需要注意的是SAX模式讀取,大部分格式的資料都可以讀取成String型別,日期格式不在此列,需要特殊處理,處理方式詳見下方code
code:

import java.io.InputStream;
import
java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.regex.Pattern; import org.apache.log4j.Logger; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.ss.usermodel.DataFormatter; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import
org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFCellStyle; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import
org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; /** * <p>ClassName: ExampleEventUserModel</p> * <p>Description: 事件模式</p> * <p>Author: sloth</p> * <p>Date: 2018-02-28</p> */ public class ExampleEventUserModel { /** * <p>Field stylesTable: 單元格樣式</p> */ public static StylesTable stylesTable; /** * <p>Description: 處理單個sheet(本案例呼叫此方法)</p> * @param filename 檔名帶路徑 * @throws Exception 異常 */ public void processOneSheet(String filename) throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); stylesTable = r.getStylesTable(); XMLReader parser = fetchSheetParser(sst); // To look up the Sheet Name / Sheet Order / rID, // you need to process the core Workbook stream. // Normally it's of the form rId# or rSheet# InputStream sheet = r.getSheet("rId1"); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); } /** * <p>Description: 處理所有sheet</p> * @param filename 檔名帶路徑 * @throws Exception 異常 */ public void processAllSheets(String filename) throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); Iterator<InputStream> sheets = r.getSheetsData(); while (sheets.hasNext()) { System.out.println("Processing new sheet:\n"); InputStream sheet = sheets.next(); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); System.out.println(""); } } /** * <p>Description: 獲取XML訪問物件</p> * @param sst 共享字串表物件 * @return XMLReader XML訪問物件 * @throws SAXException SAX異常 */ public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException { XMLReader parser = XMLReaderFactory.createXMLReader("com.sun.org.apache.xerces.internal.parsers.SAXParser"); ContentHandler handler = new SheetHandler(sst); parser.setContentHandler(handler); return parser; } /** * <p>ClassName: SheetHandler</p> * <p>Description: sheet處理類</p> * <p>Author: sloth</p> * <p>Date: 2018-02-26</p> */ private static class SheetHandler extends DefaultHandler { /** * <p>Field logger: 日誌</p> */ private static Logger logger = Logger.getLogger(SheetHandler.class); /** * <p>Field sst: 共享字串表物件</p> */ private SharedStringsTable sst; /** * <p>Field lastContents: 單元格內容</p> */ private String lastContents; /** * <p>Field nextIsString: 是否是字串</p> */ private boolean nextIsString; /** * <p>Field nextIsDate: 是否是日期</p> */ private boolean nextIsDate; /** * <p>Field mapList: 讀取Excel資料集(該變數可去除,因為集合中只有一個map)</p> */ private ArrayList<HashMap<String, String>> mapList; /** * <p>Field map: 鍵值對{單元格列名,單元格值}</p> */ private HashMap<String, String> map; /** * <p>Field key: 單元格座標</p> */ private String key; /** * <p>Field value: 單元格值</p> */ private String value; /** * <p>Field index: 樣式index</p> */ private int index; /** * <p>Description: 建構函式初始化</p> * @param sst 共享字串表物件 */ private SheetHandler(SharedStringsTable sst) { this.sst = sst; this.mapList = new ArrayList<HashMap<String, String>>(); this.map = new HashMap<String, String>(); } /** * <p>Title: startElement</p> * <p>Description: </p> * @param uri uri * @param localName localName * @param name XML標籤名 * @param attributes XML標籤物件 * @throws SAXException SAX異常 * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) */ public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { // c => cell if ("c".equals(name)) { // Print the cell reference this.key = attributes.getValue("r"); this.value = ""; System.out.print(attributes.getValue("r") + " - "); // Figure out if the value is an index in the SST String cellType = attributes.getValue("t"); if (cellType != null && "s".equals(cellType)) { this.nextIsString = true; } else { /** 單元格是日期格式時c標籤中s屬性的值是數字 **/ cellType = attributes.getValue("s"); System.out.println(cellType); this.nextIsString = false; } /** 判斷是否是日期格式 **/ if (cellType != null && Pattern.compile("^[-\\+]?[\\d]*$").matcher(cellType).matches()) { this.index = Integer.parseInt(cellType); this.nextIsDate = true; } else { this.nextIsDate = false; } } // Clear contents cache this.lastContents = ""; } /** * <p>Title: endElement</p> * <p>Description: </p> * @param uri uri * @param localName localName * @param name XML標籤名 * @throws SAXException SAX異常 * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String) */ public void endElement(String uri, String localName, String name) throws SAXException { // Process the last contents as required. // Do now, as characters() may be called more than once if (this.nextIsString) { int idx = Integer.parseInt(this.lastContents); this.lastContents = new XSSFRichTextString(this.sst.getEntryAt(idx)).toString(); this.nextIsString = false; } if (this.nextIsDate && !"".equals(this.lastContents)) { XSSFCellStyle style = stylesTable.getStyleAt(this.index); short formatIndex = style.getDataFormat(); String formatString = style.getDataFormatString(); if (formatString.contains("m/d/yy")) { formatString = "yyyy-MM-dd hh:mm:ss"; } DataFormatter formatter = new DataFormatter(); this.lastContents = formatter.formatRawCellContents(Double.parseDouble(this.lastContents), formatIndex, formatString); System.out.println(this.lastContents); this.nextIsDate = false; } // v => contents of a cell // Output after we've seen the string contents if ("v".equals(name)) { System.out.println(this.lastContents); this.value = this.lastContents; } else if ("c".equals(name)) { this.map.put(this.key.replaceAll("\\d+", ""), this.value); } else if ("row".equals(name)) { String nowRow = this.key.replaceAll(this.key.replaceAll("\\d+", ""), ""); if (!"1".equals(nowRow)) { this.mapList.add(this.map); logger.info(this.mapList); insert(nowRow); } /** 清空儲存集 **/ this.mapList.clear(); this.map.clear(); } } /** * <p>Description: 插入資料</p> * @param nowRow 插入資料所在行(用於檢查Excel哪一行資料插入報錯) */ private void insert(String nowRow) { /** 迭代資料,通過對應列,獲取資料庫欄位和值,拼接SQL **/ ...... /** 迭代資料,通過對應列,獲取資料庫欄位和值,拼接SQL **/ } /** * <p>Title: characters</p> * <p>Description: </p> * @param ch 字元陣列 * @param start 起始位 * @param length 長度 * @throws SAXException SAX異常 * @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int) */ public void characters(char[] ch, int start, int length) throws SAXException { this.lastContents += new String(ch, start, length); } } public static void main(String[] args) throws Exception { ExampleEventUserModel example = new ExampleEventUserModel(); // System.out.println("11"); example.processOneSheet("D:/" + "****" + ".xlsx"); // example.processAllSheets(args[0]); } }