Java POI SAX模式 讀取大資料Excel
阿新 • • 發佈:2019-02-09
業務描述:
字尾為.xlsx的Excel檔案,只有一個sheet頁,且該sheet頁對應資料庫中的1張表,從A1開始有資料,第1行的資料對應表的各個欄位,從第2行開始是要匯入的資料,將該Excel匯入到資料庫中
解決方案:
因為Excel包含大量資料,如果採用POI的使用者模式,會消耗大量內容,容易造成記憶體溢位
java.lang.OutOfMemoryError
所以這裡採用SAX模式(事件模式)讀取,需要注意的是SAX模式讀取,大部分格式的資料都可以讀取成String型別,日期格式不在此列,需要特殊處理,處理方式詳見下方code
code:
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
/**
* <p>ClassName: ExampleEventUserModel</p>
* <p>Description: 事件模式</p>
* <p>Author: sloth</p>
* <p>Date: 2018-02-28</p>
*/
public class ExampleEventUserModel {
/**
* <p>Field stylesTable: 單元格樣式</p>
*/
public static StylesTable stylesTable;
/**
* <p>Description: 處理單個sheet(本案例呼叫此方法)</p>
* @param filename 檔名帶路徑
* @throws Exception 異常
*/
public void processOneSheet(String filename) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
stylesTable = r.getStylesTable();
XMLReader parser = fetchSheetParser(sst);
// To look up the Sheet Name / Sheet Order / rID,
// you need to process the core Workbook stream.
// Normally it's of the form rId# or rSheet#
InputStream sheet = r.getSheet("rId1");
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
}
/**
* <p>Description: 處理所有sheet</p>
* @param filename 檔名帶路徑
* @throws Exception 異常
*/
public void processAllSheets(String filename) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while (sheets.hasNext()) {
System.out.println("Processing new sheet:\n");
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
System.out.println("");
}
}
/**
* <p>Description: 獲取XML訪問物件</p>
* @param sst 共享字串表物件
* @return XMLReader XML訪問物件
* @throws SAXException SAX異常
*/
public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader("com.sun.org.apache.xerces.internal.parsers.SAXParser");
ContentHandler handler = new SheetHandler(sst);
parser.setContentHandler(handler);
return parser;
}
/**
* <p>ClassName: SheetHandler</p>
* <p>Description: sheet處理類</p>
* <p>Author: sloth</p>
* <p>Date: 2018-02-26</p>
*/
private static class SheetHandler extends DefaultHandler {
/**
* <p>Field logger: 日誌</p>
*/
private static Logger logger = Logger.getLogger(SheetHandler.class);
/**
* <p>Field sst: 共享字串表物件</p>
*/
private SharedStringsTable sst;
/**
* <p>Field lastContents: 單元格內容</p>
*/
private String lastContents;
/**
* <p>Field nextIsString: 是否是字串</p>
*/
private boolean nextIsString;
/**
* <p>Field nextIsDate: 是否是日期</p>
*/
private boolean nextIsDate;
/**
* <p>Field mapList: 讀取Excel資料集(該變數可去除,因為集合中只有一個map)</p>
*/
private ArrayList<HashMap<String, String>> mapList;
/**
* <p>Field map: 鍵值對{單元格列名,單元格值}</p>
*/
private HashMap<String, String> map;
/**
* <p>Field key: 單元格座標</p>
*/
private String key;
/**
* <p>Field value: 單元格值</p>
*/
private String value;
/**
* <p>Field index: 樣式index</p>
*/
private int index;
/**
* <p>Description: 建構函式初始化</p>
* @param sst 共享字串表物件
*/
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
this.mapList = new ArrayList<HashMap<String, String>>();
this.map = new HashMap<String, String>();
}
/**
* <p>Title: startElement</p>
* <p>Description: </p>
* @param uri uri
* @param localName localName
* @param name XML標籤名
* @param attributes XML標籤物件
* @throws SAXException SAX異常
* @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
// c => cell
if ("c".equals(name)) {
// Print the cell reference
this.key = attributes.getValue("r");
this.value = "";
System.out.print(attributes.getValue("r") + " - ");
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if (cellType != null && "s".equals(cellType)) {
this.nextIsString = true;
} else {
/** 單元格是日期格式時c標籤中s屬性的值是數字 **/
cellType = attributes.getValue("s");
System.out.println(cellType);
this.nextIsString = false;
}
/** 判斷是否是日期格式 **/
if (cellType != null && Pattern.compile("^[-\\+]?[\\d]*$").matcher(cellType).matches()) {
this.index = Integer.parseInt(cellType);
this.nextIsDate = true;
} else {
this.nextIsDate = false;
}
}
// Clear contents cache
this.lastContents = "";
}
/**
* <p>Title: endElement</p>
* <p>Description: </p>
* @param uri uri
* @param localName localName
* @param name XML標籤名
* @throws SAXException SAX異常
* @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
*/
public void endElement(String uri, String localName, String name) throws SAXException {
// Process the last contents as required.
// Do now, as characters() may be called more than once
if (this.nextIsString) {
int idx = Integer.parseInt(this.lastContents);
this.lastContents = new XSSFRichTextString(this.sst.getEntryAt(idx)).toString();
this.nextIsString = false;
}
if (this.nextIsDate && !"".equals(this.lastContents)) {
XSSFCellStyle style = stylesTable.getStyleAt(this.index);
short formatIndex = style.getDataFormat();
String formatString = style.getDataFormatString();
if (formatString.contains("m/d/yy")) {
formatString = "yyyy-MM-dd hh:mm:ss";
}
DataFormatter formatter = new DataFormatter();
this.lastContents = formatter.formatRawCellContents(Double.parseDouble(this.lastContents), formatIndex,
formatString);
System.out.println(this.lastContents);
this.nextIsDate = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if ("v".equals(name)) {
System.out.println(this.lastContents);
this.value = this.lastContents;
} else if ("c".equals(name)) {
this.map.put(this.key.replaceAll("\\d+", ""), this.value);
} else if ("row".equals(name)) {
String nowRow = this.key.replaceAll(this.key.replaceAll("\\d+", ""), "");
if (!"1".equals(nowRow)) {
this.mapList.add(this.map);
logger.info(this.mapList);
insert(nowRow);
}
/** 清空儲存集 **/
this.mapList.clear();
this.map.clear();
}
}
/**
* <p>Description: 插入資料</p>
* @param nowRow 插入資料所在行(用於檢查Excel哪一行資料插入報錯)
*/
private void insert(String nowRow) {
/** 迭代資料,通過對應列,獲取資料庫欄位和值,拼接SQL **/
......
/** 迭代資料,通過對應列,獲取資料庫欄位和值,拼接SQL **/
}
/**
* <p>Title: characters</p>
* <p>Description: </p>
* @param ch 字元陣列
* @param start 起始位
* @param length 長度
* @throws SAXException SAX異常
* @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int)
*/
public void characters(char[] ch, int start, int length) throws SAXException {
this.lastContents += new String(ch, start, length);
}
}
public static void main(String[] args) throws Exception {
ExampleEventUserModel example = new ExampleEventUserModel();
// System.out.println("11");
example.processOneSheet("D:/" + "****" + ".xlsx");
// example.processAllSheets(args[0]);
}
}