java -excel-讀取較大的excel檔案防止記憶體溢位(相容Excel2003和2007)
阿新 • • 發佈:2019-01-23
如果覺得寫得可以 或者太差 就 評論一下或者贊一下唄,多謝支援!!
1. 需要的jar<!-- https://mvnrepository.com/artifact/org.apache.poi/poi excle 檔案 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.15</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.14</version>
</dependency>
2.程式碼
import java.io.File; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import javax.xml.parsers.ParserConfigurationException; import org.apache.poi.hssf.util.CellReference; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.ss.usermodel.DataFormatter; import org.apache.poi.util.SAXHelper; import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFComment; import org.xml.sax.ContentHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; /** * * @author lhy * */ public class ReadExcel2003_2007 { private List<String[]> rows = new ArrayList<String[]>(); private final OPCPackage xlsxPackage; private int minColumns; private class SheetToCSV implements SheetContentsHandler { private String[] record; private int minColumns; public SheetToCSV(int minColumns) { super(); this.minColumns = minColumns; } @Override public void startRow(int rowNum) { record=new String[this.minColumns]; } @Override public void endRow(int rowNum) { rows.add(this.record); } @Override public void cell(String cellReference, String formattedValue, XSSFComment comment) { int thisCol = (new CellReference(cellReference)).getCol(); record[thisCol]=formattedValue; } @Override public void headerFooter(String text, boolean isHeader, String tagName) { // Skip, no headers or footers in CSV } } public ReadExcel2003_2007(OPCPackage pkg, int minColumns) { this.xlsxPackage = pkg; this.minColumns = minColumns; } public void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler,InputStream sheetInputStream) throws IOException, ParserConfigurationException, SAXException { DataFormatter formatter = new DataFormatter(); InputSource sheetSource = new InputSource(sheetInputStream); try { XMLReader sheetParser = SAXHelper.newXMLReader(); ContentHandler handler = new XSSFSheetXMLHandler(styles, null, strings, sheetHandler, formatter, false); sheetParser.setContentHandler(handler); sheetParser.parse(sheetSource); } catch (ParserConfigurationException e) { throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage()); } } public List<String[]> process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage); XSSFReader xssfReader = new XSSFReader(this.xlsxPackage); StylesTable styles = xssfReader.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); int index = 0; while (iter.hasNext()) { InputStream stream = iter.next(); String sheetName = iter.getSheetName(); processSheet(styles, strings, new SheetToCSV(this.minColumns), stream); stream.close(); ++index; } return this.rows; } /** * 得到excel的記錄 * @param excelPath * @param minColumns 輸出多少列 * @return * @throws Exception */ public static List<String[]> getRecords(String excelPath,int minColumns) throws Exception{ File xlsxFile = new File(excelPath); if (!xlsxFile.exists()) { System.err.println("Not found or not a file: " + xlsxFile.getPath()); return null; } OPCPackage p = OPCPackage.open(xlsxFile); ReadExcel2003_2007 xlsx2csv = new ReadExcel2003_2007(p,minColumns); List<String[]>list=xlsx2csv.process(); p.close(); return list; } public static void main(String[] args) throws Exception { //需要的引數 是 excel檔案的地址,和 一共有多少列 List<String[]>list=getRecords("d:/123.xlsx",1); //如果需要數組裡放陣列 ArrayList<ArrayList<String>> result = new ArrayList<>(); for(int i=0;i<list.size();i++) { ArrayList<String> arrayList = new ArrayList<>(); for(String a:list.get(i)) { arrayList.add(a); System.out.println(a); } result.add(arrayList); } } }
對於上面的程式碼 可以把最後的主函式 攜程一個工具類, 然後 傳入excel的地址 和 一共有多少列, 然後輸出一個 list, 這樣就可以獲取excel 裡的所有資料.
至於裡面具體如何實現的 沒有特殊要求 可以不做過多研究. 能用就可以.