poi通過事件模式sax解析大excel檔案,防止記憶體溢位
1.解析類
package com.cetc.ExcelPoi; import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.ss.usermodel.BuiltinFormats; import org.apache.poi.ss.usermodel.DataFormatter; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFCellStyle; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.*; import org.xml.sax.helpers.DefaultHandler; import java.text.DecimalFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; /** * Created by shea on 2018/10/12. */ public class BigExcelParse extends DefaultHandler { enum xssfDataType { BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, } /** * Table with styles */ private StylesTable stylesTable; //取SST 的索引對應的值 private SharedStringsTable sharedStringsTable; /** * 最大列數 */ private final int minColumnCount; //單元格內容是SST 的索引 private boolean vIsOpen; private xssfDataType nextDataType; private short formatIndex; private String formatString; private final DataFormatter formatter; private int thisColumn = -1; // The last column printed to the output stream private int lastColumnNumber = -1; // Gathers characters as they are seen. private StringBuffer value; private String[] record;//封裝每一行的結果 private List<String[]> rows = new ArrayList<String[]>();//封裝結果 private boolean isCellNull = false; private SimpleDateFormat sdf = null; private static DecimalFormat df = new DecimalFormat("###########"); //構造器 public BigExcelParse(StylesTable styles, SharedStringsTable strings, int cols) { this.stylesTable = styles; this.sharedStringsTable = strings; this.minColumnCount = cols; this.value = new StringBuffer(); this.nextDataType = xssfDataType.NUMBER; this.formatter = new DataFormatter(); record = new String[this.minColumnCount]; rows.clear();// 每次讀取都清空行集合 } public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { if ("inlineStr".equals(name) || "v".equals(name)) { vIsOpen = true; // Clear contents cache value.setLength(0); } // c => cell else if ("c".equals(name)) { // Get the cell reference String r = attributes.getValue("r"); int firstDigit = -1; for (int c = 0; c < r.length(); ++c) { if (Character.isDigit(r.charAt(c))) { firstDigit = c; break; } } thisColumn = nameToColumn(r.substring(0, firstDigit)); // Set up defaults. this.nextDataType = xssfDataType.NUMBER; this.formatIndex = -1; this.formatString = null; String cellType = attributes.getValue("t"); String cellStyleStr = attributes.getValue("s"); if ("b".equals(cellType)) nextDataType = xssfDataType.BOOL; else if ("e".equals(cellType)) nextDataType = xssfDataType.ERROR; else if ("inlineStr".equals(cellType)) nextDataType = xssfDataType.INLINESTR; else if ("s".equals(cellType)) nextDataType = xssfDataType.SSTINDEX; else if ("str".equals(cellType)) nextDataType = xssfDataType.FORMULA; else if (cellStyleStr != null) { // It's a number, but almost certainly one // with a special style or format int styleIndex = Integer.parseInt(cellStyleStr); XSSFCellStyle style = stylesTable.getStyleAt(styleIndex); this.formatIndex = style.getDataFormat(); this.formatString = style.getDataFormatString(); if (this.formatString == null) this.formatString = BuiltinFormats .getBuiltinFormat(this.formatIndex); } } } public void endElement(String uri, String localName, String name) throws SAXException { String thisStr = null; // v => contents of a cell //單元格內容標籤結束,characters方法會被呼叫處理內容 if ("v".equals(name)) { // Process the value contents as required. // Do now, as characters() may be called more than once switch (nextDataType) { case BOOL: char first = value.charAt(0); thisStr = first == '0' ? "FALSE" : "TRUE"; break; case ERROR: thisStr = "\"ERROR:" + value.toString() + '"'; break; case FORMULA: // A formula could result in a string value, // so always add double-quote characters. thisStr = value.toString(); break; case INLINESTR: // TODO: have seen an example of this, so it's untested. XSSFRichTextString rtsi = new XSSFRichTextString( value.toString()); thisStr =rtsi.toString(); break; case SSTINDEX: String sstIndex = value.toString(); try { int idx = Integer.parseInt(sstIndex); XSSFRichTextString rtss = new XSSFRichTextString( sharedStringsTable.getEntryAt(idx)); thisStr = rtss.toString(); } catch (NumberFormatException ex) { System.out.println("Failed to parse SST index '" + sstIndex + "': " + ex.toString()); } break; case NUMBER: String n = value.toString(); // 判斷是否是日期格式 if (formatIndex == 14 || formatIndex == 31 || formatIndex == 57 || formatIndex == 58 || (176<=formatIndex && formatIndex<=178) || (182<=formatIndex && formatIndex<=196) || (210<=formatIndex && formatIndex<=213) || (208==formatIndex ) ) {// 日期 sdf = new SimpleDateFormat("yyyy-MM-dd"); Date date = org.apache.poi.ss.usermodel.DateUtil.getJavaDate(Double.parseDouble(n)); thisStr=sdf.format(date); } else if (formatIndex == 20 || formatIndex == 32 || formatIndex==183 || (200<=formatIndex && formatIndex<=209)) {//時間 sdf = new SimpleDateFormat("HH:mm"); Date date = org.apache.poi.ss.usermodel.DateUtil.getJavaDate(Double.parseDouble(n)); thisStr = sdf.format(date); } else{ if(n.contains("E")){//科學計數法 String[] split = n.split("\\+"); String e = split[0].replaceAll("E|e", ""); thisStr=e.replace(".",""); }else { thisStr = n; } } break; default: thisStr = "(TODO: Unexpected type: " + nextDataType + ")"; break; } if (lastColumnNumber == -1) { lastColumnNumber = 0; } //判斷單元格的值是否為空 if (thisStr == null || "".equals(isCellNull)) { isCellNull = true;// 設定單元格是否為空值 } record[thisColumn] = thisStr; // Update column if (thisColumn > -1) lastColumnNumber = thisColumn; //行結束,儲存一行資料 } else if ("row".equals(name)) { // Print out any missing commas if needed if (minColumnCount > 0) { // Columns are 0 based if (lastColumnNumber == -1) { lastColumnNumber = 0; } // 判斷是否空行 if(record!=null &&record.length!=0){ rows.add(record.clone()); isCellNull = false; for (int i = 0; i < record.length; i++) { record[i] = null; } } } lastColumnNumber = -1; } } public List<String[]> getRows() { return rows; } public void setRows(List<String[]> rows) { this.rows = rows; } public void characters(char[] ch, int start, int length) throws SAXException { if (vIsOpen) value.append(ch, start, length); } private int nameToColumn(String name) { int column = -1; for (int i = 0; i < name.length(); ++i) { int c = name.charAt(i); column = (column + 1) * 26 + c - 'A'; } return column; } }
2.呼叫類
package com.cetc.ExcelPoi; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackageAccess; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.model.StylesTable; import org.xml.sax.InputSource; import org.xml.sax.XMLReader; import org.xml.sax.helpers.XMLReaderFactory; import java.io.*; import java.util.HashMap; import java.util.List; /** * Created by shea on 2018/10/12. */ public class TestBigExcelParse { private OPCPackage xlsxPackage; private int minColumns; private PrintStream output; private String sheetName; //構造器 public TestBigExcelParse(OPCPackage pkg, PrintStream output, String sheetName, int minColumns) { this.xlsxPackage = pkg; this.output = output; this.minColumns = minColumns; this.sheetName = sheetName; } /** * * @param path----檔案路徑 * @param sheetName---解析的sheet名稱 * @param minColumns---excel的最大列數 * @return * @throws Exception */ public static HashMap<String, Object> parseSheet(String path,String sheetName,int minColumns)throws Exception{ //檔案地址 OPCPackage pkg = OPCPackage.open(path,PackageAccess.READ); XSSFReader r = new XSSFReader( pkg ); //解析的sheet名稱 // InputStream rId1 = r.getSheet("rId1");//第一個sheet表 XSSFReader.SheetIterator sheetsData = (XSSFReader.SheetIterator)r.getSheetsData(); SharedStringsTable sst = r.getSharedStringsTable(); StylesTable styles = r.getStylesTable(); XMLReader parser = XMLReaderFactory.createXMLReader(); BigExcelParse handler = new BigExcelParse(styles, sst, minColumns); parser.setContentHandler(handler); //遍歷---獲取指定的sheet名稱 HashMap<String, Object> map = new HashMap<String, Object>(); while (sheetsData.hasNext()){ InputStream in = sheetsData.next(); if(sheetName.equals(sheetsData.getSheetName())){ InputSource inputSource = new InputSource(in); parser.parse(inputSource); List<String[]> rows = handler.getRows();//返回所有的封裝結果 map.put("success", true); map.put("msg","解析完成!"); map.put("rows",rows); in.close(); } } if(map.isEmpty()){ map.put("success", false); map.put("msg","解析失敗,沒有找到相應的sheet表!"); } return map; } /** * 測試方法呼叫 * @param args * @throws Exception */ public static void main(String[] args) throws Exception{ HashMap<String,Object> res = parseSheet("C:\\Users\\shea\\Desktop\\測試2.xlsx", "Sheet1", 285); System.out.println((String)res.get("msg")); List<String[]> rows = (List<String[]>)res.get("rows"); BufferedWriter out = new BufferedWriter(new FileWriter("C:\\Users\\shea\\Desktop\\測試_bigExcel.txt")); for (String[] row:rows ) { for (String cell:row ) { out.write(cell+"\t"); } out.newLine(); } out.flush(); out.close(); } }
相關推薦
poi通過事件模式sax解析大excel檔案,防止記憶體溢位
1.解析類 package com.cetc.ExcelPoi; import org.apache.
淺談Android多圖(包括大圖)上傳時的記憶體處理,防止記憶體溢位。
Android多圖上傳時,為了防止記憶體溢位,基本只要做好兩點就好了,一是及時釋放已經上傳完的圖片,以及在對圖片處理時,必須是一張一張來,因為對圖片的處理過程是最容易OOM的。 下面有簡單的程式碼說明下, 1、首先,圖片的model, ImageBean model中
poi 通過模板反射通用導出excel
gets 集合 save red 返回 workbook get sfc lag package util;import java.io.FileInputStream;import java.io.FileOutputStream;import java.lang.ref
Excel大檔案時讀取記憶體溢位的解決方案,測試50M的Excel通過。
1.大檔案xlsx的檔案解析,本案例僅僅只是讀取Excel檔案的內容為例。具體需求可按自己的需求改。 package com.sundy.parse.util; import java.io.File; import java.io.IOException; import
sax解析xml文件,封裝到對象中
XP test xmlparse AI set version clas args java 創建User.java類 public class User { private String id; private String name; pr
springmvc下載excel檔案,通過get方式傳少量引數
Html <el-button @click=downloadExcel>資料匯出</el-button> export default { methods:{ downloadExcel(){ window.location.href='/api
golang 解析大xml檔案
golang 解析很大的xml in, err := os.Open(os.Args[1]) defer in.Close() decoder := xml.NewDecoder(in) var t xml.Token var text bool for t
POI 複製不同Sheet合成Excel檔案,完美解決單元格樣式和角標問題
專案背景: 1、需要從不同的Excel模板中讀取Sheet,生成到彙總的Excel檔案中 2、Excel格式是 Office2007版本,xlsx格式 3、需要將模板檔案的內容和格式,拷貝到新的檔案 4、解決POI 複製格式異常問題 工具程式碼: package com.sw
POI匯出Excel檔案,瀏覽器點選可下載
說明:使用SpringMVC+POI 1:服務端程式碼 /** * 匯出日誌查詢列表 */ @RequestMapping(value = "/log_excel") public void exportLogList(HttpS
java使用POI讀取excel檔案,相容xls和xlsx
public List<Double> readExcels(InputStream is)throws Exception{List<Double> xlsxList = new ArrayList<Double>(); try { if(i
java poi 讀取有合併單元格的EXCEL檔案
資料的樣子是這樣的 public String addReportByExcel(Long userId,InputStream inputStream,String fileName) throws BusinessException{ String mes
Python解析大XML檔案及讀取XML不全的問題
之前用python的minidom寫過解析xml的指令碼檔案,在前期是比較好用的,因為xml檔案比較小。但是當xml檔案超過了70M的時候,minidom不僅效率低,而且會佔用非常大的記憶體空間,因為他是將整個xml讀入進去並且按照整個xml樹進行建樹(雖然這樣寫程式碼邏輯
java -excel-讀取較大的excel檔案防止記憶體溢位(相容Excel2003和2007)
如果覺得寫得可以 或者太差 就 評論一下或者贊一下唄,多謝支援!!1. 需要的jar<!-- https://mvnrepository.com/artifact/org.apache.poi/poi excle 檔案 --><dependency>
js解析Excel檔案,解析後的資料用Echarts折線圖展示
不多說了,直接上程式碼 <!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>js解析Excel</title> <
POI實現大資料EXCLE匯入匯出,解決記憶體溢位問題
/** * 抽象Excel2007讀取器,excel2007的底層資料結構是xml檔案,採用SAX的事件驅動的方法解析 * xml,需要繼承DefaultHandler,在遇到檔案內容時,事件會觸發,這種做法可以大大降低 * 記憶體的耗費,特別使用於大資料量的檔案。 * */ public cl
poi匯入匯出excel檔案,相容.xls和.xlsx兩種格式
這個是測試類: package com.fishroad.util; import java.io.File; import java.io.FileInputStream; import java.math.BigDecimal; import java.text.Si
Java通過cmd呼叫FFmpeg實現大視訊檔案的分段切割
由於公司業務需要,就用java寫了這麼個小程式,其實挺簡單的,但是也算是弄了半天,所以就發表出來吧~ VideoFileOperate .java package xyz.leo; import java.io.*; import java.ut
flask上傳excel檔案,無須儲存,直接讀取內容
import xlrd from flask import Flask, request app = Flask(__name__) @app.route("/", methods=['POST', 'GET']) def filelist1(): print(request.files)
dede織夢繫統後臺的文章或自定義模型中的資料庫內容到匯出excel檔案,解決亂碼。
dede織夢繫統後臺的文章或自定義模型中的資料庫內容到匯出excel檔案,解決亂碼。好品牌小編下面分享的開發過程。 1、在後臺目錄建立一個php檔案toexcel.php,在最上面加入程式碼: require_once(dirname(__FILE__).'/confi
Struts2+Spring讀取csv和excel檔案,FormData Ajax提交
1.完成目標 讀取.csv .xls .xlsx檔案型別的檔案,根據表頭在表中建立表,根據資料插入表中 2.實現 2.1 ivy.xml相關依賴包: <dependency org="ossjava" n