Java使用poi讀取excel資料(excel可能很大,先轉換為csv再讀取)
阿新 • • 發佈:2019-01-07
————————————配置———————————— |
jdbc.properties中加入: excelUrl=/……xlsx檔案目錄路徑/ (excelUrl + “xxxx.xlsx” 為完整路徑) |
匯入poi-3.16下的6個jar包,poi-3.16/lib下的5個jar包,poi-3.16/ooxml-lib下的2個jar包 |
將Excel_reader.java 和 XLSX2CSV.java 匯入專案 |
————————————方法———————————— |
Excel_reader類中的: |
xlsx_reader(String excel_name , ArrayList<Object> args) |
//excel_name為要讀取的xlsx檔名(帶字尾) , args為要獲取的列號的列表 |
//返回二維陣列ArrayList<ArrayList<String>> 第一維表示xlsx的行,第二維表示xlsx中該行的單元格 |
//空單元格返回null,需要自己處理成—--或0 |
//args可以填 int 或者 String ,若args[i]為int,那麼返回的二維陣列的第I列為xlsx中的第args[i]列 |
//若args[i]為String,那麼返回的二維陣列的第i列為改字元常量 |
//例如 xlsx_reader(“崇明縣-表15:“夏淡”綠葉菜種植補貼-2014.xlsx”,args) |
// 其中 args=[7,8,9,”綠肥”] |
//那麼返回的二維陣列內容如下: |
[小明 , 350401219948383**** , null , 綠肥] |
[小紅 , 645354354354323**** , null , 綠肥] |
[小蘭 , 445353453425643**** , null , 綠肥] |
。。。。。。 |
XLSX2CSV.java
import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import javax.xml.parsers.ParserConfigurationException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.ss.usermodel.DataFormatter; import org.apache.poi.ss.util.CellAddress; import org.apache.poi.ss.util.CellReference; import org.apache.poi.util.SAXHelper; import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFComment; import org.xml.sax.ContentHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; /** * A rudimentary XLSX -> CSV processor modeled on the * POI sample program XLS2CSVmra from the package * org.apache.poi.hssf.eventusermodel.examples. * As with the HSSF version, this tries to spot missing * rows and cells, and output empty entries for them. * <p/> * Data sheets are read using a SAX parser to keep the * memory footprint relatively small, so this should be * able to read enormous workbooks. The styles table and * the shared-string table must be kept in memory. The * standard POI styles table class is used, but a custom * (read-only) class is used for the shared string table * because the standard POI SharedStringsTable grows very * quickly with the number of unique strings. * <p/> * For a more advanced implementation of SAX event parsing * of XLSX files, see {@link XSSFEventBasedExcelExtractor} * and {@link XSSFSheetXMLHandler}. Note that for many cases, * it may be possible to simply use those with a custom * {@link SheetContentsHandler} and no SAX code needed of * your own! */ public class XLSX2CSV { /** * Uses the XSSF Event SAX helpers to do most of the work * of parsing the Sheet XML, and outputs the contents * as a (basic) CSV. */ private class SheetToCSV implements SheetContentsHandler { private boolean firstCellOfRow = false; private int currentRow = -1; private int currentCol = -1; private void outputMissingRows(int number) { for (int i = 0; i < number; i++) { curstr = new ArrayList<String>(); for (int j = 0; j < minColumns; j++) { curstr.add(null); } output.add(curstr); } } @Override public void startRow(int rowNum) { curstr = new ArrayList<String>(); // If there were gaps, output the missing rows outputMissingRows(rowNum - currentRow - 1); // Prepare for this row firstCellOfRow = true; currentRow = rowNum; currentCol = -1; } @Override public void endRow(int rowNum) { // Ensure the minimum number of columns for (int i = currentCol; i < minColumns ; i++) { curstr.add(null); } output.add(curstr); } @Override public void cell(String cellReference, String formattedValue, XSSFComment comment) { // if (firstCellOfRow) { // firstCellOfRow = false; // } else { // curstr.append(','); // } // gracefully handle missing CellRef here in a similar way as XSSFCell does if (cellReference == null) { cellReference = new CellAddress(currentRow, currentCol).formatAsString(); } // Did we miss any cells? int thisCol = (new CellReference(cellReference)).getCol(); int missedCols = thisCol - currentCol - 1; for (int i = 0; i < missedCols; i++) { curstr.add(null); } currentCol = thisCol; // Number or string? try { Double.parseDouble(formattedValue); curstr.add(formattedValue); } catch (NumberFormatException e) { // output.append('"'); curstr.add(formattedValue); // output.append('"'); } } @Override public void headerFooter(String text, boolean isHeader, String tagName) { // Skip, no headers or footers in CSV } } /////////////////////////////////////// private final OPCPackage xlsxPackage; /** * Number of columns to read starting with leftmost */ private final int minColumns; /** * Destination for data */ private ArrayList<ArrayList<String>> output; private ArrayList<String> curstr; public ArrayList<ArrayList<String>> get_output(){ return output; } /** * Creates a new XLSX -> CSV converter * * @param pkg The XLSX package to process * @param output The PrintStream to output the CSV to * @param minColumns The minimum number of columns to output, or -1 for no minimum */ public XLSX2CSV(OPCPackage pkg, int minColumns) { this.xlsxPackage = pkg; this.minColumns = minColumns; } /** * Parses and shows the content of one sheet * using the specified styles and shared-strings tables. * * @param styles * @param strings * @param sheetInputStream */ public void processSheet( StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler, InputStream sheetInputStream) throws IOException, ParserConfigurationException, SAXException { DataFormatter formatter = new DataFormatter(); InputSource sheetSource = new InputSource(sheetInputStream); try { XMLReader sheetParser = SAXHelper.newXMLReader(); ContentHandler handler = new XSSFSheetXMLHandler( styles, null, strings, sheetHandler, formatter, false); sheetParser.setContentHandler(handler); sheetParser.parse(sheetSource); } catch (ParserConfigurationException e) { throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage()); } } /** * Initiates the processing of the XLS workbook file to CSV. * * @throws IOException * @throws OpenXML4JException * @throws ParserConfigurationException * @throws SAXException */ public void process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage); XSSFReader xssfReader = new XSSFReader(this.xlsxPackage); StylesTable styles = xssfReader.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); int index = 0; while (iter.hasNext()) { output = new ArrayList<ArrayList<String>> (); InputStream stream = iter.next(); String sheetName = iter.getSheetName(); System.out.println("正在讀取sheet: "+sheetName + " [index=" + index + "]:"); processSheet(styles, strings, new SheetToCSV(), stream); System.out.println("sheet 讀取完成!"); stream.close(); ++index; } } // public static void main(String[] args) throws Exception { // /* if (args.length < 1) { // System.err.println("Use:"); // System.err.println(" XLSX2CSV <xlsx file> [min columns]"); // return; // }*/ // // File xlsxFile = new File("F:\\8月資料.xlsx"); // if (!xlsxFile.exists()) { // System.err.println("Not found or not a file: " + xlsxFile.getPath()); // return; // } // // int minColumns = -1; // if (args.length >= 2) // minColumns = Integer.parseInt(args[1]); // // // The package open is instantaneous, as it should be. // OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ); // XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns); // xlsx2csv.process(); // p.close(); // } }
Excel_reader.java
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Properties;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class Excel_reader {
// *************xlsx檔案讀取函式************************
// 在jdbc.properties上加上 excelUrl:xlsx檔案的目錄
// excel_name為檔名,arg為需要查詢的列號(輸入數字則返回對應列 , 輸入字串則固定返回這個字串)
// 返回
@SuppressWarnings({ "resource", "unused" })
public static ArrayList<ArrayList<String>> xlsx_reader(String excel_name, ArrayList<Object> args)
throws IOException {
// 讀取excel資料夾url
Properties properties = new Properties();
InputStream inStream = JDBCTools.class.getClassLoader().getResourceAsStream("jdbc.properties");
properties.load(inStream);
String excelUrl = properties.getProperty("excelUrl");
File xlsxFile = new File(excelUrl + excel_name);
if (!xlsxFile.exists()) {
System.err.println("Not found or not a file: " + xlsxFile.getPath());
return null;
}
ArrayList<ArrayList<String>> excel_output = new ArrayList<ArrayList<String>>();
try {
OPCPackage p;
p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
XLSX2CSV xlsx2csv = new XLSX2CSV(p, 20); // 20代表最大列數
xlsx2csv.process();
excel_output = xlsx2csv.get_output();
p.close(); //釋放
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println(excel_name + " 讀取完畢");
// //讀取xlsx檔案
// XSSFWorkbook xssfWorkbook = null;
// //尋找目錄讀取檔案
// System.out.println("開始讀取 "+excel_name);
// File excelFile = new File(excelUrl+excel_name);
// InputStream is = new FileInputStream(excelFile);
// xssfWorkbook = new XSSFWorkbook(is);
//
// if(xssfWorkbook==null){
// System.out.println("未讀取到內容,請檢查路徑!");
// return null;
// }else{
// System.out.println(excel_name+" 讀取完畢");
// }
ArrayList<ArrayList<String>> ans = new ArrayList<ArrayList<String>>();
// 遍歷xlsx中的sheet
// 對於每個sheet,讀取其中的每一行
for (int rowNum = 0; rowNum < excel_output.size(); rowNum++) {
ArrayList<String> cur_output = excel_output.get(rowNum);
ArrayList<String> curarr = new ArrayList<String>();
for (int columnNum = 0; columnNum < args.size(); columnNum++) {
Object obj = args.get(columnNum);
if (obj instanceof String) {
curarr.add(obj.toString());
} else if (obj instanceof Integer) {
String cell = cur_output.get((int) obj);
curarr.add(cell);
} else {
System.out.print("型別錯誤!");
return null;
}
}
ans.add(curarr);
}
return ans;
}
// // 判斷後綴為xlsx的excel檔案的資料類
// @SuppressWarnings("deprecation")
// private static String getValue(XSSFCell xssfRow) {
// if (xssfRow == null) {
// return null;
// }
// if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BOOLEAN) {
// return String.valueOf(xssfRow.getBooleanCellValue());
// } else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_NUMERIC) {
// double cur = xssfRow.getNumericCellValue();
// long longVal = Math.round(cur);
// Object inputValue = null;
// if (Double.parseDouble(longVal + ".0") == cur)
// inputValue = longVal;
// else
// inputValue = cur;
// return String.valueOf(inputValue);
// } else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BLANK
// || xssfRow.getCellType() == xssfRow.CELL_TYPE_ERROR) {
// return "";
// } else {
// return String.valueOf(xssfRow.getStringCellValue());
// }
// }
}