1. 程式人生 > >Java使用poi讀取excel資料(excel可能很大,先轉換為csv再讀取)

Java使用poi讀取excel資料(excel可能很大,先轉換為csv再讀取)

————————————配置————————————
jdbc.properties中加入: excelUrl=/……xlsx檔案目錄路徑/ (excelUrl + “xxxx.xlsx” 為完整路徑)
匯入poi-3.16下的6個jar包,poi-3.16/lib下的5個jar包,poi-3.16/ooxml-lib下的2個jar包
將Excel_reader.java 和 XLSX2CSV.java 匯入專案
————————————方法————————————
Excel_reader類中的:
xlsx_reader(String excel_name , ArrayList<Object> args)
//excel_name為要讀取的xlsx檔名(帶字尾) , args為要獲取的列號的列表
//返回二維陣列ArrayList<ArrayList<String>> 第一維表示xlsx的行,第二維表示xlsx中該行的單元格
//空單元格返回null,需要自己處理成—--或0
//args可以填 int 或者 String ,若args[i]為int,那麼返回的二維陣列的第I列為xlsx中的第args[i]列
//若args[i]為String,那麼返回的二維陣列的第i列為改字元常量
//例如 xlsx_reader(“崇明縣-表15:“夏淡”綠葉菜種植補貼-2014.xlsx”,args)
// 其中 args=[7,8,9,”綠肥”]
//那麼返回的二維陣列內容如下:
[小明 , 350401219948383**** , null , 綠肥]
[小紅 , 645354354354323**** , null , 綠肥]
[小蘭 , 445353453425643**** , null , 綠肥]
。。。。。。

XLSX2CSV.java

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;  
  
/** 
 * A rudimentary XLSX -> CSV processor modeled on the 
 * POI sample program XLS2CSVmra from the package 
 * org.apache.poi.hssf.eventusermodel.examples. 
 * As with the HSSF version, this tries to spot missing 
 * rows and cells, and output empty entries for them. 
 * <p/> 
 * Data sheets are read using a SAX parser to keep the 
 * memory footprint relatively small, so this should be 
 * able to read enormous workbooks.  The styles table and 
 * the shared-string table must be kept in memory.  The 
 * standard POI styles table class is used, but a custom 
 * (read-only) class is used for the shared string table 
 * because the standard POI SharedStringsTable grows very 
 * quickly with the number of unique strings. 
 * <p/> 
 * For a more advanced implementation of SAX event parsing 
 * of XLSX files, see {@link XSSFEventBasedExcelExtractor} 
 * and {@link XSSFSheetXMLHandler}. Note that for many cases, 
 * it may be possible to simply use those with a custom 
 * {@link SheetContentsHandler} and no SAX code needed of 
 * your own! 
 */  
public class XLSX2CSV {  
    /** 
     * Uses the XSSF Event SAX helpers to do most of the work 
     * of parsing the Sheet XML, and outputs the contents 
     * as a (basic) CSV. 
     */  
    private class SheetToCSV implements SheetContentsHandler {  
        private boolean firstCellOfRow = false;  
        private int currentRow = -1;  
        private int currentCol = -1;  
  
        private void outputMissingRows(int number) {  
            for (int i = 0; i < number; i++) {  
            	curstr = new ArrayList<String>();
                for (int j = 0; j < minColumns; j++) {  
                	curstr.add(null);  
                }  
                output.add(curstr);  
            }  
        }  
  
        @Override  
        public void startRow(int rowNum) {  
        	curstr = new ArrayList<String>();
            // If there were gaps, output the missing rows  
            outputMissingRows(rowNum - currentRow - 1);  
            // Prepare for this row  
            firstCellOfRow = true;  
            currentRow = rowNum;  
            currentCol = -1;  
        }  
  
        @Override  
        public void endRow(int rowNum) {  
            // Ensure the minimum number of columns  
            for (int i = currentCol; i < minColumns ; i++) {  
                curstr.add(null);  
            }  
            output.add(curstr);  
        }  
  
        @Override  
        public void cell(String cellReference, String formattedValue,  
                         XSSFComment comment) {  
//            if (firstCellOfRow) {  
//                firstCellOfRow = false;  
//            } else {  
//                curstr.append(',');  
//            }  
  
            // gracefully handle missing CellRef here in a similar way as XSSFCell does  
            if (cellReference == null) {  
                cellReference = new CellAddress(currentRow, currentCol).formatAsString();  
            }  
  
            // Did we miss any cells?  
            int thisCol = (new CellReference(cellReference)).getCol();  
            int missedCols = thisCol - currentCol - 1;  
            for (int i = 0; i < missedCols; i++) {  
                curstr.add(null);  
            }  
            currentCol = thisCol;  
  
            // Number or string?  
            try {  
                Double.parseDouble(formattedValue);  
                curstr.add(formattedValue);  
            } catch (NumberFormatException e) {  
               // output.append('"');  
            	curstr.add(formattedValue);  
             //   output.append('"');  
            }  
        }  
  
        @Override  
        public void headerFooter(String text, boolean isHeader, String tagName) {  
            // Skip, no headers or footers in CSV  
        }  
    }  
  
  
    ///////////////////////////////////////  
  
    private final OPCPackage xlsxPackage;  
  
    /** 
     * Number of columns to read starting with leftmost 
     */  
    private final int minColumns;  
  
    /** 
     * Destination for data 
     */  
    
    private ArrayList<ArrayList<String>> output;
    private ArrayList<String> curstr;
    
    public  ArrayList<ArrayList<String>> get_output(){
    	return output;
    }
    
    /** 
     * Creates a new XLSX -> CSV converter 
     * 
     * @param pkg        The XLSX package to process 
     * @param output     The PrintStream to output the CSV to 
     * @param minColumns The minimum number of columns to output, or -1 for no minimum 
     */  
    public XLSX2CSV(OPCPackage pkg, int minColumns) {  
        this.xlsxPackage = pkg;  
        this.minColumns = minColumns;  
    }  
    
  
    /** 
     * Parses and shows the content of one sheet 
     * using the specified styles and shared-strings tables. 
     * 
     * @param styles 
     * @param strings 
     * @param sheetInputStream 
     */  
    public void processSheet(  
            StylesTable styles,  
            ReadOnlySharedStringsTable strings,  
            SheetContentsHandler sheetHandler,  
            InputStream sheetInputStream)  
            throws IOException, ParserConfigurationException, SAXException {  
        DataFormatter formatter = new DataFormatter();  
        InputSource sheetSource = new InputSource(sheetInputStream);  
        try {  
            XMLReader sheetParser = SAXHelper.newXMLReader();  
            ContentHandler handler = new XSSFSheetXMLHandler(  
                    styles, null, strings, sheetHandler, formatter, false);  
            sheetParser.setContentHandler(handler);  
            sheetParser.parse(sheetSource);  
        } catch (ParserConfigurationException e) {  
            throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());  
        }  
    }  
  
    /** 
     * Initiates the processing of the XLS workbook file to CSV. 
     * 
     * @throws IOException 
     * @throws OpenXML4JException 
     * @throws ParserConfigurationException 
     * @throws SAXException 
     */  
    public void process()  
            throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {  
        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);  
        XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);  
        StylesTable styles = xssfReader.getStylesTable();  
        XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();  
        int index = 0;  
        while (iter.hasNext()) {  
        	output = new ArrayList<ArrayList<String>> ();
            InputStream stream = iter.next();  
            String sheetName = iter.getSheetName();  
            System.out.println("正在讀取sheet: "+sheetName + " [index=" + index + "]:");  
            processSheet(styles, strings, new SheetToCSV(), stream);  
            System.out.println("sheet 讀取完成!");
            stream.close();  
            ++index;  
        }  
    }  
  
    
//    public static void main(String[] args) throws Exception {  
//      /*  if (args.length < 1) { 
//            System.err.println("Use:"); 
//            System.err.println("  XLSX2CSV <xlsx file> [min columns]"); 
//            return; 
//        }*/  
//  
//        File xlsxFile = new File("F:\\8月資料.xlsx");  
//        if (!xlsxFile.exists()) {  
//            System.err.println("Not found or not a file: " + xlsxFile.getPath());  
//            return;  
//        }  
//  
//        int minColumns = -1;  
//        if (args.length >= 2)  
//            minColumns = Integer.parseInt(args[1]);  
//  
//        // The package open is instantaneous, as it should be.  
//        OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);  
//        XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns);  
//        xlsx2csv.process();  
//        p.close();  
//    }  
}  

Excel_reader.java
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Properties;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

public class Excel_reader {

	// *************xlsx檔案讀取函式************************
	// 在jdbc.properties上加上 excelUrl:xlsx檔案的目錄
	// excel_name為檔名,arg為需要查詢的列號(輸入數字則返回對應列 , 輸入字串則固定返回這個字串)
	// 返回
	@SuppressWarnings({ "resource", "unused" })
	public static ArrayList<ArrayList<String>> xlsx_reader(String excel_name, ArrayList<Object> args)
			throws IOException {
		// 讀取excel資料夾url
		Properties properties = new Properties();
		InputStream inStream = JDBCTools.class.getClassLoader().getResourceAsStream("jdbc.properties");
		properties.load(inStream);
		String excelUrl = properties.getProperty("excelUrl");

		File xlsxFile = new File(excelUrl + excel_name);
		if (!xlsxFile.exists()) {
			System.err.println("Not found or not a file: " + xlsxFile.getPath());
			return null;
		}
		ArrayList<ArrayList<String>> excel_output = new ArrayList<ArrayList<String>>();
		try {
			OPCPackage p;
			p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
			XLSX2CSV xlsx2csv = new XLSX2CSV(p, 20); // 20代表最大列數
			xlsx2csv.process();
			excel_output = xlsx2csv.get_output();
			p.close();   //釋放
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		System.out.println(excel_name + " 讀取完畢");

		// //讀取xlsx檔案
		// XSSFWorkbook xssfWorkbook = null;
		// //尋找目錄讀取檔案
		// System.out.println("開始讀取 "+excel_name);
		// File excelFile = new File(excelUrl+excel_name);
		// InputStream is = new FileInputStream(excelFile);
		// xssfWorkbook = new XSSFWorkbook(is);
		//
		// if(xssfWorkbook==null){
		// System.out.println("未讀取到內容,請檢查路徑!");
		// return null;
		// }else{
		// System.out.println(excel_name+" 讀取完畢");
		// }

		ArrayList<ArrayList<String>> ans = new ArrayList<ArrayList<String>>();
		// 遍歷xlsx中的sheet

		// 對於每個sheet,讀取其中的每一行
		for (int rowNum = 0; rowNum < excel_output.size(); rowNum++) {
			ArrayList<String> cur_output = excel_output.get(rowNum);
			ArrayList<String> curarr = new ArrayList<String>();
			for (int columnNum = 0; columnNum < args.size(); columnNum++) {
				Object obj = args.get(columnNum);
				if (obj instanceof String) {
					curarr.add(obj.toString());
				} else if (obj instanceof Integer) {
					String cell = cur_output.get((int) obj);
					curarr.add(cell);
				} else {
					System.out.print("型別錯誤!");
					return null;
				}
			}
			ans.add(curarr);
		}

		return ans;
	}

//	// 判斷後綴為xlsx的excel檔案的資料類
//	@SuppressWarnings("deprecation")
//	private static String getValue(XSSFCell xssfRow) {
//		if (xssfRow == null) {
//			return null;
//		}
//		if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BOOLEAN) {
//			return String.valueOf(xssfRow.getBooleanCellValue());
//		} else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_NUMERIC) {
//			double cur = xssfRow.getNumericCellValue();
//			long longVal = Math.round(cur);
//			Object inputValue = null;
//			if (Double.parseDouble(longVal + ".0") == cur)
//				inputValue = longVal;
//			else
//				inputValue = cur;
//			return String.valueOf(inputValue);
//		} else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BLANK
//				|| xssfRow.getCellType() == xssfRow.CELL_TYPE_ERROR) {
//			return "";
//		} else {
//			return String.valueOf(xssfRow.getStringCellValue());
//		}
//	}

}