Java 常用的工具類—判斷Excel版本&讀取Excel/CSV檔案
阿新 • • 發佈:2018-12-12
以下內容均來自實際專案需求,記錄下,網上找的讀取Excel會出現不少問題,下面程式碼是經過好多次測試改進的符合現在專案的程式碼,如有不同要求,請自行修改,程式碼均只去掉了包名。 注:我們的Excel 第一行是表頭,其他行是資料1、第一行遇到空列,後面的內容自動忽略掉, 2、如果資料中間有一行空白行,繼續讀,只有連續兩行活著以上是空白行,下面的就忽略掉不讀取了。
完整程式碼如下
WDWUtil.java
/** * 判斷Excel 檔案的版本 * Created by Administrator on 2018/7/4. */ public class WDWUtil { // @描述:是否是2003的excel,返回true是2003 public static boolean isExcel2003(String filePath) { return filePath.matches("^.+\\.(?i)(xls)$"); } //@描述:是否是2007的excel,返回true是2007 public static boolean isExcel2007(String filePath) { return filePath.matches("^.+\\.(?i)(xlsx)$"); } }
ExcelUtils
此類適用於預覽資料和真正上傳資料(預覽資料時讀取前一百條資料,正常上傳讀取全部資料)
其中包含空行空列的處理邏輯,程式碼中均加了註釋
import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import java.io.*; import java.text.DecimalFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * Excel 檔案的處理 * Created by Administrator on 2018/7/4. */ public class ExcelUtils { //總行數 private int totalRows = 0; //總條數 private int totalCells = 0; //錯誤資訊接收器 private String errorMsg; // 是否是預覽 private boolean isPriview = true; //構造方法 public ExcelUtils() { } //構造方法 public ExcelUtils(boolean isPriview) { this.isPriview=isPriview; } //獲取總行數 public int getTotalRows() { return totalRows; } //獲取總列數 public int getTotalCells() { return totalCells; } //獲取錯誤資訊 public String getErrorInfo() { return errorMsg; } /** * 驗證EXCEL檔案 * * @param filePath * @return */ public boolean validateExcel(String filePath) { if (filePath == null || !(WDWUtil.isExcel2003(filePath) || WDWUtil.isExcel2007(filePath))) { errorMsg = "檔名不是excel格式"; return false; } return true; } /** * 讀EXCEL檔案 * * @param * @return */ public Map<String, Object> getExcelInfo(String fileName, String tmpFilePath) { Map<String, Object> result = new HashMap<String, Object>(); File fa = new File(tmpFilePath); InputStream is = null; try { is= new FileInputStream(fa); } catch (FileNotFoundException e) { e.printStackTrace(); } try { //驗證檔名是否合格 if (!validateExcel(fileName)) { errorMsg = "檔案不是excel格式"; return null; } //根據檔名判斷檔案是2003版本還是2007版本 boolean isExcel2003 = true; if (WDWUtil.isExcel2007(fileName)) { isExcel2003 = false; } // 獲取excel內容 Workbook wb = getExcelInfo(is, isExcel2003); List customerList = null; List titleList = null; Map columnstypes = null; // 讀取標題資訊 其中也設定了有效列數量 titleList = readExcelTitle(wb); //讀取Excel資訊 customerList = readExcelValue(wb); if(isPriview){ columnstypes = getColumnType(wb); customerList.add(0, columnstypes); } result.put("error", errorMsg); result.put("tablename", fileName.substring(0, fileName.lastIndexOf('.'))); result.put("schema", titleList); result.put("data", customerList); result.put("columnstypes", columnstypes); is.close(); } catch (Exception e) { e.printStackTrace(); } finally { if (is != null) { try { is.close(); } catch (IOException e) { is = null; e.printStackTrace(); } } } return result; } /** * 根據excel裡面的內容 * * @param is 輸入流 * @param isExcel2003 excel是2003還是2007版本 * @return */ public Workbook getExcelInfo(InputStream is, boolean isExcel2003) { /** 根據版本選擇建立Workbook的方式 */ Workbook wb = null; try { //當excel是2003時 if (isExcel2003) { wb = new HSSFWorkbook(is); } else { //當excel是2007時 wb = new XSSFWorkbook(is); } return wb; } catch (IOException e) { e.printStackTrace(); } return wb; } /** * 讀取Excel內容 * * @param wb * @return */ private List readExcelValue(Workbook wb) { //得到第一個shell Sheet sheet = wb.getSheetAt(0); //得到Excel的行數 this.totalRows = sheet.getPhysicalNumberOfRows(); //得到Excel的列數(前提是有行數) // 0816 已經在獲取標題的時候設定了有效列 totalCells if (isPriview && totalRows > 100) { totalRows = 101; } // 記錄空行 規則 如果空行大於1行 下面的視為垃圾資料 忽略 20180820 yunguang modified int blankLine=0; List valueList = new ArrayList(); //迴圈Excel行數,從第二行開始。標題不入庫 for (int r = 1; r < totalRows; r++) { Row row = sheet.getRow(r); if (row == null) { // 遇到空白行 獲取的行數加1 this.totalRows++; blankLine++; if (blankLine > 1) { // totalrows 重新定義總行數 20180820 yunguang modified this.totalRows = r; break; } continue; } else { // 無空白行 重置計數器 blankLine = 0; } List temp = new ArrayList(); // 標記是否為插入的空白行 識別規則 插入的資料後第一個單元格為空 boolean addFlag = false; //迴圈Excel的列 for (int c = 0; c < this.totalCells; c++) { Cell cell = row.getCell(c); if (null != cell) { String cellValue = getCellValue(cell); // 針對又見插入的行 poi預設它不算空行 判斷該行如果有一個 不為空 該條記錄視為有效 20180820 yunguang modified if ("".equals(cellValue) && (!addFlag)) { addFlag = false; } else { addFlag = true; } if("".equals(cellValue)){ temp.add("\\N"); } else { temp.add(cellValue); } } else { temp.add("\\N"); } } if (addFlag) { // 判斷是否為有效資料 valueList.add(temp); } } return valueList; } /** * 讀取Excel表頭 * * @param wb * @return */ private List readExcelTitle(Workbook wb) { //得到第一個shell Sheet sheet = wb.getSheetAt(0); //得到Excel的行數 this.totalRows = sheet.getPhysicalNumberOfRows(); //得到Excel的列數(前提是有行數) if (totalRows >= 1 && sheet.getRow(0) != null) { this.totalCells = sheet.getRow(0).getPhysicalNumberOfCells(); } List titleList = new ArrayList(); // 讀取標題 Row row = sheet.getRow(0); if (row == null) return null; //迴圈Excel的列 for (int c = 0; c < this.totalCells; c++) { Map temp = new HashMap(); Cell cell = row.getCell(c); if (null != cell) { temp.put("name", getCellValue(cell)); titleList.add(temp); } else { // 0816 遇到一個空白標題 結束 this.totalCells=c; break; } } return titleList; } /** * 讀取Excel表頭 * * @param wb * @return */ private Map getColumnType(Workbook wb) { //得到第一個shell Sheet sheet = wb.getSheetAt(0); //得到Excel的行數 this.totalRows = sheet.getPhysicalNumberOfRows(); //得到Excel的列數(前提是有行數) if (totalRows >= 1 && sheet.getRow(0) != null) { this.totalCells = sheet.getRow(0).getPhysicalNumberOfCells(); } if (this.totalRows > 101) { totalRows = 101; } // 0,string Map rowColumns = new HashMap(); // 記錄空行 規則 如果空行大於1行 下面的視為垃圾資料 忽略 20180820 yunguang modified int blankLine=0; //迴圈Excel行數,從第二行開始。標題不入庫 for (int r = 1; r < totalRows; r++) { Row row = sheet.getRow(r); if (row == null) { this.totalRows++; blankLine ++; if (blankLine > 1) { // totalrows 重新定義總行數 20180820 yunguang modified this.totalRows = r; break; } continue; } else { // 無空白行 重置計數器 blankLine = 0; } //迴圈Excel的列 for (int c = 0; c < this.totalCells; c++) { Cell cell = row.getCell(c); if (null != cell) { String cellValue = getCellValue(cell); Object value = rowColumns.get(c); String val = (String) value; String valType =FileOperateUtil.getType(cellValue); if (!"string".equals(val)) { if("string".equals(valType)){ rowColumns.put(c,valType); } else if(!"double".equals(val)){ rowColumns.put(c,valType); } } } else { rowColumns.put(c,"string"); } } } return rowColumns; } private String getCellValue(Cell cell) { String value = ""; SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss"); switch (cell.getCellTypeEnum()) { case STRING: value = cell.getRichStringCellValue().getString(); break; case NUMERIC: if ("General".equals(cell.getCellStyle().getDataFormatString())) { // 資料格式 DecimalFormat df = new DecimalFormat("#.########"); value = df.format(cell.getNumericCellValue())+""; } else if ("m/d/yy".equals(cell.getCellStyle().getDataFormatString())) { value = sdf.format(cell.getDateCellValue())+""; } else { // 針對十位數以上的數字出現科學記數法的處理 20180820 yunguang modified value = new DecimalFormat("#").format(cell.getNumericCellValue()); } break; case BOOLEAN: value = cell.getBooleanCellValue() + ""; break; case BLANK: value = ""; break; default: value = cell.toString(); break; } return value; } }
CSVUtils
import com.csvreader.CsvReader; import info.monitorenter.cpdetector.io.*; import lombok.extern.slf4j.Slf4j; import java.io.*; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * CSV 檔案的操作類 * Created by Administrator on 2018/7/10. */ @Slf4j public class CSVUtils { // 預覽或者正式上傳(true 為預覽) private boolean isPriview = true; public CSVUtils() { } public CSVUtils(boolean isPriview) { this.isPriview = isPriview; } /** * 匯出** * * @param file @param file csv檔案(路徑+檔名),csv檔案不存在會自動建立 * @param dataList 資料 * @return */ public static boolean exportCsv(File file, List<String> dataList) { boolean isSucess = false; FileOutputStream out = null; OutputStreamWriter osw = null; BufferedWriter bw = null; try { out = new FileOutputStream(file); osw = new OutputStreamWriter(out, "UTF-8"); bw = new BufferedWriter(osw); if (dataList != null && !dataList.isEmpty()) { for (String data : dataList) { bw.append(data).append("\r"); } } isSucess = true; } catch (Exception e) { isSucess = false; } finally { if (bw != null) { try { bw.close(); bw = null; } catch (IOException e) { e.printStackTrace(); } } if (osw != null) { try { osw.close(); osw = null; } catch (IOException e) { e.printStackTrace(); } } if (out != null) { try { out.close(); out = null; } catch (IOException e) { e.printStackTrace(); } } } return isSucess; } /** * 匯入 * * @param file csv檔案(路徑+檔案) * @return */ public static List<String> importCsv(File file) { List<String> dataList = new ArrayList<String>(); BufferedReader br = null; try { InputStreamReader reader = new InputStreamReader(new FileInputStream(file), "UTF-8"); br = new BufferedReader(reader); String line = ""; while ((line = br.readLine()) != null) { dataList.add(line); } } catch (Exception e) { e.printStackTrace(); } finally { if (br != null) { try { br.close(); br = null; } catch (IOException e) { e.printStackTrace(); } } } return dataList; } /** * 呼叫該方法的模組: * 本地呼叫 * 功能描述: * 獲取該檔案內容的編碼格式 * @param: * @return: * @auther: solmyr * @date: 2018/8/16 下午3:29 */ private Charset getFileEncode(String filePath) { try { File file = new File(filePath); CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance(); detector.add(new ParsingDetector(false)); detector.add(JChardetFacade.getInstance()); detector.add(ASCIIDetector.getInstance()); detector.add(UnicodeDetector.getInstance()); Charset charset = null; charset = detector.detectCodepage(file.toURI().toURL()); if (charset != null) { return charset; } } catch (Exception e) { log.error("get file encode error, filePath: " + filePath, e); } return Charset.forName("UTF-8"); } /** * 獲取 csv 檔案資訊 * * @param fileName 檔名 * @param tmpFilePath 接收到的檔案物件 * @return */ public Map<String, Object> getCSVInfo(String fileName, String tmpFilePath) { Map<String, Object> result = new HashMap<String, Object>(); String filePath = tmpFilePath; List titleList = new ArrayList(); List valueList = new ArrayList(); Map rowColumns = new HashMap(); try { Charset fileEncode = getFileEncode(filePath); File fa = new File(filePath); FileInputStream fi = new FileInputStream(fa); CsvReader cr = new CsvReader(fi, fileEncode); int i = 0; while (cr.readRecord()) { if (i == 0) { String[] rs = cr.getValues(); for (String s : rs) { Map temp = new HashMap(); temp.put("name", s); titleList.add(temp); } } else { if (isPriview && i > 100) break; List temp = new ArrayList(); String[] rs = cr.getValues(); int k = 0; for (String s : rs) { Object value = rowColumns.get(k); String val = (String) value; if (!"string".equals(val)) { if(!"double".equals(val)){ rowColumns.put(k, FileOperateUtil.getType(s)); } } temp.add(s); k++; } valueList.add(temp); } i++; } cr.close(); fi.close(); } catch (IOException e) { e.printStackTrace(); } if (isPriview){ valueList.add(0, rowColumns); } result.put("error", null); result.put("tablename", fileName.substring(0, fileName.lastIndexOf('.'))); result.put("schema", titleList); result.put("data", valueList); result.put("columnstypes", rowColumns); return result; } }
呼叫程式碼介面:
import org.springframework.web.multipart.MultipartFile;
import java.util.Map;
/**
* 對上傳的檔案進行格式解析
* Created by Administrator on 2018/7/4.
*/
public interface ExcelCsvFileParserService {
/**
* 獲取上傳檔案的目錄
* @param userId 使用者ID
* @param file 使用者上傳的檔案
* @return
*/
String getUploadPath(String userId,MultipartFile file);
/**
* 上傳檔案取消獲取上傳完成
* 刪除臨時檔案
* @param userId 使用者ID
* @param fileName 使用者上傳的檔名
* @return
*/
boolean handlePreviewCancel(String userId,String fileName);
/**
* 獲取處理後的結果
* @param isPreview
* @param filename
* @param fullPath
* @return
*/
Map<String, Object> HandlerFile(boolean isPreview,String filename,String fullPath);
}
實現類如下:
import com.easou.datasource.service.util.CSVUtils;
import com.easou.datasource.service.ExcelCsvFileParserService;
import com.easou.datasource.service.util.ExcelUtils;
import com.easou.datasource.service.util.FileOperateUtil;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.io.IOException;
import java.util.Map;
/**
* Created by Administrator on 2018/7/4.
*/
@Service
@Slf4j
public class ExcelCsvFileParserServiceImpl implements ExcelCsvFileParserService {
@Override
public Map<String, Object> HandlerFile(boolean isPreview,String filename,String fullPath) {
// 判斷檔案型別 是excel 檔案還是csc
if (".csv".equals(filename.toLowerCase().substring(filename.toLowerCase().lastIndexOf('.')))) {
return new CSVUtils(isPreview).getCSVInfo(filename, fullPath);
} else {
return new ExcelUtils(isPreview).getExcelInfo(filename, fullPath);
}
}
@Override
public boolean handlePreviewCancel(String userId,String fileName){
boolean isDelete = false;
// 獲取上傳目錄
File upload = FileOperateUtil.getAbsoluteUploadPath();
// 臨時存放檔案目錄
String tmpPath= upload + File.separator +userId+File.separator;
String fullPath = tmpPath + fileName;
File tempFilePath = new File(fullPath);
if (tempFilePath.exists()) {
isDelete = tempFilePath.delete();
}
return isDelete;
}
@Override
public String getUploadPath(String userId, MultipartFile file) {
String filename = file.getOriginalFilename();
// 獲取上傳目錄
File upload = FileOperateUtil.getAbsoluteUploadPath();
// 臨時存放檔案目錄
String tmpPath= upload + File.separator +userId+File.separator;
File tempFilePath = new File(tmpPath);
if (!tempFilePath.exists()) tempFilePath.mkdirs();
String fullPath = tmpPath + filename;
try {
// 儲存臨時檔案
file.transferTo(new File(fullPath));
} catch (IOException e) {
e.printStackTrace();
}
return fullPath;
}
}
如有效能問題或者隱含bug,期待評論拍磚!!!