1. 程式人生 > 其它 >java解析CSV檔案三種方法(openCSV)

java解析CSV檔案三種方法(openCSV)

一、簡介
1、pom.xml
<!-- csv檔案解析依賴 -->
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>5.4</version>
</dependency>
二、手動解析CSV檔案
// 析csv檔案並轉成bean(方法一)
public static List<CsvFile> getCsvDataMethod1(MultipartFile file) {
ArrayList<CsvFile> csvFileList = new ArrayList<>();

InputStreamReader in = null;
String s = null;
try {
in = new InputStreamReader(file.getInputStream(), "utf-8");
BufferedReader bufferedReader = new BufferedReader(in);
String line = null;
while ((line = bufferedReader.readLine()) != null) {
String[] split = line.split(",");
CsvFile csvFile = new CsvFile();
csvFile.setName(splitResult(split[0]));
csvFile.setTitle(splitResult(split[1]));
csvFile.setNumber(splitResult(split[2]));
csvFile.setType(splitResult(split[3]));
csvFile.setPersonnel(splitResult(split[4]));
csvFile.setTime(splitResult(split[5]));
csvFileList.add(csvFile);
}
} catch (IOException e) {
e.printStackTrace();
}
return csvFileList;
}

去重引號""


private static String splitResult(String once) {
String result = "";
for (int i = 0; i < once.length(); i++) {
if (once.charAt(i) != '"') {
result += once.charAt(i);
}
}
return result;
}
三、openCSV解析CSV檔案
/**
* 解析csv檔案並轉成bean(方法二)
*
* @param file csv檔案
* @return 陣列
*/
public static List<String[]> getCsvDataMethod2(MultipartFile file) {

List<String[]> list = new ArrayList<String[]>();
int i = 0;
try {
CSVReader csvReader = new CSVReaderBuilder(
new BufferedReader(
new InputStreamReader(file.getInputStream(), "utf-8"))).build();
Iterator<String[]> iterator = csvReader.iterator();
while (iterator.hasNext()) {
String[] next = iterator.next();
//去除第一行的表頭,從第二行開始
if (i >= 1) {
list.add(next);
}
i++;
}
return list;
} catch (Exception e) {
System.out.println("CSV檔案讀取異常");
return list;
}
}

四、openCSV解析CSV檔案(結果為實體類)
工具類:

/**
* 解析csv檔案並轉成bean(方法三)
*
* @param file csv檔案
* @param clazz 類
* @param <T> 泛型
* @return 泛型bean集合
*/
public static <T> List<T> getCsvDataMethod3(MultipartFile file, Class<T> clazz) {
InputStreamReader in = null;
CsvToBean<T> csvToBean = null;
try {
in = new InputStreamReader(file.getInputStream(), "utf-8");
HeaderColumnNameMappingStrategy<T> strategy = new HeaderColumnNameMappingStrategy<>();
strategy.setType(clazz);
csvToBean = new CsvToBeanBuilder<T>(in).withMappingStrategy(strategy).build();
} catch (Exception e) {
logger.error("資料轉化失敗");
return null;
}
return csvToBean.parse();
}

實體類:

import com.opencsv.bean.CsvBindByName;
import lombok.Data;

@Data
public class CsvFile {

@CsvBindByName(column = "name")
private String name;

@CsvBindByName(column = "title")
private String title;

@CsvBindByName(column = "number")
private String number;

@CsvBindByName(column = "type")
private String type;

@CsvBindByName(column = "personnel")
private String personnel;

@CsvBindByName(column = "time")
private String time;
}

五、整理完成的CsvUtils

import com.lydms.testopencsv.domain.CsvFile;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.bean.CsvToBean;
import com.opencsv.bean.CsvToBeanBuilder;
import com.opencsv.bean.HeaderColumnNameMappingStrategy;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.web.multipart.MultipartFile;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;


public class CsvUtils {
private static final Logger logger = LogManager.getLogger(CsvUtils.class);


/**
* 解析csv檔案並轉成bean(方法二)
*
* @param file csv檔案
* @return 陣列
*/
public static List<String[]> getCsvDataMethod2(MultipartFile file) {

List<String[]> list = new ArrayList<String[]>();
int i = 0;
try {
CSVReader csvReader = new CSVReaderBuilder(
new BufferedReader(
new InputStreamReader(file.getInputStream(), "utf-8"))).build();
Iterator<String[]> iterator = csvReader.iterator();
while (iterator.hasNext()) {
String[] next = iterator.next();
//去除第一行的表頭,從第二行開始
if (i >= 1) {
list.add(next);
}
i++;
}
return list;
} catch (Exception e) {
System.out.println("CSV檔案讀取異常");
return list;
}
}


/**
* 解析csv檔案並轉成bean(方法三)
*
* @param file csv檔案
* @param clazz 類
* @param <T> 泛型
* @return 泛型bean集合
*/
public static <T> List<T> getCsvDataMethod3(MultipartFile file, Class<T> clazz) {
InputStreamReader in = null;
CsvToBean<T> csvToBean = null;
try {
in = new InputStreamReader(file.getInputStream(), "utf-8");
HeaderColumnNameMappingStrategy<T> strategy = new HeaderColumnNameMappingStrategy<>();
strategy.setType(clazz);
csvToBean = new CsvToBeanBuilder<T>(in).withMappingStrategy(strategy).build();
} catch (Exception e) {
logger.error("資料轉化失敗");
return null;
}
return csvToBean.parse();
}


/**
* 解析csv檔案並轉成bean(方法一)
*
* @param file
* @return
*/
public static List<CsvFile> getCsvDataMethod1(MultipartFile file) {
ArrayList<CsvFile> csvFileList = new ArrayList<>();

InputStreamReader in = null;
String s = null;
try {
in = new InputStreamReader(file.getInputStream(), "utf-8");
BufferedReader bufferedReader = new BufferedReader(in);
String line = null;
while ((line = bufferedReader.readLine()) != null) {
String[] split = line.split(",");
CsvFile csvFile = new CsvFile();
csvFile.setName(splitResult(split[0]));
csvFile.setTitle(splitResult(split[1]));
csvFile.setNumber(splitResult(split[2]));
csvFile.setType(splitResult(split[3]));
csvFile.setPersonnel(splitResult(split[4]));
csvFile.setTime(splitResult(split[5]));
csvFileList.add(csvFile);
}
} catch (IOException e) {
e.printStackTrace();
}
return csvFileList;
}

private static String splitResult(String once) {
String result = "";
for (int i = 0; i < once.length(); i++) {
if (once.charAt(i) != '"') {
result += once.charAt(i);
}
}
return result;
}
}

六、相關地址

參考地址:https://www.cnblogs.com/xhj99/p/13536465.html

git地址:https://github.com/li395092734/test-opencsv

csv地址:https://files.cnblogs.com/files/blogs/604830/csvfile.zip

------------------------------------------------------------------------------------------------------------------------

Java解壓縮.gz .zip .tar.gz等格式的壓縮包方法總結

 

一、.gz檔案是linux下常見的壓縮格式。使用 java.util.zip.GZIPInputStream即可,壓縮是 java.util.zip.GZIPOutputStream
public static void unGzipFile(String sourcedir) {
String ouputfile = "";
try {
//建立gzip壓縮檔案輸入流
FileInputStream fin = new FileInputStream(sourcedir);
//建立gzip解壓工作流
GZIPInputStream gzin = new GZIPInputStream(fin);
//建立解壓檔案輸出流
ouputfile = sourcedir.substring(0,sourcedir.lastIndexOf('.'));
ouputfile = ouputfile.substring(0,ouputfile.lastIndexOf('.'));
FileOutputStream fout = new FileOutputStream(ouputfile);

int num;
byte[] buf=new byte[1024];

while ((num = gzin.read(buf,0,buf.length)) != -1)
{
fout.write(buf,0,num);
}

gzin.close();
fout.close();
fin.close();
} catch (Exception ex){
System.err.println(ex.toString());
}
return;
}

2、zip檔案,使用java.util.zip.ZipEntry 和 java.util.zip.ZipFile
/**
* 解壓縮zipFile
* @param file 要解壓的zip檔案物件
* @param outputDir 要解壓到某個指定的目錄下
* @throws IOException
*/
public static void unZip(File file,String outputDir) throws IOException {
ZipFile zipFile = null;

try {
Charset CP866 = Charset.forName("CP866"); //specifying alternative (non UTF-8) charset
//ZipFile zipFile = new ZipFile(zipArchive, CP866);
zipFile = new ZipFile(file, CP866);
createDirectory(outputDir,null);//建立輸出目錄

Enumeration<?> enums = zipFile.entries();
while(enums.hasMoreElements()){

ZipEntry entry = (ZipEntry) enums.nextElement();
System.out.println("解壓." + entry.getName());

if(entry.isDirectory()){//是目錄
createDirectory(outputDir,entry.getName());//建立空目錄
}else{//是檔案
File tmpFile = new File(outputDir + "/" + entry.getName());
createDirectory(tmpFile.getParent() + "/",null);//建立輸出目錄

InputStream in = null;
OutputStream out = null;
try{
in = zipFile.getInputStream(entry);;
out = new FileOutputStream(tmpFile);
int length = 0;

byte[] b = new byte[2048];
while((length = in.read(b)) != -1){
out.write(b, 0, length);
}

}catch(IOException ex){
throw ex;
}finally{
if(in!=null)
in.close();
if(out!=null)
out.close();
}
}
}

} catch (IOException e) {
throw new IOException("解壓縮檔案出現異常",e);
} finally{
try{
if(zipFile != null){
zipFile.close();
}
}catch(IOException ex){
throw new IOException("關閉zipFile出現異常",ex);
}
}
}

/**
* 構建目錄
* @param outputDir
* @param subDir
*/
public static void createDirectory(String outputDir,String subDir){
File file = new File(outputDir);
if(!(subDir == null || subDir.trim().equals(""))){//子目錄不為空
file = new File(outputDir + "/" + subDir);
}
if(!file.exists()){
if(!file.getParentFile().exists())
file.getParentFile().mkdirs();
file.mkdirs();
}
}

3、.tar.gz檔案可以看做先用tar打包,再使用gz進行壓縮。
使用org.apache.tools.tar.TarEntry; org.apache.tools.tar.TarInputStream 和 org.apache.tools.tar.TarOutputStream

需要匯入pom檔案:

<dependency>
<groupId>org.apache.ant</groupId>
<artifactId>ant</artifactId>
<version>1.10.7</version>
</dependency>
package com.asiainfo.utils;

import org.apache.tools.tar.TarEntry;
import org.apache.tools.tar.TarInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.util.zip.GZIPInputStream;

public class TarGzipParser {
private static final Logger LOGGER = LoggerFactory.getLogger(TarGzipParser.class);

/**
* 解壓tar.gz 檔案
* @param file 要解壓的tar.gz檔案物件
* @param outputDir 要解壓到某個指定的目錄下
* @throws IOException
*/
public static File unTarGz(File file,String outputDir) throws IOException{
TarInputStream tarIn = null;
File tmpFile = null;
try{
tarIn = new TarInputStream(new GZIPInputStream(
new BufferedInputStream(new FileInputStream(file))),
1024 * 2);

createDirectory(outputDir,null);//建立輸出目錄

TarEntry entry = null;
while( (entry = tarIn.getNextEntry()) != null ){

if(entry.isDirectory()){//是目錄
entry.getName();
createDirectory(outputDir,entry.getName());//建立空目錄
}else{//是檔案
tmpFile = new File(outputDir + "\\" + entry.getName());
createDirectory(tmpFile.getParent() + "\\",null);//建立輸出目錄
OutputStream out = null;
try{
out = new FileOutputStream(tmpFile);
int length = 0;

byte[] b = new byte[2048];

while((length = tarIn.read(b)) != -1){
out.write(b, 0, length);
}

}catch(IOException ex){
throw ex;
}finally{

if(out!=null)
out.close();
}
}
}
}catch(IOException ex){
throw new IOException("解壓歸檔檔案出現異常",ex);
} finally{
try{
if(tarIn != null){
tarIn.close();
}
}catch(IOException ex){
throw new IOException("關閉tarFile出現異常",ex);
}
}

return tmpFile;
}

/**
* 構建目錄
* @param outputDir
* @param subDir
*/
public static void createDirectory(String outputDir,String subDir){
File file = new File(outputDir);
if(!(subDir == null || subDir.trim().equals(""))){//子目錄不為空
file = new File(outputDir + "\\" + subDir);
}
if(!file.exists()){
if(!file.getParentFile().exists())
file.getParentFile().mkdirs();
file.mkdirs();
}
}

}

------------------------------------------------------------------------------------------------------------------------

應用場景

在大資料的工作中,每天必不可少的就是和資料打交道,我們需要從我們的業務方將資料採集過來,然後根據我們的業務邏輯將資料解析並轉換成我們所需要的格式!大資料分析往往資料量都是非常大的,一天幾十T都是很正常,如果按正常的來採集的話,估計就是採集都要花費不少時間,最常用的方式就是將資料進行壓縮之後再進行傳輸,這樣的效率是比較高的,也節省了頻寬資源!舉一個簡單的例子,我們的邏輯是xml原始檔案先壓縮成一個gz檔案,再將上百個gz檔案再二次壓縮成一個tar.gz檔案!一個壓縮檔案的大小大概是200M,但是解壓出來就差不多20G!!!此篇文章就記錄一下實現功能需求的過程!!!!

依賴

1 2 3 4 5 <dependency>             <groupId>org.apache.commons</groupId>             <artifactId>commons-compress</artifactId>             <version>1.5</version> </dependency>

 實現程式碼

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 import lombok.extern.slf4j.Slf4j; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.utils.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory;   import java.io.*; import java.util.zip.GZIPInputStream;     @Slf4j public class FileUtils {     private static final Logger LOGGER = LoggerFactory.getLogger(FileUtils.class);         public static void main(String[] args) {         deCompressGZipFile("path1""dir1");     }       /**      * Tar檔案解壓方法      *      * @param tarGzFile 要解壓的壓縮檔名稱(絕對路徑名稱)      * @param destDir   解壓後文件放置的路徑名(絕對路徑名稱)當路徑不存在,會自動建立      * @return 解壓出的檔案列表      */     public static void deCompressGZipFile(String tarGzFile, String destDir) {           // 建立輸出流,用於將從壓縮檔案中讀出的檔案流寫入到磁碟         TarArchiveEntry entry = null;         TarArchiveEntry[] subEntries = null;         File subEntryFile = null;         try (FileInputStream fis = new FileInputStream(tarGzFile);              GZIPInputStream gis = new GZIPInputStream(fis);              TarArchiveInputStream taris = new TarArchiveInputStream(gis);) {             while ((entry = taris.getNextTarEntry()) != null) {                 StringBuilder entryFileName = new StringBuilder();                 entryFileName.append(destDir).append(File.separator).append(entry.getName());                 File entryFile = new File(entryFileName.toString());                 if (entry.isDirectory()) {                     if (!entryFile.exists()) {                         entryFile.mkdir();                     }                     subEntries = entry.getDirectoryEntries();                     for (int i = 0; i < subEntries.length; i++) {                         try (OutputStream out = new FileOutputStream(subEntryFile)) {                             subEntryFile = new File(entryFileName + File.separator + subEntries[i].getName());                             IOUtils.copy(taris, out);                         catch (Exception e) {                             LOGGER.error("deCompressing file failed:" + subEntries[i].getName() + "in" + tarGzFile);                         }                     }                 else {                     checkFileExists(entryFile);                     OutputStream out = new FileOutputStream(entryFile);                     IOUtils.copy(taris, out);                     out.close();                     //如果是gz檔案進行遞迴解壓                     if (entryFile.getName().endsWith(".gz")) {                         String namepath = entryFile.getAbsolutePath();                         compressGZ(namepath);                     }                 }             }             //如果需要刪除之前解壓的gz檔案,在這裡進行             File dir = new File("dir1");             File[] files = dir.listFiles();             for (File f:files) {                 if (f.getName().split("\\.").length==3){                     f.delete();                 }             }             catch (Exception e) {             LOGGER.warn("decompress failed", e);         }     }       /**      * 解壓GZ檔案      * @param pwd      */     public static void compressGZ(String pwd){         if (!getExtension(pwd).equalsIgnoreCase("gz")) {             System.err.println("File name must have extension of \".gz\"");             System.exit(1);         }           GZIPInputStream in = null;         try {             in = new GZIPInputStream(new FileInputStream(pwd));         catch(FileNotFoundException e) {             System.err.println("File not found. " + pwd);             System.exit(1);         catch (IOException e) {             e.printStackTrace();         }           String outFileName = getFileName(pwd);         FileOutputStream out = null;         try {             out = new FileOutputStream(outFileName);         catch (FileNotFoundException e) {             System.err.println("Could not write to file. " + outFileName);             System.exit(1);         }           try {             byte[] buf = new byte[1024];             int len;             while((len = in.read(buf)) > 0) {                 out.write(buf, 0, len);             }             in.close();             out.close();         catch (IOException e) {             e.printStackTrace();         }     }         /**      * Used to extract and return the extension of a given file.      * @param f Incoming file to get the extension of      * @return <code>String</code> representing the extension of the incoming      *         file.      */     public static String getExtension(String f) {         String ext = "";         int i = f.lastIndexOf('.');           if (i > 0 &&  i < f.length() - 1) {             ext = f.substring(i+1);         }         return ext;     }       /**      * Used to extract the filename without its extension.      * @param f Incoming file to get the filename      * @return <code>String</code> representing the filename without its      *         extension.      */     public static String getFileName(String f) {         String fname = "";         int i = f.lastIndexOf('.');           if (i > 0 &&  i < f.length() - 1) {             fname = f.substring(0,i);         }         return fname;     }       public static void checkFileExists(File file) {         //判斷是否是目錄         if (file.isDirectory()) {             if (!file.exists()) {                 file.mkdir();             }         else {             //判斷父目錄是否存在,如果不存在,則建立             if (file.getParentFile() != null && !file.getParentFile().exists()) {                 file.getParentFile().mkdirs();             }             try {                 file.createNewFile();             catch (IOException e) {                 e.printStackTrace();             }         }     } }

 這樣就實現了將tar.gz檔案中的xml原始檔案全部解壓出來!之後就可以將xml檔案中的資料拿出來做解析和分析了!java解析xml檔案的方式有多種,這裡使用dom4j!!

依賴

1 2 3 4 5 <dependency>             <groupId>org.dom4j</groupId>             <artifactId>dom4j</artifactId>             <version>2.1.3</version> </dependency>

 實現程式碼

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader;   import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.List;   /**  * java  DOM4j  xml解析  */   public class JavaXMLTest {     public static void main(String[] args) throws IOException, DocumentException {           SAXReader reader = new SAXReader();         Document xmlDocument = reader.read(new FileInputStream(new File("2.xml")));         //System.out.println(doc==null?"未讀取到xml檔案":"已讀取到xml檔案");           //獲取根節點         Element rootElement = xmlDocument.getRootElement();         //獲取根節點下的直接子節點的個數和名字         List<Element> list = rootElement.elements("fileHeader");           //System.out.println("根節點下有"+list.size()+"直接子節點");           //獲取根節點下  fileHeader節點得value值         for (Element elemet:list) {             String reportTime = elemet.attributeValue("reportTime");             String startTime = elemet.attributeValue("startTime");             String endTime = elemet.attributeValue("endTime");         }             //獲取根節點下所有得  子節點         List<Element> list1 = rootElement.elements("header");         //System.out.println("根節點下header有"+list1.size()+"直接子節點");           //由於只有一個節點所以取get(0)         Element header = list1.get(0);         //獲取header節點得value值         String id = header.attributeValue("id");         //System.out.println("id是"+id);           //獲取header節點下所有  measure 節點         Element measure = header.elements("measure").get(0);         //獲取measurement節點下   smr節點         Element  sm = measure.elements("sm").get(0);         //獲取smr節點的value值         String stringValue = sm.getStringValue();         //按照空格進行拆分         String[] objj = stringValue.split(" ");         //System.out.println("stringvalue===="+stringValue);         //List<Element> smlist = smr.elements("obj");         //獲取measure節點下所有的  obj 節點         List<Element> objlist = measurement.elements("obj");         //Map<String,String> map = new HashMap();           //遍歷所有  obj節點         for (Element ob:objectlist) {             //System.out.println(objj.length);             //獲取所有 obj節點 下的 v 節點             List<Element> vlist = ob.elements("v");             //遍歷  v  節點             for (Element v:vlist) {                 //System.out.println("v得value值是"+v.getStringValue());                 //獲取v節點的value值                 String[] vv = v.getStringValue().split(" ");                 //System.out.println(vv.length);                 StringBuilder sb = new StringBuilder();                 for (int i=0;i<objj.length;i++){                     sb.append(objj[i]+"="+vv[i]);                 }                 System.out.println(sb.toString());                 sb=null;             }         }     } }     

 至此,利用java就完成了對tar.gz檔案的解壓,並對xml原始檔案進行資料解析!!!這個方法對於小資料量是可以實現的!但是大資料都是基於分散式檔案系統Hadoop構建的!我們的資料都是儲存在hdfs上的,而且資料也非常大,這樣解壓檔案寫到本地檔案系統中,再解析其中的資料上傳至hdfs!同時也是要消耗頻寬的!最終在測試的時候是不行的!這個方案就被否定了!