java讀取csv檔案內容(逗號分隔、回車換行文字)
最近在做專案時需要每日更新大小額支援的銀行列表資訊,該檔案從核心下載,並使用java專案解析放到資料庫中,該檔案是文字檔案,一行為一條記錄,記錄中以逗號分隔欄位,欄位以雙引號包裹,起初以這種規則自己寫一個小程式也能用,於是寫了幾句程式碼,確實能執行解析出大部分內容,但有些特殊情況是沒有解析出來的,例如:雙引號包裹的的欄位內容裡面也有逗號,但這個逗號並不是欄位之間的分隔符,而是欄位內容的一部分。起初並不知道這其實是一種標準的csv格式,雖然曲折但是整好了記錄下
package com.schedule;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.sql.DataSource;
import org.apache.commons.lang3.time.DateUtils;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import com.ynet.ifp.eams.core.util.FtpUtils;
import com.ynet.ifp.eams.utils.file.UncompressFileGZIP;
import org.springframework.stereotype.Component;
/**
* 更新大小額聯行號資訊 暫定每天晚上11點執行
*/
@Component("firstUpdatePxdzfInfoSchedule")
public class FirstUpdatePxdzfInfoScheduleImpl extends EamsScheduledService {
private static Logger log = Logger.getLogger(FirstUpdatePxdzfInfoScheduleImpl.class);
@Autowired
private com.ynet.ifp.core.utils.TxUtils txUtils;
@Autowired
private DataSource dataSource;
@Value("${hx.bank.host}")
private String host;
@Value("${hx.bank.port}")
private int port;
@Value("${hx.bank.path}")
private String path;
@Value("${hx.bank.name}")
private String name;
@Value("${hx.bank.pwd}")
private String pwd;
@Value("${hx.bank.fileName}")
private String fileName;
@Value("${hx.bank.down.path}")
private String downPath;
public String getCnName() {
return "定時任務--更新大小額聯行號資訊";
}
@Override
protected void doService() throws Exception {
Connection con= null;
try{
log.info("updatePxdzfInfoSchedule schedule is going to start.....");
Date date = new Date();
//凌晨一點取昨天3點左右放的檔案
Date tDay = DateUtils.addDays(date, -2);
DateFormat df = new SimpleDateFormat("yyyyMMdd");
String dateStr=df.format(tDay);
BufferedReader reader = null;
String path =downPath+"PXDZF.unl";
String line= "";
try {
reader = new BufferedReader (new InputStreamReader(new FileInputStream(new File(path)),"GBK"));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
// 獲取連線
con= dataSource.getConnection();
con.setAutoCommit(false);
String deleteSql="delete from yc_pxdzf";
PreparedStatement deleteStat= con.prepareStatement(deleteSql);
log.info(deleteSql);
deleteStat.execute();
con.commit();
String serialNumber = "yc_id_sequence.nextVal";
String sql="insert into YC_PXDZF (YC_FQHHO2, YC_ZHUANT, YC_JIGULB, YC_HANBDM, YC_C2ZCHH, YC_BHSJCY, YC_C2RHDM, YC_FBHHO2, YC_QSHHO2, YC_JIEDDM,"+
" YC_FKHMC1, YC_CXUMC1, YC_YLIUBZ, YC_SUSDDM, YC_DIZHI1, YC_YOUZBM, YC_TFDESC, YC_BYZDBE, YC_SXIORQ, YC_SHIXRQ, YC_BEIZXX, YC_BEIY40,"+
" YC_BYBZ01, YC_BYBZ02, YC_WEIHRQ, YC_WEIHSJ, YC_WEIHGY, YC_ROWIDD, YC_SHJNCH, YC_JILUZT,YC_ID) values "+
"(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,"+serialNumber+")";
PreparedStatement stat= con.prepareStatement(sql);
int t=0;
while((line=reader.readLine())!=null){
String regExp =getRegExp();
String str = "";
Pattern pattern = Pattern.compile(regExp);
Matcher matcher = pattern.matcher(line);
List listTemp = new ArrayList();
while(matcher.find()) {
str = matcher.group();
str = str.trim();
if (str.endsWith(",")){
str = str.substring(0, str.length()-1);
str = str.trim();
}
if (str.startsWith("\"") && str.endsWith("\"")) {
str = str.substring(1, str.length()-1);
if (isExisted("\"\"", str)) {
str = str.replaceAll("\"\"", "\"");
}
}
if (!"".equals(str)) {
listTemp.add(str);
}
}
String[] pmsbankno = (String[]) listTemp.toArray(new String[listTemp.size()]);
for(int i=0;i<pmsbankno.length;i++){
String tempString=pmsbankno[i];
tempString=tempString.replaceAll("\"", "").trim();
pmsbankno[i]=tempString;
}
stat.setString(1,pmsbankno[0]);
stat.setString(2,pmsbankno[1]);
stat.setString(3, pmsbankno[2]);
stat.setString(4,pmsbankno[3]);
stat.setString(5,pmsbankno[4]);
stat.setString(6,pmsbankno[5]);
stat.setString(7,pmsbankno[6]);
stat.setString(8, pmsbankno[7]);
stat.setString(9,pmsbankno[8]);
stat.setString(10,pmsbankno[9]);
stat.setString(11,pmsbankno[10]);
stat.setString(12, pmsbankno[11]);
stat.setString(13, pmsbankno[12]);
stat.setString(14, pmsbankno[13]);
stat.setString(15, pmsbankno[14]);
stat.setString(16,pmsbankno[15]);
stat.setString(17, pmsbankno[16]);
stat.setString(18,pmsbankno[17]);
stat.setString(19,pmsbankno[18]);
stat.setString(20, pmsbankno[19]);
stat.setString(21,pmsbankno[20]);
stat.setString(22, pmsbankno[21]);
stat.setString(23,pmsbankno[22]);
stat.setString(24, pmsbankno[23]);
stat.setString(25,pmsbankno[24]);
stat.setString(26, pmsbankno[25]);
stat.setString(27, pmsbankno[26]);
stat.setString(28, pmsbankno[27]);
stat.setString(29, pmsbankno[28]);
stat.setString(30, pmsbankno[29]);
stat.addBatch();
if(t%100==0){
int[] resultInt=stat.executeBatch();
con.commit();
}
}
int[] resultInt=stat.executeBatch();
String updateSql="update BATCH_JOB_EXECUTION e set e.end_time = sysdate where e.END_TIME is NULL and e.job_instance_id in (select i.job_instance_id from BATCH_JOB_INSTANCE i where i.job_name = 'updatePxdzfInfoSchedule')";
PreparedStatement updatestat= con.prepareStatement(updateSql);
updatestat.execute();
con.commit();
con.close();
}catch(Exception e){
e.printStackTrace();
con.close();
}
}
private String getRegExp() {
String strRegExp = "";
strRegExp =
"\"(("+ SPECIAL_CHAR_A + "*[,\\n ])*("+ SPECIAL_CHAR_A + "*\"{2})*)*"+ SPECIAL_CHAR_A + "*\"[ ]*,[ ]*"
+"|"+ SPECIAL_CHAR_B + "*[ ]*,[ ]*"
+ "|\"(("+ SPECIAL_CHAR_A + "*[,\\n ])*("+ SPECIAL_CHAR_A + "*\"{2})*)*"+ SPECIAL_CHAR_A + "*\"[ ]*"
+ "|"+ SPECIAL_CHAR_B + "*[ ]*";
return strRegExp;
}
private static final String SPECIAL_CHAR_A = "[^\",\\n ]";
private static final String SPECIAL_CHAR_B = "[^\",\\n]";
private boolean isExisted(String argChar, String argStr) {
boolean blnReturnValue = false;
if ((argStr.indexOf(argChar) >= 0)
&& (argStr.indexOf(argChar) <= argStr.length())) {
blnReturnValue = true;
}
return blnReturnValue;
}
}