1. 程式人生 > >CSV檔案準確讀取兩種思路




package xufei;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
java.util.regex.Matcher; import java.util.regex.Pattern; /* * 檔案規則 * Microsoft的格式是最簡單的。以逗號分隔的值要麼是“純粹的”(僅僅包含在括號之前), * 要麼是在雙引號之間(這時資料中的雙引號以一對雙引號表示)。 * Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K * 這一行包含七個欄位(fields): * Ten Thousand * 10000 * 2710 * 空欄位 * 10,000 * It's "10 Grand", baby * 10K * 每條記錄佔一行 * 以逗號為分隔符 * 逗號前後的空格會被忽略 * 欄位中包含有逗號,該欄位必須用雙引號括起來。如果是全形的沒有問題。 * 欄位中包含有換行符,該欄位必須用雙引號括起來 * 欄位前後包含有空格,該欄位必須用雙引號括起來 * 欄位中的雙引號用兩個雙引號表示 * 欄位中如果有雙引號,該欄位必須用雙引號括起來 * 第一條記錄,可以是欄位名 */
/** * タイトル: xufei.CSVAnalysis.java * 説明: * 著作権: Copyright (c) 2006 * 會社名: technodia * @author 徐飛 * @version 1.0 * createDate Aug 11, 2008 * 修正履歴 * 修正日 修正者       修正理由 */ public class CSVAnalysis { private InputStreamReader fr = null; private BufferedReader br = null; public
CSVAnalysis(String f) throws IOException { fr = new InputStreamReader(new FileInputStream(f)); } /** * 解析csv檔案 到一個list中 * 每個單元個為一個String型別記錄,每一行為一個list。 * 再將所有的行放到一個總list中 * @return * @throws IOException */ public List> readCSVFile() throws IOException { br = new BufferedReader(fr); String rec = null;//一行 String str;//一個單元格 List> listFile = new ArrayList>(); try { //讀取一行 while ((rec = br.readLine()) != null) { Pattern pCells = Pattern .compile("(\"[^\"]*(\"{2})*[^\"]*\")*[^,]*,"); Matcher mCells = pCells.matcher(rec); List cells = new ArrayList();//每行記錄一個list //讀取每個單元格 while (mCells.find()) { str = mCells.group(); str = str.replaceAll( "(?sm)\"?([^\"]*(\"{2})*[^\"]*)\"?.*,", "$1"); str = str.replaceAll("(?sm)(\"(\"))", "$2"); cells.add(str); } listFile.add(cells); } } catch (Exception e) { e.printStackTrace(); } finally { if (fr != null) { fr.close(); } if (br != null) { br.close(); } } return listFile; } public static void main(String[] args) throws Throwable { CSVAnalysis parser = new CSVAnalysis("c:/test2.csv"); parser.readCSVFile(); } }


import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;

 * A very simple CSV reader released under a commercial-friendly license.
 * @author Glen Smith
public class CSVReader implements Closeable {

    private BufferedReader br;

    private boolean hasNext = true;

    private final char separator;

    private final char quotechar;

    private final char escape;

    private int skipLines;

    private boolean linesSkiped;

    /** The default separator to use if none is supplied to the constructor. */
    public static final char DEFAULT_SEPARATOR = ',';

    public static final int INITIAL_READ_SIZE = 64;

     * The default quote character to use if none is supplied to the
     * constructor.
    public static final char DEFAULT_QUOTE_CHARACTER = '"';

     * The default escape character to use if none is supplied to the
     * constructor.
    public static final char DEFAULT_ESCAPE_CHARACTER = '\\';

     * The default line to start reading.
    public static final int DEFAULT_SKIP_LINES = 0;

     * Constructs CSVReader using a comma for the separator.
     * @param reader
     *            the reader to an underlying CSV source.
    public CSVReader(Reader reader) {
        this(reader, DEFAULT_SEPARATOR);

     * Constructs CSVReader with supplied separator.
     * @param reader
     *            the reader to an underlying CSV source.
     * @param separator
     *            the delimiter to use for separating entries.
    public CSVReader(Reader reader, char separator) {

     * Constructs CSVReader with supplied separator and quote char.
     * @param reader
     *            the reader to an underlying CSV source.
     * @param separator
     *            the delimiter to use for separating entries
     * @param quotechar
     *            the character to use for quoted elements
    public CSVReader(Reader reader, char separator, char quotechar) {
        this(reader, separator, quotechar, DEFAULT_ESCAPE_CHARACTER, DEFAULT_SKIP_LINES);

    public CSVReader(Reader reader, char separator,
            char quotechar, char escape) {
        this(reader, separator, quotechar, escape, DEFAULT_SKIP_LINES);

     * Constructs CSVReader with supplied separator and quote char.
     * @param reader
     *            the reader to an underlying CSV source.
     * @param separator
     *            the delimiter to use for separating entries
     * @param quotechar
     *            the character to use for quoted elements
     * @param line
     *            the line number to skip for start reading 
    public CSVReader(Reader reader, char separator, char quotechar, int line) {
        this(reader, separator, quotechar, DEFAULT_ESCAPE_CHARACTER, line);

     * Constructs CSVReader with supplied separator and quote char.
     * @param reader
     *            the reader to an underlying CSV source.
     * @param separator
     *            the delimiter to use for separating entries
     * @param quotechar
     *            the character to use for quoted elements
     * @param escape
     *            the character to use for escaping a separator or quote
     * @param line
     *            the line number to skip for start reading 
    public CSVReader(Reader reader, char separator, char quotechar, char escape, int line) {
        this.br = new BufferedReader(reader);
        this.separator = separator;
        this.quotechar = quotechar;
        this.escape = escape;
        this.skipLines = line;

     * Reads the entire file into a List with each element being a String[] of
     * tokens.
     * @return a List of String[], with each String[] representing a line of the
     *         file.
     * @throws IOException
     *             if bad things happen during the read
    public List<String[]> readAll() throws IOException {

        List<String[]> allElements = new ArrayList<String[]>();
        while (hasNext) {
            String[] nextLineAsTokens = readNext();
            if (nextLineAsTokens != null)
        return allElements;


     * Reads the next line from the buffer and converts to a string array.
     * @return a string array with each comma-separated element as a separate
     *         entry.
     * @throws IOException
     *             if bad things happen during the read
    public String[] readNext() throws IOException {

        String nextLine = getNextLine();
        return hasNext ? parseLine(nextLine) : null;

     * Reads the next line from the file.
     * @return the next line from the file without trailing newline
     * @throws IOException
     *             if bad things happen during the read
    private String getNextLine() throws IOException {
        if (!this.linesSkiped) {
            for (int i = 0; i < skipLines; i++) {
            this.linesSkiped = true;
        String nextLine = br.readLine();
        if (nextLine == null) {
            hasNext = false;
        return hasNext ? nextLine : null;

     * Parses an incoming String and returns an array of elements.
     * @param nextLine
     *            the string to parse
     * @return the comma-tokenized list of elements, or null if nextLine is null
     * @throws IOException if bad things happen during the read
    private String[] parseLine(String nextLine) throws IOException {

        if (nextLine == null) {
            return null;

        List<String>tokensOnThisLine = new ArrayList<String>();
        StringBuilder sb = new StringBuilder(INITIAL_READ_SIZE);
        boolean inQuotes = false;
        do {
            if (inQuotes) {
                // continuing a quoted section, reappend newline
                nextLine = getNextLine();
                if (nextLine == null)
            for (int i = 0; i < nextLine.length(); i++) {

                char c = nextLine.charAt(i);
                if (c == this.escape) {
                    if( isEscapable(nextLine, inQuotes, i) ){ 
                    } else {
                        i++; // ignore the escape
                } else if (c == quotechar) {
                    if( isEscapedQuote(nextLine, inQuotes, i) ){ 
                        inQuotes = !inQuotes;
                        // the tricky case of an embedded quote in the middle: a,bc"d"ef,g
                        if(i>2 //not on the beginning of the line
                                && nextLine.charAt(i-1) != this.separator //not at the beginning of an escape sequence 
                                && nextLine.length()>(i+1) &&
                                nextLine.charAt(i+1) != this.separator //not at the end of an escape sequence
                } else if (c == separator && !inQuotes) {
                    sb = new StringBuilder(INITIAL_READ_SIZE); // start work on next token
                } else {
        } while (inQuotes);
        return tokensOnThisLine.toArray(new String[0]);


     * precondition: the current character is a quote or an escape
     * @param nextLine the current line
     * @param inQuotes true if the current context is quoted
     * @param i current index in line
     * @return true if the following character is a quote
    private boolean isEscapedQuote(String nextLine, boolean inQuotes, int i) {
        return inQuotes  // we are in quotes, therefore there can be escaped quotes in here.
            && nextLine.length() > (i+1)  // there is indeed another character to check.
            && nextLine.charAt(i+1) == quotechar;

     * precondition: the current character is an escape
     * @param nextLine the current line
     * @param inQuotes true if the current context is quoted
     * @param i current index in line
     * @return true if the following character is a quote
    private boolean isEscapable(String nextLine, boolean inQuotes, int i) {
        return inQuotes  // we are in quotes, therefore there can be escaped quotes in here.
            && nextLine.length() > (i+1)  // there is indeed another character to check.
            && ( nextLine.charAt(i+1) == quotechar || nextLine.charAt(i+1) == this.escape);

     * Closes the underlying reader.
     * @throws IOException if the close fails
    public void close() throws IOException{
