1. 程式人生 > >java讀取各型別的檔案

java讀取各型別的檔案

用到的幾個包

bcmail-jdk14-132.jar/bcprov-jdk14-132.jar/checkstyle-all-4.2.jar/FontBox-0.1.0-dev.jar/lucene-core-2.0.0.jar/PDFBox-0.7.3.jar/poi-3.0-alpha3-20061212.jar/poi-contrib-3.0-alpha3-20061212.jar/poi-scratchpad-3.0-alpha3-20061212.jar

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import org.apache.poi.hslf.HSLFSlideShow;
import org.apache.poi.hslf.model.Slide;
import org.apache.poi.hslf.model.TextRun;
import org.apache.poi.hslf.usermodel.SlideShow;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.util.PDFTextStripper;

public class Test {

/**
* @param args
*/
public static void p(Object obj) {
   System.out.println(obj);
}

public static void main(String[] args) {
   try {
    p(readPpt("src/1.dps"));
   } catch (Exception e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
   }

}
//讀取ppt
public static String readPpt(String path) throws Exception {

   StringBuffer content = new StringBuffer("");
   try {
    SlideShow ss = new SlideShow(new HSLFSlideShow(path));// path為檔案的全路徑名稱,建立SlideShow
    Slide[] slides = ss.getSlides();// 獲得每一張幻燈片
    for (int i = 0; i < slides.length; i++) {
     TextRun[] t = slides[i].getTextRuns();// 為了取得幻燈片的文字內容,建立TextRun
     for (int j = 0; j < t.length; j++) {
      content.append(t[j].getText());// 這裡會將文字內容加到content中去
     }
     content.append(slides[i].getTitle());
    }
   } catch (Exception ex) {
    System.out.println(ex.toString());
   }
   return content.toString().trim();

}
// 讀取xls
public static String readXls(String path) throws Exception {
   StringBuffer content = new StringBuffer("");// 文件內容
   HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(path));
   int sheetCount = workbook.getNumberOfSheets();// excel幾張表
   for (int i = 0; i < sheetCount; i++) {// 遍歷excel表
    HSSFSheet sheet = workbook.getSheetAt(i);// 對excel的第一個表引用
    int rowCount = sheet.getLastRowNum();// 取得最後一行的下標
    for (int j = 0; j < rowCount; j++) {// 迴圈每一行
     HSSFRow row = sheet.getRow(j);// 引用行
     if (row == null) {
      continue;
     } else {
      short cellNum = row.getLastCellNum();
      for (short m = 0; m < cellNum; m++) {
       HSSFCell cell = row.getCell(m);// 引用行中的一個單元格
       if (cell != null) {
        int cellType = cell.getCellType();
        // CELL_TYPE_NUMERIC 0 數字
        // CELL_TYPE_STRING 1 字串
        // CELL_TYPE_FORMULA 2 公式
        // CELL_TYPE_BLANK 3 空格
        // CELL_TYPE_BOOLEAN 4 布林值
        // CELL_TYPE_ERROR 5 錯誤
        switch (cellType) {
        // 單元格型別為數字
        case HSSFCell.CELL_TYPE_NUMERIC:
         // 取數字單元格的值
         double d = cell.getNumericCellValue();
         content.append(String.valueOf(d) + "   ");
         break;
        // 單元格型別為字串
        case HSSFCell.CELL_TYPE_STRING:
         String str = cell.getStringCellValue().trim();
         if (!str.equals("")) {
          content.append(str + "   ");
         }
         break;
        // 單元格型別為公式
        case HSSFCell.CELL_TYPE_FORMULA:
         // 不讀取公式
         // String formula = cell.getCellFormula();
         // content = content + formula+" ";
         break;
        // 單元格型別為空白
        case HSSFCell.CELL_TYPE_BLANK:
         break;
        // 單元格型別為布林值
        case HSSFCell.CELL_TYPE_BOOLEAN:
         // boolean bool = cell.getBooleanCellValue();
         // content = content + bool+" ";
         break;
        // 單元格型別為錯誤
        case HSSFCell.CELL_TYPE_ERROR:
         // byte errorCode = cell.getErrorCellValue();
         // content = content + errorCode+" ";
         break;
        default:
         break;
        }
       } else {
        // content = content + "..." +" ";//沒有資料的單元格使用...填充
       }
      }
     }
     content.append("\r");
    }
   }
   return content.toString().trim();
}

// 讀取pdf
public static String readPdf(String path) throws Exception {
   StringBuffer content = new StringBuffer("");// 文件內容
   FileInputStream fis = new FileInputStream(path);
   PDFParser p = new PDFParser(fis);
   p.parse();
   PDFTextStripper ts = new PDFTextStripper();
   content.append(ts.getText(p.getPDDocument()));
   fis.close();
   return content.toString().trim();
}

// 讀取word,只能讀取文字內容 圖片不行
public static String readWord(String path) throws Exception {

   StringBuffer content = new StringBuffer("");// 文件內容
   HWPFDocument doc = new HWPFDocument(new FileInputStream(path));
   Range range = doc.getRange();
   int paragraphCount = range.numParagraphs();// 段落
   for (int i = 0; i < paragraphCount; i++) {// 遍歷段落讀取資料
    Paragraph pp = range.getParagraph(i);
    content.append(pp.text());
   }
   return content.toString().trim();
}

// 讀取text
public static String readTxt(String path) {
   StringBuffer content = new StringBuffer("");// 文件內容
   try {
    FileReader reader = new FileReader(path);
    BufferedReader br = new BufferedReader(reader);
    String s1 = null;

    while ((s1 = br.readLine()) != null) {
     content.append(s1 + "\r");
    }
    br.close();
    reader.close();
   } catch (IOException e) {
    e.printStackTrace();
   }
   return content.toString().trim();
}

}