1. 程式人生 > 其它 >java中使用apache poi 讀取 doc,docx,ppt,pptx,xls,xlsx,txt,csv格式的檔案示例程式碼

java中使用apache poi 讀取 doc,docx,ppt,pptx,xls,xlsx,txt,csv格式的檔案示例程式碼

java使用apache poi 讀取 doc,docx,ppt,pptx,xls,xlsx,txt,csv格式的檔案示例程式碼

1、maven依賴新增

在 pom 檔案中新增如下依賴

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>4.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>4.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml-schemas</artifactId>
    <version>4.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-scratchpad</artifactId>
    <version>4.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>ooxml-schemas</artifactId>
    <version>1.4</version>
</dependency>

2、檔案讀取程式碼示例

doc 格式檔案

// --------- doc -----------
File file = new File("E:\\search-file\\22.doc");
FileInputStream fis = null;
HWPFDocument document = null;
WordExtractor extractor = null;
try {
    fis = new FileInputStream(file);
    document = new HWPFDocument(fis);
    extractor = new WordExtractor(document);
    log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
    e.printStackTrace();
}

docx 格式檔案

// --------- docx -----------
File file = new File("E:\\search-file\\11.docx");
FileInputStream fis = null;
XWPFDocument document = null;
XWPFWordExtractor extractor = null;
try {
    fis = new FileInputStream(file);
    document = new XWPFDocument(fis);
    extractor = new XWPFWordExtractor(document);
    log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
    e.printStackTrace();
}

pptx 格式檔案

// --------- pptx -----------
File file = new File("E:\\search-file\\33.pptx");
FileInputStream fis = null;
XMLSlideShow document = null;
SlideShowExtractor extractor = null;
try {
    fis = new FileInputStream(file);
    document = new XMLSlideShow(fis);
    extractor = new SlideShowExtractor(document);
    log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
    e.printStackTrace();
}

ppt 格式檔案

// --------- ppt -----------
File file = new File("E:\\search-file\\44.ppt");
FileInputStream fis = null;
HSLFSlideShow document = null;
SlideShowExtractor extractor = null;
try {
    fis = new FileInputStream(file);
    document = new HSLFSlideShow(fis);
    extractor = new SlideShowExtractor(document);
    log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
    e.printStackTrace();
}

xlsx 格式檔案

// --------- xlsx -----------

File file = new File("E:\\search-file\\55.xlsx");
FileInputStream fis = null;
XSSFWorkbook document = null;
XSSFExcelExtractor extractor = null;
try {
    fis = new FileInputStream(file);
    document = new XSSFWorkbook(fis);
    extractor = new XSSFExcelExtractor(document);
    log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
    e.printStackTrace();
}

xls 格式檔案

// --------- xls -----------
File file = new File("E:\\search-file\\66.xls");
FileInputStream fis = null;
HSSFWorkbook document = null;
ExcelExtractor extractor = null;
try {
    fis = new FileInputStream(file);
    document = new HSSFWorkbook(fis);
    extractor = new ExcelExtractor(document);
    log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
    e.printStackTrace();
}

txt,csv 格式檔案

// --------- txt,csv -----------
File file = new File("E:\\search-file\\77.txt");
StringBuffer buffer = new StringBuffer();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf8"))){
    String line = null;
    while ((line = reader.readLine()) != null) {
        buffer.append(line).append('\n');
    }
} catch (Exception e) {
    e.printStackTrace();
}
log.info("txt-context:{}", buffer);