java中使用apache poi 讀取 doc,docx,ppt,pptx,xls,xlsx,txt,csv格式的檔案示例程式碼
阿新 • • 發佈:2022-12-06
java使用apache poi 讀取 doc,docx,ppt,pptx,xls,xlsx,txt,csv格式的檔案示例程式碼
1、maven依賴新增
在 pom 檔案中新增如下依賴
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>4.1.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>4.1.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>4.1.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>4.1.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>ooxml-schemas</artifactId> <version>1.4</version> </dependency>
2、檔案讀取程式碼示例
doc 格式檔案
// --------- doc ----------- File file = new File("E:\\search-file\\22.doc"); FileInputStream fis = null; HWPFDocument document = null; WordExtractor extractor = null; try { fis = new FileInputStream(file); document = new HWPFDocument(fis); extractor = new WordExtractor(document); log.info("extractor.getText:{}", extractor.getText()); } catch (Exception e) { e.printStackTrace(); }
docx 格式檔案
// --------- docx ----------- File file = new File("E:\\search-file\\11.docx"); FileInputStream fis = null; XWPFDocument document = null; XWPFWordExtractor extractor = null; try { fis = new FileInputStream(file); document = new XWPFDocument(fis); extractor = new XWPFWordExtractor(document); log.info("extractor.getText:{}", extractor.getText()); } catch (Exception e) { e.printStackTrace(); }
pptx 格式檔案
// --------- pptx -----------
File file = new File("E:\\search-file\\33.pptx");
FileInputStream fis = null;
XMLSlideShow document = null;
SlideShowExtractor extractor = null;
try {
fis = new FileInputStream(file);
document = new XMLSlideShow(fis);
extractor = new SlideShowExtractor(document);
log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
e.printStackTrace();
}
ppt 格式檔案
// --------- ppt -----------
File file = new File("E:\\search-file\\44.ppt");
FileInputStream fis = null;
HSLFSlideShow document = null;
SlideShowExtractor extractor = null;
try {
fis = new FileInputStream(file);
document = new HSLFSlideShow(fis);
extractor = new SlideShowExtractor(document);
log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
e.printStackTrace();
}
xlsx 格式檔案
// --------- xlsx -----------
File file = new File("E:\\search-file\\55.xlsx");
FileInputStream fis = null;
XSSFWorkbook document = null;
XSSFExcelExtractor extractor = null;
try {
fis = new FileInputStream(file);
document = new XSSFWorkbook(fis);
extractor = new XSSFExcelExtractor(document);
log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
e.printStackTrace();
}
xls 格式檔案
// --------- xls -----------
File file = new File("E:\\search-file\\66.xls");
FileInputStream fis = null;
HSSFWorkbook document = null;
ExcelExtractor extractor = null;
try {
fis = new FileInputStream(file);
document = new HSSFWorkbook(fis);
extractor = new ExcelExtractor(document);
log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
e.printStackTrace();
}
txt,csv 格式檔案
// --------- txt,csv -----------
File file = new File("E:\\search-file\\77.txt");
StringBuffer buffer = new StringBuffer();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf8"))){
String line = null;
while ((line = reader.readLine()) != null) {
buffer.append(line).append('\n');
}
} catch (Exception e) {
e.printStackTrace();
}
log.info("txt-context:{}", buffer);