java-poi3.17讀取word文字及圖片
阿新 • • 發佈:2019-01-05
package per.qy.dexter.fileoperate; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.List; import java.util.UUID; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.model.PicturesTable; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFPictureData; import org.junit.Test; public class WordTest { @Test public void testWord() { // String path = "D:\\temp\\temp\\test.doc"; String path = "D:\\temp\\temp\\test.docx"; String content = null; File file = new File(path); if (file.exists() && file.isFile()) { InputStream is = null; HWPFDocument doc = null; XWPFDocument docx = null; POIXMLTextExtractor extractor = null; try { is = new FileInputStream(file); if (path.endsWith(".doc")) { doc = new HWPFDocument(is); // 文件文字內容 content = doc.getDocumentText(); // 文件圖片內容 PicturesTable picturesTable = doc.getPicturesTable(); List<Picture> pictures = picturesTable.getAllPictures(); for (Picture picture : pictures) { // 輸出圖片到磁碟 OutputStream out = new FileOutputStream( new File("D:\\temp\\" + UUID.randomUUID() + "." + picture.suggestFileExtension())); picture.writeImageContent(out); out.close(); } } else if (path.endsWith("docx")) { docx = new XWPFDocument(is); extractor = new XWPFWordExtractor(docx); // 文件文字內容 content = extractor.getText(); // 文件圖片內容 List<XWPFPictureData> pictures = docx.getAllPictures(); for (XWPFPictureData picture : pictures) { byte[] bytev = picture.getData(); // 輸出圖片到磁碟 FileOutputStream out = new FileOutputStream( "D:\\temp\\temp\\" + UUID.randomUUID() + picture.getFileName()); out.write(bytev); out.close(); } } else { System.out.println("此檔案不是word檔案!"); } System.out.println(content); } catch (FileNotFoundException e) { } catch (IOException e) { } finally { try { if (doc != null) { doc.close(); } if (extractor != null) { extractor.close(); } if (docx != null) { docx.close(); } if (is != null) { is.close(); } } catch (IOException e) { } } } } }