POI3.8讀取WORD(簡潔例項)
阿新 • • 發佈:2019-02-13
目前最新版POI為3.8:
poi-3.8-20120326.jar
poi-examples-3.8-20120326.jar
poi-excelant-3.8-20120326.jar
poi-ooxml-3.8-20120326.jar
poi-ooxml-schemas-3.8-20120326.jar
poi-scratchpad-3.8-20120326.jar
Java程式碼
poi-3.8-20120326.jar
poi-examples-3.8-20120326.jar
poi-excelant-3.8-20120326.jar
poi-ooxml-3.8-20120326.jar
poi-ooxml-schemas-3.8-20120326.jar
poi-scratchpad-3.8-20120326.jar
Java程式碼
- import java.io.FileInputStream;
- import java.io.FileOutputStream;
- import java.io.IOException;
-
import java.io.InputStream;
- import javax.servlet.ServletException;
- import javax.servlet.http.HttpServlet;
- import javax.servlet.http.HttpServletRequest;
- import javax.servlet.http.HttpServletResponse;
- import org.apache.poi.hwpf.HWPFDocument;
- import org.apache.poi.hwpf.extractor.WordExtractor;
-
import org.apache.poi.hwpf.model.PicturesTable;
- import org.apache.poi.hwpf.usermodel.CharacterRun;
- import org.apache.poi.hwpf.usermodel.Picture;
- import org.apache.poi.hwpf.usermodel.Range;
- public class WordDemo extends HttpServlet {
- private static final long serialVersionUID = 1L;
-
public void doGet(HttpServletRequest request, HttpServletResponse response)
- throws ServletException, IOException {
- this.doPost(request, response);
- }
- public void doPost(HttpServletRequest request, HttpServletResponse response)
- throws ServletException, IOException {
- //從硬碟讀取一個doc文件
- InputStream in = new FileInputStream("F:\\test.doc");
- //類從word文件中提取文字,非特殊情況下,都將使用getParagraphText()與getText()
- WordExtractor word = new WordExtractor(in);
- //獲取段文字
- String [] strArray = word.getParagraphText();
- String str = word.getText();
- for(int i=0 ; i<strArray.length ; i++){
- System.out.println(strArray[i]+"\ti迴圈:"+i);
- }
- System.out.println(str +"\t --");
- //這個建構函式從InputStream中載入Word文件。
- HWPFDocument doc = new HWPFDocument((InputStream)new FileInputStream("F:\\test.doc"));
- //這個類為HWPF物件模型,對文件範圍段操作
- Range range = doc.getRange(); //
- //看看此文件有多少個段落
- int num = range.numParagraphs();
- System.out.println(num+"段");
- //得到word資料流
- byte [] dataStream = doc.getDataStream();
- System.out.println("資料流長度:"+dataStream.length);
- //用於在一段範圍內獲得段落數
- int numChar = range.numCharacterRuns();
- System.out.println("CharacterRuns 數:"+numChar);
- //負責影象提取 和 確定一些檔案某塊是否包含嵌入的影象。
- PicturesTable table = new PicturesTable(doc, dataStream, null);
- for(int j=0 ; j<numChar ; j++){
- //這個類表示一個文字執行,有著共同的屬性。
- CharacterRun run = range.getCharacterRun(j);
- //是否存在圖片
- boolean bool = table.hasPicture(run);
- System.out.println("是否存在圖片:"+bool);
- if(bool){
- //返回圖片物件繫結到指定的CharacterRun
- Picture pic = table.extractPicture(run, true);
- //圖片的內容位元組寫入到指定的輸出流。
- pic.writeImageContent(new FileOutputStream("F:\\"+j+".bmp"));
- System.out.println("成功提取圖片"+j+":");
- }
- }
- request.getRequestDispatcher("ok.jsp").forward(request, response);
- }
- }