java讀取txt、doc、docx檔案
阿新 • • 發佈:2019-02-15
import java.io.*;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
/**
* POI 讀取 word 2003 和 word 2007 中文字內容的<a href="http://lib.csdn.net/base/softwaretest" class='replace_word' title="軟體測試知識庫" target='_blank' style='color:#df3434; font-weight:bold;'>測試</a>類<br />
* @createDate 2009-07-25
* @author Carl He
*/
public class WorldReard {
public static void main(String[] args) {
try {
//txt:
BufferedReader buf=new BufferedReader(new FileReader("C://test//text2.txt"));
StringBuffer sbuf=new StringBuffer();
String line=null;
while((line=buf.readLine())!=null){
sbuf.toString();
}
buf.close();
System.out.println(sbuf.toString());
//word 2003: 圖片不會被讀取
InputStream is = new FileInputStream(new File("C://test//text2.doc"));
WordExtractor ex = new WordExtractor(is);
String text2003 = ex.getText();
System.out.println(text2003);
//word 2007 圖片不會被讀取, 表格中的資料會被放在字串的最後
OPCPackage opcPackage = POIXMLDocument.openPackage("C://test//text2.docx");
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
String text2007 = extractor.getText();
System.out.println(text2007);
} catch (Exception e) {
e.printStackTrace();
}
}
}
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
/**
* POI 讀取 word 2003 和 word 2007 中文字內容的<a href="http://lib.csdn.net/base/softwaretest" class='replace_word' title="軟體測試知識庫" target='_blank' style='color:#df3434; font-weight:bold;'>測試</a>類<br />
* @createDate 2009-07-25
* @author Carl He
*/
public class WorldReard {
public static void main(String[] args) {
try {
//txt:
BufferedReader buf=new BufferedReader(new FileReader("C://test//text2.txt"));
StringBuffer sbuf=new StringBuffer();
String line=null;
while((line=buf.readLine())!=null){
sbuf.toString();
}
buf.close();
System.out.println(sbuf.toString());
//word 2003: 圖片不會被讀取
InputStream is = new FileInputStream(new File("C://test//text2.doc"));
WordExtractor ex = new WordExtractor(is);
String text2003 = ex.getText();
System.out.println(text2003);
//word 2007 圖片不會被讀取, 表格中的資料會被放在字串的最後
OPCPackage opcPackage = POIXMLDocument.openPackage("C://test//text2.docx");
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
String text2007 = extractor.getText();
System.out.println(text2007);
} catch (Exception e) {
e.printStackTrace();
}
}
}