POI將word轉化為html
阿新 • • 發佈:2018-11-07
參考資料
1.POI包依賴:https://poi.apache.org/components/index.html
2.包版本問題:https://bbs.csdn.net/topics/392208805 並沒有使用其中的3.9的版本,使用的為3.13
最開始使用的為3.17的版本,但在轉為成html中出現錯誤:java.lang.NoSuchMethodError,使用3.9版本也出現了類似的問題:
java.lang.NoSuchMethodError:org.apache.poi.POIXMLDocumentPart.getPackageRelationship()
3.整體的參考:http://www.cnblogs.com/always-online/p/4800131.html
4.在釋出到伺服器上時,圖片路徑問題<img>
標籤的src:
https://blog.csdn.net/B888888888888/article/details/78113527?locationNum=7&fps=1
相關說明:3.17版本在excel設定樣式是API發生了變化
pom.xml—對應包
<!-- 測試poi需要的先決條件包 -->
<dependency>
<groupId>org.apache.commons</groupId >
<artifactId>commons-math3</artifactId>
<version>3.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.xmlbeans</groupId>
<artifactId>xmlbeans</artifactId>
<version>2.6.0</version>
</dependency>
<dependency >
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.10</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.2</version>
</dependency>
<!-- <dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.2</version>
</dependency> -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId>
<version>4.0</version>
</dependency>
<!-- 測試poi需要的先決條件包 -->
<dependency>
<groupId>com.github.virtuald</groupId>
<artifactId>curvesapi</artifactId>
<version>1.04</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.13</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.13</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.13</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.13</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-examples -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-examples</artifactId>
<version>3.13</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-excelant -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-excelant</artifactId>
<version>3.13</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.core</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.xdocreport.document</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
<version>1.0.6</version>
</dependency>`
程式碼如下:
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.converter.WordToHtmlUtils;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
public class WordToHtml {
/**
* doc轉為html
* @param ins
* @param imageAbsolutePath:存在本機的完整路徑
* @param webImagePath:html上的img標籤的src地址
* @param htmlPath:存在本機上的html路徑
* @throws IOException
* @throws ParserConfigurationException
* @throws TransformerException
*/
public static void docToHtml(InputStream ins,String imageAbsolutePath,String webImagePath, String htmlPath) throws IOException, ParserConfigurationException, TransformerException{
HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc(ins);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
//設定圖片存放的位置
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
@Override
public String savePicture(byte[] content, PictureType pictureType,
String suggestedName, float widthInches, float heightInches) {
File file = new File(imageAbsolutePath + suggestedName);
try {
OutputStream out = new FileOutputStream(file);
out.write(content);
out.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
//webImagePath為最終html中img標籤要讀取的伺服器上的地址,如<img src="/csdn/a.png"/>
//改路徑自己設定,不要使用imageAbsolutePath路徑,該路徑為存本地時的完整路徑
return webImagePath + suggestedName;
}
});;
wordToHtmlConverter.processDocument(wordDocument);
org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
OutputStream out = new FileOutputStream(new File(htmlPath));
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.close();
}
/**
* 將docx轉為html
* @param ins
* @param imageUrl 儲存在本機上的圖路徑
* @param webImagePath html上img標籤src值,其在生成圖片時會在你設定的這個資料夾下自動產生/word/media資料夾
* @param fileUrl
* @throws IOException
*/
public static void docxToHtml(InputStream ins,String imageUrl,String webImagePath,String fileUrl) throws IOException{
//1:載入文件到XWPFDocument
XWPFDocument document = new XWPFDocument(ins);
//2:載入圖片到指定資料夾
File imgFile = new File(imageUrl);
XHTMLOptions options = XHTMLOptions.create();
options.setExtractor(new FileImageExtractor(imgFile));
//使用相對路徑時,使用BasicURIResolver,使用絕對路徑可以使用FileURIResolver
options.URIResolver(new BasicURIResolver(webImagePath));
options.setIgnoreStylesIfUnused(false);
options.setFragment(true);
//3:轉換XWPFDocument to XHTML
OutputStream out = new FileOutputStream(new File(fileUrl));
XHTMLConverter.getInstance().convert(document, out, options);
}
}