1. 程式人生 > >POI將word轉化為html

POI將word轉化為html

參考資料

1.POI包依賴:https://poi.apache.org/components/index.html
2.包版本問題:https://bbs.csdn.net/topics/392208805 並沒有使用其中的3.9的版本,使用的為3.13
最開始使用的為3.17的版本,但在轉為成html中出現錯誤:java.lang.NoSuchMethodError,使用3.9版本也出現了類似的問題:
java.lang.NoSuchMethodError:org.apache.poi.POIXMLDocumentPart.getPackageRelationship()
3.整體的參考:

http://www.cnblogs.com/always-online/p/4800131.html
4.在釋出到伺服器上時,圖片路徑問題<img>標籤的src:
https://blog.csdn.net/B888888888888/article/details/78113527?locationNum=7&fps=1

相關說明:3.17版本在excel設定樣式是API發生了變化
pom.xml—對應包

<!-- 測試poi需要的先決條件包 -->
<dependency>
    <groupId>org.apache.commons</groupId
>
<artifactId>commons-math3</artifactId> <version>3.6.1</version> </dependency> <dependency> <groupId>org.apache.xmlbeans</groupId> <artifactId>xmlbeans</artifactId> <version>2.6.0</version> </dependency> <dependency
>
<groupId>commons-codec</groupId> <artifactId>commons-codec</artifactId> <version>1.10</version> </dependency> <dependency> <groupId>commons-logging</groupId> <artifactId>commons-logging</artifactId> <version>1.2</version> </dependency> <!-- <dependency> <groupId>commons-collections</groupId> <artifactId>commons-collections</artifactId> <version>3.2.2</version> </dependency> --> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-collections4</artifactId> <version>4.0</version> </dependency> <!-- 測試poi需要的先決條件包 --> <dependency> <groupId>com.github.virtuald</groupId> <artifactId>curvesapi</artifactId> <version>1.04</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.13</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.13</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.13</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.13</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-examples --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-examples</artifactId> <version>3.13</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-excelant --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-excelant</artifactId> <version>3.13</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>org.apache.poi.xwpf.converter.core</artifactId> <version>1.0.6</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.xdocreport.document</artifactId> <version>1.0.6</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId> <version>1.0.6</version> </dependency>`

程式碼如下:

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.converter.WordToHtmlUtils;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

public class WordToHtml {
    /**
     * doc轉為html
     * @param ins
     * @param imageAbsolutePath:存在本機的完整路徑
     * @param webImagePath:html上的img標籤的src地址
     * @param htmlPath:存在本機上的html路徑
     * @throws IOException
     * @throws ParserConfigurationException
     * @throws TransformerException
     */
    public static void docToHtml(InputStream ins,String imageAbsolutePath,String webImagePath, String htmlPath) throws IOException, ParserConfigurationException, TransformerException{
        HWPFDocumentCore wordDocument =  WordToHtmlUtils.loadDoc(ins);

        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        //設定圖片存放的位置
        wordToHtmlConverter.setPicturesManager(new PicturesManager() {

            @Override
            public String savePicture(byte[] content, PictureType pictureType,
                    String suggestedName, float widthInches, float heightInches) {
                File file = new File(imageAbsolutePath + suggestedName);
                try {
                    OutputStream out = new FileOutputStream(file);
                    out.write(content);
                    out.close();
                } catch (FileNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
                //webImagePath為最終html中img標籤要讀取的伺服器上的地址,如<img src="/csdn/a.png"/>
                //改路徑自己設定,不要使用imageAbsolutePath路徑,該路徑為存本地時的完整路徑
                return webImagePath + suggestedName;
            }
        });;
        wordToHtmlConverter.processDocument(wordDocument);
        org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
        OutputStream out = new FileOutputStream(new File(htmlPath));
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer  = tf.newTransformer();

        serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        out.close();
    }

    /**
     * 將docx轉為html
     * @param ins
     * @param imageUrl 儲存在本機上的圖路徑
     * @param webImagePath html上img標籤src值,其在生成圖片時會在你設定的這個資料夾下自動產生/word/media資料夾
     * @param fileUrl
     * @throws IOException
     */
    public static void docxToHtml(InputStream ins,String imageUrl,String webImagePath,String fileUrl) throws IOException{

        //1:載入文件到XWPFDocument
        XWPFDocument document = new XWPFDocument(ins);
        //2:載入圖片到指定資料夾
        File imgFile = new File(imageUrl);
        XHTMLOptions options = XHTMLOptions.create();
        options.setExtractor(new FileImageExtractor(imgFile));

        //使用相對路徑時,使用BasicURIResolver,使用絕對路徑可以使用FileURIResolver
        options.URIResolver(new BasicURIResolver(webImagePath));
        options.setIgnoreStylesIfUnused(false);
        options.setFragment(true);
        //3:轉換XWPFDocument to XHTML 
        OutputStream out = new FileOutputStream(new File(fileUrl));  
        XHTMLConverter.getInstance().convert(document, out, options); 

    }
}