1. 程式人生 > >java 解析/拆分 xml

java 解析/拆分 xml

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package jcoapp;

import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.io.XMLWriter;
import java.io.*;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.dom4j.*;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.xml.sax.InputSource;

/**
 *
 * @author luolai
 */
public class XmlDom4J {

    LogInfo loginfo = new LogInfo();

    public String parseDocument(File inputXml, String node2) {
        SAXReader saxReader = new SAXReader();
        Document document = null;
        try {
            document = saxReader.read(inputXml);
        } catch (DocumentException ex) {
            Logger.getLogger(XmlDom4J.class.getName()).log(Level.SEVERE, null, ex);
        }
        Element rootElm = document.getRootElement();
        String docXmlText = document.asXML();
        String rootXmlText = rootElm.asXML();
        //System.out.println(rootXmlText);
        System.out.println("root:" + rootElm.getName());
        loginfo.appendLog("root:" + rootElm.getName());
        List list = rootElm.elements();
        Element e2 = (Element) list.get(0);
        //System.out.println(e2.getName());

        Iterator iter = rootElm.elementIterator(e2.getName()); // 獲取根節點下的子節點
        // 遍歷head節點  
        String title = "";
        while (iter.hasNext()) {
            Element recordEle = (Element) iter.next();
            title = recordEle.elementTextTrim(node2) == null ? "" : recordEle.elementTextTrim(node2); // 拿到子節點的值               
            System.out.println(node2 + ":" + title);
            loginfo.appendLog(node2 + ":" + title);
            return title;
        }
        //System.out.print(getXmlElmentValue(rootXmlText));

        return "";
    }

    public String parseDocument(String inputXmlString, String node2) {
        // 建立一個新的字串
        StringReader read = new StringReader(inputXmlString);
        // 建立新的輸入源SAX解析器將使用InputSource 物件來確定如何讀取 XML 輸入
        InputSource source = new InputSource(read);

        SAXReader saxReader = new SAXReader();
        Document document = null;
        try {
            document = saxReader.read(source);
        } catch (DocumentException ex) {
            Logger.getLogger(XmlDom4J.class.getName()).log(Level.SEVERE, null, ex);
        }
        Element rootElm = document.getRootElement();
        String docXmlText = document.asXML();
        String rootXmlText = rootElm.asXML();
        //System.out.println(rootXmlText);
        System.out.println("root:" + rootElm.getName());
        loginfo.appendLog("root:" + rootElm.getName());
        List list = rootElm.elements();
        Element e2 = (Element) list.get(0);
        System.out.println(e2.getName());

        Iterator iter = rootElm.elementIterator(e2.getName()); // 獲取根節點下的子節點
        // 遍歷head節點  
        String title = "";
        while (iter.hasNext()) {
            Element recordEle = (Element) iter.next();
            title = recordEle.elementTextTrim(node2) == null ? "" : recordEle.elementTextTrim(node2); // 拿到子節點的值               
            if (!title.equals("")) {
                System.out.println(node2 + ":" + title);
                loginfo.appendLog(node2 + ":" + title);
                return title;
            }
        }
        //System.out.print(getXmlElmentValue(rootXmlText));

        return "";
    }
    
    public String parseDocument(String inputXmlString, String node1,String node2) {
        // 建立一個新的字串
        StringReader read = new StringReader(inputXmlString);
        // 建立新的輸入源SAX解析器將使用InputSource 物件來確定如何讀取 XML 輸入
        InputSource source = new InputSource(read);

        SAXReader saxReader = new SAXReader();
        Document document = null;
        try {
            document = saxReader.read(source);
        } catch (DocumentException ex) {
            Logger.getLogger(XmlDom4J.class.getName()).log(Level.SEVERE, null, ex);
        }
        Element rootElm = document.getRootElement();
        String docXmlText = document.asXML();
        String rootXmlText = rootElm.asXML();
        //System.out.println(rootXmlText);
        System.out.println("root:" + rootElm.getName());
        loginfo.appendLog("root:" + rootElm.getName());
        List list = rootElm.elements();
        Element e2 = (Element) list.get(0);
        if (!node1.equalsIgnoreCase(e2.getName())){
            return "";
        }
        System.out.println(e2.getName());

        Iterator iter = rootElm.elementIterator(e2.getName()); // 獲取根節點下的子節點
        // 遍歷head節點  
        String title = "";
        while (iter.hasNext()) {
            Element recordEle = (Element) iter.next();
            title = recordEle.elementTextTrim(node2) == null ? "" : recordEle.elementTextTrim(node2); // 拿到子節點的值               
            if (!title.equals("")) {
                System.out.println(node2 + ":" + title);
                loginfo.appendLog(node2 + ":" + title);
                return title;
            }
        }
        //System.out.print(getXmlElmentValue(rootXmlText));

        return "";
    }

    public static String getXmlElmentValue(String xml) {
        StringBuffer str = new StringBuffer();
        try {
            org.dom4j.Document doc = DocumentHelper.parseText(xml);
            org.dom4j.Element el = doc.getRootElement();
            return recGetXmlElementValue(el, str).toString();
        } catch (DocumentException e) {
            e.printStackTrace();
            return null;
        }
    }

    private static StringBuffer recGetXmlElementValue(org.dom4j.Element ele, StringBuffer valueBuff) {
        List eleList = ele.elements();
        if (eleList.size() == 0) {
            valueBuff.append(ele.getName() + ":" + ele.getText().replaceAll("[\n-\r]", "") + "\r");
            return valueBuff;
        } else {
            valueBuff.append("<<[" + ele.getName() + "]>>" + "\r");
            for (Iterator<org.dom4j.Element> iter = eleList.iterator(); iter.hasNext();) {
                org.dom4j.Element innerEle = iter.next();
                recGetXmlElementValue(innerEle, valueBuff);
            }
            return valueBuff;
        }
    }

    public void generateDocument() {
        Document document = DocumentHelper.createDocument();
        Element catalogElement = document.addElement("catalog");
        catalogElement.addComment("An XML Catalog");
        catalogElement.addProcessingInstruction("target", "text");
        Element journalElement = catalogElement.addElement("journal");
        journalElement.addAttribute("title", "XML Zone");
        journalElement.addAttribute("publisher", "IBM developerWorks");
        Element articleElement = journalElement.addElement("article");
        articleElement.addAttribute("level", "Intermediate");
        articleElement.addAttribute("date", "December-2001");
        Element titleElement = articleElement.addElement("title");
        titleElement.setText("Java configuration with XML Schema");
        Element authorElement = articleElement.addElement("author");
        Element firstNameElement = authorElement.addElement("firstname");
        firstNameElement.setText("Marcello");
        Element lastNameElement = authorElement.addElement("lastname");
        lastNameElement.setText("Vitaletti");
        //document.addDocType("catalog", null, "file://d:/Dtds/catalog.dtd");
        try {
            XMLWriter output = new XMLWriter(
                    new FileWriter(new File("d:/catalog.xml")));
            output.write(document);
            output.close();
        } catch (IOException e) {
            System.out.println(e.getMessage());
        }
    }

    public void modifyDocument(File inputXml) {
        try {
            SAXReader saxReader = new SAXReader();
            Document document = saxReader.read(inputXml);
            List list = document.selectNodes("//article/@level");
            Iterator iter = list.iterator();
            while (iter.hasNext()) {
                Attribute attribute = (Attribute) iter.next();
                if (attribute.getValue().equals("Intermediate")) {
                    attribute.setValue("Introductory");
                }
            }

            list = document.selectNodes("//article/@date");
            iter = list.iterator();
            while (iter.hasNext()) {
                Attribute attribute = (Attribute) iter.next();
                if (attribute.getValue().equals("December-2001")) {
                    attribute.setValue("October-2002");
                }
            }
            list = document.selectNodes("//article");
            iter = list.iterator();
            while (iter.hasNext()) {
                Element element = (Element) iter.next();
                Iterator iterator = element.elementIterator("title");
                while (iterator.hasNext()) {
                    Element titleElement = (Element) iterator.next();
                    if (titleElement.getText().equals(
                            "Java configuration with XMLSchema")) {
                        titleElement.setText("Create flexible and extensible XML schema");
                    }
                }
            }
            list = document.selectNodes("//article/author");
            iter = list.iterator();
            while (iter.hasNext()) {
                Element element = (Element) iter.next();
                Iterator iterator = element.elementIterator("firstname");
                while (iterator.hasNext()) {
                    Element firstNameElement = (Element) iterator.next();
                    if (firstNameElement.getText().equals("Marcello")) {
                        firstNameElement.setText("Ayesha");
                    }
                }
            }
            list = document.selectNodes("//article/author");
            iter = list.iterator();
            while (iter.hasNext()) {
                Element element = (Element) iter.next();
                Iterator iterator = element.elementIterator("lastname");
                while (iterator.hasNext()) {
                    Element lastNameElement = (Element) iterator.next();
                    if (lastNameElement.getText().equals("Vitaletti")) {
                        lastNameElement.setText("Malik");
                    }
                }
            }
            XMLWriter output = new XMLWriter(
                    new FileWriter(new File("c:/catalog/catalog-modified.xml")));
            output.write(document);
            output.close();
        } catch (DocumentException e) {
            System.out.println(e.getMessage());
        } catch (IOException e) {
            System.out.println(e.getMessage());
        }
    }

    public boolean splitXML(String fileName) {
        File inputXml = new File(fileName);

        SAXReader saxReader = new SAXReader();
        Document document = null;
        try {
            document = saxReader.read(inputXml);
        } catch (DocumentException ex) {
            Logger.getLogger(XmlDom4J.class.getName()).log(Level.SEVERE, null, ex);
        }
        Element rootElm = document.getRootElement();
        String docXmlText = document.asXML();
        String rootXmlText = rootElm.asXML();
        //System.out.println(rootXmlText);
        System.out.println("root:" + rootElm.getName());
        List list = rootElm.elements();
        Element e2 = (Element) list.get(0);
        list = e2.elements();
        System.out.println("root node number:" + list.size());

        for (int i = 0; i < list.size(); i++) {
            Element e3 = (Element) list.get(i);
            System.out.println(e3.getName());

            //拆分XML檔案
            Document exportDoc = DocumentHelper.createDocument();
            Element exportElement = exportDoc.addElement(rootElm.getName());
            Element exportElement2 = exportElement.addElement(e2.getName());
            Element exportElement3 = exportElement2.addElement(e3.getName());

            Iterator iter = e3.elementIterator(); // 獲取根節點下的子節點
            // 遍歷head節點  
            String title = "";
            while (iter.hasNext()) {
                Element recordEle = (Element) iter.next();

                Element exportElement4 = exportElement3.addElement(recordEle.getName());
                //exportElement4.addCDATA(recordEle.getText());
                exportElement4.setText(recordEle.getText().equals("") ? "" : recordEle.getText());
                try {
                    OutputFormat outFmt = OutputFormat.createPrettyPrint();
                    outFmt.setEncoding("UTF-8");
                    outFmt.setIndent("  ");
                    XMLWriter output = new XMLWriter(
                            new FileWriter(new File(fileName.substring(0, fileName.indexOf(".")) + "_"
                            + String.valueOf(i + 1)
                            + fileName.substring(fileName.indexOf("."), fileName.length()))), outFmt);
                    output.write(exportDoc);
                    output.close();
                } catch (IOException e) {
                    System.out.println(e.getMessage());
                    return false;
                }

                //System.out.println(recordEle.getName());
                //System.out.println(recordEle.getText());
            }
            //System.out.print(getXmlElmentValue(rootXmlText));

        }

        return true;
    }

    private void nodeByNodes(Element node) {
        if (node.element(node.getName()) != null) {
            String id = node.attributeValue("id");
            String name = node.attributeValue("name");
            System.out.print(id + "-------");
            System.out.println(name);
            for (Iterator i = node.elementIterator("node"); i.hasNext();) {
                Element newNode = (Element) i.next();
                nodeByNodes(newNode);
            }
        } else {
            String id = node.attributeValue("id");
            String name = node.attributeValue("name");
            System.out.print(id + "-------");
            System.out.println(name);
        }
    }

    public static void main(String[] argv) {
        XmlDom4J dom4j = new XmlDom4J();
        dom4j.parseDocument("<?xml version=\"1.0\" encoding=\"utf-8\"?><STOREMASTERBRIDGE><STOREMASTER><WHSE>NT1</WHSE><STORE_NBR>0022410038</STORE_NBR><STOREDETAIL><NAME>聯盛</NAME><ADDR_LINE_1>N/A</ADDR_LINE_1><ADDR_LINE_2>N/A</ADDR_LINE_2><ADDR_LINE_3>N/A</ADDR_LINE_3><CITY>N/A</CITY><ZIP>N/A</ZIP><CNTRY>CN</CNTRY></STOREDETAIL><STOREFIELDS><CONTACT>N/A</CONTACT><EMAIL>聯盛_物流專用</EMAIL><DFLT_CO>2100</DFLT_CO><DFLT_DIV>001</DFLT_DIV><STAT_CODE>0</STAT_CODE><GRP>SAP</GRP><RTE_ATTR>聯盛_物流專用</RTE_ATTR><RTE_TO>0022410038</RTE_TO></STOREFIELDS></STOREMASTER><STOREMASTER><WHSE>NT1</WHSE><STORE_NBR>0022410039</STORE_NBR><STOREDETAIL><NAME>恆興聯運</NAME><ADDR_LINE_1>N/A</ADDR_LINE_1><ADDR_LINE_2>N/A</ADDR_LINE_2><ADDR_LINE_3>N/A</ADDR_LINE_3><CITY>N/A</CITY><ZIP>N/A</ZIP><CNTRY>CN</CNTRY></STOREDETAIL><STOREFIELDS><CONTACT>N/A</CONTACT><EMAIL>恆興聯運_物流專用</EMAIL><DFLT_CO>2100</DFLT_CO><DFLT_DIV>001</DFLT_DIV><STAT_CODE>0</STAT_CODE><GRP>SAP</GRP><RTE_ATTR>恆興聯運_物流專用</RTE_ATTR><RTE_TO>0022410039</RTE_TO></STOREFIELDS></STOREMASTER></STOREMASTERBRIDGE>", "NAME1");
        System.exit(0);

        SAXReader saxReader = new SAXReader();
        Document document = null;
        try {
            document = saxReader.read(new File("d:/INV000042.xml"));
        } catch (DocumentException ex) {
            Logger.getLogger(XmlDom4J.class.getName()).log(Level.SEVERE, null, ex);
        }
        Element rootElm = document.getRootElement();
        dom4j.nodeByNodes(rootElm);

        //dom4j.generateDocument();
        //dom4j.splitXML("d:/INV000042.xml");
        //dom4j.parseDocument(new File("d:/INV000042.xml"), "outpt_pkt_hdr");
    }
}