1. 程式人生 > >libmxml數據結構(源碼分析)

libmxml數據結構(源碼分析)

attrs def device same next 下層 election str 一個

  libmxml是一個開源、小巧的C語言xml庫。這裏簡單分析一下它是用什麽樣的數據結構來保存分析過的xml文檔。

  mxml關鍵的結構體mxml_node_t是這樣的實現的:

struct mxml_node_s            /**** An XML node. @[email protected] ****/
{
  mxml_type_t        type;        /* Node type */
  struct mxml_node_s    *next;        /* Next node under same parent */
  struct
mxml_node_s *prev; /* Previous node under same parent */ struct mxml_node_s *parent; /* Parent node */ struct mxml_node_s *child; /* First child node */ struct mxml_node_s *last_child; /* Last child node */ mxml_value_t value; /* Node value */ int ref_count; /*
Use count */ void *user_data; /* User data */ }; typedef struct mxml_node_s mxml_node_t; /**** An XML node. ****/

  

  它使用左孩子右兄弟的樹形結構來描述xml報文:即下層節點登記在child鏈表,兄弟節點登記在next鏈表。 如果某個節點下面有N個子節點,則child指向第一個子節點,該子節點的next指向下一個同父節點的子節點。 比較特殊的是,mxml把xml節點值也認為是一個子節點。例如<group>value</group>, 其中value(type是MXML_OPAQUE)是一個獨立的子節點,掛載在group節點(type是MXML_ELEMENT)下面。 另外,空白符(空格,回車換行,制表符)和註釋,雖然對xml報文無實質意義,但mxml還是把它們做為一個節點存儲起來。

  由於mxml只是使用簡單的鏈表存儲xml元素,所以元素節點個數比較多時,mxml查找元素效率是比較低的。所以libmxml提供了一個索引查找的函數,它需要先遍歷xml元素樹,生成一個排序過的數組,加快查找速度。

  為了方便大家理解,我寫了一個函數打印xml結構體。

void printNode(mxml_node_t *node, int nNodeSn, int level)
{
    static int currNodeSn = 0;
    if (node == NULL)
    {
        return;
    }

    ++currNodeSn; //每遇到一個新節點 則將節點序號遞增,做為本節點序號
    printf("[%- 3d -> %- 3d] ", currNodeSn, nNodeSn);

    switch (node->type)
    {
    case MXML_ELEMENT:
        {
            int i;
            printf("level %d MXML_ELEMENT [%s]", level, node->value.element.name);
            for (i = 0; i < node->value.element.num_attrs; ++i)
            {
                printf(" %s=%s", node->value.element.attrs[i].name, node->value.element.attrs[i].value);
            }
            printf("\n");
        }
        break;
    case MXML_INTEGER:
        printf("level %d MXML_INTEGER %d\n", level, node->value.integer);
        break;
    case MXML_OPAQUE:
        printf("level %d MXML_OPAQUE [%s]\n", level, node->value.opaque);
        break;
    case MXML_REAL:
        printf("level %d MXML_REAL %lf\n", level, node->value.real);
        break;
    case MXML_TEXT:
        printf("level %d MXML_TEXT [%s]\n", level, node->value.text.string);
        break;
    case MXML_CUSTOM:
        printf("level %d MXML_CUSTOM\n", level);
        break;
    default:
        printf("unknown node type %d\n", node->type);
    }

    //深度優先遍歷
    if (node->child)
    {
        //訪問子節點時把本節點序號做為父節點序號 層級加1
        printNode(node->child, currNodeSn, level + 1);
    }

    if (node->next)
    {
        //訪問兄弟節點,直接傳父節點序號即可 層級也不用加1
        printNode(node->next, nNodeSn, level);
    }
}

  運行示例如下:

  xml源如下:

<?xml version="1.0" encoding="GBK" ?>
<group>
    <option>122334 我們
        <string>我們</string>45677
        <keyword type="opaque">InputSlot</keyword>
        <default type="opaque">Auto</default>
        <text>Media Source</text>
        <order type="real">10.000000</order>
        <choice>
            <keyword type="opaque">Auto</keyword>
            <text>Auto Tray Selection</text>
            <code type="opaque" />
        </choice>
        <choice>
            <keyword type="opaque">Upper</keyword>
            <text>Tray 1</text>
            <code type="opaque">&lt;&lt;/MediaPosition 0&gt;&gt;setpagedevice</code>
        </choice>
        <choice>
            <keyword type="opaque">Lower</keyword>
            <text>Tray 2</text>
            <code type="opaque">&lt;&lt;/MediaPosition 1&gt;&gt;setpagedevice</code>
        </choice>
    </option> 我 12334545 050504550
    <integer>123</integer>
    <string>Now is the time for all good men to come to the aid of their
country.</string>
    <!-- this is a comment -->
    <![CDATA[this is CDATA 0123456789ABCDEF]]>
</group>

  用我這個printNode分析結果如下:

說明:[ 1  ->  0 ],代表本節點序號是1,其父節點序號是0,level 0代表本節點是最頂層節點。

[ 1  ->  0 ] level 0 MXML_ELEMENT [?xml version="1.0" encoding="GBK" ?]
[ 2  ->  1 ] level 1 MXML_OPAQUE [
]
[ 3  ->  1 ] level 1 MXML_ELEMENT [group]
[ 4  ->  3 ] level 2 MXML_OPAQUE [
        ]
[ 5  ->  3 ] level 2 MXML_ELEMENT [option]
[ 6  ->  5 ] level 3 MXML_OPAQUE [122334 我們
                ]
[ 7  ->  5 ] level 3 MXML_ELEMENT [string]
[ 8  ->  7 ] level 4 MXML_OPAQUE [我們]
[ 9  ->  5 ] level 3 MXML_OPAQUE [45677
                ]
[ 10 ->  5 ] level 3 MXML_ELEMENT [keyword] type=opaque
[ 11 ->  10] level 4 MXML_OPAQUE [InputSlot]
[ 12 ->  5 ] level 3 MXML_OPAQUE [
                ]
[ 13 ->  5 ] level 3 MXML_ELEMENT [default] type=opaque
[ 14 ->  13] level 4 MXML_OPAQUE [Auto]
[ 15 ->  5 ] level 3 MXML_OPAQUE [
                ]
[ 16 ->  5 ] level 3 MXML_ELEMENT [text]
[ 17 ->  16] level 4 MXML_OPAQUE [Media Source]
[ 18 ->  5 ] level 3 MXML_OPAQUE [
                ]
[ 19 ->  5 ] level 3 MXML_ELEMENT [order] type=real
[ 20 ->  19] level 4 MXML_OPAQUE [10.000000]
[ 21 ->  5 ] level 3 MXML_OPAQUE [
                ]
[ 22 ->  5 ] level 3 MXML_ELEMENT [choice]
[ 23 ->  22] level 4 MXML_OPAQUE [
                        ]
[ 24 ->  22] level 4 MXML_ELEMENT [keyword] type=opaque
[ 25 ->  24] level 5 MXML_OPAQUE [Auto]
[ 26 ->  22] level 4 MXML_OPAQUE [
                        ]
[ 27 ->  22] level 4 MXML_ELEMENT [text]
[ 28 ->  27] level 5 MXML_OPAQUE [Auto Tray Selection]
[ 29 ->  22] level 4 MXML_OPAQUE [
                        ]
[ 30 ->  22] level 4 MXML_ELEMENT [code] type=opaque
[ 31 ->  22] level 4 MXML_OPAQUE [
                ]
[ 32 ->  5 ] level 3 MXML_OPAQUE [
                ]
[ 33 ->  5 ] level 3 MXML_ELEMENT [choice]
[ 34 ->  33] level 4 MXML_OPAQUE [
                        ]
[ 35 ->  33] level 4 MXML_ELEMENT [keyword] type=opaque
[ 36 ->  35] level 5 MXML_OPAQUE [Upper]
[ 37 ->  33] level 4 MXML_OPAQUE [
                        ]
[ 38 ->  33] level 4 MXML_ELEMENT [text]
[ 39 ->  38] level 5 MXML_OPAQUE [Tray 1]
[ 40 ->  33] level 4 MXML_OPAQUE [
                        ]
[ 41 ->  33] level 4 MXML_ELEMENT [code] type=opaque
[ 42 ->  41] level 5 MXML_OPAQUE [<</MediaPosition 0>>setpagedevice]
[ 43 ->  33] level 4 MXML_OPAQUE [
                ]
[ 44 ->  5 ] level 3 MXML_OPAQUE [
                ]
[ 45 ->  5 ] level 3 MXML_ELEMENT [choice]
[ 46 ->  45] level 4 MXML_OPAQUE [
                        ]
[ 47 ->  45] level 4 MXML_ELEMENT [keyword] type=opaque
[ 48 ->  47] level 5 MXML_OPAQUE [Lower]
[ 49 ->  45] level 4 MXML_OPAQUE [
                        ]
[ 50 ->  45] level 4 MXML_ELEMENT [text]
[ 51 ->  50] level 5 MXML_OPAQUE [Tray 2]
[ 52 ->  45] level 4 MXML_OPAQUE [
                        ]
[ 53 ->  45] level 4 MXML_ELEMENT [code] type=opaque
[ 54 ->  53] level 5 MXML_OPAQUE [<</MediaPosition 1>>setpagedevice]
[ 55 ->  45] level 4 MXML_OPAQUE [
                ]
[ 56 ->  5 ] level 3 MXML_OPAQUE [
        ]
[ 57 ->  3 ] level 2 MXML_OPAQUE [ 我12334545 050504550
        ]
[ 58 ->  3 ] level 2 MXML_ELEMENT [integer]
[ 59 ->  58] level 3 MXML_OPAQUE [123]
[ 60 ->  3 ] level 2 MXML_OPAQUE [
        ]
[ 61 ->  3 ] level 2 MXML_ELEMENT [string]
[ 62 ->  61] level 3 MXML_OPAQUE [Now is the time for all good men to come to the aid of their
country.]
[ 63 ->  3 ] level 2 MXML_OPAQUE [
        ]
[ 64 ->  3 ] level 2 MXML_ELEMENT [!-- this is a comment --]
[ 65 ->  3 ] level 2 MXML_OPAQUE [
        ]
[ 66 ->  3 ] level 2 MXML_ELEMENT [![CDATA[this is CDATA 0123456789ABCDEF]]]
[ 67 ->  3 ] level 2 MXML_OPAQUE [
]

  

libmxml數據結構(源碼分析)