xpath基本操作用法
阿新 • • 發佈:2018-12-13
查詢python lxml庫用法:lxml.de/
from lxml import etree text = ''' <div> <ul> <li class="item-0"><a href="link1.html">first item</a></li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-inactive"><a href="link3.html">third item</a></li> <li class="item-1"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> ''' #HTML初始化,構造一個XPATH解析物件 # html = etree.HTML(text) #通過.tostring 生成一個修正過的bytes型別的HTML程式碼 # result = etree.tostring(html) # print(result.decode('utf-8')) #讀取本地html檔案 # html = etree.parse('./res.html',etree.HTMLParser()) # result = etree.tostring(html) # print(result.decode('utf-8')) #獲取所有節點 # html = etree.parse('./res.html',etree.HTMLParser()) # result = html.xpath('//*') # print(result) html = etree.parse('./2.html',etree.HTMLParser()) #5獲取所有節點 獲取本地html所有li節點 # result = html.xpath('//li') # print(result[0]) #6子節點 獲取li裡面的a節點 # result = html.xpath('//li/a') #7父節點 獲取href屬性叫link3.html的a節點的父節點的class屬性 # result = html.xpath('//a[@href="link3.html"]/../@class') #8屬性匹配獲取class為item-0的li節點 # result = html.xpath('//li[@class="item-0"]') #9文字獲取。獲取所有li節點下a節點的內容 # result = html.xpath('//li/a/text()') # result = html.xpath('//li[@class="item-0"]//text()') #10屬性獲取 # result = html.xpath('//li/a/@href') #11 屬性多值匹配 # result = html.xpath('//li[contains(@class,"class_1")]/a/text()') #12多屬性匹配 # result = html.xpath('//li[contains(@class,"class_1") and @name="item"]/a/text()') #13 按順序選擇 # result1 = html.xpath('//li[1]/a/text()') # result2 = html.xpath('//li[last()]/a/text()') # result3 = html.xpath('//li[position()<3]/a/text()') # result4 = html.xpath('//li[last()-2]/a/text()') # print(result1) # print(result2) # print(result3) # print(result4) # 14節點軸選擇 #所有祖先節點 result = html.xpath('//li[1]/ancestor::*') #祖先節點裡的div result = html.xpath('//li[1]/ancestor::div') #attribute獲取節點所有屬性 result = html.xpath('//li[1]/atrribute::*') #child獲取所有直接子節點 result = html.xpath('//li[1]/child::a[@href="link1.html"]') #descendant,獲取所有子孫節點,加要求span result = html.xpath('//li[1]/descendant::span') #following 獲取當前節點後的所有節點,加要求只獲取第二個後續節點 result = html.xpath('//li[1]/following::*[2]') #following-sibling 獲取當前節點之後的所有同級節點 result = html.xpath('//li[1]/following-sibling::*')