1. 程式人生 > >python26 re正則表示式

python26 re正則表示式

 

#coding:utf-8
#/usr/bin/python
"""
2018-11-25
dinghanhua
re
"""
import re
teststr = '"id":"2994925","publisher":"Yahoo Press","isbn10":"0596517742","isbn13":"9780596517748","title":"JavaScript","url":"https:\/\/api.douban.com\/v2\/book\/2994925","alt_title":"","author_intro":"Douglas Crockford is a Senior JavaScript Architect at Yahoo!. He is the maintainer of the JSON format, and a regular speaker at conferences on advanced JavaScript topic. He is also on the JavaScript 2.0 committee at ECMA."
'

 

'''re.match() 從字串的起始位置匹配 '''
pattern = r'\d+'
print(re.match(pattern,teststr))
pattern = r'"id":"(.*)","publisher'
matchobj = re.match(pattern,teststr)
print(matchobj.group(0))
print(matchobj.groups())
print(matchobj.group(1))
print(matchobj.span())
print(matchobj.start(),matchobj.end())

 

'''re.search() 返回字串中第一個匹配的 '''
pattern = r'\d+'
print(re.search(pattern,teststr))
pattern = r'"id":"(.*?)".*"title":"(.*?)"'
matchobj = re.search(pattern,teststr)
print(matchobj.group(0))
print(matchobj.groups())
print(matchobj.group(1,2))
print(matchobj.span())
print(matchobj.start(),matchobj.end())

 

'''re.sub() 替換匹配項 repl=替換的字串,count替換幾個,預設0替換所有'''
pattern = r'\d+'
teststr2 = re.sub(pattern,repl='1111',string=teststr,count=1)
print(teststr2)

pattern = r'\D+'
teststr2 = re.sub(pattern,"",teststr) #去掉所有非數字
print(teststr2)

 

'''compile()生成正則表示式物件'''
pattern = re.compile(r'"(\w+)":"(\w+)"')
matchobj = pattern.match(teststr)
print(matchobj.groups())
matchobj = pattern.search(teststr,10,100) #設定起始結束位置
print(matchobj.groups())

 

'''findall 匹配所有,返回列表'''
pattern = r'"(\w+)":"(\d+)"'
matchlist = re.findall(pattern,teststr)
print(matchlist)
pattern = re.compile(r'"(\w+)":"(\D+)"')
matchlist = pattern.findall(teststr,10)
print(matchlist)

 

'''re.finditer 匹配所有,返回迭代器'''
pattern = r'"(\w+)":"(\d+)"'
matchiter = re.finditer(pattern,teststr)
print(matchiter)
for m in matchiter:
    print(m.groups())

 

'''re.split() 正則分隔'''
pattern = r'[^a-zA-Z]+' #根據非字母分隔
splitlist = re.split(pattern,teststr)
print(splitlist)