re模組,hashlib模組
阿新 • • 發佈:2018-12-06
import re
print(re.findall('alex','hahahah alex is alex is dsb'))
alex
print(re.findall('\w','Aah123 +-_')) 匹配字母數字下劃線
\w
print(re.findall('\w\w','Aah123 +-_'))
\w\w
['Aa','h1','23',]
print(re.findall('\w9\w','Aa9h123 aaa9c+-_'))
\w9\w
['a9h','a9c']
print(re.findall('\W','Aah123 +-_')) 匹配非字母數字下劃線
print(re.findall('\s','Aah\t12\n3 +-_')) 匹配任意空白字元
print(re.findall('\S','Aah\t12\n3 +-_')) 匹配任意非空字元
print(re.findall('\d','Aah\t12\n3 +-_')) 匹配任意數字
print(re.findall('\D','Aah\t12\n3 +-_')) 匹配任意非數字
print(re.findall('\w\w\d\d','asfdasdfegon001adfadfegon002asdfxx01 yy02'))
print(re.findall('\s','Aah\t12\n3 +-_')) 只匹配\s
print(re.findall('\t','Aah\t12\n3 +-_')) 只匹配\t
print(re.findall('\n','Aah\t12\n3 +-_')) 只匹配\n
^: 僅從頭開始匹配
print(re.findall('^alex',' alex is alex is alex'))
^alex
$: 僅從尾部開始匹配
print(re.findall('alex$',' alex is alex is alex1'))
alex$
.: 代表匹配一個字元,該字元可以是除換行符之外任意字元
print(re.findall('a.c','a a1c aaac a c asfdsaf a\nc',re.DOTALL))
a.c
['a1c','aac','a c','a\nc']
[]:代表匹配一個字元,這一個字元是來自於我們自定義的範圍
print(re.findall('a[0-9]c','a,c a a1c a9c aaac a c asfdsaf a\nc',re.DOTALL))
print(re.findall('a[a-zA-Z]c','a,c aAc a1c a9c aaac a c asfdsaf a\nc',re.DOTALL))
print(re.findall('a[a-zA-Z]c','a,c aAc a1c a9c aaac a c asfdsaf a\nc',re.DOTALL))
print(re.findall('a[+*/-]c','a,c a+c a-c a*c a/c aAc a1c a9c aaac a c asfdsaf a\nc',re.DOTALL))
print(re.findall('a[+*\-/]c','a,c a+c a-c a*c a/c aAc a1c a9c aaac a c asfdsaf a\nc',re.DOTALL))
print(re.findall('a[^0-9]c','a,c a a1c a9c aaac a c asfdsaf a\nc',re.DOTALL))
重複匹配
?:代表左邊那一個字元出現0次到1次
print(re.findall('ab?','a ab abb abbbb a123b a123bbbb'))
ab?
['a','ab','ab','ab','a','a']
*: 代表左邊那一個字元出現0次到無窮次
print(re.findall('ab*','a ab abb abbbb a123b a123bbbb'))
ab*
['a','ab','abb','abbbb','a','a']
+: 代表左邊那一個字元出現1次到無窮次
print(re.findall('ab+','a ab abb abbbb a123b a123bbbb'))
ab+
['ab','abb','abbbb']
{n,m}:代表左邊那一個字元出現n次到m次
print(re.findall('ab{1,3}','a ab abb abbbb a123b a123bbbb'))
['ab', 'abb', 'abbb']
print(re.findall('ab{1,}','a ab abb abbbb a123b a123bbbb'))
print(re.findall('ab+','a ab abb abbbb a123b a123bbbb'))
print(re.findall('ab{0,}','a ab abb abbbb a123b a123bbbb'))
print(re.findall('ab*','a ab abb abbbb a123b a123bbbb'))
print(re.findall('ab{3}','a ab abb abbbb a123b a123bbbb'))
.*: 匹配任意0個到無窮個字元,貪婪匹配
print(re.findall('a.*c','a123213123asdfasdfc123123123123+-0)((c123123'))
a.*c
.*?:匹配任意0個到無窮個字元,非貪婪匹配
print(re.findall('a.*?c','a123213123asdfasdfc123123123123+-0)((c123123'))
|:或者
print(re.findall('companies|company','Too many companies have gone bankrupt,c and the next one is my company'))
companies|company
():分組 預設只取組內內容
print(re.findall('compan(?:ies|y)','Too many companies have gone bankrupt,c and the next one is my company'))
compan(ies|y)
print(re.findall('href="(.*?)"','<p>動感視訊</p><a href="https://www.douniwan.com/1.mp4">逗你玩呢</a><a href="https://www.xxx.com/2.mp4">葫蘆娃</a>'))
href=".*?"
?:取消組內
'a\\c'
print(re.findall('a\\\\c','a\c aac'))
print(re.findall(r'a\\c','a\c aac'))
print(re.findall('alex','my name is alex Alex is dsb aLex ALeX',re.I))
忽略大小寫 re.I
print(re.findall('alex','my name is alex Alex is dsb aLex ALeX',re.I))
msg="""
my name is egon
asdfsadfadfsadf egon
123123123123123egon
"""
re.M :以\n為分隔符 為一行內容
print(re.findall('egon$',msg,re.M)) #my name is egon\nasdfsadfadfsadf egon\n123123123123123egon'
re模組其他方法
res=re.findall('(href)="(.*?)"','<p>動感視訊</p><a href="https://www.douniwan.com/1.mp4">逗你玩呢</a><a href="https://www.xxx.com/2.mp4">葫蘆娃</a>')
print(res)
re.search:只匹配成功的一次,沒有則返回None,匹配第一個內容,以及內容的索引位置
res=re.search('(href)="(.*?)"','<p>動感視訊</p><a href="https://www.douniwan.com/1.mp4">逗你玩呢</a><a href="https://www.xxx.com/2.mp4">葫蘆娃</a>')
print(res)
print(res.group(0)) 預設只取第一個分組
print(res.group(1)) 只取第一個分組
print(res.group(2)) 只取第二個分組
res=re.match('abc','123abc') ## res=re.search('^abc','123abc') 預設從頭開始找,找到就結束.
print(res)
print(re.findall('alex','alex is alex is alex'))
print(re.search('alex','alex is alex is alex'))
print(re.match('alex','alex is alex is alex'))
pattern=re.compile('alex') comile:公用表示式
print(pattern.findall('alex is alex is alex'))
print(pattern.search('alex is alex is alex'))
print(pattern.match('alex is alex is alex'))
['1', '2', '60', '-40.35', '5', '-4', '3']
msg="1-2*(60+(-40.35/5)-(-40*3))"
print(re.findall('\D?(-?\d+\.?\d*)',msg))
msg="1-2*(60+(-40.35/5)-(-40*3))"
\D?-?\d+\.?\d*
hashlib模組
'''
1. 什麼是hash
hash是一種演算法,該演算法接受一系列的資料,經過運算會得到一個hash值,
hash值具備三大特性:
1. 只要傳入的內容一樣,那麼得到的hash值一定是一樣
2. 只要採用hash演算法固定,無論傳入的內容多大,hash值的長度是固定
3. hash值不可逆,即不能通過hash值逆推出內容
2. 為何要用hash
特性1+2=>檔案完整性校驗
特性3==>加密
'''
特性1:
import hashlib
m=hashlib.md5()
m.update('你好'.encode('utf-8'))
m.update('hello'.encode('utf-8'))
print(m.hexdigest()) #65c83c71cb3b2e2882f99358430679c3
特性2:
m1=hashlib.md5()
m1.update('你好hello'.encode('utf-8'))
print(m1.hexdigest()) #65c83c71cb3b2e2882f99358430679c3
print(len(m1.hexdigest())) #32
特性3:
m2=hashlib.sha512()
m2.update(b'asdfassssssssssssssssssssssssssss')
print(m2.hexdigest())
print(len(m2.hexdigest()))
with open(r'D:\脫產5期內容\day17\今日內容',mode='rb') as f:
m=hashlib.md5()
for line in f:
m.update(line)
print(m.hexdigest())
pwd=input('password>>> ').strip()
m=hashlib.md5()
m.update('天王蓋地虎'.encode('utf-8'))
m.update(pwd.encode('utf-8'))
m.update('一行白鷺上青天'.encode('utf-8'))
print(m.hexdigest())