第18次全天課筆記 20181125
全天課筆記-210181125
寫一個類,能夠統計某個檔案的純數字字元個數,統計非空白個數,空白個數,行數
能夠讀取檔案中的某一行
通過繼承方式,增加一個方法,列印所有的統計資訊
import os.path
class FileInfo(object):
"""統計檔案的數字字元個數、
非空白數字個數、
空白字元個數、2
檔案行數、
檔案所在路徑"""
def __init__(self,file_path,encoding_type="utf-8"):
self.file_path = file_path
self.encoding_type = encoding_type
while 1:
if not os.path.exists(self.file_path):
self.file_path=input(
"例項化的檔案路徑不存在,請重新輸入:")
else:
break
def get_file_content(self):
content=""
with open(self.file_path,encoding=self.encoding_type) as fp:
content = fp.read()
return content
def count_number_str(self):
"""統計檔案中的數字字元個數"""
count =0
content = self.get_file_content()
for i in content:
if i>="0" and i<="9":
count+=1
return count
def count_not_space_str(self):
"""統計檔案中的非空白字元個數"""
count =0
content = self.get_file_content()
for i in content:
if not i.isspace():
count+=1
return count
def count_space_str(self):
"""統計檔案中的空白字元個數"""
count =0
content = self.get_file_content()
for i in content:
if i.isspace():
count+=1
return count
def count_lines(self):
"""統計檔案中的行數"""
count =0
content = self.get_file_content()
for i in content.split("\n"):
count+=1
return count
class Advanced_FileInfo(FileInfo):
"""高階的檔案資訊處理類"""
def __init__(self,file_path,encoding_type="utf-8"):
FileInfo.__init__(self,file_path,encoding_type="utf-8")
def get_content_by_line_num(self,line_number):
try:
return self.get_file_content().split("\n")[line_number-1]
except:
return None
def print_file_info(self):
print("檔案的統計資訊如下:")
print("檔案中包含的數字數量:%s" %self.count_number_str())
print("檔案中包含的非空白字元數量:%s" %self.count_not_space_str())
print("檔案中包含的空白字元數量:%s" %self.count_space_str())
print("檔案中包含的行數:%s" %self.count_lines())
fi = Advanced_FileInfo("e:\\a.txt")
print("獲取第一行的檔案內容:",fi.get_content_by_line_num(1))
fi.print_file_info()
正則表示式
>>> import re
>>> re.match(r".","abc")
<_sre.SRE_Match object; span=(0, 1), match='a'>
>>> re.match(r"..","abc")
<_sre.SRE_Match object; span=(0, 2), match='ab'>
>>> re.match(r".....","abc")
>>> print(re.match(r".....","abc"))
None
>>> print(re.match(r"...","a\nc"))
None
>>> print(re.match(r"...","a\nc",re.DOTALL))
<_sre.SRE_Match object; span=(0, 3), match='a\nc'>
>>> """I AM
... FSDFDS
... FSDW
... """
'I AM\nFSDFDS\nFSDW\n'
>>>
>>> print(re.match(r"[^abc]","hxxx"))
<_sre.SRE_Match object; span=(0, 1), match='h'>
>>> print(re.search(r"abc","sssssabc"))
<_sre.SRE_Match object; span=(5, 8), match='abc'>
>>> print(re.search(r"^abc","sssssabc"))
None
>>> print(re.match(r"\d","123"))
<_sre.SRE_Match object; span=(0, 1), match='1'>
>>> print(re.match(r"\d+","123"))
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>> print(re.match(r"\d*","123"))
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>>
>>> print(re.match(r"\d*","123"))
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>> print(re.match(r"\d*","a123"))
<_sre.SRE_Match object; span=(0, 0), match=''>
>>> print(re.match(r"\d*?","123"))
<_sre.SRE_Match object; span=(0, 0), match=''>
>>> print(re.match(r"\d+?","123"))
<_sre.SRE_Match object; span=(0, 1), match='1'>
>>> re.search(r"\d+","abc123dee").group()
'123'
>>> sorted(['5', '12', '123', '1234'],key=lambda x:len(x))[-1]
'1234'
>>> map(len,set(["a","abc"]))
<map object at 0x0000000002606198>
>>> list(map(len,set(["a","abc"])))
[1, 3]
>>> re.findall(r"\d+","1a12b123c1234d")
['1', '12', '123', '1234']
>>> re.findall(r"[a-zA-Z]+","1a12b123c1234d")
['a', 'b', 'c', 'd']
>>> re.findall(r"[a-zA-Z]+","1ab12bc123cd1234dA")
['ab', 'bc', 'cd', 'dA']
>>> re.findall(r"[A-Z]+[a-z]+|[a-z]+","ABBBossssAA abc")
['ABBBossss', 'abc']
>>> re.findall(r"[A-Z]+[a-z]*|[a-z]+","ABBBossssAA abc ABC")
['ABBBossss', 'AA', 'abc', 'ABC']
>>> re.search(r"\s","ab cd")
<_sre.SRE_Match object; span=(2, 3), match=' '>
>>> re.search(r"\s+","ab\t \r\ncd")
<_sre.SRE_Match object; span=(2, 9), match='\t \r\n'>
>>> re.findall(r"\S+","ab cd\t ef\nhi")
['ab', 'cd', 'ef', 'hi']
>>> "".join(re.findall(r"\S+","ab cd\t ef\nhi"))
'abcdefhi'
>>> re.search(r"\w+","aaaZAW0123_")
<_sre.SRE_Match object; span=(0, 11), match='aaaZAW0123_'>
>>> re.search(r"\w+","aaaZAW0123_").group()
'aaaZAW0123_'
>>> re.search(r"\W+","aaaZAW0123_-").group()
'-'
>>> re.search(r"\d?","a7").group()
''
? 0或1個
*0或多個
+1或多個
>>> re.search(r"\d{3}","123456789").group()
'123'
>>> re.search(r"\d{1,3}","123456789").group()
'123'
>>> re.search(r"\d{1,3}?","123456789").group()
'1'
>>> re.search(r"^abc","abcdddabc") #從開頭匹配^
<_sre.SRE_Match object; span=(0, 3), match='abc'>
>>> re.search(r"^abc","dddabc")
>>> re.search(r"^\d+","133dddabc")
<_sre.SRE_Match object; span=(0, 3), match='133'>
>>> re.search(r"\d+$","133dddabc5555")
<_sre.SRE_Match object; span=(9, 13), match='5555'>
>>> re.search(r"^123$","123")
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>> re.search(r"^123$","123sss") #等價於re.search(r"\A123\Z","123")
>>> re.search(r"^123$","ss123")
>>> re.search(r"\A123\Z","123")
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>> re.search(r"\d(\D+)\d","1abc3").group(1)
'abc'
>>> re.search(r"(\d)(\D+)(\d)","1abc3").group(1)
'1'
>>> re.search(r"(\d)(\D+)(\d)","1abc3").group(2)
'abc'
>>> re.search(r"(\d)(\D+)(\d)","1abc3").group(2)
'abc'
>>> re.search(r"(\d)(\D+)(\d)","1abc3").group(3)
'3'
>>> pattern = re.compile(r"\d+")
>>> pattern.search("abc123")
<_sre.SRE_Match object; span=(3, 6), match='123'>
>>> result = pattern.search("123ddddd")
>>> if result:
... print("匹配到了")
... else:
... print("沒有匹配到!")
...
匹配到了
3個數字和3個字母,3個數字開頭或者3個字母開頭
>>> re.match(r"\d{3}[a-zA-Z]{3}$|[a-zA-Z]{3}\d{3}$","abc123")
>>> re.match(r".","\ndb",re.DOTALL)
<_sre.SRE_Match object; span=(0, 1), match='\n'>
>>> re.DOTALL
<RegexFlag.DOTALL: 16>
>>> re.match(r".","\ndb",16)
<_sre.SRE_Match object; span=(0, 1), match='\n'>
>>> re.match(r"ABc","abc",re.I) #re.I 忽略大小寫
<_sre.SRE_Match object; span=(0, 3), match='abc'>
>>> re.match(r"ABc","abc")
p = re.compile(r'(\w+) (\w+)(?P<sign>.*)', re.DOTALL)
#獲取表示式中分組的數量
print("p.groups: ", p.groups)
>>> pattern =re.compile(r"abc")
>>> pattern.match("123abc")
>>> pattern.match("123abc",3)
<_sre.SRE_Match object; span=(3, 6), match='abc'>
>>> pattern.match("123abc",3,6)
<_sre.SRE_Match object; span=(3, 6), match='abc'>
>>> re.findall(r"\d+","1a2b3c")
['1', '2', '3']
>>> re.findall(r"[a-z](\d+)","1a2b3c")
['2', '3']
>>> re.findall(r"([a-z])(\d+)","1a2b3c")
[('a', '2'), ('b', '3')]
#有分組的情況下,只返回分組裡面的內容,將分組通過元組的形式返回
>>> re.findall(r"(([a-z])(\d+)([a-z]))","a1ab2bc3c")
[('a1a', 'a', '1', 'a'), ('b2b', 'b', '2', 'b'), ('c3c', 'c', '3', 'c')]
>>> s="a1a\nb2b\nc2c\n"
>>> re.search(r"[a-z]$",s)
<_sre.SRE_Match object; span=(10, 11), match='c'>
>>> re.search(r"[a-z]$",s,re.M)
<_sre.SRE_Match object; span=(2, 3), match='a'>
>>> re.findall(r"[a-z]$",s)
['c']
>>> re.findall(r"[a-z]$",s,re.M)
['a', 'b', 'c']
for i in re.finditer(r'[A-Za-z]+','one12two34three56four') :
print(i.group(),end=" ")
p = re.compile(r'\d+')
#不指定分割次數
resList = p.split('one1two2three3four4')
print(resList)
>>> resList = p.split("a 2 b 2 c 5 d",2) #指定切割次數
>>> print(resList)
['a', 'b', 'c 5 d']
>>> "aabbcc".replace("bb","**")
'aa**cc'
re.substitute
>>> re.sub(r"\d+","**","aa11bb22cc")
'aa**bb**cc'
>>> re.subn(r"[ \t\r]+","","aa 11b b22 \n \t cc")
('aa11bb22\ncc', 4)
p = re.compile(r'(\w+) (\w+)')
s = 'i say, hello world!'
#\2, \1表示分組引用,分別代表第二個分組,第一個分組
print(p.sub(r'\2 \1', s))
#當repl為方法時,將匹配的結果m傳入方法
def func(m):
print("group1:",m.group(1))
print("group2:",m.group(2))
return m.group(1).title() +" "+ m.group(2)
print(p.sub(func, s))
import re
def multiply(m):
# 將分組0的值轉成整型
v = int(m.group(0))
# 將分組0的整型值乘以2,然後返回
return str(v * 2)
# 使用multiply方法作為第二個引數,將匹配到的每個數字作為引數傳入multiply函式,處理後將返回結果替換為相應字串
result = re.sub("\d+", multiply, "10 20 30 40 50")
print(result)
>>> re.search(r"(?P<num>\d+)","123")
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>> re.search(r"(?P<num>\d+)","123").group(1)
'123'
>>> re.search(r"(?P<num>\d+)","123").group("num")
'123'
>>> re.search(r"(?P<num>\d+) (?P=num)","123 123").group()
'123 123'
>>> re.search(r"(?P<num>\d+) (?P=num)","123 456").group()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute 'group'
>>> re.search(r"(\d+) \1","123 123").group(1)
'123'
>>> re.search(r"(\d+) \1","123 456").group(1)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute 'group'
>>> re.search(r"((\d+) (\d+))","123 456").group()
'123 456'
>>> re.search(r"((\d+) (\d+))","123 456").group(1)
'123 456'
>>> re.search(r"((\d+) (\d+))","123 456").group(2)
'123'
>>> re.search(r"((\d+) (\d+))","123 456").group(3)
'456'
m = re.search(r'(\w+)! (\w+) (\w+)','HMan! gloryroad train')
#將匹配的結果帶入
print(m.expand(r'resut:\3 \2 \1'))
group(0) 和 group() 匹配的內容
group(1) 第一個分組
import re
a = re.compile(r"""\d+ # 匹配至少1個連續的數字,自定義註釋
\. # 匹配點(.)
\d* # 匹配數字至少0個""", re.X)
b = re.compile(r"\d+\.\d*") #a和b的正則表示式等價的
print(a.search("test12.58 2.0 abc 3.8").group())
>>> re.search(r"((?<=abc)\d+)","abc123deb")
<_sre.SRE_Match object; span=(3, 6), match='123'>
>>> re.search(r"((?<=abc)\d+)","abc123deb").group()
'123'
>>> re.search(r"(\d+(?=abc))","xbc123abc").group()
'123'
>>> re.search(r"(\d+(?=abc))","xbc123abc").group()
'123'
>>> re.search(r"(?<=xbc)(\d+(?=abc))","xbc123abc").group()
'123'
>>> re.search(r"(?<!xbc)\d+","abc123abc").group()
'123'
>>> re.search(r"(?<!xbc)\d+","xbc123abc").group()
'23'
>>> re.search(r"(?<!xbc)\d+?","xbc123abc").group()
'2'
>>> re.search(r"\d+(?!xbc)","123abc").group()
'123'
>>> re.search(r"\d+(?!xbc)","123xbc").group()
'12'