1. 程式人生 > 實用技巧 >python正則表示式的分組

python正則表示式的分組

(...)分組
import re

# (...)分組
string_01 = 'apple567WRAP899WRAP223funny2356'
pattern_01 = '([A-Z]{4})[0-9]+'
res_01 = re.search(pattern_01, string_01)
print(res_01, res_01.group(), res_01.group(1))

# 列印結果: <re.Match object; span=(8, 15), match='WRAP899'> WRAP899 WRAP
(?P<name>)給分組起別名
import re

# (?P<name>)給分組起別名 string_02 = 'apple567WRAP899WRAP223funny2356' pattern_02 = '(?P<name_01>[A-Z]{4})(?P<name_02>[0-9]+).' res_02 = re.search(pattern_02, string_02) print(res_02, res_02.groupdict()) print(res_02.group(), res_02.group(1), res_02.group(2)) # 按分組索引獲取 print(res_02.group('name_01'), res_02.group('
name_02')) # 按分組別名獲取 # 列印結果: # <re.Match object; span=(8, 16), match='WRAP899W'> {'name_01': 'WRAP', 'name_02': '899'} # WRAP899W WRAP 899 # WRAP 899
(?P=name)引用分組
import re

# (?P=name)引用分組
# 引用分組匹配到的內容需要和被引用分組匹配到的內容一模一樣
string_03 = 'apple567WRAP899WRAP223funny2356'
pattern_03 = '(?P<name_01>[A-Z]{4}).*(?P=name_01)
' res_03 = re.search(pattern_03, string_03) print(res_03, res_03.groupdict()) print(res_03.group(), res_03.group(1)) # 引用分組無法被捕獲,res_03.group(2)會報IndexError: no such group錯 print(res_03.group('name_01')) # 列印結果: # <re.Match object; span=(8, 19), match='WRAP899WRAP'> {'name_01': 'WRAP'} # WRAP899WRAP WRAP # WRAP
(?#...) 正則中寫註釋括號裡的內容會被忽略掉
import re

string_04 = 'apple567WRAP899WRAP223funny2356'
pattern_04 = '(?#這是註釋)([A-Z]{4})[0-9]+'
res_04 = re.search(pattern_04, string_04)
print(res_04, res_04.group(), res_04.group(1))  # 註釋分組無法被捕獲

# 列印結果: <re.Match object; span=(8, 15), match='WRAP899'> WRAP899 WRAP
(?:...) 表示這個分組不會被捕獲: 所以下面例子group(1)捕獲到的是第二個分組
import re

string_05 = 'apple567WRAP899WRAP223funny2356'
pattern_05 = '(?:[A-Z]{4})([0-9]+)'
res_05 = re.search(pattern_05, string_05)
print(res_05, res_05.group(1))

# 列印結果: <re.Match object; span=(8, 15), match='WRAP899'> 899
(?=...) 後向界定 括號內的內容不會被匹配
import re

# 括號中的 ... 代表你希望匹配的字串後面應該出現的字串
string_06 = 'apple567WRAP899WRAP223funny2356'
pattern_06 = '[A-Z]{4}\d{2}(?=3)'
res_06 = re.search(pattern_06, string_06)
print(res_06)

# 列印結果: <re.Match object; span=(15, 21), match='WRAP22'>
(?!...) 後向非界定 和上面的後向界定相反,就相當於 not
import re

string_07 = 'apple567WRAP899WRAP223funny2356'
pattern_07 = '[A-Z]{4}\d{2}(?!9)'
res_07 = re.search(pattern_07, string_07)
print(res_07)

# 列印結果: <re.Match object; span=(15, 21), match='WRAP22'>
(?<=...)  前向界定 括號內的內容不會被匹配
import re

# 括號中 ...  代表你希望匹配的字串的前面應該出現的字串
string_08 = 'apple567WRAP899WRAP223funny2356'
pattern_08 = '(?<=99)[A-Z]{4}\d'
res_08 = re.search(pattern_08, string_08)
print(res_08)

# 列印結果: <re.Match object; span=(15, 20), match='WRAP2'>
(?<!...) 前向非界定 與上面前向界定相反
import re

# 當希望的字串前面不是 ... 的內容時才匹配
string_09 = 'apple567WRAP899WRAP223funny2356'
pattern_09 = '(?<!567)[A-Z]{4}\d'
res_09 = re.search(pattern_09, string_09)
print(res_09)

# 列印結果: <re.Match object; span=(15, 20), match='WRAP2'>