学习笔记
一、正则表达式–元字符
re 模块使 Python 语言拥有全部的正则表达式功能
1. 数量词
import re
a = 'Excel 12345Word23456PPT12Lr'
r = re.findall('[a-zA-Z]{3,5}',a)
print(r)
import re
a = 'exce0excell3excel3'
r = re.findall('excel*',a)
r = re.findall('excel.*',a)
print(r)
import re
a = 'exce0excell3excel3'
r = re.findall('excel+',a)
print(r)
import re
a = 'exce0excell3excel3'
r = re.findall('excel?',a)
print(r)
2. 字符匹配
line = 'xyz,xcz.xfc.xdz,xaz,xez,xec'
r = re.findall('x[de]z', line)
print(r)
r = re.findall('x[^de]z', line)
print(r)
import re
a = 'Excel 12345Word\n23456_PPT12lr'
r = re.findall('\w',a)
print(r)
import re
a = 'Excel 12345Word\n23456_PPT12lr'
r = re.findall('\W',a)
print(r)
3. 边界匹配
import re
tel = '13811115888'
r = re.findall('^\d{8,11}$',tel)
print(r)
4. 组
import re
a = 'abcabcabcxyzabcabcxyzabc'
r = re.findall('(abc){2}',a)
print(r)
r = re.findall('(abc){3}',a)
5. 匹配模式参数
import re
a = 'abcFBIabcCIAabc'
r = re.findall('fbi',a,re.I)
print(r)
import re
a = 'abcFBI\nabcCIAabc'
r = re.findall('fbi.{1}',a,re.I | re.S)
print(r)
二、方法
re.findall
- 匹配出字符串中所有 与制定值相关的值
- 以列表的形式返回
- 未匹配则返回空列表
import re
re.findall(pattern, string, flags=0)
pattern.findall(string[ , pos[ , endpos]])
import re
line = "111aaabbb222小呼噜奥利奥"
r = re.findall('[0-9]',line)
print(r)
re.match
- re.match 尝试从字符串的起始位置匹配一个模式
- 如果不是起始位置匹配成功的话,match()就返回none。
re.match(pattern, string, flags=0)
print(re.match('www','www.xxxx.com'))
print(re.match('www','www.xxxx.com').span())
print(re.match('com','www.xxxx.com'))
<re.Match object; span=(0, 3), match='www'>
(0, 3)
None
group匹配对象
import re
a = 'life is short,i use python,i love python'
r = re.search('life(.*)python(.*)python',a)
print(r.group(0))
print(r.group(1))
print(r.group(2))
print(r.group(0,1,2))
print(r.groups())
import re
line = "Cats are smarter than dogs"
matchObj1 = re.match(r'(.*) are (.*?) .*', line, re.M|re.I)
matchObj2 = re.match(r'(.*) smarter (.*?) .*', line, re.M|re.I)
matchObj3 = re.match(r'(.*) than (.*)', line, re.M|re.I)
print(matchObj1)
print(matchObj2)
print(matchObj3)
if matchObj1:
print ("matchObj1.group() : ", matchObj1.group())
print ("matchObj1.group(1) : ", matchObj1.group(1))
print ("matchObj1.group(2) : ", matchObj1.group(2))
else:
print ("No match!!")
if matchObj2:
print ("matchObj2.group() : ", matchObj2.group())
print ("matchObj2.group(1) : ", matchObj2.group(1))
print ("matchObj2.group(2) : ", matchObj2.group(2))
else:
print ("No match!!")
if matchObj3:
print ("matchObj3.group() : ", matchObj3.group())
print ("matchObj3.group(1) : ", matchObj3.group(1))
print ("matchObj3.group(2) : ", matchObj3.group(2))
else:
print ("No match!!")
import re
str = "a b a b"
matchObj1 = re.match(r'a(.*)b', str, re.M|re.I)
matchObj2 = re.match(r'a(.*?)b', str, re.M|re.I)
print("matchObj1.group() : ", matchObj1.group())
print("matchObj2.group() : ", matchObj2.group())
re.search
扫描整个字符串并返回第一个成功的匹配。
re.search(pattern, string, flags=0)
import re
line = "cats are smarter than dogs"
matchObj = re.match(r'dogs',line,re.M|re.I)
matchObj1= re.search(r'dogs',line,re.M|re.I)
matchObj2= re.match(r'(.*) dogs',line,re.M|re.I)
if matchObj:
print ("match --> matchObj.group() : ", matchObj.group())
else:
print ("No match!!")
if matchObj1:
print ("match --> matchObj1.group() : ", matchObj1.group())
else:
print ("No match!!")
if matchObj2:
print ("match --> matchObj2.group() : ", matchObj2.group())
else:
print ("No match!!")
re.compile
- re.compile是将正则表达式转换为模式对象
- 这样可以更有效率匹配。使用compile转换一次之后,以后每次使用模式时就不用进行转换
三、检索和替换
re.sub 替换字符串
re.sub('被替换的','替换成的',a)
import re
a = 'abcFBIabcCIAabc'
r = re.sub('FBI','BBQ',a)
print(r)
import re
a = 'abcFBIabcFBIaFBICIAabc'
r = re.sub('FBI','BBQ',a,1)
print(r)
import re
a = 'abcFBIabcFBIaFBICIAabc'
def 函数名(形参):
分段获取 = 形参.group()
return '$' + 分段获取 + '$'
r = re.sub('FBI',函数名,a)
print(r)
|