xexexe

简单的正则表达式python实现

简单的Python实现正则表达式,对文字的分析用的多,爬虫也会用到。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import re
str = 'dog cat dog'
r_str = r'dog'
match = re.match(r_str,str)
print match.group(0)
#dog
match = re.search(r_str,str)
print match.group(0)
#dog
print match.start()
#0
print match.end()
#3
List = re.findall(r_str,str)
print List
#['dog', 'dog']
contactInfo = 'Doe, John: 555-1212'
r_str = r'\w+, \w+: \S+'
match = re.search(r_str,contactInfo)
print match.group(0)
#Doe, John: 555-1212
r_group_str = r'(\w+), (\w+): (\S+)'
match = re.search(r_group_str,contactInfo)
print match.group(0)
#Doe, John: 555-1212
print match.group(1)
#Doe
print match.group(2)
#John
print match.group(3)
#555-1212
r_group_name_str = r'(?P<last>\w+), (?P<first>\w+): (?P<phone>\S+)'
match = re.search(r_group_name_str,contactInfo)
print match.group('last')
#Doe
print match.group('first')
#John
print match.group('phone')
#555-1212
List = re.findall(r_group_str,contactInfo)
print List
#[('Doe', 'John', '555-1212')]