July 2007
Intermediate to advanced
128 pages
2h 39m
English
Example 1-17. Simple match
#Find Spider-Man, Spiderman, SPIDER-MAN, etc.
import re
dailybugle = 'Spider-Man Menaces City!'
pattern = r'spider[- ]?man.'
if re.match(pattern, dailybugle, re.IGNORECASE):
print dailybugleExample 1-18. Match and capture group
#Match dates formatted like MM/DD/YYYY, MM-DD-YY,...
import re
date = '12/30/1969'
regex = re.compile(r'^(\d\d)[-/](\d\d)[-/](\d\d(?:\d\d)?)$')
match = regex.match(date)
if match:
month = match.group(1) #12
day = match.group(2) #30
year = match.group(3) #1969Example 1-19. Simple substitution
#Convert <br> to <br /> for XHTML compliance import re text = 'Hello world. <br>' regex = re.compile(r'<br>', re.IGNORECASE); repl = r'<br />' result = regex.sub(repl,text)
Example 1-20. Harder substitution
#urlify - turn URLs into HTML links
import re
text = 'Check the web site, http://www.oreilly.com/catalog/regexppr.'
pattern = r'''
\b # start at word boundary
( # capture to \1
(https?|telnet|gopher|file|wais|ftp) :
# resource and colon
[\w/#~:.?+=&%@!\-] +? # one or more valid chars
# take little as possible
)
(?= # lookahead
[.:?\-] * # for possible punc
(?: [^\w/#~:.?+=&%@!\-] # invalid character
| $ ) # or end of string
)'''
regex = re.compile(pattern, re.IGNORECASE
+ re.VERBOSE)
result = regex.sub(r'<a href="\1">\1</a>', text)