A comprehensive regular expression to match many kind of URLs.
import re
url_re = re.compile(r"\b%(urls)s:[%(any)s]+?(?=[%(punc)s]*(?:[^%(any)s]|$))" % {
"urls": "(?: %s)" % "|".join("http telnet gopher file wais ftp https".split()),
"any": r"\w/#~:.?+=&%@!\-.:?\-",
"punc": r".:?\-",
}, re.VERBOSE)
reply_re = re.compile(r"@([\w]+?)\b")
© Copyright 2001-2010 Taylan Pince. All rights reserved.