[cpif] r258 - trunk/frontend-web
svn at argo.es
svn at argo.es
Wed Jul 11 11:22:35 CEST 2007
Author: alvaro
Date: Wed Jul 11 11:22:34 2007
New Revision: 258
Log:
Corregida y clarificada la expresion regular para las
urls.
Modified:
trunk/frontend-web/parser_urls.py
Modified: trunk/frontend-web/parser_urls.py
==============================================================================
--- trunk/frontend-web/parser_urls.py (original)
+++ trunk/frontend-web/parser_urls.py Wed Jul 11 11:22:34 2007
@@ -1,6 +1,16 @@
# $Id$
-allowed_urls = ['http://', 'ftp://']
+# Everybody stand back!!!
+domain = r"[a-zA-Z]{2,4}"
+subdomain = r"(?:[a-zA-Z0-9_\-]+\.)+"
+string = r"(?:[a-zA-Z0-9_\-]+)+"
+port = r"(?:\:[0-9]+)?"
+document = r"(?:/\S+)*"
+
+allowed_urls = [
+ '(?:http|ftp)://'+subdomain+domain+port+document+'/{0,1}',
+ 'mailto:'+string+"@"+subdomain+domain,
+ ]
def url_o_matic(url):
return """<a href="%(url)s" title="%(url)s">%(url)s</a>""" % locals()
@@ -9,8 +19,7 @@
if context == "a":
return None, [(True,text,context)]
import re
- # Everybody stand back!!!
- regexp = re.compile("|".join(["(\s|\A)(%s(?:[a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}(?:/\S+)*/{0,1})(\s|\Z)" % i for i in allowed_urls]))
+ regexp = re.compile("|".join("(\s|\A)(%s)(\s|\Z)" % i for i in allowed_urls))
list = regexp.split(text)
tokens = []
for i in list:
@@ -29,9 +38,15 @@
'http://example.com/foo/bar/baz/gazonk',
'http://example.com/foo?bar=baz',
'http://example.com/foo?bar=baz&gazonk&gadaf',
+ 'http://example.com:8080',
+ 'http://example.com:8080/test',
+ 'http://www.example.com:8080',
+ 'http://www.example.com:8080/test',
'http://www.example.com',
'http://www.www.example.com',
'http://buh.bih.bah.beh.example.com',
+ 'mailto:test at example.com',
+ 'mailto:test at sub.example.com',
)
badurls = (
'htp://example.com',
@@ -52,7 +67,7 @@
def testURLbad(self):
import re
- regexp = re.compile("|".join(["(?:\s|\A)(%s(?:[a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}(?:/\S+)*)/{0,1}(?:\s|\Z)" % i for i in allowed_urls]))
+ regexp = re.compile("|".join("(\s|\A)(%s)(\s|\Z)" % i for i in allowed_urls))
for i in self.badurls:
self.failIf(regexp.findall(i))
import random
@@ -65,7 +80,7 @@
def testURLok(self):
import re
- regexp = re.compile("|".join(["(\s|\A)(%s(?:[a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}(?:/\S+)*/{0,1})(\s|\Z)" % i for i in allowed_urls]))
+ regexp = re.compile("|".join("(\s|\A)(%s)(\s|\Z)" % i for i in allowed_urls))
for i in self.urls:
self.assert_(regexp.findall(i))
import random
More information about the cpif
mailing list