[cpif] r258 - trunk/frontend-web

Wed Jul 11 11:22:35 CEST 2007

Author: alvaro
Date: Wed Jul 11 11:22:34 2007
New Revision: 258

Log:
Corregida y clarificada la expresion regular para las 
urls.


Modified:
   trunk/frontend-web/parser_urls.py

Modified: trunk/frontend-web/parser_urls.py
==============================================================================

--- trunk/frontend-web/parser_urls.py	(original)
+++ trunk/frontend-web/parser_urls.py	Wed Jul 11 11:22:34 2007
@@ -1,6 +1,16 @@
 # $Id$
 
-allowed_urls = ['http://', 'ftp://']
+# Everybody stand back!!!
+domain = r"[a-zA-Z]{2,4}"
+subdomain = r"(?:[a-zA-Z0-9_\-]+\.)+"
+string = r"(?:[a-zA-Z0-9_\-]+)+"
+port = r"(?:\:[0-9]+)?"
+document = r"(?:/\S+)*"
+
+allowed_urls = [
+    '(?:http|ftp)://'+subdomain+domain+port+document+'/{0,1}',
+    'mailto:'+string+"@"+subdomain+domain,
+    ]
 
 def url_o_matic(url):
   return """<a href="%(url)s" title="%(url)s">%(url)s</a>""" % locals()
@@ -9,8 +19,7 @@
   if context == "a":
     return None, [(True,text,context)]
   import re
-  # Everybody stand back!!!
-  regexp = re.compile("|".join(["(\s|\A)(%s(?:[a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}(?:/\S+)*/{0,1})(\s|\Z)" % i for i in allowed_urls]))
+  regexp = re.compile("|".join("(\s|\A)(%s)(\s|\Z)" % i for i in allowed_urls))
   list = regexp.split(text)
   tokens = []
   for i in list:
@@ -29,9 +38,15 @@
       'http://example.com/foo/bar/baz/gazonk',
       'http://example.com/foo?bar=baz',
       'http://example.com/foo?bar=baz&gazonk&gadaf',
+      'http://example.com:8080',
+      'http://example.com:8080/test',
+      'http://www.example.com:8080',
+      'http://www.example.com:8080/test',
       'http://www.example.com',
       'http://www.www.example.com',
       'http://buh.bih.bah.beh.example.com',
+      'mailto:test at example.com',
+      'mailto:test at sub.example.com',
   )
   badurls = (
       'htp://example.com',
@@ -52,7 +67,7 @@
 
   def testURLbad(self):
     import re
-    regexp = re.compile("|".join(["(?:\s|\A)(%s(?:[a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}(?:/\S+)*)/{0,1}(?:\s|\Z)" % i for i in allowed_urls]))
+    regexp = re.compile("|".join("(\s|\A)(%s)(\s|\Z)" % i for i in allowed_urls))
     for i in self.badurls:
         self.failIf(regexp.findall(i))
         import random
@@ -65,7 +80,7 @@
 
   def testURLok(self):
     import re
-    regexp = re.compile("|".join(["(\s|\A)(%s(?:[a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}(?:/\S+)*/{0,1})(\s|\Z)" % i for i in allowed_urls]))
+    regexp = re.compile("|".join("(\s|\A)(%s)(\s|\Z)" % i for i in allowed_urls))
     for i in self.urls:
       self.assert_(regexp.findall(i))
       import random