[cpif] r429 - branches/alvaro/frontend-web

svn at argo.es svn at argo.es
Thu Oct 25 19:34:49 CEST 2007

Author: alvaro
Date: Thu Oct 25 19:34:47 2007
New Revision: 429

Parseador para la nueva version...


Modified: branches/alvaro/frontend-web/parser_urls.py
--- branches/alvaro/frontend-web/parser_urls.py	(original)
+++ branches/alvaro/frontend-web/parser_urls.py	Thu Oct 25 19:34:47 2007
@@ -13,81 +13,21 @@
 def url_o_matic(url):
-  return """<a href="%(url)s" title="%(url)s">%(url)s</a>""" % locals()
+  return """ <a href="%(url)s" title="%(url)s">%(url)s</a> """ % locals()
 def parse(text, context = None):
-  if context == "a":
-    return None, [(True,text,context)]
+  if context == "a" or context == "url":
+#    print "URLS->",len(text), text, None, None, None
+    return len(text), text, None, None, None
   import re
-  regexp = re.compile("|".join("(\s|\A)(%s)(\s|\Z)" % i for i in allowed_urls))
-  list = regexp.split(text)
-  tokens = []
-  for i in list:
-    if i and regexp.match(i):
-      tokens.append((False, url_o_matic(i), "a"))
-    elif i:
-      tokens.append((True, i, context))
-  return None, tokens
+  regexp = re.compile("|".join("(?:\s|\A)(%s)(?:\s|\Z)" % i for i in allowed_urls))
+  ret = regexp.split(text,1)
+  if len(ret) < 2:
+#    print "URLS->",len(text), text, None, None, None
+    return len(text), text, None, None, None
+  before = ret[0]
+  url = url_o_matic(ret[1])
+  after = "".join(i if i else "" for i in ret[2:])
+#  print "URLS->",len(before), "".join([before,url]), None, None, after
+  return len(before), "".join([before,url]), None, None, after
-import unittest
-class Test(unittest.TestCase):
-  urls = (
-      'http://example.com',
-      'http://example.com/',
-      'http://example.com/foo/',
-      'http://example.com/foo/bar/baz/gazonk',
-      'http://example.com/foo?bar=baz',
-      'http://example.com/foo?bar=baz&gazonk&gadaf',
-      'http://example.com:8080',
-      'http://example.com:8080/test',
-      'http://www.example.com:8080',
-      'http://www.example.com:8080/test',
-      'http://www.example.com',
-      'http://www.www.example.com',
-      'http://buh.bih.bah.beh.example.com',
-      'mailto:test at example.com',
-      'mailto:test at sub.example.com',
-  )
-  badurls = (
-      'htp://example.com',
-      'http:/example.com',
-      'http//example.com',
-      'http/example.com',
-      'http://example.c/',
-      'http://example.c/',
-      'http://.com',
-      'hattp://example.comcomcom',
-  )
-  def testConversion(self):
-    retval, tokens = parse("http://example.com")
-    if retval:
-      self.fail()
-    self.assertEqual(tokens[0][1], """<a href="http://example.com" title="http://example.com">http://example.com</a>""")
-  def testURLbad(self):
-    import re
-    regexp = re.compile("|".join("(\s|\A)(%s)(\s|\Z)" % i for i in allowed_urls))
-    for i in self.badurls:
-        self.failIf(regexp.findall(i))
-        import random
-        rnd = random.sample("""?=)(/&%$"!'0987654321+-.,..,;:_"**$foo """,10)
-        self.failIf(regexp.findall("%s%s%s" % (rnd, i, rnd)))
-    for i in self.urls:
-        import random
-        rnd = random.sample("""?=)(/&%$"!'0987654321+-.,..,;:_"**$foo """,10)
-        self.failIf(regexp.findall("%s%s%s" % (rnd, i, rnd)))
-  def testURLok(self):
-    import re
-    regexp = re.compile("|".join("(\s|\A)(%s)(\s|\Z)" % i for i in allowed_urls))
-    for i in self.urls:
-      self.assert_(regexp.findall(i))
-      import random
-      rnd = random.sample("""?=)(/&%$"!'0987654321+-.,..,;:_"**$%foo """,10)
-      self.assert_(regexp.findall("%s %s %s" % (rnd, i, rnd)))
-if __name__ == "__main__":
-  import unittest
-  unittest.main()

More information about the cpif mailing list