[cpif] r429 - branches/alvaro/frontend-web
svn at argo.es
svn at argo.es
Thu Oct 25 19:34:49 CEST 2007
Author: alvaro
Date: Thu Oct 25 19:34:47 2007
New Revision: 429
Log:
Parseador para la nueva version...
Modified:
branches/alvaro/frontend-web/parser_urls.py
Modified: branches/alvaro/frontend-web/parser_urls.py
==============================================================================
--- branches/alvaro/frontend-web/parser_urls.py (original)
+++ branches/alvaro/frontend-web/parser_urls.py Thu Oct 25 19:34:47 2007
@@ -13,81 +13,21 @@
]
def url_o_matic(url):
- return """<a href="%(url)s" title="%(url)s">%(url)s</a>""" % locals()
+ return """ <a href="%(url)s" title="%(url)s">%(url)s</a> """ % locals()
def parse(text, context = None):
- if context == "a":
- return None, [(True,text,context)]
+ if context == "a" or context == "url":
+# print "URLS->",len(text), text, None, None, None
+ return len(text), text, None, None, None
import re
- regexp = re.compile("|".join("(\s|\A)(%s)(\s|\Z)" % i for i in allowed_urls))
- list = regexp.split(text)
- tokens = []
- for i in list:
- if i and regexp.match(i):
- tokens.append((False, url_o_matic(i), "a"))
- elif i:
- tokens.append((True, i, context))
- return None, tokens
+ regexp = re.compile("|".join("(?:\s|\A)(%s)(?:\s|\Z)" % i for i in allowed_urls))
+ ret = regexp.split(text,1)
+ if len(ret) < 2:
+# print "URLS->",len(text), text, None, None, None
+ return len(text), text, None, None, None
+ before = ret[0]
+ url = url_o_matic(ret[1])
+ after = "".join(i if i else "" for i in ret[2:])
+# print "URLS->",len(before), "".join([before,url]), None, None, after
+ return len(before), "".join([before,url]), None, None, after
-import unittest
-class Test(unittest.TestCase):
- urls = (
- 'http://example.com',
- 'http://example.com/',
- 'http://example.com/foo/',
- 'http://example.com/foo/bar/baz/gazonk',
- 'http://example.com/foo?bar=baz',
- 'http://example.com/foo?bar=baz&gazonk&gadaf',
- 'http://example.com:8080',
- 'http://example.com:8080/test',
- 'http://www.example.com:8080',
- 'http://www.example.com:8080/test',
- 'http://www.example.com',
- 'http://www.www.example.com',
- 'http://buh.bih.bah.beh.example.com',
- 'mailto:test at example.com',
- 'mailto:test at sub.example.com',
- )
- badurls = (
- 'htp://example.com',
- 'http:/example.com',
- 'http//example.com',
- 'http/example.com',
- 'http://example.c/',
- 'http://example.c/',
- 'http://.com',
- 'hattp://example.comcomcom',
- )
-
- def testConversion(self):
- retval, tokens = parse("http://example.com")
- if retval:
- self.fail()
- self.assertEqual(tokens[0][1], """<a href="http://example.com" title="http://example.com">http://example.com</a>""")
-
- def testURLbad(self):
- import re
- regexp = re.compile("|".join("(\s|\A)(%s)(\s|\Z)" % i for i in allowed_urls))
- for i in self.badurls:
- self.failIf(regexp.findall(i))
- import random
- rnd = random.sample("""?=)(/&%$"!'0987654321+-.,..,;:_"**$foo """,10)
- self.failIf(regexp.findall("%s%s%s" % (rnd, i, rnd)))
- for i in self.urls:
- import random
- rnd = random.sample("""?=)(/&%$"!'0987654321+-.,..,;:_"**$foo """,10)
- self.failIf(regexp.findall("%s%s%s" % (rnd, i, rnd)))
-
- def testURLok(self):
- import re
- regexp = re.compile("|".join("(\s|\A)(%s)(\s|\Z)" % i for i in allowed_urls))
- for i in self.urls:
- self.assert_(regexp.findall(i))
- import random
- rnd = random.sample("""?=)(/&%$"!'0987654321+-.,..,;:_"**$%foo """,10)
- self.assert_(regexp.findall("%s %s %s" % (rnd, i, rnd)))
-
-
-if __name__ == "__main__":
- import unittest
- unittest.main()
More information about the cpif
mailing list