[cpif] r203 - trunk/frontend-web

svn at argo.es svn at argo.es
Sat Jun 30 00:09:27 CEST 2007


Author: alvaro
Date: Sat Jun 30 00:09:19 2007
New Revision: 203

Log:
Reestructuracion del codigo. Para hacer un parseo hay
que llamar a convert_to_html(text).

Cada modulo define la funcion parse(text, allow=True). El segundo
parametro, opcional, dice al parser si debe escapar todo, o debe por
el contrario hacer la conversion a HTML (se usa para cuando no se quiere
tener HTML y/o BBCode en el sitio, mediante la variables globales
correspondientes).

Cambiados los test de sitio.


Modified:
   trunk/frontend-web/parser_bbcode.py
   trunk/frontend-web/parser_html.py
   trunk/frontend-web/parsers.py

Modified: trunk/frontend-web/parser_bbcode.py
==============================================================================
--- trunk/frontend-web/parser_bbcode.py	(original)
+++ trunk/frontend-web/parser_bbcode.py	Sat Jun 30 00:09:19 2007
@@ -1,5 +1,5 @@
 #!/bin/python2.5
-
+"""BBCode to HTML parser"""
 # $Id$
 
 # Allow BBCode syntax errors?
@@ -7,22 +7,22 @@
 allow_errors = False
 
 allowed_html = {
-    "a": ["href", "title"],
-    "p": [],
-    "b": [],
-    "s": [],
-    "i": [],
-    "em": [],
-    "strong": [],
+    "a": ["href", "title"], 
+    "p": [], 
+    "b": [], 
+    "s": [], 
+    "i": [], 
+    "em": [], 
+    "strong": [], 
     "img": ["src", "alt", "title"]
 }
 
 allowed_tags = {
-    "b": ('<strong>', '</strong>', False),
-    "u": ('<span style="text-decoration:underline">', '</span>', False),
-    "s": ('<strike>', '</strike>', False),
-    "size": ('<span style="font-size:%(arg)s">', '</span>', True),
-    "i": ('<em>', '</em>', False),
+    "b": ('<strong>', '</strong>', False), 
+    "u": ('<span style="text-decoration:underline">', '</span>', False), 
+    "s": ('<strike>', '</strike>', False), 
+    "size": ('<span style="font-size:%(arg)s">', '</span>', True), 
+    "i": ('<em>', '</em>', False), 
     "url": ('<a href="%(arg)s">', '</a>', True), 
     "quote*": ('<blockquote>', '</blockquote>', False), 
     "quote": ('<blockquote><h4>%(arg)s escribi&oacute;:</h4>', '</blockquote>', True), 
@@ -31,18 +31,21 @@
 
 from parser_html import escape
 
+
 class BBCodeParser:
   """A BBCode to HTML parser"""
 
   _open = "["
   _close = "]"
+  _regexp_start = "\%s.*?\%s" % (_open, _close)
+  _regexp_end = "\%s(.*?)\%s" % (_open, _close)
 
-  def __init__(self):
+  def __init__(self, allow = True):
+    self._allow = allow
     self._text = ""
     self._tokens = []
     self._tags = []
     self._parsed = []
-
   def _reset(self):
     """Resets the internal arrays."""
     self._tokens = []
@@ -57,8 +60,8 @@
     """Tokenize the string and the tags in two separates lists
     Should be called after before _parse."""
     import re
-    self._tokens = re.split("\%s.*?\%s" % (self._open, self._close), self._text)
-    self._tags = re.findall("\%s(.*?)\%s" % (self._open, self._close), self._text)
+    self._tokens = re.split(self._regexp_start, self._text)
+    self._tags = re.findall(self._regexp_end, self._text)
 
   def _close_tag_to_html(self, tag):
     """Translate the BBCode tag to HTML anc close it."""
@@ -90,6 +93,10 @@
     self._parsed.append(self._tokens.pop(0))
     while self._tags:
       tag = self._tags.pop(0)
+      if not self._allow:
+        self._parsed.append("%s%s%s"%(self._open, escape(tag), self._close))
+        self._parsed.append(self._tokens.pop(0))
+        continue
       tag = tag.split("=", 1)
       tag[0] = tag[0].strip()
       if tag[0].startswith("/") and tag[0][1:] in allowed_tags.keys() and stack:
@@ -100,7 +107,8 @@
         else: # Si alguien activa allow_errors mereceria la mas lenta y dolorosa muerte
               # Esto es una chapuza para usuarios estupidos y chapuceros
           self._parsed.append("%s%s%s" % (self._open, tag[0], self._close))
-        if self._tokens and self._tag_closes(tag): self._parsed.append(self._tokens.pop(0))
+        if self._tokens and self._tag_closes(tag):
+          self._parsed.append(self._tokens.pop(0))
 
       elif tag[0] in allowed_tags.keys():
         tag.append(None)
@@ -112,11 +120,13 @@
         self._parsed.append(self._tag_to_html(tag))
         if not self._tag_closes(tag) and self._tokens:
           self._tokens.pop(0)
-        if self._tokens: self._parsed.append(self._tokens.pop(0))
+        if self._tokens: 
+          self._parsed.append(self._tokens.pop(0))
 
       else: # Si no conozco el tag se lo paso tal cual
         self._parsed.append("%s%s%s" % (self._open, tag[0], self._close))
-        if self._tokens: self._parsed.append(self._tokens.pop(0))
+        if self._tokens:
+          self._parsed.append(self._tokens.pop(0))
     
     if stack and allow_errors:
       while stack: # No me responsabilizo de lo que salga aqui...
@@ -133,3 +143,152 @@
     if not e:
       return e, "".join(self._parsed)
     return e or "Error, HTML no valido", None
+
+def parse(text, allow = True):
+  """Parses the text w/ BBCode into HTML. The allow argument is optional and
+tells the parser if the BBCode should be translated or no."""
+  parser = BBCodeParser(allow)
+  parser.feed(text)
+  return parser.parse()
+
+import unittest
+class TestBBCode(unittest.TestCase):
+  """Test Case for the BBCode parser"""
+  correct_bbcode = (
+      ('[ b]hola[/b]', '<strong>hola</strong>'), 
+      ('[b ]hola[/b]', '<strong>hola</strong>'), 
+      ('[ b ]hola[/b]', '<strong>hola</strong>'), 
+
+      ('[b]hola[ /b]', '<strong>hola</strong>'), 
+      ('[b]hola[/b ]', '<strong>hola</strong>'), 
+      ('[b]hola[ /b ]', '<strong>hola</strong>'), 
+
+
+      ('[b]hola[/b]', '<strong>hola</strong>'), 
+      ('antes [b]hola[/b]', 'antes <strong>hola</strong>'), 
+      ('[b]hola[/b] despues', '<strong>hola</strong> despues'), 
+      ('antes [b]hola[/b] despues', 'antes <strong>hola</strong> despues'), 
+
+      ('[url]la direccion[/url]', '<a href="la direccion">la direccion</a>'), 
+      ('antes [url]la direccion[/url]', 'antes <a href="la direccion">la direccion</a>'), 
+      ('[url]la direccion[/url] despues', '<a href="la direccion">la direccion</a> despues'), 
+      ('antes [url]la direccion[/url] despues', 'antes <a href="la direccion">la direccion</a> despues'), 
+
+      ('[url="la direccion"]el texto[/url]', '<a href="la direccion">el texto</a>'), 
+      ('antes [url="la direccion"]el texto[/url]', 'antes <a href="la direccion">el texto</a>'), 
+      ('[url="la direccion"]el texto[/url] despues', '<a href="la direccion">el texto</a> despues'), 
+      ('antes [url="la direccion"]el texto[/url] despues', 'antes <a href="la direccion">el texto</a> despues'), 
+
+      ('[url="la direccion"]el texto[/url]', '<a href="la direccion">el texto</a>'), 
+
+      ('[url=la direccion]el texto[/url]', '<a href="la direccion">el texto</a>'), 
+      ('antes [url=la direccion]el texto[/url]', 'antes <a href="la direccion">el texto</a>'), 
+      ('[url =la direccion]el texto[/url] despues', '<a href="la direccion">el texto</a> despues'), 
+      ('antes [url=la direccion]el texto[/url] despues', 'antes <a href="la direccion">el texto</a> despues'), 
+
+      ('[quote=paco]Hola[/quote]', '<blockquote><h4>paco escribi&oacute;:</h4>Hola</blockquote>'), 
+      ('antes [quote=paco]Hola[/quote]', 'antes <blockquote><h4>paco escribi&oacute;:</h4>Hola</blockquote>'), 
+      ('[quote=paco]Hola[/quote] despues', '<blockquote><h4>paco escribi&oacute;:</h4>Hola</blockquote> despues'), 
+      ('antes [quote=paco]Hola[/quote] despues', 'antes <blockquote><h4>paco escribi&oacute;:</h4>Hola</blockquote> despues'), 
+
+      ('[quote]Hola[/quote]', '<blockquote>Hola</blockquote>'), 
+      ('antes [quote]Hola[/quote]', 'antes <blockquote>Hola</blockquote>'), 
+      ('[quote]Hola[/quote] despues', '<blockquote>Hola</blockquote> despues'), 
+      ('antes [quote]Hola[/quote] despues', 'antes <blockquote>Hola</blockquote> despues'), 
+  
+      ('[u]hola[/u]', '<span style="text-decoration:underline">hola</span>'), 
+      
+      ('[size=20px]hola[/size]', '<span style="font-size:20px">hola</span>'), 
+
+      ('[n]hola[/n]', '[n]hola[/n]'), 
+
+      ("[img]laimagen1[/img]", '<img src="laimagen1" />'), 
+      ("antes [img]laimagen2[/img]", 'antes <img src="laimagen2" />'), 
+      ("[img]laimagen3[/img] despues", '<img src="laimagen3" /> despues'), 
+      ("antes [img]laimagen4[/img] despues", 'antes <img src="laimagen4" /> despues')
+  )
+
+  incorrect_bbcode = (
+      '[i]bla bla bla [b]hola[/n][/i]', 
+      '[b]Ay[/ b ]', 
+      '[b]Ay[ / b ]', 
+      '[b]Ay[/]', 
+      '[b]Ay, se me ha olvidado cerrar', 
+      '[url][/url]', 
+      '[b][/url]hola[/url]', 
+      '[b]caca[url]hola[/b][/url]', 
+      """Un texto [b]mas[/b] largo, con [b]negritas y [n]cursivas[/i]
+       [/b] e incluso [url="laurl"]enlaces [/b]con negritas[/b] dentro[/url]""")
+
+  xss = (
+      """[url="hola'"]adios[/url]""", 
+      """[url="hola''"'"'"]adios[/url]""", 
+)
+
+
+  def testCorrectBBCode(self):
+    """Test if the translation goes well."""
+    bbcode_list = []
+    html_list = []
+    global allow_errors
+    allow_errors = False
+#    print
+    for bbcode, html in self.correct_bbcode:
+      bbcode_list.append(bbcode)
+      html_list.append(html)
+      retval, result = parse(bbcode)
+#      print "-->", bbcode
+#      print "<--", html
+#      print "<--", result
+#      print
+      assert not retval
+      self.assertEqual(html, result)
+      retval, result = parse(bbcode)
+      assert not retval
+      self.assertEqual(html, result)
+    retval, result = parse("".join(bbcode_list))
+    assert not retval
+    self.assertEqual("".join(html_list), result)
+  
+  def test_NoBBCode(self):
+    """Test if the translation goes well in the case that we do not allow BBCode."""
+    list = []
+    global allow_errors
+    allow_errors = False
+    import globales
+    globales.allow_bbcode = False
+    globales.allow_html = True
+    for code, dummy in self.correct_bbcode:
+      list.append(code)
+      retval, result = parse(code, False)
+      assert not retval
+      self.assertEqual(escape(code), result)
+    retval, result = parse("".join(list), False)
+    assert not retval
+    self.assertEqual(escape("".join(list)), result)
+
+  def testIncorrectBBCode(self):
+    """Test if the translation fails using bad formed BBCode."""
+    global allow_errors
+    allow_errors = False
+    for bbcode in self.incorrect_bbcode:
+      retval, result = parse(bbcode)
+#      print "-->", bbcode
+#      print "<--", retval, result
+      assert retval
+      retval, result = parse(bbcode)
+      assert retval
+
+  def testIncorrectXSS(self):
+    """Test if the code is XSS safe. Uncomment the print statents"""
+    global allow_errors
+    allow_errors = False
+    for xss in self.xss:
+      retval, result = parse(xss)
+#      print "-->", xss
+#      print "<--", retval, result
+#      assert retval
+
+if __name__ == "__main__":
+  unittest.main()
+

Modified: trunk/frontend-web/parser_html.py
==============================================================================
--- trunk/frontend-web/parser_html.py	(original)
+++ trunk/frontend-web/parser_html.py	Sat Jun 30 00:09:19 2007
@@ -3,29 +3,45 @@
 # $Id$
 
 allowed_html = {
-    "a": ["href", "title"],
-    "p": [],
-    "b": [],
-    "s": [],
-    "i": [],
-    "em": [],
-    "strong": [],
+    "a": ["href", "title"], 
+    "p": [], 
+    "b": [], 
+    "s": [], 
+    "i": [], 
+    "em": [], 
+    "strong": [], 
     "img": ["src", "alt", "title"]
 }
 
 def escape(text):
-  entities = {'"': '&quot;', "'": "&apos;"}
+  entities = {
+      '<':'&lt;', 
+      '>':'&gt;', 
+      '"': '&quot;', 
+      "'": "&apos;", 
+      "&": "&amp;"
+      }
+  import re
+  pat = "(%s)" % "|".join( map(re.escape, entities.keys())  )
+  return re.sub( pat, lambda m:entities[m.group()], text )
   import xml.sax.saxutils
   return xml.sax.saxutils.escape(text, entities)
-    
-from sgmllib import SGMLParser,SGMLParseError
+
+from sgmllib import SGMLParser, SGMLParseError
 class HTMLParser(SGMLParser):
+  def __init__(self, allow=True):
+    self.allow = allow
+    SGMLParser.__init__(self)
   def reset(self):                       
     self.text = []
     self._stack = []
     SGMLParser.reset(self)
 
   def do_img(self, attrs):
+    if not self.allow:
+      strattrs = "".join([' %s="%s"' % (a, v) for a, v in attrs])
+      self.text.append(escape("<img%(strattrs)s />" % locals()))
+      return
     if not attrs: raise SGMLParseError, "Error, argumentos para <img> no validos"
     attrs_list = []
     for a, v in attrs:
@@ -40,16 +56,19 @@
     pass
 
   def do_br(self):
+    if not self.allow:
+      self.text.append(escape("<br />" % locals()))
+      return
     self.text.append("<br />")
 
   def end_br(self):
     pass
 
   def unknown_starttag(self, tag, attrs):
-    if tag in allowed_html.keys():
+    if tag in allowed_html.keys() and self.allow:
       attrs_list = []
       for a, v in attrs:
-        if a in allowed_html[tag] and v:
+        if a in allowed_html[tag] and v and self.allow:
           attrs_list.append(' %s="%s"' % (a, escape(v)))
         else:
           raise SGMLParseError, "Error, argumentos para <%s> no validos" % tag
@@ -61,7 +80,7 @@
       self.text.append(escape("<%(tag)s%(strattrs)s>" % locals()))
 
   def unknown_endtag(self, tag):         
-    if tag in allowed_html.keys():
+    if tag in allowed_html.keys() and self.allow:
       self.text.append("</%(tag)s>" % locals())
       if self._stack and tag == self._stack[-1]:
         self._stack.pop(-1)
@@ -88,3 +107,120 @@
     if self._stack: 
       raise SGMLParseError, "Error, tag <%s> sin cerrar" % self._stack.pop(-1)
     return "".join(self.text)
+
+def parse(text, allow = True):
+  parser = HTMLParser(allow)
+  parser.reset()
+  try:
+    parser.feed(text)
+    text = parser.get_text()
+    if not text: raise SGMLParseError, "HTML no valido"
+  except SGMLParseError, e:
+    return (e or "HTML no valido", None)
+  return False, text
+
+import unittest
+class TestHTML(unittest.TestCase):
+  correct_html = (
+      ('&amp;', '&amp;amp;'), 
+      ('<b>caca</b>', '<b>caca</b>'), 
+      ('b>', 'b&gt;'), 
+      ('<script>', '&lt;script&gt;'), 
+      ('<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>', 
+       '<a href="http://blablabla.com/index.php?caca=1&amp;mierda=2">bla bla</a>'), 
+      ('<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>', 
+       '<a href="http://blablabla.com/index.php?caca=1&amp;mierda=2" title="caca">bla bla </a>'), 
+      ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" />', 
+       '<img src="http://blablabla.com/index.php?caca=1&amp;mierda=2" />'), 
+      ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca">', 
+       '<img src="http://blablabla.com/index.php?caca=1&amp;mierda=2" alt="caca" />'), 
+      ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca"></img>', 
+       '<img src="http://blablabla.com/index.php?caca=1&amp;mierda=2" alt="caca" />')
+  )
+  
+  correct_html2 = (
+      '&amp;', 
+      '<b>caca</b>', 
+      'b>', 
+      '<script>', 
+      '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>', 
+      '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>', 
+      '<img src="http://blablabla.com/index.php?caca=1&mierda=2" />', 
+      '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />', 
+      '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />', 
+  )
+
+  incorrect_html = (
+      '<b>', 
+      '<b', 
+      '</b>', 
+      '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla', 
+      '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla </as>', 
+      '<a href="http://blablabla.com/index.php?caca=1&mierda=2" taitle="caca">bla bla </a>', 
+      '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca" de la vaca>bla bla <a>', 
+      '<img sorcerer="http://blablabla.com/index.php?caca=1&mierda=2" />', 
+      '<img src="http://blablabla.com/index.php?caca=1&mierda=2" altibajo="caca">', 
+  )
+
+  xss = (
+      """'';!--"<XSS>=&{()}""", 
+      """<?pi ?>""", 
+      """<?php ?>""", 
+      """';alert(String.fromCharCode(88, 83, 83))//\';alert(String.fromCharCode(88, 83, 83))//";alert(String.fromCharCode(88, 83, 83))//\";alert(String.fromCharCode(88, 83, 83))//--></SCRIPT>">'><SCRIPT>alert(String.fromCharCode(88, 83, 83))</SCRIPT>""", 
+      "<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>", 
+      """<IMG SRC="javascript:alert('XSS');">""", 
+      """<IMG SRC=javascript:alert('XSS')>""", 
+      """<IMG SRC=JaVaScRiPt:alert('XSS')>""", 
+      """<IMG SRC=javascript:alert(&quot;XSS&quot;)>""", 
+      """<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>""", 
+      '<IMG """><SCRIPT>alert("XSS")</SCRIPT>">', 
+)
+
+  def test_NoBBHTML(self):
+    """Checks if the HTML is escaped when it is not allowed."""
+    html_list = []
+    import globales
+    for code in self.correct_html2:
+      html_list.append(code)
+      retval, result = parse(code, False)
+      assert not retval
+      self.assertEqual(escape(code), result)
+    retval, result = parse("".join(html_list), False)
+    assert not retval
+    self.assertEqual(escape("".join(html_list)), result)
+
+  def testCorrectHTML(self):
+    """Checks if the conversion goes well."""
+    html_list = []
+    parsed_html_list = []
+    for html, parsed_html in self.correct_html:
+      retval, result = parse(html)
+      html_list.append(html)
+      parsed_html_list.append(parsed_html)
+      assert not retval
+      self.assertEqual(parsed_html, result)
+      retval, result = parse(html)
+      assert not retval
+      self.assertEqual(parsed_html, result)
+    retval, result = parse("".join(html_list))
+    assert not retval
+    self.assertEqual("".join(parsed_html_list), result)
+
+  def testIncorrectHTML(self):
+    """Checks if the conversion fails w/ bad formed HTML."""
+    for html in self.incorrect_html:
+      retval, result = parse(html)
+      assert retval
+      retval, result = parse(html)
+      assert retval
+
+  def testIncorrectXSS(self):
+    """Checks if the generated HTML is XSS safe. Uncomment the print statements."""
+    for xss in self.xss:
+      retval, result = parse(xss)
+#      print "-->", xss
+#      print "<--", retval, result
+#      assert retval
+
+if __name__ == "__main__":
+  unittest.main()

Modified: trunk/frontend-web/parsers.py
==============================================================================
--- trunk/frontend-web/parsers.py	(original)
+++ trunk/frontend-web/parsers.py	Sat Jun 30 00:09:19 2007
@@ -2,319 +2,19 @@
 
 # $Id$
 
-import unittest
 import parser_bbcode
 import parser_html
-from parser_html import escape
 
 def convert_to_html(text):
+  """Converts the text into valid HTML"""
   from globales import allow_bbcode, allow_html
-  if allow_html:
-    parser = parser_html.HTMLParser()
-    parser.reset()
-    try:
-      parser.feed(text)
-      text = parser.get_text()
-      if not text: raise parser_html.SGMLParseError, "HTML no valido"
-    except parser_html.SGMLParseError, e:
-      return e or "HTML no valido", None
-  else:
-    text = escape(text)
-
-  if allow_bbcode:
-    parser = parser_bbcode.BBCodeParser()
-    parser.feed(text)
-    return parser.parse()
-  
-  return False, text
-
-class TestHTMLParser(unittest.TestCase):
-  correct_html = (
-      ('&amp;', '&amp;amp;'),
-      ('<b>caca</b>', '<b>caca</b>'),
-      ('b>', 'b&gt;'),
-      ('<script>', '&lt;script&gt;'),
-      ('<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>',
-       '<a href="http://blablabla.com/index.php?caca=1&amp;mierda=2">bla bla</a>'),
-      ('<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>',
-       '<a href="http://blablabla.com/index.php?caca=1&amp;mierda=2" title="caca">bla bla </a>'),
-      ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" />',
-       '<img src="http://blablabla.com/index.php?caca=1&amp;mierda=2" />'),
-      ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca">',
-       '<img src="http://blablabla.com/index.php?caca=1&amp;mierda=2" alt="caca" />'),
-      ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca"></img>',
-       '<img src="http://blablabla.com/index.php?caca=1&amp;mierda=2" alt="caca" />')
-  )
-
-  incorrect_html = (
-      '<b>',
-      '<b',
-      '</b>',
-      '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla',
-      '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla </as>',
-      '<a href="http://blablabla.com/index.php?caca=1&mierda=2" taitle="caca">bla bla </a>',
-      '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca" de la vaca>bla bla <a>',
-      '<img sorcerer="http://blablabla.com/index.php?caca=1&mierda=2" />',
-      '<img src="http://blablabla.com/index.php?caca=1&mierda=2" altibajo="caca">',
-  )
-
-  def testCorrectHTML(self):
-    parser = parser_html.HTMLParser()
-    html_list = []
-    parsed_html_list = []
-    global allow_errors
-    allow_errors = False
-    print
-    for html, parsed_html in self.correct_html:
-      parser.reset()
-      parser.feed(html)
-      html_list.append(html)
-      parsed_html_list.append(parsed_html)
-      result = parser.get_text()
-#      print "-->", html
-#      print "<--", parsed_html
-#      print "<--", result
-#      print
-      assert result
-      self.assertEqual(parsed_html, result)
-    parser.reset()
-    parser.feed("".join(html_list))
-    result = parser.get_text()
-    assert result
-    self.assertEqual("".join(parsed_html_list), result)
-
-class TestBBCode(unittest.TestCase):
-  correct_bbcode = (
-      ('[ b]hola[/b]', '<strong>hola</strong>'),
-      ('[b ]hola[/b]', '<strong>hola</strong>'),
-      ('[ b ]hola[/b]', '<strong>hola</strong>'),
-
-      ('[b]hola[ /b]', '<strong>hola</strong>'),
-      ('[b]hola[/b ]', '<strong>hola</strong>'),
-      ('[b]hola[ /b ]', '<strong>hola</strong>'),
-
-
-      ('[b]hola[/b]', '<strong>hola</strong>'),
-      ('antes [b]hola[/b]', 'antes <strong>hola</strong>'),
-      ('[b]hola[/b] despues', '<strong>hola</strong> despues'),
-      ('antes [b]hola[/b] despues', 'antes <strong>hola</strong> despues'),
-
-      ('[url]la direccion[/url]', '<a href="la direccion">la direccion</a>'),
-      ('antes [url]la direccion[/url]', 'antes <a href="la direccion">la direccion</a>'),
-      ('[url]la direccion[/url] despues', '<a href="la direccion">la direccion</a> despues'),
-      ('antes [url]la direccion[/url] despues', 'antes <a href="la direccion">la direccion</a> despues'),
-
-      ('[url="la direccion"]el texto[/url]', '<a href="la direccion">el texto</a>'),
-      ('antes [url="la direccion"]el texto[/url]', 'antes <a href="la direccion">el texto</a>'),
-      ('[url="la direccion"]el texto[/url] despues', '<a href="la direccion">el texto</a> despues'),
-      ('antes [url="la direccion"]el texto[/url] despues', 'antes <a href="la direccion">el texto</a> despues'),
-
-      ('[url="la direccion"]el texto[/url]', '<a href="la direccion">el texto</a>'),
-
-      ('[url=la direccion]el texto[/url]', '<a href="la direccion">el texto</a>'),
-      ('antes [url=la direccion]el texto[/url]', 'antes <a href="la direccion">el texto</a>'),
-      ('[url =la direccion]el texto[/url] despues', '<a href="la direccion">el texto</a> despues'),
-      ('antes [url=la direccion]el texto[/url] despues', 'antes <a href="la direccion">el texto</a> despues'),
-
-      ('[quote=paco]Hola[/quote]', '<blockquote><h4>paco escribi&oacute;:</h4>Hola</blockquote>'),
-      ('antes [quote=paco]Hola[/quote]', 'antes <blockquote><h4>paco escribi&oacute;:</h4>Hola</blockquote>'),
-      ('[quote=paco]Hola[/quote] despues', '<blockquote><h4>paco escribi&oacute;:</h4>Hola</blockquote> despues'),
-      ('antes [quote=paco]Hola[/quote] despues', 'antes <blockquote><h4>paco escribi&oacute;:</h4>Hola</blockquote> despues'),
-
-      ('[quote]Hola[/quote]', '<blockquote>Hola</blockquote>'),
-      ('antes [quote]Hola[/quote]', 'antes <blockquote>Hola</blockquote>'),
-      ('[quote]Hola[/quote] despues', '<blockquote>Hola</blockquote> despues'),
-      ('antes [quote]Hola[/quote] despues', 'antes <blockquote>Hola</blockquote> despues'),
-  
-      ('[u]hola[/u]', '<span style="text-decoration:underline">hola</span>'),
-      
-      ('[size=20px]hola[/size]', '<span style="font-size:20px">hola</span>'),
-
-      ('[n]hola[/n]', '[n]hola[/n]'),
-
-      ("[img]laimagen1[/img]", '<img src="laimagen1" />'),
-      ("antes [img]laimagen2[/img]", 'antes <img src="laimagen2" />'),
-      ("[img]laimagen3[/img] despues", '<img src="laimagen3" /> despues'),
-      ("antes [img]laimagen4[/img] despues", 'antes <img src="laimagen4" /> despues')
-  )
-
-  correct_html = (
-      ('&amp;', '&amp;amp;'),
-      ('<b>caca</b>', '<b>caca</b>'),
-      ('b>', 'b&gt;'),
-      ('<script>', '&lt;script&gt;'),
-      ('<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>',
-       '<a href="http://blablabla.com/index.php?caca=1&amp;mierda=2">bla bla</a>'),
-      ('<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>',
-       '<a href="http://blablabla.com/index.php?caca=1&amp;mierda=2" title="caca">bla bla </a>'),
-      ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" />',
-       '<img src="http://blablabla.com/index.php?caca=1&amp;mierda=2" />'),
-      ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca">',
-       '<img src="http://blablabla.com/index.php?caca=1&amp;mierda=2" alt="caca" />'),
-      ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca"></img>',
-       '<img src="http://blablabla.com/index.php?caca=1&amp;mierda=2" alt="caca" />')
-  )
-
-  incorrect_html = (
-      '<b>',
-      '<b',
-      '</b>',
-      '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla',
-      '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla </as>',
-      '<a href="http://blablabla.com/index.php?caca=1&mierda=2" taitle="caca">bla bla </a>',
-      '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca" de la vaca>bla bla <a>',
-      '<img sorcerer="http://blablabla.com/index.php?caca=1&mierda=2" />',
-      '<img src="http://blablabla.com/index.php?caca=1&mierda=2" altibajo="caca">',
-  )
-
-  incorrect_bbcode = (
-      '[i]bla bla bla [b]hola[/n][/i]',
-      '[b]Ay[/ b ]',
-      '[b]Ay[ / b ]',
-      '[b]Ay[/]',
-      '[b]Ay, se me ha olvidado cerrar',
-      '[url][/url]',
-      '[b][/url]hola[/url]',
-      '[b]caca[url]hola[/b][/url]',
-      """Un texto [b]mas[/b] largo, con [b]negritas y [n]cursivas[/i]
-       [/b] e incluso [url="laurl"]enlaces [/b]con negritas[/b] dentro[/url]""")
-
-  xss = (
-      """[url="hola'"]adios[/url]""",
-      """[url="hola''"'"'"]adios[/url]""",
-      """'';!--"<XSS>=&{()}""",
-      """<?pi ?>""",
-      """<?php ?>""",
-      """';alert(String.fromCharCode(88,83,83))//\';alert(String.fromCharCode(88,83,83))//";alert(String.fromCharCode(88,83,83))//\";alert(String.fromCharCode(88,83,83))//--></SCRIPT>">'><SCRIPT>alert(String.fromCharCode(88,83,83))</SCRIPT>""",
-      "<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
-      """<IMG SRC="javascript:alert('XSS');">""",
-      """<IMG SRC=javascript:alert('XSS')>""",
-      """<IMG SRC=JaVaScRiPt:alert('XSS')>""",
-      """<IMG SRC=javascript:alert(&quot;XSS&quot;)>""",
-      """<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>""",
-      '<IMG """><SCRIPT>alert("XSS")</SCRIPT>">',
-)
-
-
-  def testCorrectBBCode(self):
-    bbcode_list = []
-    html_list = []
-    global allow_errors
-    allow_errors = False
-    print
-    for bbcode, html in self.correct_bbcode:
-      bbcode_list.append(bbcode)
-      html_list.append(html)
-      retval, result = convert_to_html(bbcode)
-#      print "-->", bbcode
-#      print "<--", html
-#      print "<--", result
-#      print
-      assert not retval
-      self.assertEqual(html, result)
-      retval, result = convert_to_html(bbcode)
-      assert not retval
-      self.assertEqual(html, result)
-    retval, result = convert_to_html("".join(bbcode_list))
-    assert not retval
-    self.assertEqual("".join(html_list), result)
-  
-  def test_NoBBCodeYesHTML(self):
-    list = []
-    global allow_errors
-    allow_errors = False
-    import globales
-    globales.allow_bbcode = False
-    globales.allow_html = True
-    for code, dummy in self.correct_bbcode:
-      list.append(code)
-      retval, result = convert_to_html(code)
-      assert not retval
-      self.assertEqual(code,result)
-    retval, result = convert_to_html("".join(list))
-    assert not retval
-    self.assertEqual("".join(list),result)
-
-  def test_NoBBCodeNoHTML(self):
-    list = []
-    global allow_errors
-    allow_errors = False
-    import globales
-    globales.allow_bbcode = False
-    globales.allow_html = False
-    for code, dummy in self.correct_bbcode:
-      list.append(code)
-      retval, result = convert_to_html(code)
-      assert not retval
-      self.assertEqual(escape(code),result)
-    retval, result = convert_to_html("".join(list))
-    assert not retval
-    self.assertEqual(escape("".join(list)),result)
-    for code, dummy in self.correct_html:
-      list.append(code)
-      retval, result = convert_to_html(code)
-      assert not retval
-      self.assertEqual(escape(code),result)
-    retval, result = convert_to_html("".join(list))
-    assert not retval
-    self.assertEqual(escape("".join(list)),result)
-
-  def testCorrectHTML(self):
-    html_list = []
-    parsed_html_list = []
-    global allow_errors
-    allow_errors = False
-    print
-    for html, parsed_html in self.correct_html:
-      retval, result = convert_to_html(html)
-      html_list.append(html)
-      parsed_html_list.append(parsed_html)
-#      print "-->", html
-#      print "<--", parsed_html
-#      print "<--", result
-#      print
-      assert not retval
-      self.assertEqual(parsed_html, result)
-      retval, result = convert_to_html(html)
-      assert not retval
-      self.assertEqual(parsed_html, result)
-    retval, result = convert_to_html("".join(html_list))
-    assert not retval
-    self.assertEqual("".join(parsed_html_list), result)
-
-  def testIncorrectBBCode(self):
-    global allow_errors
-    allow_errors = False
-    print
-    for bbcode in self.incorrect_bbcode:
-      retval, result = convert_to_html(bbcode)
-#      print "-->", bbcode
-#      print "<--", retval, result
-      assert retval
-      retval, result = convert_to_html(bbcode)
-      assert retval
-
-  def testIncorrectHTML(self):
-    global allow_errors
-    allow_errors = False
-    print
-    for html in self.incorrect_html:
-      retval, result = convert_to_html(html)
-      print "-->", html
-      print "<--", retval, result
-      assert retval
-      retval, result = convert_to_html(html)
-      assert retval
-
-  def testIncorrectXSS(self):
-    global allow_errors
-    allow_errors = False
-    print
-    for xss in self.xss:
-      retval, result = convert_to_html(xss)
-      print "-->", xss
-      print "<--", retval, result
-#      assert retval
+  retval, text = parser_html.parse(text, allow_html)
+  if retval:
+    return retval, None
+  return parser_bbcode.parse(text, allow_bbcode)
 
 if __name__ == "__main__":
+  test1 = parser_html.TestHTML
+  test2 = parser_bbcode.TestBBCode
+  import unittest
   unittest.main()



More information about the cpif mailing list