[cpif] r203 - trunk/frontend-web
svn at argo.es
svn at argo.es
Sat Jun 30 00:09:27 CEST 2007
Author: alvaro
Date: Sat Jun 30 00:09:19 2007
New Revision: 203
Log:
Reestructuracion del codigo. Para hacer un parseo hay
que llamar a convert_to_html(text).
Cada modulo define la funcion parse(text, allow=True). El segundo
parametro, opcional, dice al parser si debe escapar todo, o debe por
el contrario hacer la conversion a HTML (se usa para cuando no se quiere
tener HTML y/o BBCode en el sitio, mediante la variables globales
correspondientes).
Cambiados los test de sitio.
Modified:
trunk/frontend-web/parser_bbcode.py
trunk/frontend-web/parser_html.py
trunk/frontend-web/parsers.py
Modified: trunk/frontend-web/parser_bbcode.py
==============================================================================
--- trunk/frontend-web/parser_bbcode.py (original)
+++ trunk/frontend-web/parser_bbcode.py Sat Jun 30 00:09:19 2007
@@ -1,5 +1,5 @@
#!/bin/python2.5
-
+"""BBCode to HTML parser"""
# $Id$
# Allow BBCode syntax errors?
@@ -7,22 +7,22 @@
allow_errors = False
allowed_html = {
- "a": ["href", "title"],
- "p": [],
- "b": [],
- "s": [],
- "i": [],
- "em": [],
- "strong": [],
+ "a": ["href", "title"],
+ "p": [],
+ "b": [],
+ "s": [],
+ "i": [],
+ "em": [],
+ "strong": [],
"img": ["src", "alt", "title"]
}
allowed_tags = {
- "b": ('<strong>', '</strong>', False),
- "u": ('<span style="text-decoration:underline">', '</span>', False),
- "s": ('<strike>', '</strike>', False),
- "size": ('<span style="font-size:%(arg)s">', '</span>', True),
- "i": ('<em>', '</em>', False),
+ "b": ('<strong>', '</strong>', False),
+ "u": ('<span style="text-decoration:underline">', '</span>', False),
+ "s": ('<strike>', '</strike>', False),
+ "size": ('<span style="font-size:%(arg)s">', '</span>', True),
+ "i": ('<em>', '</em>', False),
"url": ('<a href="%(arg)s">', '</a>', True),
"quote*": ('<blockquote>', '</blockquote>', False),
"quote": ('<blockquote><h4>%(arg)s escribió:</h4>', '</blockquote>', True),
@@ -31,18 +31,21 @@
from parser_html import escape
+
class BBCodeParser:
"""A BBCode to HTML parser"""
_open = "["
_close = "]"
+ _regexp_start = "\%s.*?\%s" % (_open, _close)
+ _regexp_end = "\%s(.*?)\%s" % (_open, _close)
- def __init__(self):
+ def __init__(self, allow = True):
+ self._allow = allow
self._text = ""
self._tokens = []
self._tags = []
self._parsed = []
-
def _reset(self):
"""Resets the internal arrays."""
self._tokens = []
@@ -57,8 +60,8 @@
"""Tokenize the string and the tags in two separates lists
Should be called after before _parse."""
import re
- self._tokens = re.split("\%s.*?\%s" % (self._open, self._close), self._text)
- self._tags = re.findall("\%s(.*?)\%s" % (self._open, self._close), self._text)
+ self._tokens = re.split(self._regexp_start, self._text)
+ self._tags = re.findall(self._regexp_end, self._text)
def _close_tag_to_html(self, tag):
"""Translate the BBCode tag to HTML anc close it."""
@@ -90,6 +93,10 @@
self._parsed.append(self._tokens.pop(0))
while self._tags:
tag = self._tags.pop(0)
+ if not self._allow:
+ self._parsed.append("%s%s%s"%(self._open, escape(tag), self._close))
+ self._parsed.append(self._tokens.pop(0))
+ continue
tag = tag.split("=", 1)
tag[0] = tag[0].strip()
if tag[0].startswith("/") and tag[0][1:] in allowed_tags.keys() and stack:
@@ -100,7 +107,8 @@
else: # Si alguien activa allow_errors mereceria la mas lenta y dolorosa muerte
# Esto es una chapuza para usuarios estupidos y chapuceros
self._parsed.append("%s%s%s" % (self._open, tag[0], self._close))
- if self._tokens and self._tag_closes(tag): self._parsed.append(self._tokens.pop(0))
+ if self._tokens and self._tag_closes(tag):
+ self._parsed.append(self._tokens.pop(0))
elif tag[0] in allowed_tags.keys():
tag.append(None)
@@ -112,11 +120,13 @@
self._parsed.append(self._tag_to_html(tag))
if not self._tag_closes(tag) and self._tokens:
self._tokens.pop(0)
- if self._tokens: self._parsed.append(self._tokens.pop(0))
+ if self._tokens:
+ self._parsed.append(self._tokens.pop(0))
else: # Si no conozco el tag se lo paso tal cual
self._parsed.append("%s%s%s" % (self._open, tag[0], self._close))
- if self._tokens: self._parsed.append(self._tokens.pop(0))
+ if self._tokens:
+ self._parsed.append(self._tokens.pop(0))
if stack and allow_errors:
while stack: # No me responsabilizo de lo que salga aqui...
@@ -133,3 +143,152 @@
if not e:
return e, "".join(self._parsed)
return e or "Error, HTML no valido", None
+
+def parse(text, allow = True):
+ """Parses the text w/ BBCode into HTML. The allow argument is optional and
+tells the parser if the BBCode should be translated or no."""
+ parser = BBCodeParser(allow)
+ parser.feed(text)
+ return parser.parse()
+
+import unittest
+class TestBBCode(unittest.TestCase):
+ """Test Case for the BBCode parser"""
+ correct_bbcode = (
+ ('[ b]hola[/b]', '<strong>hola</strong>'),
+ ('[b ]hola[/b]', '<strong>hola</strong>'),
+ ('[ b ]hola[/b]', '<strong>hola</strong>'),
+
+ ('[b]hola[ /b]', '<strong>hola</strong>'),
+ ('[b]hola[/b ]', '<strong>hola</strong>'),
+ ('[b]hola[ /b ]', '<strong>hola</strong>'),
+
+
+ ('[b]hola[/b]', '<strong>hola</strong>'),
+ ('antes [b]hola[/b]', 'antes <strong>hola</strong>'),
+ ('[b]hola[/b] despues', '<strong>hola</strong> despues'),
+ ('antes [b]hola[/b] despues', 'antes <strong>hola</strong> despues'),
+
+ ('[url]la direccion[/url]', '<a href="la direccion">la direccion</a>'),
+ ('antes [url]la direccion[/url]', 'antes <a href="la direccion">la direccion</a>'),
+ ('[url]la direccion[/url] despues', '<a href="la direccion">la direccion</a> despues'),
+ ('antes [url]la direccion[/url] despues', 'antes <a href="la direccion">la direccion</a> despues'),
+
+ ('[url="la direccion"]el texto[/url]', '<a href="la direccion">el texto</a>'),
+ ('antes [url="la direccion"]el texto[/url]', 'antes <a href="la direccion">el texto</a>'),
+ ('[url="la direccion"]el texto[/url] despues', '<a href="la direccion">el texto</a> despues'),
+ ('antes [url="la direccion"]el texto[/url] despues', 'antes <a href="la direccion">el texto</a> despues'),
+
+ ('[url="la direccion"]el texto[/url]', '<a href="la direccion">el texto</a>'),
+
+ ('[url=la direccion]el texto[/url]', '<a href="la direccion">el texto</a>'),
+ ('antes [url=la direccion]el texto[/url]', 'antes <a href="la direccion">el texto</a>'),
+ ('[url =la direccion]el texto[/url] despues', '<a href="la direccion">el texto</a> despues'),
+ ('antes [url=la direccion]el texto[/url] despues', 'antes <a href="la direccion">el texto</a> despues'),
+
+ ('[quote=paco]Hola[/quote]', '<blockquote><h4>paco escribió:</h4>Hola</blockquote>'),
+ ('antes [quote=paco]Hola[/quote]', 'antes <blockquote><h4>paco escribió:</h4>Hola</blockquote>'),
+ ('[quote=paco]Hola[/quote] despues', '<blockquote><h4>paco escribió:</h4>Hola</blockquote> despues'),
+ ('antes [quote=paco]Hola[/quote] despues', 'antes <blockquote><h4>paco escribió:</h4>Hola</blockquote> despues'),
+
+ ('[quote]Hola[/quote]', '<blockquote>Hola</blockquote>'),
+ ('antes [quote]Hola[/quote]', 'antes <blockquote>Hola</blockquote>'),
+ ('[quote]Hola[/quote] despues', '<blockquote>Hola</blockquote> despues'),
+ ('antes [quote]Hola[/quote] despues', 'antes <blockquote>Hola</blockquote> despues'),
+
+ ('[u]hola[/u]', '<span style="text-decoration:underline">hola</span>'),
+
+ ('[size=20px]hola[/size]', '<span style="font-size:20px">hola</span>'),
+
+ ('[n]hola[/n]', '[n]hola[/n]'),
+
+ ("[img]laimagen1[/img]", '<img src="laimagen1" />'),
+ ("antes [img]laimagen2[/img]", 'antes <img src="laimagen2" />'),
+ ("[img]laimagen3[/img] despues", '<img src="laimagen3" /> despues'),
+ ("antes [img]laimagen4[/img] despues", 'antes <img src="laimagen4" /> despues')
+ )
+
+ incorrect_bbcode = (
+ '[i]bla bla bla [b]hola[/n][/i]',
+ '[b]Ay[/ b ]',
+ '[b]Ay[ / b ]',
+ '[b]Ay[/]',
+ '[b]Ay, se me ha olvidado cerrar',
+ '[url][/url]',
+ '[b][/url]hola[/url]',
+ '[b]caca[url]hola[/b][/url]',
+ """Un texto [b]mas[/b] largo, con [b]negritas y [n]cursivas[/i]
+ [/b] e incluso [url="laurl"]enlaces [/b]con negritas[/b] dentro[/url]""")
+
+ xss = (
+ """[url="hola'"]adios[/url]""",
+ """[url="hola''"'"'"]adios[/url]""",
+)
+
+
+ def testCorrectBBCode(self):
+ """Test if the translation goes well."""
+ bbcode_list = []
+ html_list = []
+ global allow_errors
+ allow_errors = False
+# print
+ for bbcode, html in self.correct_bbcode:
+ bbcode_list.append(bbcode)
+ html_list.append(html)
+ retval, result = parse(bbcode)
+# print "-->", bbcode
+# print "<--", html
+# print "<--", result
+# print
+ assert not retval
+ self.assertEqual(html, result)
+ retval, result = parse(bbcode)
+ assert not retval
+ self.assertEqual(html, result)
+ retval, result = parse("".join(bbcode_list))
+ assert not retval
+ self.assertEqual("".join(html_list), result)
+
+ def test_NoBBCode(self):
+ """Test if the translation goes well in the case that we do not allow BBCode."""
+ list = []
+ global allow_errors
+ allow_errors = False
+ import globales
+ globales.allow_bbcode = False
+ globales.allow_html = True
+ for code, dummy in self.correct_bbcode:
+ list.append(code)
+ retval, result = parse(code, False)
+ assert not retval
+ self.assertEqual(escape(code), result)
+ retval, result = parse("".join(list), False)
+ assert not retval
+ self.assertEqual(escape("".join(list)), result)
+
+ def testIncorrectBBCode(self):
+ """Test if the translation fails using bad formed BBCode."""
+ global allow_errors
+ allow_errors = False
+ for bbcode in self.incorrect_bbcode:
+ retval, result = parse(bbcode)
+# print "-->", bbcode
+# print "<--", retval, result
+ assert retval
+ retval, result = parse(bbcode)
+ assert retval
+
+ def testIncorrectXSS(self):
+ """Test if the code is XSS safe. Uncomment the print statents"""
+ global allow_errors
+ allow_errors = False
+ for xss in self.xss:
+ retval, result = parse(xss)
+# print "-->", xss
+# print "<--", retval, result
+# assert retval
+
+if __name__ == "__main__":
+ unittest.main()
+
Modified: trunk/frontend-web/parser_html.py
==============================================================================
--- trunk/frontend-web/parser_html.py (original)
+++ trunk/frontend-web/parser_html.py Sat Jun 30 00:09:19 2007
@@ -3,29 +3,45 @@
# $Id$
allowed_html = {
- "a": ["href", "title"],
- "p": [],
- "b": [],
- "s": [],
- "i": [],
- "em": [],
- "strong": [],
+ "a": ["href", "title"],
+ "p": [],
+ "b": [],
+ "s": [],
+ "i": [],
+ "em": [],
+ "strong": [],
"img": ["src", "alt", "title"]
}
def escape(text):
- entities = {'"': '"', "'": "'"}
+ entities = {
+ '<':'<',
+ '>':'>',
+ '"': '"',
+ "'": "'",
+ "&": "&"
+ }
+ import re
+ pat = "(%s)" % "|".join( map(re.escape, entities.keys()) )
+ return re.sub( pat, lambda m:entities[m.group()], text )
import xml.sax.saxutils
return xml.sax.saxutils.escape(text, entities)
-
-from sgmllib import SGMLParser,SGMLParseError
+
+from sgmllib import SGMLParser, SGMLParseError
class HTMLParser(SGMLParser):
+ def __init__(self, allow=True):
+ self.allow = allow
+ SGMLParser.__init__(self)
def reset(self):
self.text = []
self._stack = []
SGMLParser.reset(self)
def do_img(self, attrs):
+ if not self.allow:
+ strattrs = "".join([' %s="%s"' % (a, v) for a, v in attrs])
+ self.text.append(escape("<img%(strattrs)s />" % locals()))
+ return
if not attrs: raise SGMLParseError, "Error, argumentos para <img> no validos"
attrs_list = []
for a, v in attrs:
@@ -40,16 +56,19 @@
pass
def do_br(self):
+ if not self.allow:
+ self.text.append(escape("<br />" % locals()))
+ return
self.text.append("<br />")
def end_br(self):
pass
def unknown_starttag(self, tag, attrs):
- if tag in allowed_html.keys():
+ if tag in allowed_html.keys() and self.allow:
attrs_list = []
for a, v in attrs:
- if a in allowed_html[tag] and v:
+ if a in allowed_html[tag] and v and self.allow:
attrs_list.append(' %s="%s"' % (a, escape(v)))
else:
raise SGMLParseError, "Error, argumentos para <%s> no validos" % tag
@@ -61,7 +80,7 @@
self.text.append(escape("<%(tag)s%(strattrs)s>" % locals()))
def unknown_endtag(self, tag):
- if tag in allowed_html.keys():
+ if tag in allowed_html.keys() and self.allow:
self.text.append("</%(tag)s>" % locals())
if self._stack and tag == self._stack[-1]:
self._stack.pop(-1)
@@ -88,3 +107,120 @@
if self._stack:
raise SGMLParseError, "Error, tag <%s> sin cerrar" % self._stack.pop(-1)
return "".join(self.text)
+
+def parse(text, allow = True):
+ parser = HTMLParser(allow)
+ parser.reset()
+ try:
+ parser.feed(text)
+ text = parser.get_text()
+ if not text: raise SGMLParseError, "HTML no valido"
+ except SGMLParseError, e:
+ return (e or "HTML no valido", None)
+ return False, text
+
+import unittest
+class TestHTML(unittest.TestCase):
+ correct_html = (
+ ('&', '&amp;'),
+ ('<b>caca</b>', '<b>caca</b>'),
+ ('b>', 'b>'),
+ ('<script>', '<script>'),
+ ('<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>',
+ '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>'),
+ ('<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>',
+ '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>'),
+ ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" />',
+ '<img src="http://blablabla.com/index.php?caca=1&mierda=2" />'),
+ ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca">',
+ '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />'),
+ ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca"></img>',
+ '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />')
+ )
+
+ correct_html2 = (
+ '&',
+ '<b>caca</b>',
+ 'b>',
+ '<script>',
+ '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>',
+ '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>',
+ '<img src="http://blablabla.com/index.php?caca=1&mierda=2" />',
+ '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />',
+ '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />',
+ )
+
+ incorrect_html = (
+ '<b>',
+ '<b',
+ '</b>',
+ '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla',
+ '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla </as>',
+ '<a href="http://blablabla.com/index.php?caca=1&mierda=2" taitle="caca">bla bla </a>',
+ '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca" de la vaca>bla bla <a>',
+ '<img sorcerer="http://blablabla.com/index.php?caca=1&mierda=2" />',
+ '<img src="http://blablabla.com/index.php?caca=1&mierda=2" altibajo="caca">',
+ )
+
+ xss = (
+ """'';!--"<XSS>=&{()}""",
+ """<?pi ?>""",
+ """<?php ?>""",
+ """';alert(String.fromCharCode(88, 83, 83))//\';alert(String.fromCharCode(88, 83, 83))//";alert(String.fromCharCode(88, 83, 83))//\";alert(String.fromCharCode(88, 83, 83))//--></SCRIPT>">'><SCRIPT>alert(String.fromCharCode(88, 83, 83))</SCRIPT>""",
+ "<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
+ """<IMG SRC="javascript:alert('XSS');">""",
+ """<IMG SRC=javascript:alert('XSS')>""",
+ """<IMG SRC=JaVaScRiPt:alert('XSS')>""",
+ """<IMG SRC=javascript:alert("XSS")>""",
+ """<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>""",
+ '<IMG """><SCRIPT>alert("XSS")</SCRIPT>">',
+)
+
+ def test_NoBBHTML(self):
+ """Checks if the HTML is escaped when it is not allowed."""
+ html_list = []
+ import globales
+ for code in self.correct_html2:
+ html_list.append(code)
+ retval, result = parse(code, False)
+ assert not retval
+ self.assertEqual(escape(code), result)
+ retval, result = parse("".join(html_list), False)
+ assert not retval
+ self.assertEqual(escape("".join(html_list)), result)
+
+ def testCorrectHTML(self):
+ """Checks if the conversion goes well."""
+ html_list = []
+ parsed_html_list = []
+ for html, parsed_html in self.correct_html:
+ retval, result = parse(html)
+ html_list.append(html)
+ parsed_html_list.append(parsed_html)
+ assert not retval
+ self.assertEqual(parsed_html, result)
+ retval, result = parse(html)
+ assert not retval
+ self.assertEqual(parsed_html, result)
+ retval, result = parse("".join(html_list))
+ assert not retval
+ self.assertEqual("".join(parsed_html_list), result)
+
+ def testIncorrectHTML(self):
+ """Checks if the conversion fails w/ bad formed HTML."""
+ for html in self.incorrect_html:
+ retval, result = parse(html)
+ assert retval
+ retval, result = parse(html)
+ assert retval
+
+ def testIncorrectXSS(self):
+ """Checks if the generated HTML is XSS safe. Uncomment the print statements."""
+ for xss in self.xss:
+ retval, result = parse(xss)
+# print "-->", xss
+# print "<--", retval, result
+# assert retval
+
+if __name__ == "__main__":
+ unittest.main()
Modified: trunk/frontend-web/parsers.py
==============================================================================
--- trunk/frontend-web/parsers.py (original)
+++ trunk/frontend-web/parsers.py Sat Jun 30 00:09:19 2007
@@ -2,319 +2,19 @@
# $Id$
-import unittest
import parser_bbcode
import parser_html
-from parser_html import escape
def convert_to_html(text):
+ """Converts the text into valid HTML"""
from globales import allow_bbcode, allow_html
- if allow_html:
- parser = parser_html.HTMLParser()
- parser.reset()
- try:
- parser.feed(text)
- text = parser.get_text()
- if not text: raise parser_html.SGMLParseError, "HTML no valido"
- except parser_html.SGMLParseError, e:
- return e or "HTML no valido", None
- else:
- text = escape(text)
-
- if allow_bbcode:
- parser = parser_bbcode.BBCodeParser()
- parser.feed(text)
- return parser.parse()
-
- return False, text
-
-class TestHTMLParser(unittest.TestCase):
- correct_html = (
- ('&', '&amp;'),
- ('<b>caca</b>', '<b>caca</b>'),
- ('b>', 'b>'),
- ('<script>', '<script>'),
- ('<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>'),
- ('<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>'),
- ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" />',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" />'),
- ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca">',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />'),
- ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca"></img>',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />')
- )
-
- incorrect_html = (
- '<b>',
- '<b',
- '</b>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla </as>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2" taitle="caca">bla bla </a>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca" de la vaca>bla bla <a>',
- '<img sorcerer="http://blablabla.com/index.php?caca=1&mierda=2" />',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" altibajo="caca">',
- )
-
- def testCorrectHTML(self):
- parser = parser_html.HTMLParser()
- html_list = []
- parsed_html_list = []
- global allow_errors
- allow_errors = False
- print
- for html, parsed_html in self.correct_html:
- parser.reset()
- parser.feed(html)
- html_list.append(html)
- parsed_html_list.append(parsed_html)
- result = parser.get_text()
-# print "-->", html
-# print "<--", parsed_html
-# print "<--", result
-# print
- assert result
- self.assertEqual(parsed_html, result)
- parser.reset()
- parser.feed("".join(html_list))
- result = parser.get_text()
- assert result
- self.assertEqual("".join(parsed_html_list), result)
-
-class TestBBCode(unittest.TestCase):
- correct_bbcode = (
- ('[ b]hola[/b]', '<strong>hola</strong>'),
- ('[b ]hola[/b]', '<strong>hola</strong>'),
- ('[ b ]hola[/b]', '<strong>hola</strong>'),
-
- ('[b]hola[ /b]', '<strong>hola</strong>'),
- ('[b]hola[/b ]', '<strong>hola</strong>'),
- ('[b]hola[ /b ]', '<strong>hola</strong>'),
-
-
- ('[b]hola[/b]', '<strong>hola</strong>'),
- ('antes [b]hola[/b]', 'antes <strong>hola</strong>'),
- ('[b]hola[/b] despues', '<strong>hola</strong> despues'),
- ('antes [b]hola[/b] despues', 'antes <strong>hola</strong> despues'),
-
- ('[url]la direccion[/url]', '<a href="la direccion">la direccion</a>'),
- ('antes [url]la direccion[/url]', 'antes <a href="la direccion">la direccion</a>'),
- ('[url]la direccion[/url] despues', '<a href="la direccion">la direccion</a> despues'),
- ('antes [url]la direccion[/url] despues', 'antes <a href="la direccion">la direccion</a> despues'),
-
- ('[url="la direccion"]el texto[/url]', '<a href="la direccion">el texto</a>'),
- ('antes [url="la direccion"]el texto[/url]', 'antes <a href="la direccion">el texto</a>'),
- ('[url="la direccion"]el texto[/url] despues', '<a href="la direccion">el texto</a> despues'),
- ('antes [url="la direccion"]el texto[/url] despues', 'antes <a href="la direccion">el texto</a> despues'),
-
- ('[url="la direccion"]el texto[/url]', '<a href="la direccion">el texto</a>'),
-
- ('[url=la direccion]el texto[/url]', '<a href="la direccion">el texto</a>'),
- ('antes [url=la direccion]el texto[/url]', 'antes <a href="la direccion">el texto</a>'),
- ('[url =la direccion]el texto[/url] despues', '<a href="la direccion">el texto</a> despues'),
- ('antes [url=la direccion]el texto[/url] despues', 'antes <a href="la direccion">el texto</a> despues'),
-
- ('[quote=paco]Hola[/quote]', '<blockquote><h4>paco escribió:</h4>Hola</blockquote>'),
- ('antes [quote=paco]Hola[/quote]', 'antes <blockquote><h4>paco escribió:</h4>Hola</blockquote>'),
- ('[quote=paco]Hola[/quote] despues', '<blockquote><h4>paco escribió:</h4>Hola</blockquote> despues'),
- ('antes [quote=paco]Hola[/quote] despues', 'antes <blockquote><h4>paco escribió:</h4>Hola</blockquote> despues'),
-
- ('[quote]Hola[/quote]', '<blockquote>Hola</blockquote>'),
- ('antes [quote]Hola[/quote]', 'antes <blockquote>Hola</blockquote>'),
- ('[quote]Hola[/quote] despues', '<blockquote>Hola</blockquote> despues'),
- ('antes [quote]Hola[/quote] despues', 'antes <blockquote>Hola</blockquote> despues'),
-
- ('[u]hola[/u]', '<span style="text-decoration:underline">hola</span>'),
-
- ('[size=20px]hola[/size]', '<span style="font-size:20px">hola</span>'),
-
- ('[n]hola[/n]', '[n]hola[/n]'),
-
- ("[img]laimagen1[/img]", '<img src="laimagen1" />'),
- ("antes [img]laimagen2[/img]", 'antes <img src="laimagen2" />'),
- ("[img]laimagen3[/img] despues", '<img src="laimagen3" /> despues'),
- ("antes [img]laimagen4[/img] despues", 'antes <img src="laimagen4" /> despues')
- )
-
- correct_html = (
- ('&', '&amp;'),
- ('<b>caca</b>', '<b>caca</b>'),
- ('b>', 'b>'),
- ('<script>', '<script>'),
- ('<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>'),
- ('<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>'),
- ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" />',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" />'),
- ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca">',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />'),
- ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca"></img>',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />')
- )
-
- incorrect_html = (
- '<b>',
- '<b',
- '</b>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla </as>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2" taitle="caca">bla bla </a>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca" de la vaca>bla bla <a>',
- '<img sorcerer="http://blablabla.com/index.php?caca=1&mierda=2" />',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" altibajo="caca">',
- )
-
- incorrect_bbcode = (
- '[i]bla bla bla [b]hola[/n][/i]',
- '[b]Ay[/ b ]',
- '[b]Ay[ / b ]',
- '[b]Ay[/]',
- '[b]Ay, se me ha olvidado cerrar',
- '[url][/url]',
- '[b][/url]hola[/url]',
- '[b]caca[url]hola[/b][/url]',
- """Un texto [b]mas[/b] largo, con [b]negritas y [n]cursivas[/i]
- [/b] e incluso [url="laurl"]enlaces [/b]con negritas[/b] dentro[/url]""")
-
- xss = (
- """[url="hola'"]adios[/url]""",
- """[url="hola''"'"'"]adios[/url]""",
- """'';!--"<XSS>=&{()}""",
- """<?pi ?>""",
- """<?php ?>""",
- """';alert(String.fromCharCode(88,83,83))//\';alert(String.fromCharCode(88,83,83))//";alert(String.fromCharCode(88,83,83))//\";alert(String.fromCharCode(88,83,83))//--></SCRIPT>">'><SCRIPT>alert(String.fromCharCode(88,83,83))</SCRIPT>""",
- "<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
- """<IMG SRC="javascript:alert('XSS');">""",
- """<IMG SRC=javascript:alert('XSS')>""",
- """<IMG SRC=JaVaScRiPt:alert('XSS')>""",
- """<IMG SRC=javascript:alert("XSS")>""",
- """<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>""",
- '<IMG """><SCRIPT>alert("XSS")</SCRIPT>">',
-)
-
-
- def testCorrectBBCode(self):
- bbcode_list = []
- html_list = []
- global allow_errors
- allow_errors = False
- print
- for bbcode, html in self.correct_bbcode:
- bbcode_list.append(bbcode)
- html_list.append(html)
- retval, result = convert_to_html(bbcode)
-# print "-->", bbcode
-# print "<--", html
-# print "<--", result
-# print
- assert not retval
- self.assertEqual(html, result)
- retval, result = convert_to_html(bbcode)
- assert not retval
- self.assertEqual(html, result)
- retval, result = convert_to_html("".join(bbcode_list))
- assert not retval
- self.assertEqual("".join(html_list), result)
-
- def test_NoBBCodeYesHTML(self):
- list = []
- global allow_errors
- allow_errors = False
- import globales
- globales.allow_bbcode = False
- globales.allow_html = True
- for code, dummy in self.correct_bbcode:
- list.append(code)
- retval, result = convert_to_html(code)
- assert not retval
- self.assertEqual(code,result)
- retval, result = convert_to_html("".join(list))
- assert not retval
- self.assertEqual("".join(list),result)
-
- def test_NoBBCodeNoHTML(self):
- list = []
- global allow_errors
- allow_errors = False
- import globales
- globales.allow_bbcode = False
- globales.allow_html = False
- for code, dummy in self.correct_bbcode:
- list.append(code)
- retval, result = convert_to_html(code)
- assert not retval
- self.assertEqual(escape(code),result)
- retval, result = convert_to_html("".join(list))
- assert not retval
- self.assertEqual(escape("".join(list)),result)
- for code, dummy in self.correct_html:
- list.append(code)
- retval, result = convert_to_html(code)
- assert not retval
- self.assertEqual(escape(code),result)
- retval, result = convert_to_html("".join(list))
- assert not retval
- self.assertEqual(escape("".join(list)),result)
-
- def testCorrectHTML(self):
- html_list = []
- parsed_html_list = []
- global allow_errors
- allow_errors = False
- print
- for html, parsed_html in self.correct_html:
- retval, result = convert_to_html(html)
- html_list.append(html)
- parsed_html_list.append(parsed_html)
-# print "-->", html
-# print "<--", parsed_html
-# print "<--", result
-# print
- assert not retval
- self.assertEqual(parsed_html, result)
- retval, result = convert_to_html(html)
- assert not retval
- self.assertEqual(parsed_html, result)
- retval, result = convert_to_html("".join(html_list))
- assert not retval
- self.assertEqual("".join(parsed_html_list), result)
-
- def testIncorrectBBCode(self):
- global allow_errors
- allow_errors = False
- print
- for bbcode in self.incorrect_bbcode:
- retval, result = convert_to_html(bbcode)
-# print "-->", bbcode
-# print "<--", retval, result
- assert retval
- retval, result = convert_to_html(bbcode)
- assert retval
-
- def testIncorrectHTML(self):
- global allow_errors
- allow_errors = False
- print
- for html in self.incorrect_html:
- retval, result = convert_to_html(html)
- print "-->", html
- print "<--", retval, result
- assert retval
- retval, result = convert_to_html(html)
- assert retval
-
- def testIncorrectXSS(self):
- global allow_errors
- allow_errors = False
- print
- for xss in self.xss:
- retval, result = convert_to_html(xss)
- print "-->", xss
- print "<--", retval, result
-# assert retval
+ retval, text = parser_html.parse(text, allow_html)
+ if retval:
+ return retval, None
+ return parser_bbcode.parse(text, allow_bbcode)
if __name__ == "__main__":
+ test1 = parser_html.TestHTML
+ test2 = parser_bbcode.TestBBCode
+ import unittest
unittest.main()
More information about the cpif
mailing list