[cpif] r198 - in trunk/frontend-web: . skins/default
svn at argo.es
svn at argo.es
Fri Jun 29 19:50:33 CEST 2007
Author: alvaro
Date: Fri Jun 29 19:50:31 2007
New Revision: 198
Log:
Separados los parsers.
Aniadidos nuevos tests para el parser HTML
Aniadidos flags en globales.py
Added:
trunk/frontend-web/parser_html.py
- copied, changed from r197, /trunk/frontend-web/parser_bbcode.py
trunk/frontend-web/parsers.py
- copied, changed from r197, /trunk/frontend-web/parser_bbcode.py
Modified:
trunk/frontend-web/globales.py
trunk/frontend-web/parser_bbcode.py
trunk/frontend-web/skins/default/body_threadlist_thread.html
trunk/frontend-web/skins/default/body_threadlist_thread_unread.html
Modified: trunk/frontend-web/globales.py
==============================================================================
--- trunk/frontend-web/globales.py (original)
+++ trunk/frontend-web/globales.py Fri Jun 29 19:50:31 2007
@@ -5,17 +5,17 @@
limitarse al minimo posible.
"""
-# 'monitor' sera un decorador utilizado para
-# sincronizar el acceso al storage.
+# 'monitor': decorator used to syncronize the
+# storage access
monitor=None
-# Version global de todo el programa
+# Global version of the program
version=None
-# Numero de mensajes por pagina de hilo
+# Number of messages per page on a thread
thread_len=10
-# Permitimos el acceso anonimo al foro
+# Allow anonymous access?
allow_anonymous=True
# Simultaneous HTTP connections
@@ -24,3 +24,8 @@
# OpenID Support
openid_support=True
+# Allow BBcode?
+bbcode_support=True
+
+# Allow HTML?
+html_support=True
Modified: trunk/frontend-web/parser_bbcode.py
==============================================================================
--- trunk/frontend-web/parser_bbcode.py (original)
+++ trunk/frontend-web/parser_bbcode.py Fri Jun 29 19:50:31 2007
@@ -29,11 +29,7 @@
"img": ('<img src="%(arg)s" />', None, True)
}
-def escape(text):
- entities = {'"': '"', "'": "'"}
- import xml.sax.saxutils
- return xml.sax.saxutils.escape(text, entities)
-
+from parser_html import escape
class BBCodeParser:
"""A BBCode to HTML parser"""
@@ -93,84 +89,16 @@
"""Preparse the text, just leaving the allowed html tags. Escape dangerous sequences.
Should be called before _parse"""
- import sgmllib
- class PreParseHTML(sgmllib.SGMLParser):
- self._stack = []
- def reset(self):
- self.text = []
- self._stack = []
- sgmllib.SGMLParser.reset(self)
-
- def do_img(self, attrs):
- if not attrs: raise sgmllib.SGMLParseError, "Error, argumentos para <img> no validos"
- attrs_list = []
- for a, v in attrs:
- if a in allowed_html["img"] and v:
- attrs_list.append(' %s="%s"' % (a, escape(v)))
- else:
- raise sgmllib.SGMLParseError, "Error, argumentos para <img> no validos"
- strattrs = "".join(attrs_list)
- self.text.append("<img%(strattrs)s />" % locals())
-
- def end_img(self):
- pass
-
- def do_br(self):
- self.text.append("<br />")
-
- def end_br(self):
- pass
-
- def unknown_starttag(self, tag, attrs):
- if tag in allowed_html.keys():
- attrs_list = []
- for a, v in attrs:
- if a in allowed_html[tag] and v:
- attrs_list.append(' %s="%s"' % (a, escape(v)))
- else:
- raise sgmllib.SGMLParseError, "Error, argumentos para <%s> no validos" % tag
- strattrs = "".join(attrs_list)
- self.text.append("<%(tag)s%(strattrs)s>" % locals())
- self._stack.append(tag)
- else:
- strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs])
- self.text.append(escape("<%(tag)s%(strattrs)s>" % locals()))
-
- def unknown_endtag(self, tag):
- if tag in allowed_html.keys():
- self.text.append("</%(tag)s>" % locals())
- if self._stack and tag == self._stack[-1]:
- self._stack.pop(-1)
- else:
- raise sgmllib.SGMLParseError, "Error, tag <%s> sin abrir" % tag
- else:
- self.text.append(escape("</%(tag)s>" % locals()))
-
- def handle_charref(self, ref):
- self.text.append("&#%(ref)s;" % locals())
-
- def handle_entityref(self, ref):
- self.text.append("&%(ref)s" % locals())
- import htmlentitydefs
- if htmlentitydefs.entitydefs.has_key(ref):
- self.text.append(";")
-
- def handle_data(self, text):
- import xml.sax.saxutils
- self.text.append(xml.sax.saxutils.escape(text))
-
- def get_text(self):
- """Return processed HTML as a single string"""
- if self._stack:
- raise sgmllib.SGMLParseError, "Error, tag <%s> sin cerrar" % self._stack.pop(-1)
- return "".join(self.text)
-
+ import parser_html
+ from globales import html_support
+ if not html_support:
+ return "Error, HTML no permitido"
self._text = self._text.replace("javascript","javascropt") # FIXME: necesario?
- html_parser = PreParseHTML()
+ html_parser = parser_html.HTMLParser()
try:
html_parser.feed(self._text)
self._preparsed = html_parser.get_text()
- except sgmllib.SGMLParseError, e:
+ except parser_html.SGMLParseError, e:
return e
def _parse(self):
@@ -225,219 +153,3 @@
if not e:
return e, "".join(self._parsed)
return e or "Error, HTML no valido", None
-
-
-import unittest
-class TestBBCode(unittest.TestCase):
- correct_bbcode = (
- ('[ b]hola[/b]', '<strong>hola</strong>'),
- ('[b ]hola[/b]', '<strong>hola</strong>'),
- ('[ b ]hola[/b]', '<strong>hola</strong>'),
-
- ('[b]hola[ /b]', '<strong>hola</strong>'),
- ('[b]hola[/b ]', '<strong>hola</strong>'),
- ('[b]hola[ /b ]', '<strong>hola</strong>'),
-
-
- ('[b]hola[/b]', '<strong>hola</strong>'),
- ('antes [b]hola[/b]', 'antes <strong>hola</strong>'),
- ('[b]hola[/b] despues', '<strong>hola</strong> despues'),
- ('antes [b]hola[/b] despues', 'antes <strong>hola</strong> despues'),
-
- ('[url]la direccion[/url]', '<a href="la direccion">la direccion</a>'),
- ('antes [url]la direccion[/url]', 'antes <a href="la direccion">la direccion</a>'),
- ('[url]la direccion[/url] despues', '<a href="la direccion">la direccion</a> despues'),
- ('antes [url]la direccion[/url] despues', 'antes <a href="la direccion">la direccion</a> despues'),
-
- ('[url="la direccion"]el texto[/url]', '<a href="la direccion">el texto</a>'),
- ('antes [url="la direccion"]el texto[/url]', 'antes <a href="la direccion">el texto</a>'),
- ('[url="la direccion"]el texto[/url] despues', '<a href="la direccion">el texto</a> despues'),
- ('antes [url="la direccion"]el texto[/url] despues', 'antes <a href="la direccion">el texto</a> despues'),
-
- ('[url="la direccion"]el texto[/url]', '<a href="la direccion">el texto</a>'),
-
- ('[url=la direccion]el texto[/url]', '<a href="la direccion">el texto</a>'),
- ('antes [url=la direccion]el texto[/url]', 'antes <a href="la direccion">el texto</a>'),
- ('[url =la direccion]el texto[/url] despues', '<a href="la direccion">el texto</a> despues'),
- ('antes [url=la direccion]el texto[/url] despues', 'antes <a href="la direccion">el texto</a> despues'),
-
- ('[quote=paco]Hola[/quote]', '<blockquote><h4>paco escribió:</h4>Hola</blockquote>'),
- ('antes [quote=paco]Hola[/quote]', 'antes <blockquote><h4>paco escribió:</h4>Hola</blockquote>'),
- ('[quote=paco]Hola[/quote] despues', '<blockquote><h4>paco escribió:</h4>Hola</blockquote> despues'),
- ('antes [quote=paco]Hola[/quote] despues', 'antes <blockquote><h4>paco escribió:</h4>Hola</blockquote> despues'),
-
- ('[quote]Hola[/quote]', '<blockquote>Hola</blockquote>'),
- ('antes [quote]Hola[/quote]', 'antes <blockquote>Hola</blockquote>'),
- ('[quote]Hola[/quote] despues', '<blockquote>Hola</blockquote> despues'),
- ('antes [quote]Hola[/quote] despues', 'antes <blockquote>Hola</blockquote> despues'),
-
- ('[u]hola[/u]', '<span style="text-decoration:underline">hola</span>'),
-
- ('[size=20px]hola[/size]', '<span style="font-size:20px">hola</span>'),
-
- ('[n]hola[/n]', '[n]hola[/n]'),
-
- ("[img]laimagen1[/img]", '<img src="laimagen1" />'),
- ("antes [img]laimagen2[/img]", 'antes <img src="laimagen2" />'),
- ("[img]laimagen3[/img] despues", '<img src="laimagen3" /> despues'),
- ("antes [img]laimagen4[/img] despues", 'antes <img src="laimagen4" /> despues')
- )
-
- correct_html = (
- ('&', '&amp;'),
- ('<b>caca</b>', '<b>caca</b>'),
- ('b>', 'b>'),
- ('<script>', '<script>'),
- ('<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>'),
- ('<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>'),
- ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" />',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" />'),
- ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca">',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />'),
- ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca"></img>',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />')
- )
-
- incorrect_html = (
- '<b>',
- '<b',
- '</b>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla </as>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2" taitle="caca">bla bla </a>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca" de la vaca>bla bla <a>',
- '<img sorcerer="http://blablabla.com/index.php?caca=1&mierda=2" />',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" altibajo="caca">',
- )
-
- incorrect_bbcode = (
- '[i]bla bla bla [b]hola[/n][/i]',
- '[b]Ay[/ b ]',
- '[b]Ay[ / b ]',
- '[b]Ay[/]',
- '[b]Ay, se me ha olvidado cerrar',
- '[url][/url]',
- '[b][/url]hola[/url]',
- '[b]caca[url]hola[/b][/url]',
- """Un texto [b]mas[/b] largo, con [b]negritas y [n]cursivas[/i]
- [/b] e incluso [url="laurl"]enlaces [/b]con negritas[/b] dentro[/url]""")
-
- xss = (
- """[url="hola'"]adios[/url]""",
- """[url="hola''"'"'"]adios[/url]""",
- """'';!--"<XSS>=&{()}""",
- """<?pi ?>""",
- """<?php ?>""",
- """';alert(String.fromCharCode(88,83,83))//\';alert(String.fromCharCode(88,83,83))//";alert(String.fromCharCode(88,83,83))//\";alert(String.fromCharCode(88,83,83))//--></SCRIPT>">'><SCRIPT>alert(String.fromCharCode(88,83,83))</SCRIPT>""",
- "<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
- """<IMG SRC="javascript:alert('XSS');">""",
- """<IMG SRC=javascript:alert('XSS')>""",
- """<IMG SRC=JaVaScRiPt:alert('XSS')>""",
- """<IMG SRC=javascript:alert("XSS")>""",
- """<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>""",
- '<IMG """><SCRIPT>alert("XSS")</SCRIPT>">',
-)
-
-
- def testCorrectBBCode(self):
- parser = BBCodeParser()
- bbcode_list = []
- html_list = []
- global allow_errors
- allow_errors = False
- print
- for bbcode, html in self.correct_bbcode:
- parser.feed(bbcode)
- bbcode_list.append(bbcode)
- html_list.append(html)
- retval, result = parser.parse()
-# print "-->", bbcode
-# print "<--", html
-# print "<--", result
-# print
- assert not retval
- self.assertEqual(html, result)
- retval, result = parser.parse()
- assert not retval
- self.assertEqual(html, result)
- parser.feed("".join(bbcode_list))
- retval, result = parser.parse()
- assert not retval
- self.assertEqual("".join(html_list), result)
- retval, result = parser.parse()
- assert not retval
- self.assertEqual("".join(html_list), result)
-
- def testCorrectHTML(self):
- parser = BBCodeParser()
- html_list = []
- parsed_html_list = []
- global allow_errors
- allow_errors = False
- print
- for html, parsed_html in self.correct_html:
- parser.feed(html)
- html_list.append(html)
- parsed_html_list.append(parsed_html)
- retval, result = parser.parse()
-# print "-->", html
-# print "<--", parsed_html
-# print "<--", result
-# print
- assert not retval
- self.assertEqual(parsed_html, result)
- retval, result = parser.parse()
- assert not retval
- self.assertEqual(parsed_html, result)
- parser.feed("".join(html_list))
- retval, result = parser.parse()
- assert not retval
- self.assertEqual("".join(parsed_html_list), result)
- retval, result = parser.parse()
- assert not retval
- self.assertEqual("".join(parsed_html_list), result)
-
- def testIncorrectBBCode(self):
- parser = BBCodeParser()
- global allow_errors
- allow_errors = False
- print
- for bbcode in self.incorrect_bbcode:
- parser.feed(bbcode)
- retval, result = parser.parse()
-# print "-->", bbcode
-# print "<--", retval, result
- assert retval
- retval, result = parser.parse()
- assert retval
-
- def testIncorrectHTML(self):
- parser = BBCodeParser()
- global allow_errors
- allow_errors = False
- print
- for html in self.incorrect_html:
- parser.feed(html)
- retval, result = parser.parse()
-# print "-->", html
-# print "<--", retval, result
- assert retval
- retval, result = parser.parse()
- assert retval
-
- def testIncorrectXSS(self):
- parser = BBCodeParser()
- global allow_errors
- allow_errors = False
- print
- for xss in self.xss:
- parser.feed(xss)
- retval, result = parser.parse()
- print "-->", xss
- print "<--", retval, result
-# assert retval
-
-if __name__ == "__main__":
- unittest.main()
Copied: trunk/frontend-web/parser_html.py (from r197, /trunk/frontend-web/parser_bbcode.py)
==============================================================================
--- /trunk/frontend-web/parser_bbcode.py (original)
+++ trunk/frontend-web/parser_html.py Fri Jun 29 19:50:31 2007
@@ -2,10 +2,6 @@
# $Id$
-# Allow BBCode syntax errors?
-allow_errors = True
-allow_errors = False
-
allowed_html = {
"a": ["href", "title"],
"p": [],
@@ -17,427 +13,78 @@
"img": ["src", "alt", "title"]
}
-allowed_tags = {
- "b": ('<strong>', '</strong>', False),
- "u": ('<span style="text-decoration:underline">', '</span>', False),
- "s": ('<strike>', '</strike>', False),
- "size": ('<span style="font-size:%(arg)s">', '</span>', True),
- "i": ('<em>', '</em>', False),
- "url": ('<a href="%(arg)s">', '</a>', True),
- "quote*": ('<blockquote>', '</blockquote>', False),
- "quote": ('<blockquote><h4>%(arg)s escribió:</h4>', '</blockquote>', True),
- "img": ('<img src="%(arg)s" />', None, True)
-}
-
def escape(text):
entities = {'"': '"', "'": "'"}
import xml.sax.saxutils
return xml.sax.saxutils.escape(text, entities)
-
-
-class BBCodeParser:
- """A BBCode to HTML parser"""
-
- _open = "["
- _close = "]"
-
- def __init__(self):
- self._text = ""
- self._preparsed = ""
- self._tokens = []
- self._tags = []
- self._parsed = []
-
- def _reset(self):
- """Resets the internal arrays."""
- self._preparsed = ""
- self._tokens = []
- self._tags = []
- self._parsed = []
-
- def feed(self, text):
- """Sets the text to parse"""
- self._text = text
-
- def _tokenize(self):
- """Tokenize the string and the tags in two separates lists
- Should be called after _preparse but before _parse."""
- import re
- self._tokens = re.split("\%s.*?\%s" % (self._open, self._close), self._preparsed)
- self._tags = re.findall("\%s(.*?)\%s" % (self._open, self._close), self._preparsed)
-
- def _close_tag_to_html(self, tag):
- """Translate the BBCode tag to HTML anc close it."""
- return allowed_tags[tag][1] or ""
-
- def _tag_to_html(self, tag):
- """Translates the BBCode tag to HTML"""
- aux = allowed_tags[tag[0]]
- if not tag[1]:
- aux = allowed_tags.get(tag[0] + "*", aux) # Permitimos tag* que no lleva argumentos y puede ser distinto
- return aux[0] % {"arg": escape(tag[1].strip('"')) if tag[1] else '' }
-
- def _tag_closes(self, tag):
- """Returns true if the tags needs to be closed"""
- if tag[0].startswith("/"):
- return allowed_tags[tag[0][1:]][1]
- return allowed_tags[tag[0]][1]
-
- def _tag_needs_args(self, tag):
- """Returns true if the tag needs arguments"""
- if allowed_tags.get(tag + "*", False):
- return False
- return allowed_tags[tag][2]
-
- def _preparse(self):
- """Preparse the text, just leaving the allowed html tags. Escape dangerous sequences.
- Should be called before _parse"""
-
- import sgmllib
- class PreParseHTML(sgmllib.SGMLParser):
- self._stack = []
- def reset(self):
- self.text = []
- self._stack = []
- sgmllib.SGMLParser.reset(self)
-
- def do_img(self, attrs):
- if not attrs: raise sgmllib.SGMLParseError, "Error, argumentos para <img> no validos"
- attrs_list = []
- for a, v in attrs:
- if a in allowed_html["img"] and v:
- attrs_list.append(' %s="%s"' % (a, escape(v)))
- else:
- raise sgmllib.SGMLParseError, "Error, argumentos para <img> no validos"
- strattrs = "".join(attrs_list)
- self.text.append("<img%(strattrs)s />" % locals())
-
- def end_img(self):
- pass
-
- def do_br(self):
- self.text.append("<br />")
-
- def end_br(self):
- pass
-
- def unknown_starttag(self, tag, attrs):
- if tag in allowed_html.keys():
- attrs_list = []
- for a, v in attrs:
- if a in allowed_html[tag] and v:
- attrs_list.append(' %s="%s"' % (a, escape(v)))
- else:
- raise sgmllib.SGMLParseError, "Error, argumentos para <%s> no validos" % tag
- strattrs = "".join(attrs_list)
- self.text.append("<%(tag)s%(strattrs)s>" % locals())
- self._stack.append(tag)
- else:
- strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs])
- self.text.append(escape("<%(tag)s%(strattrs)s>" % locals()))
-
- def unknown_endtag(self, tag):
- if tag in allowed_html.keys():
- self.text.append("</%(tag)s>" % locals())
- if self._stack and tag == self._stack[-1]:
- self._stack.pop(-1)
- else:
- raise sgmllib.SGMLParseError, "Error, tag <%s> sin abrir" % tag
- else:
- self.text.append(escape("</%(tag)s>" % locals()))
-
- def handle_charref(self, ref):
- self.text.append("&#%(ref)s;" % locals())
-
- def handle_entityref(self, ref):
- self.text.append("&%(ref)s" % locals())
- import htmlentitydefs
- if htmlentitydefs.entitydefs.has_key(ref):
- self.text.append(";")
-
- def handle_data(self, text):
- import xml.sax.saxutils
- self.text.append(xml.sax.saxutils.escape(text))
-
- def get_text(self):
- """Return processed HTML as a single string"""
- if self._stack:
- raise sgmllib.SGMLParseError, "Error, tag <%s> sin cerrar" % self._stack.pop(-1)
- return "".join(self.text)
-
- self._text = self._text.replace("javascript","javascropt") # FIXME: necesario?
- html_parser = PreParseHTML()
- try:
- html_parser.feed(self._text)
- self._preparsed = html_parser.get_text()
- except sgmllib.SGMLParseError, e:
- return e
- def _parse(self):
- """Do the parsing.
- Should be called after _reset, _preparse and _tokenize."""
- stack = []
- self._parsed.append(self._tokens.pop(0))
- while self._tags:
- tag = self._tags.pop(0)
- tag = tag.split("=", 1)
- tag[0] = tag[0].strip()
- if tag[0].startswith("/") and tag[0][1:] in allowed_tags.keys() and stack:
- if tag[0][1:] == stack[-1]:
- self._parsed.append(self._close_tag_to_html(stack.pop(-1)))
- elif not allow_errors:
- return "Error, tratando de cerrar %s%s%s no abierto antes" % (self._open, tag[0], self._close)
- else: # Si alguien activa allow_errors mereceria la mas lenta y dolorosa muerte
- # Esto es una chapuza para usuarios estupidos y chapuceros
- self._parsed.append("%s%s%s" % (self._open, tag[0], self._close))
- if self._tokens and self._tag_closes(tag): self._parsed.append(self._tokens.pop(0))
-
- elif tag[0] in allowed_tags.keys():
- tag.append(None)
- stack.append(tag[0])
- if self._tag_needs_args(tag[0]) and tag[1] == None:
- tag[1] = self._tokens[0]
- if not allow_errors and not tag[1]:
- return "Error, no se ha pasado el argumento a %s%s%s" % (self._open, tag[0], self._close)
- self._parsed.append(self._tag_to_html(tag))
- if not self._tag_closes(tag) and self._tokens:
- self._tokens.pop(0)
- if self._tokens: self._parsed.append(self._tokens.pop(0))
-
- else: # Si no conozco el tag se lo paso tal cual
- self._parsed.append("%s%s%s" % (self._open, tag[0], self._close))
- if self._tokens: self._parsed.append(self._tokens.pop(0))
-
- if stack and allow_errors:
- while stack: # No me responsabilizo de lo que salga aqui...
- self._parsed.append(self._close_tag_to_html(stack.pop(-1)))
- elif stack:
- return "Error, falta por cerrar el tag %s%s%s" % (self._open, stack[-1], self._close)
- return False
-
- def parse(self):
- """Parses the string"""
- self._reset()
- e = self._preparse()
- if not e and self._preparsed:
- self._tokenize()
- e = self._parse()
- if not e:
- return e, "".join(self._parsed)
- return e or "Error, HTML no valido", None
-
-
-import unittest
-class TestBBCode(unittest.TestCase):
- correct_bbcode = (
- ('[ b]hola[/b]', '<strong>hola</strong>'),
- ('[b ]hola[/b]', '<strong>hola</strong>'),
- ('[ b ]hola[/b]', '<strong>hola</strong>'),
-
- ('[b]hola[ /b]', '<strong>hola</strong>'),
- ('[b]hola[/b ]', '<strong>hola</strong>'),
- ('[b]hola[ /b ]', '<strong>hola</strong>'),
-
-
- ('[b]hola[/b]', '<strong>hola</strong>'),
- ('antes [b]hola[/b]', 'antes <strong>hola</strong>'),
- ('[b]hola[/b] despues', '<strong>hola</strong> despues'),
- ('antes [b]hola[/b] despues', 'antes <strong>hola</strong> despues'),
-
- ('[url]la direccion[/url]', '<a href="la direccion">la direccion</a>'),
- ('antes [url]la direccion[/url]', 'antes <a href="la direccion">la direccion</a>'),
- ('[url]la direccion[/url] despues', '<a href="la direccion">la direccion</a> despues'),
- ('antes [url]la direccion[/url] despues', 'antes <a href="la direccion">la direccion</a> despues'),
-
- ('[url="la direccion"]el texto[/url]', '<a href="la direccion">el texto</a>'),
- ('antes [url="la direccion"]el texto[/url]', 'antes <a href="la direccion">el texto</a>'),
- ('[url="la direccion"]el texto[/url] despues', '<a href="la direccion">el texto</a> despues'),
- ('antes [url="la direccion"]el texto[/url] despues', 'antes <a href="la direccion">el texto</a> despues'),
-
- ('[url="la direccion"]el texto[/url]', '<a href="la direccion">el texto</a>'),
-
- ('[url=la direccion]el texto[/url]', '<a href="la direccion">el texto</a>'),
- ('antes [url=la direccion]el texto[/url]', 'antes <a href="la direccion">el texto</a>'),
- ('[url =la direccion]el texto[/url] despues', '<a href="la direccion">el texto</a> despues'),
- ('antes [url=la direccion]el texto[/url] despues', 'antes <a href="la direccion">el texto</a> despues'),
-
- ('[quote=paco]Hola[/quote]', '<blockquote><h4>paco escribió:</h4>Hola</blockquote>'),
- ('antes [quote=paco]Hola[/quote]', 'antes <blockquote><h4>paco escribió:</h4>Hola</blockquote>'),
- ('[quote=paco]Hola[/quote] despues', '<blockquote><h4>paco escribió:</h4>Hola</blockquote> despues'),
- ('antes [quote=paco]Hola[/quote] despues', 'antes <blockquote><h4>paco escribió:</h4>Hola</blockquote> despues'),
-
- ('[quote]Hola[/quote]', '<blockquote>Hola</blockquote>'),
- ('antes [quote]Hola[/quote]', 'antes <blockquote>Hola</blockquote>'),
- ('[quote]Hola[/quote] despues', '<blockquote>Hola</blockquote> despues'),
- ('antes [quote]Hola[/quote] despues', 'antes <blockquote>Hola</blockquote> despues'),
-
- ('[u]hola[/u]', '<span style="text-decoration:underline">hola</span>'),
-
- ('[size=20px]hola[/size]', '<span style="font-size:20px">hola</span>'),
-
- ('[n]hola[/n]', '[n]hola[/n]'),
-
- ("[img]laimagen1[/img]", '<img src="laimagen1" />'),
- ("antes [img]laimagen2[/img]", 'antes <img src="laimagen2" />'),
- ("[img]laimagen3[/img] despues", '<img src="laimagen3" /> despues'),
- ("antes [img]laimagen4[/img] despues", 'antes <img src="laimagen4" /> despues')
- )
-
- correct_html = (
- ('&', '&amp;'),
- ('<b>caca</b>', '<b>caca</b>'),
- ('b>', 'b>'),
- ('<script>', '<script>'),
- ('<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>'),
- ('<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>'),
- ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" />',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" />'),
- ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca">',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />'),
- ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca"></img>',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />')
- )
-
- incorrect_html = (
- '<b>',
- '<b',
- '</b>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla </as>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2" taitle="caca">bla bla </a>',
- '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca" de la vaca>bla bla <a>',
- '<img sorcerer="http://blablabla.com/index.php?caca=1&mierda=2" />',
- '<img src="http://blablabla.com/index.php?caca=1&mierda=2" altibajo="caca">',
- )
-
- incorrect_bbcode = (
- '[i]bla bla bla [b]hola[/n][/i]',
- '[b]Ay[/ b ]',
- '[b]Ay[ / b ]',
- '[b]Ay[/]',
- '[b]Ay, se me ha olvidado cerrar',
- '[url][/url]',
- '[b][/url]hola[/url]',
- '[b]caca[url]hola[/b][/url]',
- """Un texto [b]mas[/b] largo, con [b]negritas y [n]cursivas[/i]
- [/b] e incluso [url="laurl"]enlaces [/b]con negritas[/b] dentro[/url]""")
-
- xss = (
- """[url="hola'"]adios[/url]""",
- """[url="hola''"'"'"]adios[/url]""",
- """'';!--"<XSS>=&{()}""",
- """<?pi ?>""",
- """<?php ?>""",
- """';alert(String.fromCharCode(88,83,83))//\';alert(String.fromCharCode(88,83,83))//";alert(String.fromCharCode(88,83,83))//\";alert(String.fromCharCode(88,83,83))//--></SCRIPT>">'><SCRIPT>alert(String.fromCharCode(88,83,83))</SCRIPT>""",
- "<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
- """<IMG SRC="javascript:alert('XSS');">""",
- """<IMG SRC=javascript:alert('XSS')>""",
- """<IMG SRC=JaVaScRiPt:alert('XSS')>""",
- """<IMG SRC=javascript:alert("XSS")>""",
- """<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>""",
- '<IMG """><SCRIPT>alert("XSS")</SCRIPT>">',
-)
-
-
- def testCorrectBBCode(self):
- parser = BBCodeParser()
- bbcode_list = []
- html_list = []
- global allow_errors
- allow_errors = False
- print
- for bbcode, html in self.correct_bbcode:
- parser.feed(bbcode)
- bbcode_list.append(bbcode)
- html_list.append(html)
- retval, result = parser.parse()
-# print "-->", bbcode
-# print "<--", html
-# print "<--", result
-# print
- assert not retval
- self.assertEqual(html, result)
- retval, result = parser.parse()
- assert not retval
- self.assertEqual(html, result)
- parser.feed("".join(bbcode_list))
- retval, result = parser.parse()
- assert not retval
- self.assertEqual("".join(html_list), result)
- retval, result = parser.parse()
- assert not retval
- self.assertEqual("".join(html_list), result)
-
- def testCorrectHTML(self):
- parser = BBCodeParser()
- html_list = []
- parsed_html_list = []
- global allow_errors
- allow_errors = False
- print
- for html, parsed_html in self.correct_html:
- parser.feed(html)
- html_list.append(html)
- parsed_html_list.append(parsed_html)
- retval, result = parser.parse()
-# print "-->", html
-# print "<--", parsed_html
-# print "<--", result
-# print
- assert not retval
- self.assertEqual(parsed_html, result)
- retval, result = parser.parse()
- assert not retval
- self.assertEqual(parsed_html, result)
- parser.feed("".join(html_list))
- retval, result = parser.parse()
- assert not retval
- self.assertEqual("".join(parsed_html_list), result)
- retval, result = parser.parse()
- assert not retval
- self.assertEqual("".join(parsed_html_list), result)
-
- def testIncorrectBBCode(self):
- parser = BBCodeParser()
- global allow_errors
- allow_errors = False
- print
- for bbcode in self.incorrect_bbcode:
- parser.feed(bbcode)
- retval, result = parser.parse()
-# print "-->", bbcode
-# print "<--", retval, result
- assert retval
- retval, result = parser.parse()
- assert retval
-
- def testIncorrectHTML(self):
- parser = BBCodeParser()
- global allow_errors
- allow_errors = False
- print
- for html in self.incorrect_html:
- parser.feed(html)
- retval, result = parser.parse()
-# print "-->", html
-# print "<--", retval, result
- assert retval
- retval, result = parser.parse()
- assert retval
-
- def testIncorrectXSS(self):
- parser = BBCodeParser()
- global allow_errors
- allow_errors = False
- print
- for xss in self.xss:
- parser.feed(xss)
- retval, result = parser.parse()
- print "-->", xss
- print "<--", retval, result
-# assert retval
-
-if __name__ == "__main__":
- unittest.main()
+from sgmllib import SGMLParser,SGMLParseError
+class HTMLParser(SGMLParser):
+ def reset(self):
+ self.text = []
+ self._stack = []
+ SGMLParser.reset(self)
+
+ def do_img(self, attrs):
+ if not attrs: raise SGMLParseError, "Error, argumentos para <img> no validos"
+ attrs_list = []
+ for a, v in attrs:
+ if a in allowed_html["img"] and v:
+ attrs_list.append(' %s="%s"' % (a, escape(v)))
+ else:
+ raise SGMLParseError, "Error, argumentos para <img> no validos"
+ strattrs = "".join(attrs_list)
+ self.text.append("<img%(strattrs)s />" % locals())
+
+ def end_img(self):
+ pass
+
+ def do_br(self):
+ self.text.append("<br />")
+
+ def end_br(self):
+ pass
+
+ def unknown_starttag(self, tag, attrs):
+ if tag in allowed_html.keys():
+ attrs_list = []
+ for a, v in attrs:
+ if a in allowed_html[tag] and v:
+ attrs_list.append(' %s="%s"' % (a, escape(v)))
+ else:
+ raise SGMLParseError, "Error, argumentos para <%s> no validos" % tag
+ strattrs = "".join(attrs_list)
+ self.text.append("<%(tag)s%(strattrs)s>" % locals())
+ self._stack.append(tag)
+ else:
+ strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs])
+ self.text.append(escape("<%(tag)s%(strattrs)s>" % locals()))
+
+ def unknown_endtag(self, tag):
+ if tag in allowed_html.keys():
+ self.text.append("</%(tag)s>" % locals())
+ if self._stack and tag == self._stack[-1]:
+ self._stack.pop(-1)
+ else:
+ raise SGMLParseError, "Error, tag <%s> sin abrir" % tag
+ else:
+ self.text.append(escape("</%(tag)s>" % locals()))
+
+ def handle_charref(self, ref):
+ self.text.append("&#%(ref)s;" % locals())
+
+ def handle_entityref(self, ref):
+ self.text.append("&%(ref)s" % locals())
+ import htmlentitydefs
+ if htmlentitydefs.entitydefs.has_key(ref):
+ self.text.append(";")
+
+ def handle_data(self, text):
+ import xml.sax.saxutils
+ self.text.append(xml.sax.saxutils.escape(text))
+
+ def get_text(self):
+ """Return processed HTML as a single string"""
+ if self._stack:
+ raise SGMLParseError, "Error, tag <%s> sin cerrar" % self._stack.pop(-1)
+ return "".join(self.text)
Copied: trunk/frontend-web/parsers.py (from r197, /trunk/frontend-web/parser_bbcode.py)
==============================================================================
--- /trunk/frontend-web/parser_bbcode.py (original)
+++ trunk/frontend-web/parsers.py Fri Jun 29 19:50:31 2007
@@ -2,232 +2,65 @@
# $Id$
-# Allow BBCode syntax errors?
-allow_errors = True
-allow_errors = False
-
-allowed_html = {
- "a": ["href", "title"],
- "p": [],
- "b": [],
- "s": [],
- "i": [],
- "em": [],
- "strong": [],
- "img": ["src", "alt", "title"]
-}
-
-allowed_tags = {
- "b": ('<strong>', '</strong>', False),
- "u": ('<span style="text-decoration:underline">', '</span>', False),
- "s": ('<strike>', '</strike>', False),
- "size": ('<span style="font-size:%(arg)s">', '</span>', True),
- "i": ('<em>', '</em>', False),
- "url": ('<a href="%(arg)s">', '</a>', True),
- "quote*": ('<blockquote>', '</blockquote>', False),
- "quote": ('<blockquote><h4>%(arg)s escribió:</h4>', '</blockquote>', True),
- "img": ('<img src="%(arg)s" />', None, True)
-}
-
-def escape(text):
- entities = {'"': '"', "'": "'"}
- import xml.sax.saxutils
- return xml.sax.saxutils.escape(text, entities)
-
-
-class BBCodeParser:
- """A BBCode to HTML parser"""
-
- _open = "["
- _close = "]"
-
- def __init__(self):
- self._text = ""
- self._preparsed = ""
- self._tokens = []
- self._tags = []
- self._parsed = []
-
- def _reset(self):
- """Resets the internal arrays."""
- self._preparsed = ""
- self._tokens = []
- self._tags = []
- self._parsed = []
-
- def feed(self, text):
- """Sets the text to parse"""
- self._text = text
-
- def _tokenize(self):
- """Tokenize the string and the tags in two separates lists
- Should be called after _preparse but before _parse."""
- import re
- self._tokens = re.split("\%s.*?\%s" % (self._open, self._close), self._preparsed)
- self._tags = re.findall("\%s(.*?)\%s" % (self._open, self._close), self._preparsed)
-
- def _close_tag_to_html(self, tag):
- """Translate the BBCode tag to HTML anc close it."""
- return allowed_tags[tag][1] or ""
-
- def _tag_to_html(self, tag):
- """Translates the BBCode tag to HTML"""
- aux = allowed_tags[tag[0]]
- if not tag[1]:
- aux = allowed_tags.get(tag[0] + "*", aux) # Permitimos tag* que no lleva argumentos y puede ser distinto
- return aux[0] % {"arg": escape(tag[1].strip('"')) if tag[1] else '' }
-
- def _tag_closes(self, tag):
- """Returns true if the tags needs to be closed"""
- if tag[0].startswith("/"):
- return allowed_tags[tag[0][1:]][1]
- return allowed_tags[tag[0]][1]
-
- def _tag_needs_args(self, tag):
- """Returns true if the tag needs arguments"""
- if allowed_tags.get(tag + "*", False):
- return False
- return allowed_tags[tag][2]
-
- def _preparse(self):
- """Preparse the text, just leaving the allowed html tags. Escape dangerous sequences.
- Should be called before _parse"""
-
- import sgmllib
- class PreParseHTML(sgmllib.SGMLParser):
- self._stack = []
- def reset(self):
- self.text = []
- self._stack = []
- sgmllib.SGMLParser.reset(self)
-
- def do_img(self, attrs):
- if not attrs: raise sgmllib.SGMLParseError, "Error, argumentos para <img> no validos"
- attrs_list = []
- for a, v in attrs:
- if a in allowed_html["img"] and v:
- attrs_list.append(' %s="%s"' % (a, escape(v)))
- else:
- raise sgmllib.SGMLParseError, "Error, argumentos para <img> no validos"
- strattrs = "".join(attrs_list)
- self.text.append("<img%(strattrs)s />" % locals())
-
- def end_img(self):
- pass
-
- def do_br(self):
- self.text.append("<br />")
-
- def end_br(self):
- pass
-
- def unknown_starttag(self, tag, attrs):
- if tag in allowed_html.keys():
- attrs_list = []
- for a, v in attrs:
- if a in allowed_html[tag] and v:
- attrs_list.append(' %s="%s"' % (a, escape(v)))
- else:
- raise sgmllib.SGMLParseError, "Error, argumentos para <%s> no validos" % tag
- strattrs = "".join(attrs_list)
- self.text.append("<%(tag)s%(strattrs)s>" % locals())
- self._stack.append(tag)
- else:
- strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs])
- self.text.append(escape("<%(tag)s%(strattrs)s>" % locals()))
-
- def unknown_endtag(self, tag):
- if tag in allowed_html.keys():
- self.text.append("</%(tag)s>" % locals())
- if self._stack and tag == self._stack[-1]:
- self._stack.pop(-1)
- else:
- raise sgmllib.SGMLParseError, "Error, tag <%s> sin abrir" % tag
- else:
- self.text.append(escape("</%(tag)s>" % locals()))
-
- def handle_charref(self, ref):
- self.text.append("&#%(ref)s;" % locals())
-
- def handle_entityref(self, ref):
- self.text.append("&%(ref)s" % locals())
- import htmlentitydefs
- if htmlentitydefs.entitydefs.has_key(ref):
- self.text.append(";")
-
- def handle_data(self, text):
- import xml.sax.saxutils
- self.text.append(xml.sax.saxutils.escape(text))
-
- def get_text(self):
- """Return processed HTML as a single string"""
- if self._stack:
- raise sgmllib.SGMLParseError, "Error, tag <%s> sin cerrar" % self._stack.pop(-1)
- return "".join(self.text)
-
- self._text = self._text.replace("javascript","javascropt") # FIXME: necesario?
- html_parser = PreParseHTML()
- try:
- html_parser.feed(self._text)
- self._preparsed = html_parser.get_text()
- except sgmllib.SGMLParseError, e:
- return e
-
- def _parse(self):
- """Do the parsing.
- Should be called after _reset, _preparse and _tokenize."""
- stack = []
- self._parsed.append(self._tokens.pop(0))
- while self._tags:
- tag = self._tags.pop(0)
- tag = tag.split("=", 1)
- tag[0] = tag[0].strip()
- if tag[0].startswith("/") and tag[0][1:] in allowed_tags.keys() and stack:
- if tag[0][1:] == stack[-1]:
- self._parsed.append(self._close_tag_to_html(stack.pop(-1)))
- elif not allow_errors:
- return "Error, tratando de cerrar %s%s%s no abierto antes" % (self._open, tag[0], self._close)
- else: # Si alguien activa allow_errors mereceria la mas lenta y dolorosa muerte
- # Esto es una chapuza para usuarios estupidos y chapuceros
- self._parsed.append("%s%s%s" % (self._open, tag[0], self._close))
- if self._tokens and self._tag_closes(tag): self._parsed.append(self._tokens.pop(0))
-
- elif tag[0] in allowed_tags.keys():
- tag.append(None)
- stack.append(tag[0])
- if self._tag_needs_args(tag[0]) and tag[1] == None:
- tag[1] = self._tokens[0]
- if not allow_errors and not tag[1]:
- return "Error, no se ha pasado el argumento a %s%s%s" % (self._open, tag[0], self._close)
- self._parsed.append(self._tag_to_html(tag))
- if not self._tag_closes(tag) and self._tokens:
- self._tokens.pop(0)
- if self._tokens: self._parsed.append(self._tokens.pop(0))
-
- else: # Si no conozco el tag se lo paso tal cual
- self._parsed.append("%s%s%s" % (self._open, tag[0], self._close))
- if self._tokens: self._parsed.append(self._tokens.pop(0))
-
- if stack and allow_errors:
- while stack: # No me responsabilizo de lo que salga aqui...
- self._parsed.append(self._close_tag_to_html(stack.pop(-1)))
- elif stack:
- return "Error, falta por cerrar el tag %s%s%s" % (self._open, stack[-1], self._close)
- return False
-
- def parse(self):
- """Parses the string"""
- self._reset()
- e = self._preparse()
- if not e and self._preparsed:
- self._tokenize()
- e = self._parse()
- if not e:
- return e, "".join(self._parsed)
- return e or "Error, HTML no valido", None
+import unittest
+from parser_bbcode import BBCodeParser
+from parser_html import HTMLParser, SGMLParseError
+class TestHTMLParser(unittest.TestCase):
+ correct_html = (
+ ('&', '&amp;'),
+ ('<b>caca</b>', '<b>caca</b>'),
+ ('b>', 'b>'),
+ ('<script>', '<script>'),
+ ('<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>',
+ '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>'),
+ ('<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>',
+ '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>'),
+ ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" />',
+ '<img src="http://blablabla.com/index.php?caca=1&mierda=2" />'),
+ ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca">',
+ '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />'),
+ ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca"></img>',
+ '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />')
+ )
+
+ incorrect_html = (
+ '<b>',
+ '<b',
+ '</b>',
+ '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla',
+ '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla </as>',
+ '<a href="http://blablabla.com/index.php?caca=1&mierda=2" taitle="caca">bla bla </a>',
+ '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca" de la vaca>bla bla <a>',
+ '<img sorcerer="http://blablabla.com/index.php?caca=1&mierda=2" />',
+ '<img src="http://blablabla.com/index.php?caca=1&mierda=2" altibajo="caca">',
+ )
+
+ def testCorrectHTML(self):
+ parser = HTMLParser()
+ html_list = []
+ parsed_html_list = []
+ global allow_errors
+ allow_errors = False
+ print
+ for html, parsed_html in self.correct_html:
+ parser.reset()
+ parser.feed(html)
+ html_list.append(html)
+ parsed_html_list.append(parsed_html)
+ result = parser.get_text()
+# print "-->", html
+# print "<--", parsed_html
+# print "<--", result
+# print
+ assert result
+ self.assertEqual(parsed_html, result)
+ parser.reset()
+ parser.feed("".join(html_list))
+ result = parser.get_text()
+ assert result
+ self.assertEqual("".join(parsed_html_list), result)
-import unittest
class TestBBCode(unittest.TestCase):
correct_bbcode = (
('[ b]hola[/b]', '<strong>hola</strong>'),
Modified: trunk/frontend-web/skins/default/body_threadlist_thread.html
==============================================================================
--- trunk/frontend-web/skins/default/body_threadlist_thread.html (original)
+++ trunk/frontend-web/skins/default/body_threadlist_thread.html Fri Jun 29 19:50:31 2007
@@ -1,3 +1,3 @@
-<div class="alt %(format)s" onkeypress="window.location='%(link)s';" onclick="window.location='%(link)s';" >
+<div class="alt %(format)s">
<a href="%(link)s">%(title)s</a> (%(t_date)s) - Respuestas: %(t_replies)s
</div>
Modified: trunk/frontend-web/skins/default/body_threadlist_thread_unread.html
==============================================================================
--- trunk/frontend-web/skins/default/body_threadlist_thread_unread.html (original)
+++ trunk/frontend-web/skins/default/body_threadlist_thread_unread.html Fri Jun 29 19:50:31 2007
@@ -1,3 +1,3 @@
-<div class="alt %(format)s" onkeypress="window.location='%(link)s';" onclick="window.location='%(link)s';" >
+<div class="alt %(format)s">
<a href="%(link)s">%(title)s</a> (%(t_date)s) - <a href="%(link_notread)s">No leído</a> - Respuestas: %(t_replies)s
</div>
More information about the cpif
mailing list