[cpif] r199 - in trunk: . frontend-web

svn at argo.es svn at argo.es
Fri Jun 29 20:50:21 CEST 2007


Author: alvaro
Date: Fri Jun 29 20:50:19 2007
New Revision: 199

Log:
Mayor separacion. El parseo se hace con parsers.parse(text)


Modified:
   trunk/TODO
   trunk/frontend-web/globales.py
   trunk/frontend-web/parser_bbcode.py
   trunk/frontend-web/parsers.py

Modified: trunk/TODO
==============================================================================
--- trunk/TODO	(original)
+++ trunk/TODO	Fri Jun 29 20:50:19 2007
@@ -166,8 +166,4 @@
   van bien como estan). Estudiar si devolver un codigo de error
   aparte del mensaje.
 
-  Escribir mas casos de prueba para la unittest.
-  
-  Escapar comillas simples y dobles.
-
   Definir que tags se quieren permitir (HTML y BBCode).

Modified: trunk/frontend-web/globales.py
==============================================================================
--- trunk/frontend-web/globales.py	(original)
+++ trunk/frontend-web/globales.py	Fri Jun 29 20:50:19 2007
@@ -25,7 +25,7 @@
 openid_support=True
 
 # Allow BBcode?
-bbcode_support=True
+allow_bbcode=True
 
 # Allow HTML?
-html_support=True
+allow_html=True

Modified: trunk/frontend-web/parser_bbcode.py
==============================================================================
--- trunk/frontend-web/parser_bbcode.py	(original)
+++ trunk/frontend-web/parser_bbcode.py	Fri Jun 29 20:50:19 2007
@@ -39,14 +39,12 @@
 
   def __init__(self):
     self._text = ""
-    self._preparsed = ""
     self._tokens = []
     self._tags = []
     self._parsed = []
 
   def _reset(self):
     """Resets the internal arrays."""
-    self._preparsed = ""
     self._tokens = []
     self._tags = []
     self._parsed = []
@@ -57,10 +55,10 @@
 
   def _tokenize(self):
     """Tokenize the string and the tags in two separates lists
-    Should be called after _preparse but before _parse."""
+    Should be called after before _parse."""
     import re
-    self._tokens = re.split("\%s.*?\%s" % (self._open, self._close), self._preparsed)
-    self._tags = re.findall("\%s(.*?)\%s" % (self._open, self._close), self._preparsed)
+    self._tokens = re.split("\%s.*?\%s" % (self._open, self._close), self._text)
+    self._tags = re.findall("\%s(.*?)\%s" % (self._open, self._close), self._text)
 
   def _close_tag_to_html(self, tag):
     """Translate the BBCode tag to HTML anc close it."""
@@ -85,25 +83,9 @@
       return False
     return allowed_tags[tag][2]
   
-  def _preparse(self):
-    """Preparse the text, just leaving the allowed html tags. Escape dangerous sequences.
-    Should be called before _parse"""
-
-    import parser_html
-    from globales import html_support
-    if not html_support:
-      return "Error, HTML no permitido"
-    self._text = self._text.replace("javascript","javascropt") # FIXME: necesario?
-    html_parser = parser_html.HTMLParser()
-    try:
-      html_parser.feed(self._text)
-      self._preparsed = html_parser.get_text()
-    except parser_html.SGMLParseError, e:
-      return e
-    
   def _parse(self):
     """Do the parsing.
-    Should be called after _reset, _preparse and _tokenize."""
+    Should be called after _reset, and _tokenize."""
     stack = []
     self._parsed.append(self._tokens.pop(0))
     while self._tags:
@@ -146,10 +128,8 @@
   def parse(self):
     """Parses the string"""
     self._reset()
-    e = self._preparse()
-    if not e and self._preparsed:
-      self._tokenize()
-      e =  self._parse()
-      if not e:
-        return e, "".join(self._parsed)
+    self._tokenize()
+    e =  self._parse()
+    if not e:
+      return e, "".join(self._parsed)
     return e or "Error, HTML no valido", None

Modified: trunk/frontend-web/parsers.py
==============================================================================
--- trunk/frontend-web/parsers.py	(original)
+++ trunk/frontend-web/parsers.py	Fri Jun 29 20:50:19 2007
@@ -4,7 +4,31 @@
 
 import unittest
 from parser_bbcode import BBCodeParser
-from parser_html import HTMLParser, SGMLParseError
+from parser_html import *
+
+def convert_to_html(text):
+  from globales import allow_bbcode,allow_html
+  if not allow_html and not allow_bbcode: 
+    return False, escape(text)
+  if allow_html:
+    parser = HTMLParser()
+    parser.reset()
+    try:
+      parser.feed(text)
+      text = parser.get_text()
+      if not text: raise SGMLParseError, "HTML no valido"
+    except SGMLParseError, e:
+      return e or "HTML no valido", None
+  else:
+    text = escape(text)
+
+  if allow_bbcode:
+    parser = BBCodeParser()
+    parser.feed(text)
+    return parser.parse()
+  else:
+    return False, escape(text)
+
 
 class TestHTMLParser(unittest.TestCase):
   correct_html = (
@@ -175,89 +199,113 @@
 
 
   def testCorrectBBCode(self):
-    parser = BBCodeParser()
     bbcode_list = []
     html_list = []
     global allow_errors
     allow_errors = False
     print
     for bbcode, html in self.correct_bbcode:
-      parser.feed(bbcode)
       bbcode_list.append(bbcode)
       html_list.append(html)
-      retval, result = parser.parse()
+      retval, result = convert_to_html(bbcode)
 #      print "-->", bbcode
 #      print "<--", html
 #      print "<--", result
 #      print
       assert not retval
       self.assertEqual(html, result)
-      retval, result = parser.parse()
+      retval, result = convert_to_html(bbcode)
       assert not retval
       self.assertEqual(html, result)
-    parser.feed("".join(bbcode_list))
-    retval, result = parser.parse()
-    assert not retval
-    self.assertEqual("".join(html_list), result)
-    retval, result = parser.parse()
+    retval, result = convert_to_html("".join(bbcode_list))
     assert not retval
     self.assertEqual("".join(html_list), result)
   
+  def test_NoBBCodeYesHTML(self):
+    list = []
+    global allow_errors
+    allow_errors = False
+    import globales
+    globales.allow_bbcode = False
+    globales.allow_html = True
+    for code, dummy in self.correct_bbcode:
+      list.append(code)
+      retval, result = convert_to_html(code)
+      assert not retval
+      self.assertEqual(escape(code),result)
+    retval, result = convert_to_html("".join(list))
+    assert not retval
+    self.assertEqual(escape("".join(list)),result)
+
+  def test_NoBBCodeNoHTML(self):
+    list = []
+    global allow_errors
+    allow_errors = False
+    import globales
+    globales.allow_bbcode = False
+    globales.allow_html = False
+    for code, dummy in self.correct_bbcode:
+      list.append(code)
+      retval, result = convert_to_html(code)
+      assert not retval
+      self.assertEqual(escape(code),result)
+    retval, result = convert_to_html("".join(list))
+    assert not retval
+    self.assertEqual(escape("".join(list)),result)
+    for code, dummy in self.correct_html:
+      list.append(code)
+      retval, result = convert_to_html(code)
+      assert not retval
+      self.assertEqual(escape(code),result)
+    retval, result = convert_to_html("".join(list))
+    assert not retval
+    self.assertEqual(escape("".join(list)),result)
+
   def testCorrectHTML(self):
-    parser = BBCodeParser()
     html_list = []
     parsed_html_list = []
     global allow_errors
     allow_errors = False
     print
     for html, parsed_html in self.correct_html:
-      parser.feed(html)
+      retval, result = convert_to_html(html)
       html_list.append(html)
       parsed_html_list.append(parsed_html)
-      retval, result = parser.parse()
 #      print "-->", html
 #      print "<--", parsed_html
 #      print "<--", result
 #      print
       assert not retval
       self.assertEqual(parsed_html, result)
-      retval, result = parser.parse()
+      retval, result = convert_to_html(html)
       assert not retval
       self.assertEqual(parsed_html, result)
-    parser.feed("".join(html_list))
-    retval, result = parser.parse()
-    assert not retval
-    self.assertEqual("".join(parsed_html_list), result)
-    retval, result = parser.parse()
+    retval, result = convert_to_html("".join(html_list))
     assert not retval
     self.assertEqual("".join(parsed_html_list), result)
 
   def testIncorrectBBCode(self):
-    parser = BBCodeParser()
     global allow_errors
     allow_errors = False
     print
     for bbcode in self.incorrect_bbcode:
-      parser.feed(bbcode)
-      retval, result = parser.parse()
+      retval, result = convert_to_html(bbcode)
 #      print "-->", bbcode
 #      print "<--", retval, result
       assert retval
-      retval, result = parser.parse()
+      retval, result = convert_to_html(bbcode)
       assert retval
 
   def testIncorrectHTML(self):
-    parser = BBCodeParser()
     global allow_errors
     allow_errors = False
     print
     for html in self.incorrect_html:
-      parser.feed(html)
-      retval, result = parser.parse()
-#      print "-->", html
-#      print "<--", retval, result
+      retval, result = convert_to_html(html)
+      print "-->", html
+      print "<--", retval, result
       assert retval
-      retval, result = parser.parse()
+      retval, result = convert_to_html(html)
       assert retval
 
   def testIncorrectXSS(self):
@@ -266,8 +314,7 @@
     allow_errors = False
     print
     for xss in self.xss:
-      parser.feed(xss)
-      retval, result = parser.parse()
+      retval, result = convert_to_html(xss)
       print "-->", xss
       print "<--", retval, result
 #      assert retval



More information about the cpif mailing list