[cpif] r204 - in trunk: . backend frontend-web

Sat Jun 30 14:57:11 CEST 2007

Author: jcea
Date: Sat Jun 30 14:57:09 2007
New Revision: 204

Log:
La primera linea de los ficheros "*.py" debe ser
el "id" del svn, para controlar correctamente
la version del programa.

Integramos el soporte BBCode y HTML.



Added:
   trunk/frontend-web/parser_eol.py
      - copied, changed from r203, /trunk/frontend-web/parser_html.py
Modified:
   trunk/TODO
   trunk/backend/database.py
   trunk/backend/upgrade.py
   trunk/frontend-web/init.py
   trunk/frontend-web/parser_bbcode.py
   trunk/frontend-web/parser_html.py
   trunk/frontend-web/parsers.py
   trunk/frontend-web/url_nuevo_hilo_POST.py
   trunk/frontend-web/url_nuevo_post_POST.py

Modified: trunk/TODO
==============================================================================

--- trunk/TODO	(original)
+++ trunk/TODO	Sat Jun 30 14:57:09 2007
@@ -167,3 +167,29 @@
   aparte del mensaje.
 
   Definir que tags se quieren permitir (HTML y BBCode).
+
+- 20070630: Ahora mismo guardamos juntos el texto en bruto
+  tal cual lo introduce el usuario, y el texto procesado
+  que se va a mostrar en HTML. El argumento para esto es
+  la sencillez y que en el disco duro, al grabarse juntos,
+  la compresion sera muy alta (basicamente los dos textos
+  juntos ocupan en disco lo mismo que si solo se guardase
+  uno).
+
+  El problema de este esquema es que cuando cargamos mensajes
+  en memoria, la mayor parte del tiempo ocupamos el doble
+  de RAM de la necesaria, ya que cargamos tambien la version
+  en bruto que, basicamente, solo necesitamos cuando se
+  reedita el texto, cuando se manda por email, etc.
+
+  En el futuro podria haber mas casos, como acceso via news.
+
+  No esta claro el punto de corte, usamos la opcion mas
+  simple (guardar las dos versiones juntas), pudiendo reevaluar
+  la cuestion en el futuro.
+
+- 20070630: FRONTAL WEB: Habria que liberar el bloqueo al
+  sistema durus mientras estamos procesando el texto que un
+  usuario acaba de enviar, ya que puede ser una operacion
+  potencialmente lenta.
+

Modified: trunk/backend/database.py
==============================================================================
--- trunk/backend/database.py	(original)
+++ trunk/backend/database.py	Sat Jun 30 14:57:09 2007
@@ -1,7 +1,7 @@
 # $Id$
 
 
-VERSION_DB="2007062003"
+VERSION_DB="2007063001"
 
 from globales import thread_len
 
@@ -169,7 +169,7 @@
 
   return num_mt
   
-def mensaje_add(conn,texto,nick,hilo=None,titulo=None,metatag=None) :
+def mensaje_add(conn,texto_bruto,texto_procesado,nick,hilo=None,titulo=None,metatag=None) :
   from durus.btree import BTree
   from durus.persistent_dict import PersistentDict
   import time
@@ -184,7 +184,7 @@
 
   ts=time.time()
   mensaje=PersistentDict(
-             {"texto":texto,
+             {"texto":[texto_bruto,texto_procesado],
               "TS":ts,
               "autor":nick})
 
@@ -301,7 +301,7 @@
   iterador_mensajes=hilo["mensajes"].items_from(pr)
   for msg_num,msg in iterador_mensajes :
     posicion,msg=msg
-    mensajes.append((msg_num,msg["autor"],msg["TS"],msg["texto"]))
+    mensajes.append((msg_num,msg["autor"],msg["TS"],msg["texto"][1]))
     i-=1
     if not i : break
 

Modified: trunk/backend/upgrade.py
==============================================================================
--- trunk/backend/upgrade.py	(original)
+++ trunk/backend/upgrade.py	Sat Jun 30 14:57:09 2007
@@ -100,3 +100,13 @@
       root["usuarios"]["usuarios"]["alvaro"]["OpenID"].add("http://perseverantia.com/")
     conn.commit()
 
+  if root["version del foro"]=="2007062003" :
+    print "Actualizando la base de datos: 2007062003 -> 2007063001"
+    root["version del foro"]="2007063001"
+    for mensaje in root["mensajes"]["mensajes"].values() :
+      texto_procesado=mensaje["texto"]
+      # Ojo, no hacemos "unscape" de los caracteres especiales como el "<" o el "&".
+      texto_bruto=texto_procesado.replace("<br/>","")
+      mensaje["texto"]=[texto_bruto,texto_procesado]
+    conn.commit()
+

Modified: trunk/frontend-web/init.py
==============================================================================
--- trunk/frontend-web/init.py	(original)
+++ trunk/frontend-web/init.py	Sat Jun 30 14:57:09 2007
@@ -75,7 +75,7 @@
       assert v==1
 
     if not conn.get_root()["metatags"]["metatags"][1]["TS2hilo"] :
-      database.mensaje_add(conn,".","master",titulo="No puede haber metatags vacios",metatag=1)
+      database.mensaje_add(conn,".",".","master",titulo="No puede haber metatags vacios",metatag=1)
 
     num_hilos=conn.get_root()["hilos"]["num_hilos"]
     if not num_hilos :

Modified: trunk/frontend-web/parser_bbcode.py
==============================================================================
--- trunk/frontend-web/parser_bbcode.py	(original)
+++ trunk/frontend-web/parser_bbcode.py	Sat Jun 30 14:57:09 2007
@@ -1,6 +1,6 @@
-#!/bin/python2.5
-"""BBCode to HTML parser"""
 # $Id$
+"""BBCode to HTML parser"""
+
 
 # Allow BBCode syntax errors?
 allow_errors = True
@@ -132,7 +132,7 @@
       while stack: # No me responsabilizo de lo que salga aqui...
         self._parsed.append(self._close_tag_to_html(stack.pop(-1)))
     elif stack:
-      return "Error, falta por cerrar el tag %s%s%s" % (self._open, stack[-1], self._close)
+      return "Error, falta cerrar el tag %s%s%s" % (self._open, stack[-1], self._close)
     return False
 
   def parse(self):

Copied: trunk/frontend-web/parser_eol.py (from r203, /trunk/frontend-web/parser_html.py)
==============================================================================
--- /trunk/frontend-web/parser_html.py	(original)
+++ trunk/frontend-web/parser_eol.py	Sat Jun 30 14:57:09 2007
@@ -1,226 +1,5 @@
-#!/bin/python2.5
-
 # $Id$
 
-allowed_html = {
-    "a": ["href", "title"], 
-    "p": [], 
-    "b": [], 
-    "s": [], 
-    "i": [], 
-    "em": [], 
-    "strong": [], 
-    "img": ["src", "alt", "title"]
-}
-
-def escape(text):
-  entities = {
-      '<':'&lt;', 
-      '>':'&gt;', 
-      '"': '&quot;', 
-      "'": "&apos;", 
-      "&": "&amp;"
-      }
-  import re
-  pat = "(%s)" % "|".join( map(re.escape, entities.keys())  )
-  return re.sub( pat, lambda m:entities[m.group()], text )
-  import xml.sax.saxutils
-  return xml.sax.saxutils.escape(text, entities)
-
-from sgmllib import SGMLParser, SGMLParseError
-class HTMLParser(SGMLParser):
-  def __init__(self, allow=True):
-    self.allow = allow
-    SGMLParser.__init__(self)
-  def reset(self):                       
-    self.text = []
-    self._stack = []
-    SGMLParser.reset(self)
-
-  def do_img(self, attrs):
-    if not self.allow:
-      strattrs = "".join([' %s="%s"' % (a, v) for a, v in attrs])
-      self.text.append(escape("<img%(strattrs)s />" % locals()))
-      return
-    if not attrs: raise SGMLParseError, "Error, argumentos para <img> no validos"
-    attrs_list = []
-    for a, v in attrs:
-      if a in allowed_html["img"] and v:
-        attrs_list.append(' %s="%s"' % (a, escape(v)))
-      else:
-        raise SGMLParseError, "Error, argumentos para <img> no validos"
-    strattrs = "".join(attrs_list)
-    self.text.append("<img%(strattrs)s />" % locals())
-
-  def end_img(self):
-    pass
-
-  def do_br(self):
-    if not self.allow:
-      self.text.append(escape("<br />" % locals()))
-      return
-    self.text.append("<br />")
-
-  def end_br(self):
-    pass
-
-  def unknown_starttag(self, tag, attrs):
-    if tag in allowed_html.keys() and self.allow:
-      attrs_list = []
-      for a, v in attrs:
-        if a in allowed_html[tag] and v and self.allow:
-          attrs_list.append(' %s="%s"' % (a, escape(v)))
-        else:
-          raise SGMLParseError, "Error, argumentos para <%s> no validos" % tag
-      strattrs = "".join(attrs_list)
-      self.text.append("<%(tag)s%(strattrs)s>" % locals())
-      self._stack.append(tag)
-    else:
-      strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs])
-      self.text.append(escape("<%(tag)s%(strattrs)s>" % locals()))
-
-  def unknown_endtag(self, tag):         
-    if tag in allowed_html.keys() and self.allow:
-      self.text.append("</%(tag)s>" % locals())
-      if self._stack and tag == self._stack[-1]:
-        self._stack.pop(-1)
-      else:
-        raise SGMLParseError, "Error, tag <%s> sin abrir" % tag
-    else:
-      self.text.append(escape("</%(tag)s>" % locals()))
-
-  def handle_charref(self, ref):         
-    self.text.append("&amp;#%(ref)s;" % locals())
-
-  def handle_entityref(self, ref):      
-    self.text.append("&amp;%(ref)s" % locals())
-    import htmlentitydefs
-    if htmlentitydefs.entitydefs.has_key(ref):
-      self.text.append(";")
-
-  def handle_data(self, text):           
-    import xml.sax.saxutils
-    self.text.append(xml.sax.saxutils.escape(text))
-
-  def get_text(self):              
-    """Return processed HTML as a single string"""
-    if self._stack: 
-      raise SGMLParseError, "Error, tag <%s> sin cerrar" % self._stack.pop(-1)
-    return "".join(self.text)
-
-def parse(text, allow = True):
-  parser = HTMLParser(allow)
-  parser.reset()
-  try:
-    parser.feed(text)
-    text = parser.get_text()
-    if not text: raise SGMLParseError, "HTML no valido"
-  except SGMLParseError, e:
-    return (e or "HTML no valido", None)
-  return False, text
-
-import unittest
-class TestHTML(unittest.TestCase):
-  correct_html = (
-      ('&amp;', '&amp;amp;'), 
-      ('<b>caca</b>', '<b>caca</b>'), 
-      ('b>', 'b&gt;'), 
-      ('<script>', '&lt;script&gt;'), 
-      ('<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>', 
-       '<a href="http://blablabla.com/index.php?caca=1&amp;mierda=2">bla bla</a>'), 
-      ('<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>', 
-       '<a href="http://blablabla.com/index.php?caca=1&amp;mierda=2" title="caca">bla bla </a>'), 
-      ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" />', 
-       '<img src="http://blablabla.com/index.php?caca=1&amp;mierda=2" />'), 
-      ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca">', 
-       '<img src="http://blablabla.com/index.php?caca=1&amp;mierda=2" alt="caca" />'), 
-      ('<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca"></img>', 
-       '<img src="http://blablabla.com/index.php?caca=1&amp;mierda=2" alt="caca" />')
-  )
-  
-  correct_html2 = (
-      '&amp;', 
-      '<b>caca</b>', 
-      'b>', 
-      '<script>', 
-      '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla</a>', 
-      '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca">bla bla </a>', 
-      '<img src="http://blablabla.com/index.php?caca=1&mierda=2" />', 
-      '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />', 
-      '<img src="http://blablabla.com/index.php?caca=1&mierda=2" alt="caca" />', 
-  )
-
-  incorrect_html = (
-      '<b>', 
-      '<b', 
-      '</b>', 
-      '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla', 
-      '<a href="http://blablabla.com/index.php?caca=1&mierda=2">bla bla </as>', 
-      '<a href="http://blablabla.com/index.php?caca=1&mierda=2" taitle="caca">bla bla </a>', 
-      '<a href="http://blablabla.com/index.php?caca=1&mierda=2" title="caca" de la vaca>bla bla <a>', 
-      '<img sorcerer="http://blablabla.com/index.php?caca=1&mierda=2" />', 
-      '<img src="http://blablabla.com/index.php?caca=1&mierda=2" altibajo="caca">', 
-  )
-
-  xss = (
-      """'';!--"<XSS>=&{()}""", 
-      """<?pi ?>""", 
-      """<?php ?>""", 
-      """';alert(String.fromCharCode(88, 83, 83))//\';alert(String.fromCharCode(88, 83, 83))//";alert(String.fromCharCode(88, 83, 83))//\";alert(String.fromCharCode(88, 83, 83))//--></SCRIPT>">'><SCRIPT>alert(String.fromCharCode(88, 83, 83))</SCRIPT>""", 
-      "<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>", 
-      """<IMG SRC="javascript:alert('XSS');">""", 
-      """<IMG SRC=javascript:alert('XSS')>""", 
-      """<IMG SRC=JaVaScRiPt:alert('XSS')>""", 
-      """<IMG SRC=javascript:alert(&quot;XSS&quot;)>""", 
-      """<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>""", 
-      '<IMG """><SCRIPT>alert("XSS")</SCRIPT>">', 
-)
-
-  def test_NoBBHTML(self):
-    """Checks if the HTML is escaped when it is not allowed."""
-    html_list = []
-    import globales
-    for code in self.correct_html2:
-      html_list.append(code)
-      retval, result = parse(code, False)
-      assert not retval
-      self.assertEqual(escape(code), result)
-    retval, result = parse("".join(html_list), False)
-    assert not retval
-    self.assertEqual(escape("".join(html_list)), result)
-
-  def testCorrectHTML(self):
-    """Checks if the conversion goes well."""
-    html_list = []
-    parsed_html_list = []
-    for html, parsed_html in self.correct_html:
-      retval, result = parse(html)
-      html_list.append(html)
-      parsed_html_list.append(parsed_html)
-      assert not retval
-      self.assertEqual(parsed_html, result)
-      retval, result = parse(html)
-      assert not retval
-      self.assertEqual(parsed_html, result)
-    retval, result = parse("".join(html_list))
-    assert not retval
-    self.assertEqual("".join(parsed_html_list), result)
-
-  def testIncorrectHTML(self):
-    """Checks if the conversion fails w/ bad formed HTML."""
-    for html in self.incorrect_html:
-      retval, result = parse(html)
-      assert retval
-      retval, result = parse(html)
-      assert retval
-
-  def testIncorrectXSS(self):
-    """Checks if the generated HTML is XSS safe. Uncomment the print statements."""
-    for xss in self.xss:
-      retval, result = parse(xss)
-#      print "-->", xss
-#      print "<--", retval, result
-#      assert retval
+def parse(text):
+  return False,text.replace("\r","").replace("\n","<br>\r\n")
 
-if __name__ == "__main__":
-  unittest.main()

Modified: trunk/frontend-web/parser_html.py
==============================================================================
--- trunk/frontend-web/parser_html.py	(original)
+++ trunk/frontend-web/parser_html.py	Sat Jun 30 14:57:09 2007
@@ -1,5 +1,3 @@
-#!/bin/python2.5
-
 # $Id$
 
 allowed_html = {

Modified: trunk/frontend-web/parsers.py
==============================================================================
--- trunk/frontend-web/parsers.py	(original)
+++ trunk/frontend-web/parsers.py	Sat Jun 30 14:57:09 2007
@@ -1,17 +1,23 @@
-#!/bin/python2.5
-
 # $Id$
 
 import parser_bbcode
 import parser_html
+import parser_eol
 
 def convert_to_html(text):
   """Converts the text into valid HTML"""
   from globales import allow_bbcode, allow_html
+
   retval, text = parser_html.parse(text, allow_html)
   if retval:
     return retval, None
-  return parser_bbcode.parse(text, allow_bbcode)
+
+  retval,text=parser_bbcode.parse(text, allow_bbcode)
+  if retval :
+    return retval,None
+
+  return parser_eol.parse(text)
+
 
 if __name__ == "__main__":
   test1 = parser_html.TestHTML

Modified: trunk/frontend-web/url_nuevo_hilo_POST.py
==============================================================================
--- trunk/frontend-web/url_nuevo_hilo_POST.py	(original)
+++ trunk/frontend-web/url_nuevo_hilo_POST.py	Sat Jun 30 14:57:09 2007
@@ -31,9 +31,13 @@
   texto=cuerpo.getfirst("texto")
   if not (titulo and texto)  : # Incluye tanto el caso de vacios como de inexistentes
     return pagina_error("El T&iacute;tulo y el Texto no pueden estar vacios")
+
   titulo=cgi.escape(titulo)
-  texto=cgi.escape(texto)
-  texto=texto.replace("\r","").replace("\n","<br/>\r\n")
-  database.mensaje_add(conn,texto,usuario,titulo=titulo,metatag=metatag)
+
+  import parsers
+  error,texto2=parsers.convert_to_html(texto)
+  if error :
+    return (200,{"Content-Type":"text/plain; charset=utf-8"},error)
+  database.mensaje_add(conn,texto,texto2,usuario,titulo=titulo,metatag=metatag)
   return (302,{"Location":"/indice/%d" %metatag},"")
 

Modified: trunk/frontend-web/url_nuevo_post_POST.py
==============================================================================
--- trunk/frontend-web/url_nuevo_post_POST.py	(original)
+++ trunk/frontend-web/url_nuevo_post_POST.py	Sat Jun 30 14:57:09 2007
@@ -31,8 +31,11 @@
   texto=cuerpo.getfirst("texto")
   if not texto : # Incluye tanto el caso de vacio como de inexistente
     return pagina_error("El Texto no puede estar vacio")
-  texto=cgi.escape(texto)
-  texto=texto.replace("\r","").replace("\n","<br/>\r\n")
-  database.mensaje_add(conn,texto,usuario,hilo=hilo)
+
+  import parsers
+  error,texto2=parsers.convert_to_html(texto)
+  if error :
+    return (200,{"Content-Type":"text/plain; charset=utf-8"},error)
+  database.mensaje_add(conn,texto,texto2,usuario,hilo=hilo)
   return (302,{"Location":"/indice/%d" %metatag}, "")