[cpif] r265 - in trunk/varios/parser_stack: . grammars

svn at argo.es svn at argo.es
Wed Jul 11 23:15:49 CEST 2007


Author: heimy
Date: Wed Jul 11 23:15:36 2007
New Revision: 265

Log:
Prueba de concepto de una 'pila' de gramáticas para el parser de texto de entrada

Added:
   trunk/varios/parser_stack/
   trunk/varios/parser_stack/grammars/
   trunk/varios/parser_stack/grammars/__init__.py   (contents, props changed)
   trunk/varios/parser_stack/grammars/bbcode.py   (contents, props changed)
   trunk/varios/parser_stack/grammars/nodes.py   (contents, props changed)
   trunk/varios/parser_stack/grammars/smileys.py   (contents, props changed)
   trunk/varios/parser_stack/parser.py   (contents, props changed)

Added: trunk/varios/parser_stack/grammars/__init__.py
==============================================================================
--- (empty file)
+++ trunk/varios/parser_stack/grammars/__init__.py	Wed Jul 11 23:15:36 2007
@@ -0,0 +1,39 @@
+$Id$
+
+import os
+from nodes import TextNode, NodeList, MatchObject
+
+skip_list = ['__init__.py', 'token.py']
+grammar_list = []
+
+class DummyGrammar(object):
+    def search(self, strng):
+        return MatchObject(self, strng, 0, len(strng) + 1)
+    def get_nodes(self, parser, strng):
+        return TextNode(strng)
+
+dummy_grammar = DummyGrammar()
+
+def get_matches(strng):
+    global grammar_list, dummy_grammar
+
+    matches = [x for x in (y.search(strng) for y in grammar_list) if not x.is_nil()]
+    if not matches:
+        matches = [dummy_grammar.search(strng)]
+
+    return matches
+
+valid_grammar_mod = lambda x: x.endswith('.py') and x not in skip_list
+this_dir = os.path.dirname(__file__)
+
+for grmr in (x for x in os.listdir(this_dir) if valid_grammar_mod(x)):
+    try:
+        name = grmr[:-3]
+        mdl = __import__(name, globals(), (), name)
+        cls = getattr(mdl, 'Grammar', None)
+        if cls:
+            grammar_list.append(cls())
+    except ImportError:
+        pass
+
+__all__ = ["get_matches"]

Added: trunk/varios/parser_stack/grammars/bbcode.py
==============================================================================
--- (empty file)
+++ trunk/varios/parser_stack/grammars/bbcode.py	Wed Jul 11 23:15:36 2007
@@ -0,0 +1,37 @@
+$Id$
+
+import re
+from nodes import NodeList, MatchObject
+
+class AnchorNode(NodeList):
+    def __init__(self, parser, uri, text):
+        self.uri = uri
+        self.extend(parser.parse(text))
+    def render_html(self):
+        return "<a href='%s'>%s</a>" % (self.uri, super(AnchorNode, self).render_html())
+
+class BoldNode(NodeList):
+    def __init__(self, parser, text):
+        self.extend(parser.parse(text))
+    def render_html(self):
+        return "<b>%s</b>" % super(BoldNode, self).render_html()
+
+express = {
+    'url':  (AnchorNode, re.compile('''\[url="([^"]*?)"\](.*?)\[/url\]''')),
+    'bold': (BoldNode, re.compile('''\[b\](.*?)\[/b\]''')),
+    }
+
+class Grammar(object):
+    def search(self, strng):
+        lst = []
+        for (key, (nodecls, regexp)) in express.items():
+            ret = regexp.search(strng)
+            if ret:
+                lst.append(MatchObject(self, (nodecls, ret), *ret.span()))
+        if not lst:
+            lst = [MatchObject(self, None, 0, 0)]
+
+        return sorted(lst)[0]
+    def get_nodes(self, parser, info):
+        nodecls, ret = info
+        return nodecls(parser, *ret.groups())

Added: trunk/varios/parser_stack/grammars/nodes.py
==============================================================================
--- (empty file)
+++ trunk/varios/parser_stack/grammars/nodes.py	Wed Jul 11 23:15:36 2007
@@ -0,0 +1,30 @@
+$Id$
+
+class Node(object):
+    def render_html(self):
+        return ""
+
+class TextNode(Node):
+    def __init__(self, content):
+        self.content = content
+
+    def render_html(self):
+        return self.content
+
+class MatchObject(object):
+    def __init__(self, grammar, info, beg, end):
+        self.grammar = grammar
+        self.info = info
+        self.beg, self.end = beg, end
+    def __cmp__(self, other):
+        return cmp( (self.beg, self.end), (other.beg, other.end) )
+    def is_nil(self):
+        if self.beg == self.end:
+            return True
+        return False
+    def get_nodes(self, parser):
+        return self.grammar.get_nodes(parser, self.info)
+
+class NodeList(list, Node):
+    def render_html(self):
+        return ''.join([x.render_html() for x in self])

Added: trunk/varios/parser_stack/grammars/smileys.py
==============================================================================
--- (empty file)
+++ trunk/varios/parser_stack/grammars/smileys.py	Wed Jul 11 23:15:36 2007
@@ -0,0 +1,23 @@
+$Id$
+
+import re
+from nodes import TextNode, MatchObject
+
+class SmileyNode(TextNode):
+    pass
+
+risitas = {
+    ':)': "<img src='/smileys/happy.png' />",
+    ';)': "<img src='/smileys/wink.png' />",
+    ':(': "<img src='/smileys/sad.png' />",
+    }
+
+class Grammar(object):
+    def search(self, strng):
+        matches = sorted( (strng.find(x), x) for x in risitas if x in strng )
+        if matches:
+            pos, key = matches[0]
+            return MatchObject(self, key, pos, pos + len(key))
+        return MatchObject(self, None, 0, 0)
+    def get_nodes(self, parser, key):
+        return SmileyNode(risitas[key])

Added: trunk/varios/parser_stack/parser.py
==============================================================================
--- (empty file)
+++ trunk/varios/parser_stack/parser.py	Wed Jul 11 23:15:36 2007
@@ -0,0 +1,29 @@
+$Id: parser.py 16 2007-04-19 21:57:52Z heimy $
+
+import re
+
+class Parser(object):
+    def __init__(self):
+        self.grammars = []
+    def parse(self, strng):
+        import grammars
+        from grammars.nodes import NodeList, TextNode
+
+        lst = NodeList()
+        while strng:
+            matches = sorted(grammars.get_matches(strng))
+            if not matches:
+                strng = ''
+                continue
+            match = matches[0]
+            prev = strng[:match.beg]
+            if prev:
+                lst.append(TextNode(prev))
+            lst.append(match.get_nodes(self))
+            strng = strng[match.end:]
+        return lst
+
+if __name__ == '__main__':
+    parser = Parser()
+    nodelist = parser.parse('foobar! :) And this is [url="http://www.bold.com"][b]bold[/b][/url] foobar ;)')
+    print nodelist.render_html()



More information about the cpif mailing list