update postmarkup version to 1.1.4

2009-01-19 17:46:49 +02:00 · 2009-01-19 17:46:49 +02:00 · b8e72275b2
commit b8e72275b2
parent 3b86f217ff
1 changed files with 385 additions and 109 deletions
--- a/apps/forum/markups/postmarkup.py
+++ b/apps/forum/markups/postmarkup.py
@ -5,10 +5,10 @@ Post Markup
 Author: Will McGugan (http://www.willmcgugan.com)
 """

-__version__ = "1.1.3"
+__version__ = "1.1.4"

 import re
-from urllib import quote, unquote, quote_plus
+from urllib import quote, unquote, quote_plus, urlencode
 from urlparse import urlparse, urlunparse

 pygments_available = True
@ -21,7 +21,6 @@ except ImportError:
    pygments_available = False


-
 def annotate_link(domain):
    """This function is called by the url tag. Override to disable or change behaviour.

@ -31,36 +30,34 @@ def annotate_link(domain):
    return u" [%s]"%_escape(domain)


-re_url = re.compile(r"((https?):((//)|(\\\\))+[\w\d:#@%/;$()~_?\+-=\\\.&]*)", re.MULTILINE| re.UNICODE)
+_re_url = re.compile(r"((https?):((//)|(\\\\))+[\w\d:#@%/;$()~_?\+-=\\\.&]*)", re.MULTILINE|re.UNICODE)


-re_html=re.compile('<.*?>|\&.*?\;')
+_re_html=re.compile('<.*?>|\&.*?\;', re.UNICODE)
 def textilize(s):
    """Remove markup from html"""
-    return re_html.sub("", s)
+    return _re_html.sub("", s)

-re_excerpt = re.compile(r'\[".*?\]+?.*?\[/".*?\]+?', re.DOTALL)
-re_remove_markup = re.compile(r'\[.*?\]', re.DOTALL)
+_re_excerpt = re.compile(r'\[".*?\]+?.*?\[/".*?\]+?', re.DOTALL|re.UNICODE)
+_re_remove_markup = re.compile(r'\[.*?\]', re.DOTALL|re.UNICODE)

-def remove_markup(post):
-    """Removes html tags from a string."""
-    return re_remove_markup.sub("", post)
+_re_break_groups = re.compile(r'\n+', re.DOTALL|re.UNICODE)

 def get_excerpt(post):
    """Returns an excerpt between ["] and [/"]

    post -- BBCode string"""

-    match = re_excerpt.search(post)
+    match = _re_excerpt.search(post)
    if match is None:
        return ""
    excerpt = match.group(0)
    excerpt = excerpt.replace(u'\n', u"<br/>")
-    return remove_markup(excerpt)
+    return _re_remove_markup.sub("", excerpt)

 def strip_bbcode(bbcode):

-    """ Strips bbcode tags from a string.
+    """Strips bbcode tags from a string.

    bbcode -- A string to remove tags from

@ -71,7 +68,10 @@ def strip_bbcode(bbcode):

 def create(include=None, exclude=None, use_pygments=True, **kwargs):

-    """Create a postmarkup object that converts bbcode to XML snippets.
+    """Create a postmarkup object that converts bbcode to XML snippets. Note
+    that creating postmarkup objects is _not_ threadsafe, but rendering the
+    html _is_ threadsafe. So typically you will need just one postmarkup instance
+    to render the bbcode accross threads.

    include -- List or similar iterable containing the names of the tags to use
               If omitted, all tags will be used
@ -79,6 +79,8 @@ def create(include=None, exclude=None, use_pygments=True, **kwargs):
               If omitted, no tags will be excluded
    use_pygments -- If True, Pygments (http://pygments.org/) will be used for the code tag,
                    otherwise it will use <pre>code</pre>
+    kwargs -- Remaining keyword arguments are passed to tag constructors.
+
    """

    postmarkup = PostMarkup()
@ -125,30 +127,10 @@ def create(include=None, exclude=None, use_pygments=True, **kwargs):
    else:
        add_tag(CodeTag, u'code', **kwargs)

+    add_tag(ParagraphTag, u"p")
+
    return postmarkup

-
-
-_postmarkup = None
-def render_bbcode(bbcode, encoding="ascii", exclude_tags=None, auto_urls=True):
-
-    """Renders a bbcode string in to XHTML. This is a shortcut if you don't
-    need to customize any tags.
-
-    bbcode -- A string containing the bbcode
-    encoding -- If bbcode is not unicode, then then it will be encoded with
-    this encoding (defaults to 'ascii'). Ignore the encoding if you already have
-    a unicode string
-
-    """
-
-    global _postmarkup
-    if _postmarkup is None:
-        _postmarkup = create(use_pygments=pygments_available)
-
-    return _postmarkup(bbcode, encoding, exclude_tags=exclude_tags, auto_urls=auto_urls)
-
-
 class TagBase(object):

    def __init__(self, name, enclosed=False, auto_close=False, inline=False, strip_first_newline=False, **kwargs):
@ -219,6 +201,7 @@ class SimpleTag(TagBase):
    def render_open(self, parser, node_index):
        return u"<%s>"%self.html_name

+
    def render_close(self, parser, node_index):
        return u"</%s>"%self.html_name

@ -241,6 +224,13 @@ class DivStyleTag(TagBase):

 class LinkTag(TagBase):

+    _safe_chars = frozenset('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+               'abcdefghijklmnopqrstuvwxyz'
+               '0123456789'
+               '_.-=/&?:%&')
+
+    _re_domain = re.compile(r"//([a-z0-9-\.]*)", re.UNICODE)
+
    def __init__(self, name, annotate_links=True, **kwargs):
        TagBase.__init__(self, name, inline=True)

@ -260,35 +250,41 @@ class LinkTag(TagBase):
            url = self.params.strip()
        else:
            url = self.get_contents_text(parser).strip()
+            url = _unescape(url)

        self.domain = ""
-        #Unquote the url
-        self.url = unquote(url)

-        #Disallow javascript links
-        if u"javascript:" in self.url.lower():
+        if u"javascript:" in url.lower():
            return ""

-        #Disallow non http: links
-        url_parsed = urlparse(self.url)
-        if url_parsed[0] and not url_parsed[0].lower().startswith(u'http'):
-            return ""
+        if ':' not in url:
+            url = 'http://' + url

-        #Prepend http: if it is not present
-        if not url_parsed[0]:
-            self.url="http://"+self.url
-            url_parsed = urlparse(self.url)
+        scheme, uri = url.split(':', 1)

-        #Get domain
-        self.domain = url_parsed[1].lower()
+        if scheme not in ['http', 'https']:
+            return u''

-        #Remove www for brevity
-        if self.domain.startswith(u'www.'):
-            self.domain = self.domain[4:]
+        try:
+            domain = self._re_domain.search(uri.lower()).group(1)
+        except IndexError:
+            return u''

-        #Quote the url
-        #self.url="http:"+urlunparse( map(quote, (u"",)+url_parsed[1:]) )
-        self.url= unicode( urlunparse([quote(component.encode("utf-8"), safe='/=&?:+') for component in url_parsed]) )
+        domain = domain.lower()
+        if domain.startswith('www.'):
+            domain = domain[4:]
+
+        def percent_encode(s):
+            safe_chars = self._safe_chars
+            def replace(c):
+                if c not in safe_chars:
+                    return "%%%02X"%ord(c)
+                else:
+                    return c
+            return "".join([replace(c) for c in s])
+
+        self.url = percent_encode(url.encode('utf-8', 'replace'))
+        self.domain = domain

        if not self.url:
            return u""
@ -298,6 +294,7 @@ class LinkTag(TagBase):
        else:
            return u""

+
    def render_close(self, parser, node_index):

        tag_data = parser.tag_data
@ -364,10 +361,10 @@ class SearchTag(TagBase):
    def render_close(self, parser, node_index):

        if self.label:
-            ret = u'</a>'
            if self.annotate_links:
-                ret += annotate_link(self.label)
-            return ret
+                return u'</a>'+ annotate_link(self.label)
+            else:
+                return u'</a>'
        else:
            return u''

@ -406,6 +403,7 @@ class CodeTag(TagBase):
        return '<div class="code"><pre>%s</pre></div>' % contents


+
 class ImgTag(TagBase):

    def __init__(self, name, **kwargs):
@ -550,14 +548,70 @@ class ColorTag(TagBase):
 class CenterTag(TagBase):

    def render_open(self, parser, node_index, **kwargs):
-
-        return u'<div style="text-align:center">'
+        return u'<div style="text-align:center;">'


    def render_close(self, parser, node_index):
-
        return u'</div>'

+
+class ParagraphTag(TagBase):
+
+    def __init__(self, name, **kwargs):
+        TagBase.__init__(self, name)
+
+    def render_open(self, parser, node_index, **kwargs):
+
+        tag_data = parser.tag_data
+        level = tag_data.setdefault('ParagraphTag.level', 0)
+
+        ret = []
+        if level > 0:
+            ret.append(u'</p>')
+            tag_data['ParagraphTag.level'] -= 1;
+
+        ret.append(u'<p>')
+        tag_data['ParagraphTag.level'] += 1;
+        return u''.join(ret)
+
+    def render_close(self, parser, node_index):
+
+        tag_data = parser.tag_data
+        level = tag_data.setdefault('ParagraphTag.level', 0)
+
+        if not level:
+            return u''
+
+        tag_data['ParagraphTag.level'] -= 1;
+
+        return u'</p>'
+
+class SectionTag(TagBase):
+
+    """A specialised tag that stores its contents in a dictionary. Can be
+    used to define extra contents areas.
+
+    """
+
+    def __init__(self, name, **kwargs):
+        TagBase.__init__(self, name, enclosed=True)
+
+    def render_open(self, parser, node_index):
+
+        self.section_name = self.params.strip().lower().replace(u' ', u'_')
+
+        contents = self.get_contents(parser)
+        self.skip_contents(parser)
+
+
+        tag_data = parser.tag_data
+        sections = tag_data.setdefault('sections', {})
+
+        sections.setdefault(self.section_name, []).append(contents)
+
+        return u''
+
+
 # http://effbot.org/zone/python-replace.htm
 class MultiReplace:

@ -565,8 +619,7 @@ class MultiReplace:

        # string to string mapping; use a regular expression
        keys = repl_dict.keys()
-        keys.sort() # lexical order
-        keys.reverse() # use longest match first
+        keys.sort(reverse=True) # lexical order
        pattern = u"|".join([re.escape(key) for key in keys])
        self.pattern = re.compile(pattern)
        self.dict = repl_dict
@ -588,6 +641,9 @@ def _escape(s):
 def _escape_no_breaks(s):
    return PostMarkup.standard_replace_no_break(s.rstrip('\n'))

+def _unescape(s):
+    return PostMarkup.standard_unreplace(s)
+
 class TagFactory(object):

    def __init__(self):
@ -629,10 +685,13 @@ class _Parser(object):

    """ This is an interface to the parser, used by Tag classes. """

-    def __init__(self, post_markup):
+    def __init__(self, post_markup, tag_data=None):

        self.pm = post_markup
-        self.tag_data = {}
+        if tag_data is None:
+            self.tag_data = {}
+        else:
+            self.tag_data = tag_data
        self.render_node_index = 0

    def skip_to_node(self, node_index):
@ -673,8 +732,11 @@ class PostMarkup(object):
    standard_replace = MultiReplace({   u'<':u'&lt;',
                                        u'>':u'&gt;',
                                        u'&':u'&amp;',
-                                        u'\n':u'<br/>',
-                                        })
+                                        u'\n':u'<br/>'})
+
+    standard_unreplace = MultiReplace({  u'&lt;':u'<',
+                                         u'&gt;':u'>',
+                                         u'&amp;':u'&'})

    standard_replace_no_break = MultiReplace({  u'<':u'&lt;',
                                                u'>':u'&gt;',
@ -682,50 +744,55 @@ class PostMarkup(object):

    TOKEN_TAG, TOKEN_PTAG, TOKEN_TEXT = range(3)

+    _re_end_eq = re.compile(u"\]|\=", re.UNICODE)
+    _re_quote_end = re.compile(u'\"|\]', re.UNICODE)

    # I tried to use RE's. Really I did.
    @classmethod
    def tokenize(cls, post):

+        re_end_eq = cls._re_end_eq
+        re_quote_end = cls._re_quote_end
+
        text = True
        pos = 0

-        def find_first(post, pos, c):
-            f1 = post.find(c[0], pos)
-            f2 = post.find(c[1], pos)
-            if f1 == -1:
-                return f2
-            if f2 == -1:
-                return f1
-            return min(f1, f2)
+        def find_first(post, pos, re_ff):
+            try:
+                return re_ff.search(post, pos).start()
+            except AttributeError:
+                return -1

+        TOKEN_TAG, TOKEN_PTAG, TOKEN_TEXT = range(3)
+
+        post_find = post.find
        while True:

-            brace_pos = post.find(u'[', pos)
+            brace_pos = post_find(u'[', pos)
            if brace_pos == -1:
                if pos<len(post):
-                    yield PostMarkup.TOKEN_TEXT, post[pos:], pos, len(post)
+                    yield TOKEN_TEXT, post[pos:], pos, len(post)
                return
            if brace_pos - pos > 0:
-                yield PostMarkup.TOKEN_TEXT, post[pos:brace_pos], pos, brace_pos
+                yield TOKEN_TEXT, post[pos:brace_pos], pos, brace_pos

            pos = brace_pos
            end_pos = pos+1

-            open_tag_pos = post.find(u'[', end_pos)
-            end_pos = find_first(post, end_pos, u']=')
+            open_tag_pos = post_find(u'[', end_pos)
+            end_pos = find_first(post, end_pos, re_end_eq)
            if end_pos == -1:
-                yield PostMarkup.TOKEN_TEXT, post[pos:], pos, len(post)
+                yield TOKEN_TEXT, post[pos:], pos, len(post)
                return

            if open_tag_pos != -1 and open_tag_pos < end_pos:
-                yield PostMarkup.TOKEN_TEXT, post[pos:open_tag_pos], pos, open_tag_pos
+                yield TOKEN_TEXT, post[pos:open_tag_pos], pos, open_tag_pos
                end_pos = open_tag_pos
                pos = end_pos
                continue

            if post[end_pos] == ']':
-                yield PostMarkup.TOKEN_TAG, post[pos:end_pos+1], pos, end_pos+1
+                yield TOKEN_TAG, post[pos:end_pos+1], pos, end_pos+1
                pos = end_pos+1
                continue

@ -735,29 +802,32 @@ class PostMarkup(object):
                    while post[end_pos] == ' ':
                        end_pos += 1
                    if post[end_pos] != '"':
-                        end_pos = post.find(u']', end_pos+1)
+                        end_pos = post_find(u']', end_pos+1)
                        if end_pos == -1:
                            return
-                        yield PostMarkup.TOKEN_TAG, post[pos:end_pos+1], pos, end_pos+1
+                        yield TOKEN_TAG, post[pos:end_pos+1], pos, end_pos+1
                    else:
-                        end_pos = find_first(post, end_pos, u'"]')
-
+                        end_pos = find_first(post, end_pos, re_quote_end)
                        if end_pos==-1:
                            return
                        if post[end_pos] == '"':
-                            end_pos = post.find(u'"', end_pos+1)
+                            end_pos = post_find(u'"', end_pos+1)
                            if end_pos == -1:
                                return
-                            end_pos = post.find(u']', end_pos+1)
+                            end_pos = post_find(u']', end_pos+1)
                            if end_pos == -1:
                                return
-                            yield PostMarkup.TOKEN_PTAG, post[pos:end_pos+1], pos, end_pos+1
+                            yield TOKEN_PTAG, post[pos:end_pos+1], pos, end_pos+1
                        else:
-                            yield PostMarkup.TOKEN_TAG, post[pos:end_pos+1], pos, end_pos
+                            yield TOKEN_TAG, post[pos:end_pos+1], pos, end_pos
                    pos = end_pos+1
                except IndexError:
                    return

+
+    def add_tag(self, cls, name, *args, **kwargs):
+        return self.tag_factory.add_tag(cls, name, *args, **kwargs)
+
    def tagify_urls(self, postmarkup ):

        """ Surrounds urls with url bbcode tags. """
@ -766,10 +836,11 @@ class PostMarkup(object):
            return u'[url]%s[/url]' % match.group(0)

        text_tokens = []
+        TOKEN_TEXT = PostMarkup.TOKEN_TEXT
        for tag_type, tag_token, start_pos, end_pos in self.tokenize(postmarkup):

-            if tag_type == PostMarkup.TOKEN_TEXT:
-                text_tokens.append(re_url.sub(repl, tag_token))
+            if tag_type == TOKEN_TEXT:
+                text_tokens.append(_re_url.sub(repl, tag_token))
            else:
                text_tokens.append(tag_token)

@ -800,19 +871,112 @@ class PostMarkup(object):
        return sorted(self.tag_factory.tags.keys())


+    def insert_paragraphs(self, post_markup):
+
+        """Inserts paragraph tags in place of newlines. A more complex task than
+        it may seem -- Multiple newlines result in just one paragraph tag, and
+        paragraph tags aren't inserted inside certain other tags (such as the
+        code tag). Returns a postmarkup string.
+
+        post_markup -- A string containing the raw postmarkup
+
+        """
+
+        parts = [u'[p]']
+        tag_factory = self.tag_factory
+        enclosed_count = 0
+
+        TOKEN_TEXT = PostMarkup.TOKEN_TEXT
+        TOKEN_TAG = PostMarkup.TOKEN_TAG
+
+        for tag_type, tag_token, start_pos, end_pos in self.tokenize(post_markup):
+
+            if tag_type == TOKEN_TEXT:
+                if enclosed_count:
+                    parts.append(post_markup[start_pos:end_pos])
+                else:
+                    txt = post_markup[start_pos:end_pos]
+                    txt = _re_break_groups.sub(u'[p]', txt)
+                    parts.append(txt)
+                continue
+
+            elif tag_type == TOKEN_TAG:
+                tag_token = tag_token[1:-1].lstrip()
+                if ' ' in tag_token:
+                    tag_name = tag_token.split(u' ', 1)[0]
+                else:
+                    if '=' in tag_token:
+                        tag_name = tag_token.split(u'=', 1)[0]
+                    else:
+                        tag_name = tag_token
+            else:
+                tag_token = tag_token[1:-1].lstrip()
+                tag_name = tag_token.split(u'=', 1)[0]
+
+            tag_name = tag_name.strip().lower()
+
+            end_tag = False
+            if tag_name.startswith(u'/'):
+                end_tag = True
+                tag_name = tag_name[1:]
+
+            tag = tag_factory.get(tag_name, None)
+            if tag is not None and tag.enclosed:
+                if end_tag:
+                    enclosed_count -= 1
+                else:
+                    enclosed_count += 1
+
+            parts.append(post_markup[start_pos:end_pos])
+
+        new_markup = u"".join(parts)
+        return new_markup
+
+    # Matches simple blank tags containing only whitespace
+    _re_blank_tags = re.compile(r"\<(\w+?)\>\s*\</\1\>")
+
+    @classmethod
+    def cleanup_html(cls, html):
+        """Cleans up html. Currently only removes blank tags, i.e. tags containing only
+        whitespace. Only applies to tags without attributes. Tag removal is done
+        recursively until there are no more blank tags. So <strong><em></em></strong>
+        would be completely removed.
+
+        html -- A string containing (X)HTML
+
+        """
+
+        original_html = ''
+        while original_html != html:
+            original_html = html
+            html = cls._re_blank_tags.sub(u"", html)
+        return html
+

    def render_to_html(self,
                       post_markup,
                       encoding="ascii",
                       exclude_tags=None,
-                       auto_urls=True):
+                       auto_urls=True,
+                       paragraphs=False,
+                       clean=True,
+                       tag_data=None):

-        """Converts Post Markup to XHTML.
+        """Converts post markup (ie. bbcode) to XHTML. This method is threadsafe,
+        buy virtue that the state is entirely stored on the stack.

        post_markup -- String containing bbcode.
-        encoding -- Encoding of string, defaults to "ascii".
+        encoding -- Encoding of string, defaults to "ascii" if the string is not
+        already unicode.
        exclude_tags -- A collection of tag names to ignore.
        auto_urls -- If True, then urls will be wrapped with url bbcode tags.
+        paragraphs -- If True then line breaks will be replaced with paragraph
+        tags, rather than break tags.
+        clean -- If True, html will be run through the cleanup_html method.
+        tag_data -- An optional dictionary to store tag data in. The default of
+        None will create a dictionary internaly. Set this to your own dictionary
+        if you want to retrieve information from the Tag Classes.
+

        """

@ -822,7 +986,10 @@ class PostMarkup(object):
        if auto_urls:
            post_markup = self.tagify_urls(post_markup)

-        parser = _Parser(self)
+        if paragraphs:
+            post_markup = self.insert_paragraphs(post_markup)
+
+        parser = _Parser(self, tag_data=tag_data)
        parser.markup = post_markup

        if exclude_tags is None:
@ -858,6 +1025,7 @@ class PostMarkup(object):

        def break_inline_tags():

+
            while tag_stack:
                if tag_stack[-1].inline:
                    tag = tag_stack.pop()
@ -876,12 +1044,15 @@ class PostMarkup(object):
                return tag.render_close(parser, node_index)
            nodes.append(call)

+        TOKEN_TEXT = PostMarkup.TOKEN_TEXT
+        TOKEN_TAG = PostMarkup.TOKEN_TAG
+
        # Pass 1
        for tag_type, tag_token, start_pos, end_pos in self.tokenize(post_markup):

            raw_tag_token = tag_token

-            if tag_type == PostMarkup.TOKEN_TEXT:
+            if tag_type == TOKEN_TEXT:
                if parser.no_breaks_count:
                    tag_token = tag_token.strip()
                    if not tag_token:
@ -907,7 +1078,7 @@ class PostMarkup(object):
                nodes.append(self.standard_replace(tag_token))
                continue

-            elif tag_type == PostMarkup.TOKEN_TAG:
+            elif tag_type == TOKEN_TAG:
                tag_token = tag_token[1:-1].lstrip()
                if ' ' in tag_token:
                    tag_name, tag_attribs = tag_token.split(u' ', 1)
@ -1006,11 +1177,46 @@ class PostMarkup(object):
                text.append(node_text)
            parser.render_node_index += 1

-        return u"".join(text)
+        html = u"".join(text)
+        if clean:
+            html = self.cleanup_html(html)
+        return html

+    # A shortcut for render_to_html
    __call__ = render_to_html


+_postmarkup = create(use_pygments=pygments_available)
+def render_bbcode(bbcode,
+                  encoding="ascii",
+                  exclude_tags=None,
+                  auto_urls=True,
+                  paragraphs=False,
+                  clean=True,
+                  tag_data=None):
+
+    """ Renders a bbcode string in to XHTML. This is a shortcut if you don't
+        need to customize any tags.
+
+        post_markup -- String containing bbcode.
+        encoding -- Encoding of string, defaults to "ascii" if the string is not
+        already unicode.
+        exclude_tags -- A collection of tag names to ignore.
+        auto_urls -- If True, then urls will be wrapped with url bbcode tags.
+        paragraphs -- If True then line breaks will be replaces with paragraph
+        tags, rather than break tags.
+        clean -- If True, html will be run through a cleanup_html method.
+        tag_data -- An optional dictionary to store tag data in. The default of
+        None will create a dictionary internally.
+
+    """
+    return _postmarkup(bbcode,
+                       encoding,
+                       exclude_tags=exclude_tags,
+                       auto_urls=auto_urls,
+                       paragraphs=paragraphs,
+                       clean=clean,
+                       tag_data=tag_data)



@ -1021,6 +1227,7 @@ def _tests():

    post_markup = create(use_pygments=True)

+
    tests = []
    print """<link rel="stylesheet" href="code.css" type="text/css" />\n"""

@ -1033,8 +1240,8 @@ def _tests():
    tests.append("[link http://www.willmcgugan.com]My homepage[/link]")
    tests.append("[link]http://www.willmcgugan.com[/link]")

-    tests.append(u"[b]Hello AndrУЉ[/b]")
-    tests.append(u"[google]AndrУЉ[/google]")
+    tests.append(u"[b]Hello André[/b]")
+    tests.append(u"[google]André[/google]")
    tests.append("[s]Strike through[/s]")
    tests.append("[b]bold [i]bold and italic[/b] italic[/i]")
    tests.append("[google]Will McGugan[/google]")
@ -1091,7 +1298,7 @@ New lines characters are converted to breaks."""\

    tests.append('Nested urls, i.e. [url][url]www.becontrary.com[/url][/url], are condensed in to a single tag.')

-    tests.append(u'[google]ЩИЮВfvЮИУАsz[/google]')
+    tests.append(u'[google]ɸβfvθðsz[/google]')

    tests.append(u'[size 30]Hello, World![/size]')

@ -1132,6 +1339,11 @@ asdasdasdasdqweqwe
 [/list]""")


+    #tests = []
+    tests.append("[b][p]Hello, [p]World")
+    tests.append("[p][p][p]")
+
+    tests.append("http://www.google.com/search?as_q=bbcode&btnG=%D0%9F%D0%BE%D0%B8%D1%81%D0%BA")

    #tests=["""[b]b[i]i[/b][/i]"""]

@ -1141,15 +1353,29 @@ asdasdasdasdqweqwe
        print u"<hr/>"
        print

-    print repr(post_markup('[url=<script>Attack</script>]Attack[/url]'))
+    #print repr(post_markup('[url=<script>Attack</script>]Attack[/url]'))

-    print repr(post_markup('http://www.google.com/search?as_q=bbcode&btnG=%D0%9F%D0%BE%D0%B8%D1%81%D0%BA'))
+    #print repr(post_markup('http://www.google.com/search?as_q=%D0%9F%D0%BE%D0%B8%D1%81%D0%BA&test=hai'))

-    p = create(use_pygments=False)
-    print (p('[code]foo\nbar[/code]'))
+    #p = create(use_pygments=False)
+    #print (p('[code]foo\nbar[/code]'))

    #print render_bbcode("[b]For the lazy, use the http://www.willmcgugan.com render_bbcode function.[/b]")

+    smarkup = create()
+    smarkup.add_tag(SectionTag, 'section')
+
+    test = """Hello, World.[b][i]This in italics
+[section sidebar]This is the [b]sidebar[/b][/section]
+[section footer]
+This is the footer
+[/section]
+More text"""
+
+    print smarkup(test, paragraphs=True, clean=False)
+    tag_data = {}
+    print smarkup(test, tag_data=tag_data, paragraphs=True, clean=True)
+    print tag_data

 def _run_unittests():

@ -1159,6 +1385,18 @@ def _run_unittests():

    class TestPostmarkup(unittest.TestCase):

+        def testcleanuphtml(self):
+
+            postmarkup = create()
+
+            tests = [("""\n<p>\n </p>\n""", ""),
+                     ("""<b>\n\n<i>   </i>\n</b>Test""", "Test"),
+                     ("""<p id="test">Test</p>""", """<p id="test">Test</p>"""),]
+
+            for test, result in tests:
+                self.assertEqual(PostMarkup.cleanup_html(test).strip(), result)
+
+
        def testsimpletag(self):

            postmarkup = create()
@ -1194,6 +1432,7 @@ def _run_unittests():
                     ('[link]http://www.willmcgugan.com[/link]', '<a href="http://www.willmcgugan.com">http://www.willmcgugan.com</a>')
                     ]

+
            for test, result in tests:
                self.assertEqual(postmarkup(test), result)

@ -1202,6 +1441,43 @@ def _run_unittests():
    unittest.TextTestRunner(verbosity=2).run(suite)


+def _ff_test():
+
+    def ff1(post, pos, c1, c2):
+        f1 = post.find(c1, pos)
+        f2 = post.find(c2, pos)
+        if f1 == -1:
+            return f2
+        if f2 == -1:
+            return f1
+        return min(f1, f2)
+
+    re_ff=re.compile('a|b', re.UNICODE)
+
+    def ff2(post, pos, c1, c2):
+        try:
+            return re_ff.search(post).group(0)
+        except AttributeError:
+            return -1
+
+    text = u"sdl;fk;sdlfks;dflksd;flksdfsdfwerwerwgwegwegwegwegwegegwweggewwegwegwegwettttttttttttttttttttttttttttttttttgggggggggg;slbdfkwelrkwelrkjal;sdfksdl;fksdf;lb"
+
+    REPEAT = 100000
+
+    from time import time
+
+    start = time()
+    for n in xrange(REPEAT):
+        ff1(text, 0, "a", "b")
+    end = time()
+    print end - start
+
+    start = time()
+    for n in xrange(REPEAT):
+        ff2(text, 0, "a", "b")
+    end = time()
+    print end - start
+


 if __name__ == "__main__":