1486 lines
44 KiB
Python
1486 lines
44 KiB
Python
# -*- coding: UTF-8 -*-
|
|
|
|
"""
|
|
Post Markup
|
|
Author: Will McGugan (http://www.willmcgugan.com)
|
|
"""
|
|
|
|
__version__ = "1.1.4"
|
|
|
|
import re
|
|
from urllib import quote, unquote, quote_plus, urlencode
|
|
from urlparse import urlparse, urlunparse
|
|
|
|
pygments_available = True
|
|
try:
|
|
from pygments import highlight
|
|
from pygments.lexers import get_lexer_by_name, ClassNotFound
|
|
from pygments.formatters import HtmlFormatter
|
|
except ImportError:
|
|
# Make Pygments optional
|
|
pygments_available = False
|
|
|
|
|
|
def annotate_link(domain):
|
|
"""This function is called by the url tag. Override to disable or change behaviour.
|
|
|
|
domain -- Domain parsed from url
|
|
|
|
"""
|
|
return u" [%s]"%_escape(domain)
|
|
|
|
|
|
_re_url = re.compile(r"((https?):((//)|(\\\\))+[\w\d:#@%/;$()~_?\+-=\\\.&]*)", re.MULTILINE|re.UNICODE)
|
|
|
|
|
|
_re_html=re.compile('<.*?>|\&.*?\;', re.UNICODE)
|
|
def textilize(s):
|
|
"""Remove markup from html"""
|
|
return _re_html.sub("", s)
|
|
|
|
_re_excerpt = re.compile(r'\[".*?\]+?.*?\[/".*?\]+?', re.DOTALL|re.UNICODE)
|
|
_re_remove_markup = re.compile(r'\[.*?\]', re.DOTALL|re.UNICODE)
|
|
|
|
_re_break_groups = re.compile(r'\n+', re.DOTALL|re.UNICODE)
|
|
|
|
def get_excerpt(post):
|
|
"""Returns an excerpt between ["] and [/"]
|
|
|
|
post -- BBCode string"""
|
|
|
|
match = _re_excerpt.search(post)
|
|
if match is None:
|
|
return ""
|
|
excerpt = match.group(0)
|
|
excerpt = excerpt.replace(u'\n', u"<br/>")
|
|
return _re_remove_markup.sub("", excerpt)
|
|
|
|
def strip_bbcode(bbcode):
|
|
|
|
"""Strips bbcode tags from a string.
|
|
|
|
bbcode -- A string to remove tags from
|
|
|
|
"""
|
|
|
|
return u"".join([t[1] for t in PostMarkup.tokenize(bbcode) if t[0] == PostMarkup.TOKEN_TEXT])
|
|
|
|
|
|
def create(include=None, exclude=None, use_pygments=True, **kwargs):
|
|
|
|
"""Create a postmarkup object that converts bbcode to XML snippets. Note
|
|
that creating postmarkup objects is _not_ threadsafe, but rendering the
|
|
html _is_ threadsafe. So typically you will need just one postmarkup instance
|
|
to render the bbcode accross threads.
|
|
|
|
include -- List or similar iterable containing the names of the tags to use
|
|
If omitted, all tags will be used
|
|
exclude -- List or similar iterable containing the names of the tags to exclude.
|
|
If omitted, no tags will be excluded
|
|
use_pygments -- If True, Pygments (http://pygments.org/) will be used for the code tag,
|
|
otherwise it will use <pre>code</pre>
|
|
kwargs -- Remaining keyword arguments are passed to tag constructors.
|
|
|
|
"""
|
|
|
|
postmarkup = PostMarkup()
|
|
postmarkup_add_tag = postmarkup.tag_factory.add_tag
|
|
|
|
def add_tag(tag_class, name, *args, **kwargs):
|
|
if include is None or name in include:
|
|
if exclude is not None and name in exclude:
|
|
return
|
|
postmarkup_add_tag(tag_class, name, *args, **kwargs)
|
|
|
|
|
|
|
|
add_tag(SimpleTag, 'b', 'strong')
|
|
add_tag(SimpleTag, 'i', 'em')
|
|
add_tag(SimpleTag, 'u', 'u')
|
|
add_tag(SimpleTag, 's', 'strike')
|
|
|
|
add_tag(LinkTag, 'link', **kwargs)
|
|
add_tag(LinkTag, 'url', **kwargs)
|
|
|
|
add_tag(QuoteTag, 'quote')
|
|
|
|
add_tag(SearchTag, u'wiki',
|
|
u"http://en.wikipedia.org/wiki/Special:Search?search=%s", u'wikipedia.com', **kwargs)
|
|
add_tag(SearchTag, u'google',
|
|
u"http://www.google.com/search?hl=en&q=%s&btnG=Google+Search", u'google.com', **kwargs)
|
|
add_tag(SearchTag, u'dictionary',
|
|
u"http://dictionary.reference.com/browse/%s", u'dictionary.com', **kwargs)
|
|
add_tag(SearchTag, u'dict',
|
|
u"http://dictionary.reference.com/browse/%s", u'dictionary.com', **kwargs)
|
|
|
|
add_tag(ImgTag, u'img')
|
|
add_tag(ListTag, u'list')
|
|
add_tag(ListItemTag, u'*')
|
|
|
|
add_tag(SizeTag, u"size")
|
|
add_tag(ColorTag, u"color")
|
|
add_tag(CenterTag, u"center")
|
|
|
|
if use_pygments:
|
|
assert pygments_available, "Install Pygments (http://pygments.org/) or call create with use_pygments=False"
|
|
add_tag(PygmentsCodeTag, u'code', **kwargs)
|
|
else:
|
|
add_tag(CodeTag, u'code', **kwargs)
|
|
|
|
add_tag(ParagraphTag, u"p")
|
|
|
|
return postmarkup
|
|
|
|
class TagBase(object):
|
|
|
|
def __init__(self, name, enclosed=False, auto_close=False, inline=False, strip_first_newline=False, **kwargs):
|
|
"""Base class for all tags.
|
|
|
|
name -- The name of the bbcode tag
|
|
enclosed -- True if the contents of the tag should not be bbcode processed.
|
|
auto_close -- True if the tag is standalone and does not require a close tag.
|
|
inline -- True if the tag generates an inline html tag.
|
|
|
|
"""
|
|
|
|
self.name = name
|
|
self.enclosed = enclosed
|
|
self.auto_close = auto_close
|
|
self.inline = inline
|
|
self.strip_first_newline = strip_first_newline
|
|
|
|
self.open_pos = None
|
|
self.close_pos = None
|
|
self.open_node_index = None
|
|
self.close_node_index = None
|
|
|
|
def open(self, parser, params, open_pos, node_index):
|
|
""" Called when the open tag is initially encountered. """
|
|
self.params = params
|
|
self.open_pos = open_pos
|
|
self.open_node_index = node_index
|
|
|
|
def close(self, parser, close_pos, node_index):
|
|
""" Called when the close tag is initially encountered. """
|
|
self.close_pos = close_pos
|
|
self.close_node_index = node_index
|
|
|
|
def render_open(self, parser, node_index):
|
|
""" Called to render the open tag. """
|
|
pass
|
|
|
|
def render_close(self, parser, node_index):
|
|
""" Called to render the close tag. """
|
|
pass
|
|
|
|
def get_contents(self, parser):
|
|
"""Returns the string between the open and close tag."""
|
|
return parser.markup[self.open_pos:self.close_pos]
|
|
|
|
def get_contents_text(self, parser):
|
|
"""Returns the string between the the open and close tag, minus bbcode tags."""
|
|
return u"".join( parser.get_text_nodes(self.open_node_index, self.close_node_index) )
|
|
|
|
def skip_contents(self, parser):
|
|
"""Skips the contents of a tag while rendering."""
|
|
parser.skip_to_node(self.close_node_index)
|
|
|
|
def __str__(self):
|
|
return '[%s]'%self.name
|
|
|
|
|
|
class SimpleTag(TagBase):
|
|
|
|
"""A tag that can be rendered with a simple substitution. """
|
|
|
|
def __init__(self, name, html_name, **kwargs):
|
|
""" html_name -- the html tag to substitute."""
|
|
TagBase.__init__(self, name, inline=True)
|
|
self.html_name = html_name
|
|
|
|
def render_open(self, parser, node_index):
|
|
return u"<%s>"%self.html_name
|
|
|
|
|
|
def render_close(self, parser, node_index):
|
|
return u"</%s>"%self.html_name
|
|
|
|
|
|
class DivStyleTag(TagBase):
|
|
|
|
"""A simple tag that is replaces with a div and a style."""
|
|
|
|
def __init__(self, name, style, value, **kwargs):
|
|
TagBase.__init__(self, name)
|
|
self.style = style
|
|
self.value = value
|
|
|
|
def render_open(self, parser, node_index):
|
|
return u'<div style="%s:%s;">' % (self.style, self.value)
|
|
|
|
def render_close(self, parser, node_index):
|
|
return u'</div>'
|
|
|
|
|
|
class LinkTag(TagBase):
|
|
|
|
_safe_chars = frozenset('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
|
'abcdefghijklmnopqrstuvwxyz'
|
|
'0123456789'
|
|
'_.-=/&?:%&')
|
|
|
|
_re_domain = re.compile(r"//([a-z0-9-\.]*)", re.UNICODE)
|
|
|
|
def __init__(self, name, annotate_links=True, **kwargs):
|
|
TagBase.__init__(self, name, inline=True)
|
|
|
|
self.annotate_links = annotate_links
|
|
|
|
|
|
def render_open(self, parser, node_index):
|
|
|
|
self.domain = u''
|
|
tag_data = parser.tag_data
|
|
nest_level = tag_data['link_nest_level'] = tag_data.setdefault('link_nest_level', 0) + 1
|
|
|
|
if nest_level > 1:
|
|
return u""
|
|
|
|
if self.params:
|
|
url = self.params.strip()
|
|
else:
|
|
url = self.get_contents_text(parser).strip()
|
|
url = _unescape(url)
|
|
|
|
self.domain = ""
|
|
|
|
if u"javascript:" in url.lower():
|
|
return ""
|
|
|
|
if ':' not in url:
|
|
url = 'http://' + url
|
|
|
|
scheme, uri = url.split(':', 1)
|
|
|
|
if scheme not in ['http', 'https']:
|
|
return u''
|
|
|
|
try:
|
|
domain = self._re_domain.search(uri.lower()).group(1)
|
|
except IndexError:
|
|
return u''
|
|
|
|
domain = domain.lower()
|
|
if domain.startswith('www.'):
|
|
domain = domain[4:]
|
|
|
|
def percent_encode(s):
|
|
safe_chars = self._safe_chars
|
|
def replace(c):
|
|
if c not in safe_chars:
|
|
return "%%%02X"%ord(c)
|
|
else:
|
|
return c
|
|
return "".join([replace(c) for c in s])
|
|
|
|
self.url = percent_encode(url.encode('utf-8', 'replace'))
|
|
self.domain = domain
|
|
|
|
if not self.url:
|
|
return u""
|
|
|
|
if self.domain:
|
|
return u'<a href="%s">'%self.url
|
|
else:
|
|
return u""
|
|
|
|
|
|
def render_close(self, parser, node_index):
|
|
|
|
tag_data = parser.tag_data
|
|
tag_data['link_nest_level'] -= 1
|
|
|
|
if tag_data['link_nest_level'] > 0:
|
|
return u''
|
|
|
|
if self.domain:
|
|
return u'</a>'+self.annotate_link(self.domain)
|
|
else:
|
|
return u''
|
|
|
|
def annotate_link(self, domain=None):
|
|
|
|
if domain and self.annotate_links:
|
|
return annotate_link(domain)
|
|
else:
|
|
return u""
|
|
|
|
|
|
class QuoteTag(TagBase):
|
|
|
|
def __init__(self, name, **kwargs):
|
|
TagBase.__init__(self, name, strip_first_newline=True)
|
|
|
|
def open(self, parser, *args):
|
|
TagBase.open(self, parser, *args)
|
|
|
|
def close(self, parser, *args):
|
|
TagBase.close(self, parser, *args)
|
|
|
|
def render_open(self, parser, node_index):
|
|
if self.params:
|
|
return u'<blockquote><em>%s</em><br/>'%(PostMarkup.standard_replace(self.params))
|
|
else:
|
|
return u'<blockquote>'
|
|
|
|
|
|
def render_close(self, parser, node_index):
|
|
return u"</blockquote>"
|
|
|
|
|
|
class SearchTag(TagBase):
|
|
|
|
def __init__(self, name, url, label="", annotate_links=True, **kwargs):
|
|
TagBase.__init__(self, name, inline=True)
|
|
self.url = url
|
|
self.label = label
|
|
self.annotate_links = annotate_links
|
|
|
|
def render_open(self, parser, node_idex):
|
|
|
|
if self.params:
|
|
search=self.params
|
|
else:
|
|
search=self.get_contents(parser)
|
|
link = u'<a href="%s">' % self.url
|
|
if u'%' in link:
|
|
return link%quote_plus(search.encode("UTF-8"))
|
|
else:
|
|
return link
|
|
|
|
def render_close(self, parser, node_index):
|
|
|
|
if self.label:
|
|
if self.annotate_links:
|
|
return u'</a>'+ annotate_link(self.label)
|
|
else:
|
|
return u'</a>'
|
|
else:
|
|
return u''
|
|
|
|
|
|
class PygmentsCodeTag(TagBase):
|
|
|
|
def __init__(self, name, pygments_line_numbers=False, **kwargs):
|
|
TagBase.__init__(self, name, enclosed=True, strip_first_newline=True)
|
|
self.line_numbers = pygments_line_numbers
|
|
|
|
def render_open(self, parser, node_index):
|
|
|
|
contents = self.get_contents(parser)
|
|
self.skip_contents(parser)
|
|
|
|
try:
|
|
lexer = get_lexer_by_name(self.params, stripall=True)
|
|
except ClassNotFound:
|
|
contents = _escape(contents)
|
|
return '<div class="code"><pre>%s</pre></div>' % contents
|
|
|
|
formatter = HtmlFormatter(linenos=self.line_numbers, cssclass="code")
|
|
return highlight(contents, lexer, formatter)
|
|
|
|
|
|
|
|
class CodeTag(TagBase):
|
|
|
|
def __init__(self, name, **kwargs):
|
|
TagBase.__init__(self, name, enclosed=True, strip_first_newline=True)
|
|
|
|
def render_open(self, parser, node_index):
|
|
|
|
contents = _escape_no_breaks(self.get_contents(parser))
|
|
self.skip_contents(parser)
|
|
return '<div class="code"><pre>%s</pre></div>' % contents
|
|
|
|
|
|
|
|
class ImgTag(TagBase):
|
|
|
|
def __init__(self, name, **kwargs):
|
|
TagBase.__init__(self, name, inline=True)
|
|
|
|
def render_open(self, parser, node_index):
|
|
|
|
contents = self.get_contents(parser)
|
|
self.skip_contents(parser)
|
|
|
|
contents = strip_bbcode(contents).replace(u'"', "%22")
|
|
|
|
return u'<img src="%s"></img>' % contents
|
|
|
|
|
|
class ListTag(TagBase):
|
|
|
|
def __init__(self, name, **kwargs):
|
|
TagBase.__init__(self, name, strip_first_newline=True)
|
|
|
|
def open(self, parser, params, open_pos, node_index):
|
|
TagBase.open(self, parser, params, open_pos, node_index)
|
|
|
|
def close(self, parser, close_pos, node_index):
|
|
TagBase.close(self, parser, close_pos, node_index)
|
|
|
|
|
|
def render_open(self, parser, node_index):
|
|
|
|
self.close_tag = u""
|
|
|
|
tag_data = parser.tag_data
|
|
tag_data.setdefault("ListTag.count", 0)
|
|
|
|
if tag_data["ListTag.count"]:
|
|
return u""
|
|
|
|
tag_data["ListTag.count"] += 1
|
|
|
|
tag_data["ListItemTag.initial_item"]=True
|
|
|
|
if self.params == "1":
|
|
self.close_tag = u"</li></ol>"
|
|
return u"<ol><li>"
|
|
elif self.params == "a":
|
|
self.close_tag = u"</li></ol>"
|
|
return u'<ol style="list-style-type: lower-alpha;"><li>'
|
|
elif self.params == "A":
|
|
self.close_tag = u"</li></ol>"
|
|
return u'<ol style="list-style-type: upper-alpha;"><li>'
|
|
else:
|
|
self.close_tag = u"</li></ul>"
|
|
return u"<ul><li>"
|
|
|
|
def render_close(self, parser, node_index):
|
|
|
|
tag_data = parser.tag_data
|
|
tag_data["ListTag.count"] -= 1
|
|
|
|
return self.close_tag
|
|
|
|
|
|
class ListItemTag(TagBase):
|
|
|
|
def __init__(self, name, **kwargs):
|
|
TagBase.__init__(self, name)
|
|
self.closed = False
|
|
|
|
def render_open(self, parser, node_index):
|
|
|
|
tag_data = parser.tag_data
|
|
if not tag_data.setdefault("ListTag.count", 0):
|
|
return u""
|
|
|
|
if tag_data["ListItemTag.initial_item"]:
|
|
tag_data["ListItemTag.initial_item"] = False
|
|
return
|
|
|
|
return u"</li><li>"
|
|
|
|
|
|
class SizeTag(TagBase):
|
|
|
|
valid_chars = frozenset("0123456789")
|
|
|
|
def __init__(self, name, **kwargs):
|
|
TagBase.__init__(self, name, inline=True)
|
|
|
|
def render_open(self, parser, node_index):
|
|
|
|
try:
|
|
self.size = int( "".join([c for c in self.params if c in self.valid_chars]) )
|
|
except ValueError:
|
|
self.size = None
|
|
|
|
if self.size is None:
|
|
return u""
|
|
|
|
self.size = self.validate_size(self.size)
|
|
|
|
return u'<span style="font-size:%spx">' % self.size
|
|
|
|
def render_close(self, parser, node_index):
|
|
|
|
if self.size is None:
|
|
return u""
|
|
|
|
return u'</span>'
|
|
|
|
def validate_size(self, size):
|
|
|
|
size = min(64, size)
|
|
size = max(4, size)
|
|
return size
|
|
|
|
|
|
class ColorTag(TagBase):
|
|
|
|
valid_chars = frozenset("#0123456789abcdefghijklmnopqrstuvwxyz")
|
|
|
|
def __init__(self, name, **kwargs):
|
|
TagBase.__init__(self, name, inline=True)
|
|
|
|
def render_open(self, parser, node_index):
|
|
|
|
valid_chars = self.valid_chars
|
|
color = self.params.split()[0:1][0].lower()
|
|
self.color = "".join([c for c in color if c in valid_chars])
|
|
|
|
if not self.color:
|
|
return u""
|
|
|
|
return u'<span style="color:%s">' % self.color
|
|
|
|
def render_close(self, parser, node_index):
|
|
|
|
if not self.color:
|
|
return u''
|
|
return u'</span>'
|
|
|
|
|
|
class CenterTag(TagBase):
|
|
|
|
def render_open(self, parser, node_index, **kwargs):
|
|
return u'<div style="text-align:center;">'
|
|
|
|
|
|
def render_close(self, parser, node_index):
|
|
return u'</div>'
|
|
|
|
|
|
class ParagraphTag(TagBase):
|
|
|
|
def __init__(self, name, **kwargs):
|
|
TagBase.__init__(self, name)
|
|
|
|
def render_open(self, parser, node_index, **kwargs):
|
|
|
|
tag_data = parser.tag_data
|
|
level = tag_data.setdefault('ParagraphTag.level', 0)
|
|
|
|
ret = []
|
|
if level > 0:
|
|
ret.append(u'</p>')
|
|
tag_data['ParagraphTag.level'] -= 1;
|
|
|
|
ret.append(u'<p>')
|
|
tag_data['ParagraphTag.level'] += 1;
|
|
return u''.join(ret)
|
|
|
|
def render_close(self, parser, node_index):
|
|
|
|
tag_data = parser.tag_data
|
|
level = tag_data.setdefault('ParagraphTag.level', 0)
|
|
|
|
if not level:
|
|
return u''
|
|
|
|
tag_data['ParagraphTag.level'] -= 1;
|
|
|
|
return u'</p>'
|
|
|
|
class SectionTag(TagBase):
|
|
|
|
"""A specialised tag that stores its contents in a dictionary. Can be
|
|
used to define extra contents areas.
|
|
|
|
"""
|
|
|
|
def __init__(self, name, **kwargs):
|
|
TagBase.__init__(self, name, enclosed=True)
|
|
|
|
def render_open(self, parser, node_index):
|
|
|
|
self.section_name = self.params.strip().lower().replace(u' ', u'_')
|
|
|
|
contents = self.get_contents(parser)
|
|
self.skip_contents(parser)
|
|
|
|
|
|
tag_data = parser.tag_data
|
|
sections = tag_data.setdefault('sections', {})
|
|
|
|
sections.setdefault(self.section_name, []).append(contents)
|
|
|
|
return u''
|
|
|
|
|
|
# http://effbot.org/zone/python-replace.htm
|
|
class MultiReplace:
|
|
|
|
def __init__(self, repl_dict):
|
|
|
|
# string to string mapping; use a regular expression
|
|
keys = repl_dict.keys()
|
|
keys.sort(reverse=True) # lexical order
|
|
pattern = u"|".join([re.escape(key) for key in keys])
|
|
self.pattern = re.compile(pattern)
|
|
self.dict = repl_dict
|
|
|
|
def replace(self, s):
|
|
# apply replacement dictionary to string
|
|
|
|
def repl(match, get=self.dict.get):
|
|
item = match.group(0)
|
|
return get(item, item)
|
|
return self.pattern.sub(repl, s)
|
|
|
|
__call__ = replace
|
|
|
|
|
|
def _escape(s):
|
|
return PostMarkup.standard_replace(s.rstrip('\n'))
|
|
|
|
def _escape_no_breaks(s):
|
|
return PostMarkup.standard_replace_no_break(s.rstrip('\n'))
|
|
|
|
def _unescape(s):
|
|
return PostMarkup.standard_unreplace(s)
|
|
|
|
class TagFactory(object):
|
|
|
|
def __init__(self):
|
|
|
|
self.tags = {}
|
|
|
|
@classmethod
|
|
def tag_factory_callable(cls, tag_class, name, *args, **kwargs):
|
|
"""
|
|
Returns a callable that returns a new tag instance.
|
|
"""
|
|
def make():
|
|
return tag_class(name, *args, **kwargs)
|
|
|
|
return make
|
|
|
|
|
|
def add_tag(self, cls, name, *args, **kwargs):
|
|
|
|
self.tags[name] = self.tag_factory_callable(cls, name, *args, **kwargs)
|
|
|
|
def __getitem__(self, name):
|
|
|
|
return self.tags[name]()
|
|
|
|
def __contains__(self, name):
|
|
|
|
return name in self.tags
|
|
|
|
def get(self, name, default=None):
|
|
|
|
if name in self.tags:
|
|
return self.tags[name]()
|
|
|
|
return default
|
|
|
|
|
|
class _Parser(object):
|
|
|
|
""" This is an interface to the parser, used by Tag classes. """
|
|
|
|
def __init__(self, post_markup, tag_data=None):
|
|
|
|
self.pm = post_markup
|
|
if tag_data is None:
|
|
self.tag_data = {}
|
|
else:
|
|
self.tag_data = tag_data
|
|
self.render_node_index = 0
|
|
|
|
def skip_to_node(self, node_index):
|
|
|
|
""" Skips to a node, ignoring intermediate nodes. """
|
|
assert node_index is not None, "Node index must be non-None"
|
|
self.render_node_index = node_index
|
|
|
|
def get_text_nodes(self, node1, node2):
|
|
|
|
""" Retrieves the text nodes between two node indices. """
|
|
|
|
if node2 is None:
|
|
node2 = node1+1
|
|
|
|
return [node for node in self.nodes[node1:node2] if not callable(node)]
|
|
|
|
def begin_no_breaks(self):
|
|
|
|
"""Disables replacing of newlines with break tags at the start and end of text nodes.
|
|
Can only be called from a tags 'open' method.
|
|
|
|
"""
|
|
assert self.phase==1, "Can not be called from render_open or render_close"
|
|
self.no_breaks_count += 1
|
|
|
|
def end_no_breaks(self):
|
|
|
|
"""Re-enables auto-replacing of newlines with break tags (see begin_no_breaks)."""
|
|
|
|
assert self.phase==1, "Can not be called from render_open or render_close"
|
|
if self.no_breaks_count:
|
|
self.no_breaks_count -= 1
|
|
|
|
|
|
class PostMarkup(object):
|
|
|
|
standard_replace = MultiReplace({ u'<':u'<',
|
|
u'>':u'>',
|
|
u'&':u'&',
|
|
u'\n':u'<br/>'})
|
|
|
|
standard_unreplace = MultiReplace({ u'<':u'<',
|
|
u'>':u'>',
|
|
u'&':u'&'})
|
|
|
|
standard_replace_no_break = MultiReplace({ u'<':u'<',
|
|
u'>':u'>',
|
|
u'&':u'&',})
|
|
|
|
TOKEN_TAG, TOKEN_PTAG, TOKEN_TEXT = range(3)
|
|
|
|
_re_end_eq = re.compile(u"\]|\=", re.UNICODE)
|
|
_re_quote_end = re.compile(u'\"|\]', re.UNICODE)
|
|
|
|
# I tried to use RE's. Really I did.
|
|
@classmethod
|
|
def tokenize(cls, post):
|
|
|
|
re_end_eq = cls._re_end_eq
|
|
re_quote_end = cls._re_quote_end
|
|
|
|
text = True
|
|
pos = 0
|
|
|
|
def find_first(post, pos, re_ff):
|
|
try:
|
|
return re_ff.search(post, pos).start()
|
|
except AttributeError:
|
|
return -1
|
|
|
|
TOKEN_TAG, TOKEN_PTAG, TOKEN_TEXT = range(3)
|
|
|
|
post_find = post.find
|
|
while True:
|
|
|
|
brace_pos = post_find(u'[', pos)
|
|
if brace_pos == -1:
|
|
if pos<len(post):
|
|
yield TOKEN_TEXT, post[pos:], pos, len(post)
|
|
return
|
|
if brace_pos - pos > 0:
|
|
yield TOKEN_TEXT, post[pos:brace_pos], pos, brace_pos
|
|
|
|
pos = brace_pos
|
|
end_pos = pos+1
|
|
|
|
open_tag_pos = post_find(u'[', end_pos)
|
|
end_pos = find_first(post, end_pos, re_end_eq)
|
|
if end_pos == -1:
|
|
yield TOKEN_TEXT, post[pos:], pos, len(post)
|
|
return
|
|
|
|
if open_tag_pos != -1 and open_tag_pos < end_pos:
|
|
yield TOKEN_TEXT, post[pos:open_tag_pos], pos, open_tag_pos
|
|
end_pos = open_tag_pos
|
|
pos = end_pos
|
|
continue
|
|
|
|
if post[end_pos] == ']':
|
|
yield TOKEN_TAG, post[pos:end_pos+1], pos, end_pos+1
|
|
pos = end_pos+1
|
|
continue
|
|
|
|
if post[end_pos] == '=':
|
|
try:
|
|
end_pos += 1
|
|
while post[end_pos] == ' ':
|
|
end_pos += 1
|
|
if post[end_pos] != '"':
|
|
end_pos = post_find(u']', end_pos+1)
|
|
if end_pos == -1:
|
|
return
|
|
yield TOKEN_TAG, post[pos:end_pos+1], pos, end_pos+1
|
|
else:
|
|
end_pos = find_first(post, end_pos, re_quote_end)
|
|
if end_pos==-1:
|
|
return
|
|
if post[end_pos] == '"':
|
|
end_pos = post_find(u'"', end_pos+1)
|
|
if end_pos == -1:
|
|
return
|
|
end_pos = post_find(u']', end_pos+1)
|
|
if end_pos == -1:
|
|
return
|
|
yield TOKEN_PTAG, post[pos:end_pos+1], pos, end_pos+1
|
|
else:
|
|
yield TOKEN_TAG, post[pos:end_pos+1], pos, end_pos
|
|
pos = end_pos+1
|
|
except IndexError:
|
|
return
|
|
|
|
|
|
def add_tag(self, cls, name, *args, **kwargs):
|
|
return self.tag_factory.add_tag(cls, name, *args, **kwargs)
|
|
|
|
def tagify_urls(self, postmarkup ):
|
|
|
|
""" Surrounds urls with url bbcode tags. """
|
|
|
|
def repl(match):
|
|
return u'[url]%s[/url]' % match.group(0)
|
|
|
|
text_tokens = []
|
|
TOKEN_TEXT = PostMarkup.TOKEN_TEXT
|
|
for tag_type, tag_token, start_pos, end_pos in self.tokenize(postmarkup):
|
|
|
|
if tag_type == TOKEN_TEXT:
|
|
text_tokens.append(_re_url.sub(repl, tag_token))
|
|
else:
|
|
text_tokens.append(tag_token)
|
|
|
|
return u"".join(text_tokens)
|
|
|
|
|
|
def __init__(self, tag_factory=None):
|
|
|
|
self.tag_factory = tag_factory or TagFactory()
|
|
|
|
|
|
def default_tags(self):
|
|
|
|
""" Add some basic tags. """
|
|
|
|
add_tag = self.tag_factory.add_tag
|
|
|
|
add_tag(SimpleTag, u'b', u'strong')
|
|
add_tag(SimpleTag, u'i', u'em')
|
|
add_tag(SimpleTag, u'u', u'u')
|
|
add_tag(SimpleTag, u's', u's')
|
|
|
|
|
|
def get_supported_tags(self):
|
|
|
|
""" Returns a list of the supported tags. """
|
|
|
|
return sorted(self.tag_factory.tags.keys())
|
|
|
|
|
|
def insert_paragraphs(self, post_markup):
|
|
|
|
"""Inserts paragraph tags in place of newlines. A more complex task than
|
|
it may seem -- Multiple newlines result in just one paragraph tag, and
|
|
paragraph tags aren't inserted inside certain other tags (such as the
|
|
code tag). Returns a postmarkup string.
|
|
|
|
post_markup -- A string containing the raw postmarkup
|
|
|
|
"""
|
|
|
|
parts = [u'[p]']
|
|
tag_factory = self.tag_factory
|
|
enclosed_count = 0
|
|
|
|
TOKEN_TEXT = PostMarkup.TOKEN_TEXT
|
|
TOKEN_TAG = PostMarkup.TOKEN_TAG
|
|
|
|
for tag_type, tag_token, start_pos, end_pos in self.tokenize(post_markup):
|
|
|
|
if tag_type == TOKEN_TEXT:
|
|
if enclosed_count:
|
|
parts.append(post_markup[start_pos:end_pos])
|
|
else:
|
|
txt = post_markup[start_pos:end_pos]
|
|
txt = _re_break_groups.sub(u'[p]', txt)
|
|
parts.append(txt)
|
|
continue
|
|
|
|
elif tag_type == TOKEN_TAG:
|
|
tag_token = tag_token[1:-1].lstrip()
|
|
if ' ' in tag_token:
|
|
tag_name = tag_token.split(u' ', 1)[0]
|
|
else:
|
|
if '=' in tag_token:
|
|
tag_name = tag_token.split(u'=', 1)[0]
|
|
else:
|
|
tag_name = tag_token
|
|
else:
|
|
tag_token = tag_token[1:-1].lstrip()
|
|
tag_name = tag_token.split(u'=', 1)[0]
|
|
|
|
tag_name = tag_name.strip().lower()
|
|
|
|
end_tag = False
|
|
if tag_name.startswith(u'/'):
|
|
end_tag = True
|
|
tag_name = tag_name[1:]
|
|
|
|
tag = tag_factory.get(tag_name, None)
|
|
if tag is not None and tag.enclosed:
|
|
if end_tag:
|
|
enclosed_count -= 1
|
|
else:
|
|
enclosed_count += 1
|
|
|
|
parts.append(post_markup[start_pos:end_pos])
|
|
|
|
new_markup = u"".join(parts)
|
|
return new_markup
|
|
|
|
# Matches simple blank tags containing only whitespace
|
|
_re_blank_tags = re.compile(r"\<(\w+?)\>\s*\</\1\>")
|
|
|
|
@classmethod
|
|
def cleanup_html(cls, html):
|
|
"""Cleans up html. Currently only removes blank tags, i.e. tags containing only
|
|
whitespace. Only applies to tags without attributes. Tag removal is done
|
|
recursively until there are no more blank tags. So <strong><em></em></strong>
|
|
would be completely removed.
|
|
|
|
html -- A string containing (X)HTML
|
|
|
|
"""
|
|
|
|
original_html = ''
|
|
while original_html != html:
|
|
original_html = html
|
|
html = cls._re_blank_tags.sub(u"", html)
|
|
return html
|
|
|
|
|
|
def render_to_html(self,
|
|
post_markup,
|
|
encoding="ascii",
|
|
exclude_tags=None,
|
|
auto_urls=True,
|
|
paragraphs=False,
|
|
clean=True,
|
|
tag_data=None):
|
|
|
|
"""Converts post markup (ie. bbcode) to XHTML. This method is threadsafe,
|
|
buy virtue that the state is entirely stored on the stack.
|
|
|
|
post_markup -- String containing bbcode.
|
|
encoding -- Encoding of string, defaults to "ascii" if the string is not
|
|
already unicode.
|
|
exclude_tags -- A collection of tag names to ignore.
|
|
auto_urls -- If True, then urls will be wrapped with url bbcode tags.
|
|
paragraphs -- If True then line breaks will be replaced with paragraph
|
|
tags, rather than break tags.
|
|
clean -- If True, html will be run through the cleanup_html method.
|
|
tag_data -- An optional dictionary to store tag data in. The default of
|
|
None will create a dictionary internaly. Set this to your own dictionary
|
|
if you want to retrieve information from the Tag Classes.
|
|
|
|
|
|
"""
|
|
|
|
if not isinstance(post_markup, unicode):
|
|
post_markup = unicode(post_markup, encoding, 'replace')
|
|
|
|
if auto_urls:
|
|
post_markup = self.tagify_urls(post_markup)
|
|
|
|
if paragraphs:
|
|
post_markup = self.insert_paragraphs(post_markup)
|
|
|
|
parser = _Parser(self, tag_data=tag_data)
|
|
parser.markup = post_markup
|
|
|
|
if exclude_tags is None:
|
|
exclude_tags = []
|
|
|
|
tag_factory = self.tag_factory
|
|
|
|
|
|
nodes = []
|
|
parser.nodes = nodes
|
|
|
|
parser.phase = 1
|
|
parser.no_breaks_count = 0
|
|
enclosed_count = 0
|
|
open_stack = []
|
|
tag_stack = []
|
|
break_stack = []
|
|
remove_next_newline = False
|
|
|
|
def check_tag_stack(tag_name):
|
|
|
|
for tag in reversed(tag_stack):
|
|
if tag_name == tag.name:
|
|
return True
|
|
return False
|
|
|
|
def redo_break_stack():
|
|
|
|
while break_stack:
|
|
tag = break_stack.pop()
|
|
open_tag(tag)
|
|
tag_stack.append(tag)
|
|
|
|
def break_inline_tags():
|
|
|
|
|
|
while tag_stack:
|
|
if tag_stack[-1].inline:
|
|
tag = tag_stack.pop()
|
|
close_tag(tag)
|
|
break_stack.append(tag)
|
|
else:
|
|
break
|
|
|
|
def open_tag(tag):
|
|
def call(node_index):
|
|
return tag.render_open(parser, node_index)
|
|
nodes.append(call)
|
|
|
|
def close_tag(tag):
|
|
def call(node_index):
|
|
return tag.render_close(parser, node_index)
|
|
nodes.append(call)
|
|
|
|
TOKEN_TEXT = PostMarkup.TOKEN_TEXT
|
|
TOKEN_TAG = PostMarkup.TOKEN_TAG
|
|
|
|
# Pass 1
|
|
for tag_type, tag_token, start_pos, end_pos in self.tokenize(post_markup):
|
|
|
|
raw_tag_token = tag_token
|
|
|
|
if tag_type == TOKEN_TEXT:
|
|
if parser.no_breaks_count:
|
|
tag_token = tag_token.strip()
|
|
if not tag_token:
|
|
continue
|
|
if remove_next_newline:
|
|
tag_token = tag_token.lstrip(' ')
|
|
if tag_token.startswith('\n'):
|
|
tag_token = tag_token.lstrip(' ')[1:]
|
|
if not tag_token:
|
|
continue
|
|
remove_next_newline = False
|
|
|
|
if tag_stack and tag_stack[-1].strip_first_newline:
|
|
tag_token = tag_token.lstrip()
|
|
tag_stack[-1].strip_first_newline = False
|
|
if not tag_stack[-1]:
|
|
tag_stack.pop()
|
|
continue
|
|
|
|
if not enclosed_count:
|
|
redo_break_stack()
|
|
|
|
nodes.append(self.standard_replace(tag_token))
|
|
continue
|
|
|
|
elif tag_type == TOKEN_TAG:
|
|
tag_token = tag_token[1:-1].lstrip()
|
|
if ' ' in tag_token:
|
|
tag_name, tag_attribs = tag_token.split(u' ', 1)
|
|
tag_attribs = tag_attribs.strip()
|
|
else:
|
|
if '=' in tag_token:
|
|
tag_name, tag_attribs = tag_token.split(u'=', 1)
|
|
tag_attribs = tag_attribs.strip()
|
|
else:
|
|
tag_name = tag_token
|
|
tag_attribs = u""
|
|
else:
|
|
tag_token = tag_token[1:-1].lstrip()
|
|
tag_name, tag_attribs = tag_token.split(u'=', 1)
|
|
tag_attribs = tag_attribs.strip()[1:-1]
|
|
|
|
tag_name = tag_name.strip().lower()
|
|
|
|
end_tag = False
|
|
if tag_name.startswith(u'/'):
|
|
end_tag = True
|
|
tag_name = tag_name[1:]
|
|
|
|
|
|
if enclosed_count and tag_stack[-1].name != tag_name:
|
|
continue
|
|
|
|
if tag_name in exclude_tags:
|
|
continue
|
|
|
|
if not end_tag:
|
|
|
|
tag = tag_factory.get(tag_name, None)
|
|
if tag is None:
|
|
continue
|
|
|
|
redo_break_stack()
|
|
|
|
if not tag.inline:
|
|
break_inline_tags()
|
|
|
|
tag.open(parser, tag_attribs, end_pos, len(nodes))
|
|
if tag.enclosed:
|
|
enclosed_count += 1
|
|
tag_stack.append(tag)
|
|
|
|
open_tag(tag)
|
|
|
|
if tag.auto_close:
|
|
tag = tag_stack.pop()
|
|
tag.close(self, start_pos, len(nodes)-1)
|
|
close_tag(tag)
|
|
|
|
else:
|
|
|
|
if break_stack and break_stack[-1].name == tag_name:
|
|
break_stack.pop()
|
|
tag.close(parser, start_pos, len(nodes))
|
|
elif check_tag_stack(tag_name):
|
|
while tag_stack[-1].name != tag_name:
|
|
tag = tag_stack.pop()
|
|
break_stack.append(tag)
|
|
close_tag(tag)
|
|
|
|
tag = tag_stack.pop()
|
|
tag.close(parser, start_pos, len(nodes))
|
|
if tag.enclosed:
|
|
enclosed_count -= 1
|
|
|
|
close_tag(tag)
|
|
|
|
if not tag.inline:
|
|
remove_next_newline = True
|
|
|
|
if tag_stack:
|
|
redo_break_stack()
|
|
while tag_stack:
|
|
tag = tag_stack.pop()
|
|
tag.close(parser, len(post_markup), len(nodes))
|
|
if tag.enclosed:
|
|
enclosed_count -= 1
|
|
close_tag(tag)
|
|
|
|
parser.phase = 2
|
|
# Pass 2
|
|
parser.nodes = nodes
|
|
|
|
text = []
|
|
parser.render_node_index = 0
|
|
while parser.render_node_index < len(parser.nodes):
|
|
i = parser.render_node_index
|
|
node_text = parser.nodes[i]
|
|
if callable(node_text):
|
|
node_text = node_text(i)
|
|
if node_text is not None:
|
|
text.append(node_text)
|
|
parser.render_node_index += 1
|
|
|
|
html = u"".join(text)
|
|
if clean:
|
|
html = self.cleanup_html(html)
|
|
return html
|
|
|
|
# A shortcut for render_to_html
|
|
__call__ = render_to_html
|
|
|
|
|
|
_postmarkup = create(use_pygments=pygments_available)
|
|
def render_bbcode(bbcode,
|
|
encoding="ascii",
|
|
exclude_tags=None,
|
|
auto_urls=True,
|
|
paragraphs=False,
|
|
clean=True,
|
|
tag_data=None):
|
|
|
|
""" Renders a bbcode string in to XHTML. This is a shortcut if you don't
|
|
need to customize any tags.
|
|
|
|
post_markup -- String containing bbcode.
|
|
encoding -- Encoding of string, defaults to "ascii" if the string is not
|
|
already unicode.
|
|
exclude_tags -- A collection of tag names to ignore.
|
|
auto_urls -- If True, then urls will be wrapped with url bbcode tags.
|
|
paragraphs -- If True then line breaks will be replaces with paragraph
|
|
tags, rather than break tags.
|
|
clean -- If True, html will be run through a cleanup_html method.
|
|
tag_data -- An optional dictionary to store tag data in. The default of
|
|
None will create a dictionary internally.
|
|
|
|
"""
|
|
return _postmarkup(bbcode,
|
|
encoding,
|
|
exclude_tags=exclude_tags,
|
|
auto_urls=auto_urls,
|
|
paragraphs=paragraphs,
|
|
clean=clean,
|
|
tag_data=tag_data)
|
|
|
|
|
|
|
|
def _tests():
|
|
|
|
import sys
|
|
#sys.stdout=open('test.htm', 'w')
|
|
|
|
post_markup = create(use_pygments=True)
|
|
|
|
|
|
tests = []
|
|
print """<link rel="stylesheet" href="code.css" type="text/css" />\n"""
|
|
|
|
tests.append(']')
|
|
tests.append('[')
|
|
tests.append(':-[ Hello, [b]World[/b]')
|
|
|
|
tests.append("[link=http://www.willmcgugan.com]My homepage[/link]")
|
|
tests.append('[link="http://www.willmcgugan.com"]My homepage[/link]')
|
|
tests.append("[link http://www.willmcgugan.com]My homepage[/link]")
|
|
tests.append("[link]http://www.willmcgugan.com[/link]")
|
|
|
|
tests.append(u"[b]Hello André[/b]")
|
|
tests.append(u"[google]André[/google]")
|
|
tests.append("[s]Strike through[/s]")
|
|
tests.append("[b]bold [i]bold and italic[/b] italic[/i]")
|
|
tests.append("[google]Will McGugan[/google]")
|
|
tests.append("[wiki Will McGugan]Look up my name in Wikipedia[/wiki]")
|
|
|
|
tests.append("[quote Will said...]BBCode is very cool[/quote]")
|
|
|
|
tests.append("""[code python]
|
|
# A proxy object that calls a callback when converted to a string
|
|
class TagStringify(object):
|
|
def __init__(self, callback, raw):
|
|
self.callback = callback
|
|
self.raw = raw
|
|
r[b]=3
|
|
def __str__(self):
|
|
return self.callback()
|
|
def __repr__(self):
|
|
return self.__str__()
|
|
[/code]""")
|
|
|
|
|
|
tests.append(u"[img]http://upload.wikimedia.org/wikipedia/commons"\
|
|
"/6/61/Triops_longicaudatus.jpg[/img]")
|
|
|
|
tests.append("[list][*]Apples[*]Oranges[*]Pears[/list]")
|
|
tests.append("""[list=1]
|
|
[*]Apples
|
|
[*]Oranges
|
|
are not the only fruit
|
|
[*]Pears
|
|
[/list]""")
|
|
tests.append("[list=a][*]Apples[*]Oranges[*]Pears[/list]")
|
|
tests.append("[list=A][*]Apples[*]Oranges[*]Pears[/list]")
|
|
|
|
long_test="""[b]Long test[/b]
|
|
|
|
New lines characters are converted to breaks."""\
|
|
"""Tags my be [b]ove[i]rl[/b]apped[/i].
|
|
|
|
[i]Open tags will be closed.
|
|
[b]Test[/b]"""
|
|
|
|
tests.append(long_test)
|
|
|
|
tests.append("[dict]Will[/dict]")
|
|
|
|
tests.append("[code unknownlanguage]10 print 'In yr code'; 20 goto 10[/code]")
|
|
|
|
tests.append("[url=http://www.google.com/coop/cse?cx=006850030468302103399%3Amqxv78bdfdo]CakePHP Google Groups[/url]")
|
|
tests.append("[url=http://www.google.com/search?hl=en&safe=off&client=opera&rls=en&hs=pO1&q=python+bbcode&btnG=Search]Search for Python BBCode[/url]")
|
|
#tests = []
|
|
# Attempt to inject html in to unicode
|
|
tests.append("[url=http://www.test.com/sfsdfsdf/ter?t=\"></a><h1>HACK</h1><a>\"]Test Hack[/url]")
|
|
|
|
tests.append('Nested urls, i.e. [url][url]www.becontrary.com[/url][/url], are condensed in to a single tag.')
|
|
|
|
tests.append(u'[google]ɸβfvθðsz[/google]')
|
|
|
|
tests.append(u'[size 30]Hello, World![/size]')
|
|
|
|
tests.append(u'[color red]This should be red[/color]')
|
|
tests.append(u'[color #0f0]This should be green[/color]')
|
|
tests.append(u"[center]This should be in the center!")
|
|
|
|
tests.append('Nested urls, i.e. [url][url]www.becontrary.com[/url][/url], are condensed in to a single tag.')
|
|
|
|
#tests = []
|
|
tests.append('[b]Hello, [i]World[/b]! [/i]')
|
|
|
|
tests.append('[b][center]This should be centered![/center][/b]')
|
|
|
|
tests.append('[list][*]Hello[i][*]World![/i][/list]')
|
|
|
|
|
|
tests.append("""[list=1]
|
|
[*]Apples
|
|
[*]Oranges
|
|
are not the only fruit
|
|
[*]Pears
|
|
[/list]""")
|
|
|
|
tests.append("[b]urls such as http://www.willmcgugan.com are authomaticaly converted to links[/b]")
|
|
|
|
tests.append("""
|
|
[b]
|
|
[code python]
|
|
parser.markup[self.open_pos:self.close_pos]
|
|
[/code]
|
|
asdasdasdasdqweqwe
|
|
""")
|
|
|
|
tests.append("""[list 1]
|
|
[*]Hello
|
|
[*]World
|
|
[/list]""")
|
|
|
|
|
|
#tests = []
|
|
tests.append("[b][p]Hello, [p]World")
|
|
tests.append("[p][p][p]")
|
|
|
|
tests.append("http://www.google.com/search?as_q=bbcode&btnG=%D0%9F%D0%BE%D0%B8%D1%81%D0%BA")
|
|
|
|
#tests=["""[b]b[i]i[/b][/i]"""]
|
|
|
|
for test in tests:
|
|
print u"<pre>%s</pre>"%str(test.encode("ascii", "xmlcharrefreplace"))
|
|
print u"<p>%s</p>"%str(post_markup(test).encode("ascii", "xmlcharrefreplace"))
|
|
print u"<hr/>"
|
|
print
|
|
|
|
#print repr(post_markup('[url=<script>Attack</script>]Attack[/url]'))
|
|
|
|
#print repr(post_markup('http://www.google.com/search?as_q=%D0%9F%D0%BE%D0%B8%D1%81%D0%BA&test=hai'))
|
|
|
|
#p = create(use_pygments=False)
|
|
#print (p('[code]foo\nbar[/code]'))
|
|
|
|
#print render_bbcode("[b]For the lazy, use the http://www.willmcgugan.com render_bbcode function.[/b]")
|
|
|
|
smarkup = create()
|
|
smarkup.add_tag(SectionTag, 'section')
|
|
|
|
test = """Hello, World.[b][i]This in italics
|
|
[section sidebar]This is the [b]sidebar[/b][/section]
|
|
[section footer]
|
|
This is the footer
|
|
[/section]
|
|
More text"""
|
|
|
|
print smarkup(test, paragraphs=True, clean=False)
|
|
tag_data = {}
|
|
print smarkup(test, tag_data=tag_data, paragraphs=True, clean=True)
|
|
print tag_data
|
|
|
|
def _run_unittests():
|
|
|
|
# TODO: Expand tests for better coverage!
|
|
|
|
import unittest
|
|
|
|
class TestPostmarkup(unittest.TestCase):
|
|
|
|
def testcleanuphtml(self):
|
|
|
|
postmarkup = create()
|
|
|
|
tests = [("""\n<p>\n </p>\n""", ""),
|
|
("""<b>\n\n<i> </i>\n</b>Test""", "Test"),
|
|
("""<p id="test">Test</p>""", """<p id="test">Test</p>"""),]
|
|
|
|
for test, result in tests:
|
|
self.assertEqual(PostMarkup.cleanup_html(test).strip(), result)
|
|
|
|
|
|
def testsimpletag(self):
|
|
|
|
postmarkup = create()
|
|
|
|
tests= [ ('[b]Hello[/b]', "<strong>Hello</strong>"),
|
|
('[i]Italic[/i]', "<em>Italic</em>"),
|
|
('[s]Strike[/s]', "<strike>Strike</strike>"),
|
|
('[u]underlined[/u]', "<u>underlined</u>"),
|
|
]
|
|
|
|
for test, result in tests:
|
|
self.assertEqual(postmarkup(test), result)
|
|
|
|
|
|
def testoverlap(self):
|
|
|
|
postmarkup = create()
|
|
|
|
tests= [ ('[i][b]Hello[/i][/b]', "<em><strong>Hello</strong></em>"),
|
|
('[b]bold [u]both[/b] underline[/u]', '<strong>bold <u>both</u></strong><u> underline</u>')
|
|
]
|
|
|
|
for test, result in tests:
|
|
self.assertEqual(postmarkup(test), result)
|
|
|
|
def testlinks(self):
|
|
|
|
postmarkup = create(annotate_links=False)
|
|
|
|
tests= [ ('[link=http://www.willmcgugan.com]blog1[/link]', '<a href="http://www.willmcgugan.com">blog1</a>'),
|
|
('[link="http://www.willmcgugan.com"]blog2[/link]', '<a href="http://www.willmcgugan.com">blog2</a>'),
|
|
('[link http://www.willmcgugan.com]blog3[/link]', '<a href="http://www.willmcgugan.com">blog3</a>'),
|
|
('[link]http://www.willmcgugan.com[/link]', '<a href="http://www.willmcgugan.com">http://www.willmcgugan.com</a>')
|
|
]
|
|
|
|
|
|
for test, result in tests:
|
|
self.assertEqual(postmarkup(test), result)
|
|
|
|
|
|
suite = unittest.TestLoader().loadTestsFromTestCase(TestPostmarkup)
|
|
unittest.TextTestRunner(verbosity=2).run(suite)
|
|
|
|
|
|
def _ff_test():
|
|
|
|
def ff1(post, pos, c1, c2):
|
|
f1 = post.find(c1, pos)
|
|
f2 = post.find(c2, pos)
|
|
if f1 == -1:
|
|
return f2
|
|
if f2 == -1:
|
|
return f1
|
|
return min(f1, f2)
|
|
|
|
re_ff=re.compile('a|b', re.UNICODE)
|
|
|
|
def ff2(post, pos, c1, c2):
|
|
try:
|
|
return re_ff.search(post).group(0)
|
|
except AttributeError:
|
|
return -1
|
|
|
|
text = u"sdl;fk;sdlfks;dflksd;flksdfsdfwerwerwgwegwegwegwegwegegwweggewwegwegwegwettttttttttttttttttttttttttttttttttgggggggggg;slbdfkwelrkwelrkjal;sdfksdl;fksdf;lb"
|
|
|
|
REPEAT = 100000
|
|
|
|
from time import time
|
|
|
|
start = time()
|
|
for n in xrange(REPEAT):
|
|
ff1(text, 0, "a", "b")
|
|
end = time()
|
|
print end - start
|
|
|
|
start = time()
|
|
for n in xrange(REPEAT):
|
|
ff2(text, 0, "a", "b")
|
|
end = time()
|
|
print end - start
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
_tests()
|
|
_run_unittests()
|