2013-02-05 14:16:51 -05:00
( function ( ) {
var output , Converter ;
if ( typeof exports === "object" && typeof require === "function" ) { // we're in a CommonJS (e.g. Node.js) module
output = exports ;
Converter = require ( "./Markdown.Converter" ) . Converter ;
} else {
output = window . Markdown ;
Converter = output . Converter ;
}
2013-02-25 19:42:20 +03:00
2013-02-05 14:16:51 -05:00
output . getSanitizingConverter = function ( ) {
var converter = new Converter ( ) ;
converter . hooks . chain ( "postConversion" , sanitizeHtml ) ;
converter . hooks . chain ( "postConversion" , balanceTags ) ;
return converter ;
}
function sanitizeHtml ( html ) {
return html . replace ( /<[^>]*>?/gi , sanitizeTag ) ;
}
// (tags that can be opened/closed) | (tags that stand alone)
var basic _tag _whitelist = /^(<\/?(b|blockquote|code|del|dd|dl|dt|em|h1|h2|h3|i|kbd|li|ol|p|pre|s|sup|sub|strong|strike|ul)>|<(br|hr)\s?\/?>)$/i ;
// <a href="url..." optional title>|</a>
var a _white = /^(<a\shref="((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+"(\stitle="[^"<>]+")?\s?>|<\/a>)$/i ;
// <img src="url..." optional width optional height optional alt optional title
var img _white = /^(<img\ssrc="(https?:\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$/i ;
function sanitizeTag ( tag ) {
if ( tag . match ( basic _tag _whitelist ) || tag . match ( a _white ) || tag . match ( img _white ) )
return tag ;
else
return "" ;
}
/// <summary>
/// attempt to balance HTML tags in the html string
/// by removing any unmatched opening or closing tags
2013-02-25 19:42:20 +03:00
/// IMPORTANT: we *assume* HTML has *already* been
2013-02-05 14:16:51 -05:00
/// sanitized and is safe/sane before balancing!
2013-02-25 19:42:20 +03:00
///
2013-02-05 14:16:51 -05:00
/// adapted from CODESNIPPET: A8591DBA-D1D3-11DE-947C-BA5556D89593
/// </summary>
function balanceTags ( html ) {
if ( html == "" )
return "" ;
var re = /<\/?\w+[^>]*(\s|$|>)/g ;
// convert everything to lower case; this makes
// our case insensitive comparisons easier
var tags = html . toLowerCase ( ) . match ( re ) ;
// no HTML tags present? nothing to do; exit now
var tagcount = ( tags || [ ] ) . length ;
if ( tagcount == 0 )
return html ;
var tagname , tag ;
var ignoredtags = "<p><img><br><li><hr>" ;
var match ;
var tagpaired = [ ] ;
var tagremove = [ ] ;
var needsRemoval = false ;
// loop through matched tags in forward order
for ( var ctag = 0 ; ctag < tagcount ; ctag ++ ) {
tagname = tags [ ctag ] . replace ( /<\/?(\w+).*/ , "$1" ) ;
// skip any already paired tags
// and skip tags in our ignore list; assume they're self-closed
if ( tagpaired [ ctag ] || ignoredtags . search ( "<" + tagname + ">" ) > - 1 )
continue ;
tag = tags [ ctag ] ;
match = - 1 ;
if ( ! /^<\// . test ( tag ) ) {
// this is an opening tag
// search forwards (next tags), look for closing tags
for ( var ntag = ctag + 1 ; ntag < tagcount ; ntag ++ ) {
if ( ! tagpaired [ ntag ] && tags [ ntag ] == "</" + tagname + ">" ) {
match = ntag ;
break ;
}
}
}
if ( match == - 1 )
needsRemoval = tagremove [ ctag ] = true ; // mark for removal
else
tagpaired [ match ] = true ; // mark paired
}
if ( ! needsRemoval )
return html ;
// delete all orphaned tags from the string
var ctag = 0 ;
html = html . replace ( re , function ( match ) {
var res = tagremove [ ctag ] ? "" : match ;
ctag ++ ;
return res ;
} ) ;
return html ;
}
} ) ( ) ;