2013-02-10 23:37:42 -05:00
/* LICENSE: http://code.google.com/p/pagedown/source/browse/LICENSE.txt */
2013-02-05 14:16:51 -05:00
var Markdown ;
if ( typeof exports === "object" && typeof require === "function" ) // we're in a CommonJS (e.g. Node.js) module
Markdown = exports ;
else
Markdown = { } ;
// The following text is included for historical reasons, but should
// be taken with a pinch of salt; it's not all true anymore.
//
// Wherever possible, Showdown is a straight, line-by-line port
// of the Perl version of Markdown.
//
// This is not a normal parser design; it's basically just a
// series of string substitutions. It's hard to read and
// maintain this way, but keeping Showdown close to the original
// design makes it easier to port new features.
//
// More importantly, Showdown behaves like markdown.pl in most
// edge cases. So web applications can do client-side preview
// in Javascript, and then build identical HTML on the server.
//
// This port needs the new RegExp functionality of ECMA 262,
// 3rd Edition (i.e. Javascript 1.5). Most modern web browsers
// should do fine. Even with the new regular expression features,
// We do a lot of work to emulate Perl's regex functionality.
// The tricky changes in this file mostly have the "attacklab:"
// label. Major or self-explanatory changes don't.
//
// Smart diff tools like Araxis Merge will be able to match up
// this file with markdown.pl in a useful way. A little tweaking
// helps: in a copy of markdown.pl, replace "#" with "//" and
// replace "$text" with "text". Be sure to ignore whitespace
// and line endings.
//
//
// Usage:
//
// var text = "Markdown *rocks*.";
//
// var converter = new Markdown.Converter();
// var html = converter.makeHtml(text);
//
// alert(html);
//
// Note: move the sample code to the bottom of this
// file before uncommenting it.
//
( function ( ) {
function identity ( x ) { return x ; }
function returnFalse ( x ) { return false ; }
function HookCollection ( ) { }
HookCollection . prototype = {
chain : function ( hookname , func ) {
var original = this [ hookname ] ;
if ( ! original )
throw new Error ( "unknown hook " + hookname ) ;
if ( original === identity )
this [ hookname ] = func ;
else
2013-02-10 23:37:42 -05:00
this [ hookname ] = function ( text ) {
var args = Array . prototype . slice . call ( arguments , 0 ) ;
args [ 0 ] = original . apply ( null , args ) ;
return func . apply ( null , args ) ;
} ;
2013-02-05 14:16:51 -05:00
} ,
set : function ( hookname , func ) {
if ( ! this [ hookname ] )
throw new Error ( "unknown hook " + hookname ) ;
this [ hookname ] = func ;
} ,
addNoop : function ( hookname ) {
this [ hookname ] = identity ;
} ,
addFalse : function ( hookname ) {
this [ hookname ] = returnFalse ;
}
} ;
Markdown . HookCollection = HookCollection ;
// g_urls and g_titles allow arbitrary user-entered strings as keys. This
// caused an exception (and hence stopped the rendering) when the user entered
// e.g. [push] or [__proto__]. Adding a prefix to the actual key prevents this
// (since no builtin property starts with "s_"). See
// http://meta.stackoverflow.com/questions/64655/strange-wmd-bug
// (granted, switching from Array() to Object() alone would have left only __proto__
// to be a problem)
function SaveHash ( ) { }
SaveHash . prototype = {
set : function ( key , value ) {
this [ "s_" + key ] = value ;
} ,
get : function ( key ) {
return this [ "s_" + key ] ;
}
} ;
Markdown . Converter = function ( ) {
var pluginHooks = this . hooks = new HookCollection ( ) ;
2013-02-10 23:37:42 -05:00
// given a URL that was encountered by itself (without markup), should return the link text that's to be given to this link
pluginHooks . addNoop ( "plainLinkText" ) ;
// called with the orignal text as given to makeHtml. The result of this plugin hook is the actual markdown source that will be cooked
pluginHooks . addNoop ( "preConversion" ) ;
// called with the text once all normalizations have been completed (tabs to spaces, line endings, etc.), but before any conversions have
pluginHooks . addNoop ( "postNormalization" ) ;
// Called with the text before / after creating block elements like code blocks and lists. Note that this is called recursively
// with inner content, e.g. it's called with the full text, and then only with the content of a blockquote. The inner
// call will receive outdented text.
pluginHooks . addNoop ( "preBlockGamut" ) ;
pluginHooks . addNoop ( "postBlockGamut" ) ;
// called with the text of a single block element before / after the span-level conversions (bold, code spans, etc.) have been made
pluginHooks . addNoop ( "preSpanGamut" ) ;
pluginHooks . addNoop ( "postSpanGamut" ) ;
// called with the final cooked HTML code. The result of this plugin hook is the actual output of makeHtml
pluginHooks . addNoop ( "postConversion" ) ;
2013-02-05 14:16:51 -05:00
//
// Private state of the converter instance:
//
// Global hashes, used by various utility routines
var g _urls ;
var g _titles ;
var g _html _blocks ;
// Used to track when we're inside an ordered or unordered list
// (see _ProcessListItems() for details):
var g _list _level ;
this . makeHtml = function ( text ) {
//
// Main function. The order in which other subs are called here is
// essential. Link and image substitutions need to happen before
// _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
// and <img> tags get encoded.
//
// This will only happen if makeHtml on the same converter instance is called from a plugin hook.
// Don't do that.
if ( g _urls )
throw new Error ( "Recursive call to converter.makeHtml" ) ;
// Create the private state objects.
g _urls = new SaveHash ( ) ;
g _titles = new SaveHash ( ) ;
g _html _blocks = [ ] ;
g _list _level = 0 ;
text = pluginHooks . preConversion ( text ) ;
// attacklab: Replace ~ with ~T
// This lets us use tilde as an escape char to avoid md5 hashes
// The choice of character is arbitray; anything that isn't
// magic in Markdown will work.
text = text . replace ( /~/g , "~T" ) ;
// attacklab: Replace $ with ~D
// RegExp interprets $ as a special character
// when it's in a replacement string
text = text . replace ( /\$/g , "~D" ) ;
// Standardize line endings
text = text . replace ( /\r\n/g , "\n" ) ; // DOS to Unix
text = text . replace ( /\r/g , "\n" ) ; // Mac to Unix
// Make sure text begins and ends with a couple of newlines:
text = "\n\n" + text + "\n\n" ;
// Convert all tabs to spaces.
text = _Detab ( text ) ;
// Strip any lines consisting only of spaces and tabs.
// This makes subsequent regexen easier to write, because we can
// match consecutive blank lines with /\n+/ instead of something
// contorted like /[ \t]*\n+/ .
text = text . replace ( /^[ \t]+$/mg , "" ) ;
2013-02-10 23:37:42 -05:00
text = pluginHooks . postNormalization ( text ) ;
2013-02-05 14:16:51 -05:00
// Turn block-level HTML blocks into hash entries
text = _HashHTMLBlocks ( text ) ;
// Strip link definitions, store in hashes.
text = _StripLinkDefinitions ( text ) ;
text = _RunBlockGamut ( text ) ;
text = _UnescapeSpecialChars ( text ) ;
// attacklab: Restore dollar signs
text = text . replace ( /~D/g , "$$" ) ;
// attacklab: Restore tildes
text = text . replace ( /~T/g , "~" ) ;
text = pluginHooks . postConversion ( text ) ;
g _html _blocks = g _titles = g _urls = null ;
return text ;
} ;
function _StripLinkDefinitions ( text ) {
//
// Strips link definitions from text, stores the URLs and titles in
// hash references.
//
// Link defs are in the form: ^[id]: url "optional title"
/ *
text = text . replace ( /
^ [ ] { 0 , 3 } \ [ ( . + ) \ ] : // id = $1 attacklab: g_tab_width - 1
[ \ t ] *
\ n ? // maybe *one* newline
[ \ t ] *
< ? ( \ S + ? ) > ? // url = $2
( ? = \ s | $ ) // lookahead for whitespace instead of the lookbehind removed below
[ \ t ] *
\ n ? // maybe one newline
[ \ t ] *
( // (potential) title = $3
( \ n * ) // any lines skipped = $4 attacklab: lookbehind removed
[ \ t ] +
[ " ( ]
( . + ? ) // title = $5
[ " ) ]
[ \ t ] *
) ? // title is optional
( ? : \ n + | $ )
/ g m , f u n c t i o n ( ) { . . . } ) ;
* /
text = text . replace ( /^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*<?(\S+?)>?(?=\s|$)[ \t]*\n?[ \t]*((\n*)["(](.+?)[")][ \t]*)?(?:\n+)/gm ,
function ( wholeMatch , m1 , m2 , m3 , m4 , m5 ) {
m1 = m1 . toLowerCase ( ) ;
g _urls . set ( m1 , _EncodeAmpsAndAngles ( m2 ) ) ; // Link IDs are case-insensitive
if ( m4 ) {
// Oops, found blank lines, so it's not a title.
// Put back the parenthetical statement we stole.
return m3 ;
} else if ( m5 ) {
g _titles . set ( m1 , m5 . replace ( /"/g , """ ) ) ;
}
// Completely remove the definition from the text
return "" ;
}
) ;
return text ;
}
function _HashHTMLBlocks ( text ) {
// Hashify HTML blocks:
// We only want to do this for block-level HTML tags, such as headers,
// lists, and tables. That's because we still want to wrap <p>s around
// "paragraphs" that are wrapped in non-block-level tags, such as anchors,
// phrase emphasis, and spans. The list of tags we're looking for is
// hard-coded:
var block _tags _a = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del"
var block _tags _b = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math"
// First, look for nested blocks, e.g.:
// <div>
// <div>
// tags for inner block must be indented.
// </div>
// </div>
//
// The outermost tags must start at the left margin for this to match, and
// the inner nested divs must be indented.
// We need to do this before the next, more liberal match, because the next
// match will start at the first `<div>` and stop at the first `</div>`.
// attacklab: This regex can be expensive when it fails.
/ *
text = text . replace ( /
( // save in $1
^ // start of line (with /m)
< ( $block _tags _a ) // start tag = $2
\ b // word break
// attacklab: hack around khtml/pcre bug...
[ ^ \ r ] * ? \ n // any number of lines, minimally matching
< /\2> / / the matching end tag
[ \ t ] * // trailing spaces/tabs
( ? = \ n + ) // followed by a newline
) // attacklab: there are sentinel newlines at end of document
/ g m , f u n c t i o n ( ) { . . . } } ;
* /
text = text . replace ( /^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm , hashElement ) ;
//
// Now match more liberally, simply from `\n<tag>` to `</tag>\n`
//
/ *
text = text . replace ( /
( // save in $1
^ // start of line (with /m)
< ( $block _tags _b ) // start tag = $2
\ b // word break
// attacklab: hack around khtml/pcre bug...
[ ^ \ r ] * ? // any number of lines, minimally matching
. * < /\2> / / the matching end tag
[ \ t ] * // trailing spaces/tabs
( ? = \ n + ) // followed by a newline
) // attacklab: there are sentinel newlines at end of document
/ g m , f u n c t i o n ( ) { . . . } } ;
* /
text = text . replace ( /^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b[^\r]*?.*<\/\2>[ \t]*(?=\n+)\n)/gm , hashElement ) ;
// Special case just for <hr />. It was easier to make a special case than
// to make the other regex more complicated.
/ *
text = text . replace ( /
\ n // Starting after a blank line
[ ] { 0 , 3 }
( // save in $1
( < ( hr ) // start tag = $2
\ b // word break
( [ ^ < > ] ) * ?
\ / ? > ) // the matching end tag
[ \ t ] *
( ? = \ n { 2 , } ) // followed by a blank line
)
/ g , h a s h E l e m e n t ) ;
* /
text = text . replace ( /\n[ ]{0,3}((<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g , hashElement ) ;
// Special case for standalone HTML comments:
/ *
text = text . replace ( /
\ n \ n // Starting after a blank line
[ ] { 0 , 3 } // attacklab: g_tab_width - 1
( // save in $1
< !
( -- ( ? : | ( ? : [ ^ > - ] | - [ ^ > ] ) ( ? : [ ^ - ] | - [ ^ - ] ) * ) -- ) // see http://www.w3.org/TR/html-markup/syntax.html#comments and http://meta.stackoverflow.com/q/95256
>
[ \ t ] *
( ? = \ n { 2 , } ) // followed by a blank line
)
/ g , h a s h E l e m e n t ) ;
* /
text = text . replace ( /\n\n[ ]{0,3}(<!(--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)--)>[ \t]*(?=\n{2,}))/g , hashElement ) ;
// PHP and ASP-style processor instructions (<?...?> and <%...%>)
/ *
text = text . replace ( /
( ? :
\ n \ n // Starting after a blank line
)
( // save in $1
[ ] { 0 , 3 } // attacklab: g_tab_width - 1
( ? :
< ( [ ? % ] ) // $2
[ ^ \ r ] * ?
\ 2 >
)
[ \ t ] *
( ? = \ n { 2 , } ) // followed by a blank line
)
/ g , h a s h E l e m e n t ) ;
* /
text = text . replace ( /(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g , hashElement ) ;
return text ;
}
function hashElement ( wholeMatch , m1 ) {
var blockText = m1 ;
// Undo double lines
blockText = blockText . replace ( /^\n+/ , "" ) ;
// strip trailing blank lines
blockText = blockText . replace ( /\n+$/g , "" ) ;
// Replace the element text with a marker ("~KxK" where x is its key)
blockText = "\n\n~K" + ( g _html _blocks . push ( blockText ) - 1 ) + "K\n\n" ;
return blockText ;
}
2013-02-10 23:37:42 -05:00
var blockGamutHookCallback = function ( t ) { return _RunBlockGamut ( t ) ; }
2013-02-05 14:16:51 -05:00
function _RunBlockGamut ( text , doNotUnhash ) {
//
// These are all the transformations that form block-level
// tags like paragraphs, headers, and list items.
//
2013-02-10 23:37:42 -05:00
text = pluginHooks . preBlockGamut ( text , blockGamutHookCallback ) ;
2013-02-05 14:16:51 -05:00
text = _DoHeaders ( text ) ;
// Do Horizontal Rules:
var replacement = "<hr />\n" ;
text = text . replace ( /^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$/gm , replacement ) ;
text = text . replace ( /^[ ]{0,2}([ ]?-[ ]?){3,}[ \t]*$/gm , replacement ) ;
text = text . replace ( /^[ ]{0,2}([ ]?_[ ]?){3,}[ \t]*$/gm , replacement ) ;
text = _DoLists ( text ) ;
text = _DoCodeBlocks ( text ) ;
text = _DoBlockQuotes ( text ) ;
2013-02-10 23:37:42 -05:00
text = pluginHooks . postBlockGamut ( text , blockGamutHookCallback ) ;
2013-02-05 14:16:51 -05:00
// We already ran _HashHTMLBlocks() before, in Markdown(), but that
// was to escape raw HTML in the original Markdown source. This time,
// we're escaping the markup we've just created, so that we don't wrap
// <p> tags around block-level tags.
text = _HashHTMLBlocks ( text ) ;
text = _FormParagraphs ( text , doNotUnhash ) ;
return text ;
}
function _RunSpanGamut ( text ) {
//
// These are all the transformations that occur *within* block-level
// tags like paragraphs, headers, and list items.
//
2013-02-10 23:37:42 -05:00
text = pluginHooks . preSpanGamut ( text ) ;
2013-02-05 14:16:51 -05:00
text = _DoCodeSpans ( text ) ;
text = _EscapeSpecialCharsWithinTagAttributes ( text ) ;
text = _EncodeBackslashEscapes ( text ) ;
// Process anchor and image tags. Images must come first,
// because ![foo][f] looks like an anchor.
text = _DoImages ( text ) ;
text = _DoAnchors ( text ) ;
// Make links out of things like `<http://example.com/>`
// Must come after _DoAnchors(), because you can use < and >
// delimiters in inline links like [this](<url>).
text = _DoAutoLinks ( text ) ;
text = text . replace ( /~P/g , "://" ) ; // put in place to prevent autolinking; reset now
text = _EncodeAmpsAndAngles ( text ) ;
text = _DoItalicsAndBold ( text ) ;
// Do hard breaks:
text = text . replace ( / +\n/g , " <br>\n" ) ;
2013-02-10 23:37:42 -05:00
text = pluginHooks . postSpanGamut ( text ) ;
2013-02-05 14:16:51 -05:00
return text ;
}
function _EscapeSpecialCharsWithinTagAttributes ( text ) {
//
// Within tags -- meaning between < and > -- encode [\ ` * _] so they
// don't conflict with their use in Markdown for code, italics and strong.
//
// Build a regex to find HTML tags and comments. See Friedl's
// "Mastering Regular Expressions", 2nd Ed., pp. 200-201.
// SE: changed the comment part of the regex
var regex = /(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|<!(--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)--)>)/gi ;
text = text . replace ( regex , function ( wholeMatch ) {
var tag = wholeMatch . replace ( /(.)<\/?code>(?=.)/g , "$1`" ) ;
tag = escapeCharacters ( tag , wholeMatch . charAt ( 1 ) == "!" ? "\\`*_/" : "\\`*_" ) ; // also escape slashes in comments to prevent autolinking there -- http://meta.stackoverflow.com/questions/95987
return tag ;
} ) ;
return text ;
}
function _DoAnchors ( text ) {
//
// Turn Markdown link shortcuts into XHTML <a> tags.
//
//
// First, handle reference-style links: [link text] [id]
//
/ *
text = text . replace ( /
( // wrap whole match in $1
\ [
(
( ? :
\ [ [ ^ \ ] ] * \ ] // allow brackets nested one level
|
[ ^ \ [ ] // or anything else
) *
)
\ ]
[ ] ? // one optional space
( ? : \ n [ ] * ) ? // one optional newline followed by spaces
\ [
( . * ? ) // id = $3
\ ]
)
( ) ( ) ( ) ( ) // pad remaining backreferences
/ g , w r i t e A n c h o r T a g ) ;
* /
text = text . replace ( /(\[((?:\[[^\]]*\]|[^\[\]])*)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g , writeAnchorTag ) ;
//
// Next, inline-style links: [link text](url "optional title")
//
/ *
text = text . replace ( /
( // wrap whole match in $1
\ [
(
( ? :
\ [ [ ^ \ ] ] * \ ] // allow brackets nested one level
|
[ ^ \ [ \ ] ] // or anything else
) *
)
\ ]
\ ( // literal paren
[ \ t ] *
( ) // no id, so leave $3 empty
< ? ( // href = $4
( ? :
\ ( [ ^ ) ] * \ ) // allow one level of (correctly nested) parens (think MSDN)
|
[ ^ ( ) \ s ]
) * ?
) > ?
[ \ t ] *
( // $5
( [ ' " ] ) // quote char = $6
( . * ? ) // Title = $7
\ 6 // matching quote
[ \ t ] * // ignore any spaces/tabs between closing quote and )
) ? // title is optional
\ )
)
/ g , w r i t e A n c h o r T a g ) ;
* /
text = text . replace ( /(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()<?((?:\([^)]*\)|[^()\s])*?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g , writeAnchorTag ) ;
//
// Last, handle reference-style shortcuts: [link text]
// These must come last in case you've also got [link test][1]
// or [link test](/foo)
//
/ *
text = text . replace ( /
( // wrap whole match in $1
\ [
( [ ^ \ [ \ ] ] + ) // link text = $2; can't contain '[' or ']'
\ ]
)
( ) ( ) ( ) ( ) ( ) // pad rest of backreferences
/ g , w r i t e A n c h o r T a g ) ;
* /
text = text . replace ( /(\[([^\[\]]+)\])()()()()()/g , writeAnchorTag ) ;
return text ;
}
function writeAnchorTag ( wholeMatch , m1 , m2 , m3 , m4 , m5 , m6 , m7 ) {
if ( m7 == undefined ) m7 = "" ;
var whole _match = m1 ;
var link _text = m2 . replace ( /:\/\//g , "~P" ) ; // to prevent auto-linking withing the link. will be converted back after the auto-linker runs
var link _id = m3 . toLowerCase ( ) ;
var url = m4 ;
var title = m7 ;
if ( url == "" ) {
if ( link _id == "" ) {
// lower-case and turn embedded newlines into spaces
link _id = link _text . toLowerCase ( ) . replace ( / ?\n/g , " " ) ;
}
url = "#" + link _id ;
if ( g _urls . get ( link _id ) != undefined ) {
url = g _urls . get ( link _id ) ;
if ( g _titles . get ( link _id ) != undefined ) {
title = g _titles . get ( link _id ) ;
}
}
else {
if ( whole _match . search ( /\(\s*\)$/m ) > - 1 ) {
// Special case for explicit empty url
url = "" ;
} else {
return whole _match ;
}
}
}
url = encodeProblemUrlChars ( url ) ;
url = escapeCharacters ( url , "*_" ) ;
var result = "<a href=\"" + url + "\"" ;
if ( title != "" ) {
title = attributeEncode ( title ) ;
title = escapeCharacters ( title , "*_" ) ;
result += " title=\"" + title + "\"" ;
}
result += ">" + link _text + "</a>" ;
return result ;
}
function _DoImages ( text ) {
//
// Turn Markdown image shortcuts into <img> tags.
//
//
// First, handle reference-style labeled images: ![alt text][id]
//
/ *
text = text . replace ( /
( // wrap whole match in $1
! \ [
( . * ? ) // alt text = $2
\ ]
[ ] ? // one optional space
( ? : \ n [ ] * ) ? // one optional newline followed by spaces
\ [
( . * ? ) // id = $3
\ ]
)
( ) ( ) ( ) ( ) // pad rest of backreferences
/ g , w r i t e I m a g e T a g ) ;
* /
text = text . replace ( /(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g , writeImageTag ) ;
//
// Next, handle inline images: ![alt text](url "optional title")
// Don't forget: encode * and _
/ *
text = text . replace ( /
( // wrap whole match in $1
! \ [
( . * ? ) // alt text = $2
\ ]
\ s ? // One optional whitespace character
\ ( // literal paren
[ \ t ] *
( ) // no id, so leave $3 empty
< ? ( \ S + ? ) > ? // src url = $4
[ \ t ] *
( // $5
( [ ' " ] ) // quote char = $6
( . * ? ) // title = $7
\ 6 // matching quote
[ \ t ] *
) ? // title is optional
\ )
)
/ g , w r i t e I m a g e T a g ) ;
* /
text = text . replace ( /(!\[(.*?)\]\s?\([ \t]*()<?(\S+?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g , writeImageTag ) ;
return text ;
}
function attributeEncode ( text ) {
// unconditionally replace angle brackets here -- what ends up in an attribute (e.g. alt or title)
// never makes sense to have verbatim HTML in it (and the sanitizer would totally break it)
return text . replace ( />/g , ">" ) . replace ( /</g , "<" ) . replace ( /"/g , """ ) ;
}
function writeImageTag ( wholeMatch , m1 , m2 , m3 , m4 , m5 , m6 , m7 ) {
var whole _match = m1 ;
var alt _text = m2 ;
var link _id = m3 . toLowerCase ( ) ;
var url = m4 ;
var title = m7 ;
if ( ! title ) title = "" ;
if ( url == "" ) {
if ( link _id == "" ) {
// lower-case and turn embedded newlines into spaces
link _id = alt _text . toLowerCase ( ) . replace ( / ?\n/g , " " ) ;
}
url = "#" + link _id ;
if ( g _urls . get ( link _id ) != undefined ) {
url = g _urls . get ( link _id ) ;
if ( g _titles . get ( link _id ) != undefined ) {
title = g _titles . get ( link _id ) ;
}
}
else {
return whole _match ;
}
}
alt _text = escapeCharacters ( attributeEncode ( alt _text ) , "*_[]()" ) ;
url = escapeCharacters ( url , "*_" ) ;
var result = "<img src=\"" + url + "\" alt=\"" + alt _text + "\"" ;
// attacklab: Markdown.pl adds empty title attributes to images.
// Replicate this bug.
//if (title != "") {
title = attributeEncode ( title ) ;
title = escapeCharacters ( title , "*_" ) ;
result += " title=\"" + title + "\"" ;
//}
result += " />" ;
return result ;
}
function _DoHeaders ( text ) {
// Setext-style headers:
// Header 1
// ========
//
// Header 2
// --------
//
text = text . replace ( /^(.+)[ \t]*\n=+[ \t]*\n+/gm ,
function ( wholeMatch , m1 ) { return "<h1>" + _RunSpanGamut ( m1 ) + "</h1>\n\n" ; }
) ;
text = text . replace ( /^(.+)[ \t]*\n-+[ \t]*\n+/gm ,
function ( matchFound , m1 ) { return "<h2>" + _RunSpanGamut ( m1 ) + "</h2>\n\n" ; }
) ;
// atx-style headers:
// # Header 1
// ## Header 2
// ## Header 2 with closing hashes ##
// ...
// ###### Header 6
//
/ *
text = text . replace ( /
^ ( \ # { 1 , 6 } ) // $1 = string of #'s
[ \ t ] *
( . + ? ) // $2 = Header text
[ \ t ] *
\ # * // optional closing #'s (not counted)
\ n +
/ g m , f u n c t i o n ( ) { . . . } ) ;
* /
text = text . replace ( /^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm ,
function ( wholeMatch , m1 , m2 ) {
var h _level = m1 . length ;
return "<h" + h _level + ">" + _RunSpanGamut ( m2 ) + "</h" + h _level + ">\n\n" ;
}
) ;
return text ;
}
function _DoLists ( text ) {
//
// Form HTML ordered (numbered) and unordered (bulleted) lists.
//
// attacklab: add sentinel to hack around khtml/safari bug:
// http://bugs.webkit.org/show_bug.cgi?id=11231
text += "~0" ;
// Re-usable pattern to match any entirel ul or ol list:
/ *
var whole _list = /
( // $1 = whole list
( // $2
[ ] { 0 , 3 } // attacklab: g_tab_width - 1
( [ * + - ] | \ d + [ . ] ) // $3 = first list item marker
[ \ t ] +
)
[ ^ \ r ] + ?
( // $4
~ 0 // sentinel for workaround; should be $
|
\ n { 2 , }
( ? = \ S )
( ? ! // Negative lookahead for another list item marker
[ \ t ] *
( ? : [ * + - ] | \ d + [ . ] ) [ \ t ] +
)
)
)
/ g
* /
var whole _list = /^(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/gm ;
if ( g _list _level ) {
text = text . replace ( whole _list , function ( wholeMatch , m1 , m2 ) {
var list = m1 ;
var list _type = ( m2 . search ( /[*+-]/g ) > - 1 ) ? "ul" : "ol" ;
var result = _ProcessListItems ( list , list _type ) ;
// Trim any trailing whitespace, to put the closing `</$list_type>`
// up on the preceding line, to get it past the current stupid
// HTML block parser. This is a hack to work around the terrible
// hack that is the HTML block parser.
result = result . replace ( /\s+$/ , "" ) ;
result = "<" + list _type + ">" + result + "</" + list _type + ">\n" ;
return result ;
} ) ;
} else {
whole _list = /(\n\n|^\n?)(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/g ;
text = text . replace ( whole _list , function ( wholeMatch , m1 , m2 , m3 ) {
var runup = m1 ;
var list = m2 ;
var list _type = ( m3 . search ( /[*+-]/g ) > - 1 ) ? "ul" : "ol" ;
var result = _ProcessListItems ( list , list _type ) ;
result = runup + "<" + list _type + ">\n" + result + "</" + list _type + ">\n" ;
return result ;
} ) ;
}
// attacklab: strip sentinel
text = text . replace ( /~0/ , "" ) ;
return text ;
}
var _listItemMarkers = { ol : "\\d+[.]" , ul : "[*+-]" } ;
function _ProcessListItems ( list _str , list _type ) {
//
// Process the contents of a single ordered or unordered list, splitting it
// into individual list items.
//
// list_type is either "ul" or "ol".
// The $g_list_level global keeps track of when we're inside a list.
// Each time we enter a list, we increment it; when we leave a list,
// we decrement. If it's zero, we're not in a list anymore.
//
// We do this because when we're not inside a list, we want to treat
// something like this:
//
// I recommend upgrading to version
// 8. Oops, now this line is treated
// as a sub-list.
//
// As a single paragraph, despite the fact that the second line starts
// with a digit-period-space sequence.
//
// Whereas when we're inside a list (or sub-list), that line will be
// treated as the start of a sub-list. What a kludge, huh? This is
// an aspect of Markdown's syntax that's hard to parse perfectly
// without resorting to mind-reading. Perhaps the solution is to
// change the syntax rules such that sub-lists must start with a
// starting cardinal number; e.g. "1." or "a.".
g _list _level ++ ;
// trim trailing blank lines:
list _str = list _str . replace ( /\n{2,}$/ , "\n" ) ;
// attacklab: add sentinel to emulate \z
list _str += "~0" ;
// In the original attacklab showdown, list_type was not given to this function, and anything
// that matched /[*+-]|\d+[.]/ would just create the next <li>, causing this mismatch:
//
// Markdown rendered by WMD rendered by MarkdownSharp
// ------------------------------------------------------------------
// 1. first 1. first 1. first
// 2. second 2. second 2. second
// - third 3. third * third
//
// We changed this to behave identical to MarkdownSharp. This is the constructed RegEx,
// with {MARKER} being one of \d+[.] or [*+-], depending on list_type:
/ *
list _str = list _str . replace ( /
( ^ [ \ t ] * ) // leading whitespace = $1
( { MARKER } ) [ \ t ] + // list marker = $2
( [ ^ \ r ] + ? // list item text = $3
( \ n + )
)
( ? =
( ~ 0 | \ 2 ( { MARKER } ) [ \ t ] + )
)
/ g m , f u n c t i o n ( ) { . . . } ) ;
* /
var marker = _listItemMarkers [ list _type ] ;
var re = new RegExp ( "(^[ \\t]*)(" + marker + ")[ \\t]+([^\\r]+?(\\n+))(?=(~0|\\1(" + marker + ")[ \\t]+))" , "gm" ) ;
var last _item _had _a _double _newline = false ;
list _str = list _str . replace ( re ,
function ( wholeMatch , m1 , m2 , m3 ) {
var item = m3 ;
var leading _space = m1 ;
var ends _with _double _newline = /\n\n$/ . test ( item ) ;
var contains _double _newline = ends _with _double _newline || item . search ( /\n{2,}/ ) > - 1 ;
if ( contains _double _newline || last _item _had _a _double _newline ) {
item = _RunBlockGamut ( _Outdent ( item ) , /* doNotUnhash = */ true ) ;
}
else {
// Recursion for sub-lists:
item = _DoLists ( _Outdent ( item ) ) ;
item = item . replace ( /\n$/ , "" ) ; // chomp(item)
item = _RunSpanGamut ( item ) ;
}
last _item _had _a _double _newline = ends _with _double _newline ;
return "<li>" + item + "</li>\n" ;
}
) ;
// attacklab: strip sentinel
list _str = list _str . replace ( /~0/g , "" ) ;
g _list _level -- ;
return list _str ;
}
function _DoCodeBlocks ( text ) {
//
// Process Markdown `<pre><code>` blocks.
//
/ *
text = text . replace ( /
( ? : \ n \ n | ^ )
( // $1 = the code block -- one or more lines, starting with a space/tab
( ? :
( ? : [ ] { 4 } | \ t ) // Lines must start with a tab or a tab-width of spaces - attacklab: g_tab_width
. * \ n +
) +
)
( \ n * [ ] { 0 , 3 } [ ^ \ t \ n ] | ( ? = ~ 0 ) ) // attacklab: g_tab_width
/ g , f u n c t i o n ( ) { . . . } ) ;
* /
// attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
text += "~0" ;
text = text . replace ( /(?:\n\n|^)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g ,
function ( wholeMatch , m1 , m2 ) {
var codeblock = m1 ;
var nextChar = m2 ;
codeblock = _EncodeCode ( _Outdent ( codeblock ) ) ;
codeblock = _Detab ( codeblock ) ;
codeblock = codeblock . replace ( /^\n+/g , "" ) ; // trim leading newlines
codeblock = codeblock . replace ( /\n+$/g , "" ) ; // trim trailing whitespace
codeblock = "<pre><code>" + codeblock + "\n</code></pre>" ;
return "\n\n" + codeblock + "\n\n" + nextChar ;
}
) ;
// attacklab: strip sentinel
text = text . replace ( /~0/ , "" ) ;
return text ;
}
function hashBlock ( text ) {
text = text . replace ( /(^\n+|\n+$)/g , "" ) ;
return "\n\n~K" + ( g _html _blocks . push ( text ) - 1 ) + "K\n\n" ;
}
function _DoCodeSpans ( text ) {
//
// * Backtick quotes are used for <code></code> spans.
//
// * You can use multiple backticks as the delimiters if you want to
// include literal backticks in the code span. So, this input:
//
// Just type ``foo `bar` baz`` at the prompt.
//
// Will translate to:
//
// <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
//
// There's no arbitrary limit to the number of backticks you
// can use as delimters. If you need three consecutive backticks
// in your code, use four for delimiters, etc.
//
// * You can use spaces to get literal backticks at the edges:
//
// ... type `` `bar` `` ...
//
// Turns to:
//
// ... type <code>`bar`</code> ...
//
/ *
text = text . replace ( /
( ^ | [ ^ \ \ ] ) // Character before opening ` can't be a backslash
( ` +) // $ 2 = Opening run of `
( // $3 = The code block
[ ^ \ r ] * ?
[ ^ ` ] // attacklab: work around lack of lookbehind
)
\ 2 // Matching closer
( ? ! ` )
/ g m , f u n c t i o n ( ) { . . . } ) ;
* /
text = text . replace ( /(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm ,
function ( wholeMatch , m1 , m2 , m3 , m4 ) {
var c = m3 ;
c = c . replace ( /^([ \t]*)/g , "" ) ; // leading whitespace
c = c . replace ( /[ \t]*$/g , "" ) ; // trailing whitespace
c = _EncodeCode ( c ) ;
c = c . replace ( /:\/\//g , "~P" ) ; // to prevent auto-linking. Not necessary in code *blocks*, but in code spans. Will be converted back after the auto-linker runs.
return m1 + "<code>" + c + "</code>" ;
}
) ;
return text ;
}
function _EncodeCode ( text ) {
//
// Encode/escape certain characters inside Markdown code runs.
// The point is that in code, these characters are literals,
// and lose their special Markdown meanings.
//
// Encode all ampersands; HTML entities are not
// entities within a Markdown code span.
text = text . replace ( /&/g , "&" ) ;
// Do the angle bracket song and dance:
text = text . replace ( /</g , "<" ) ;
text = text . replace ( />/g , ">" ) ;
// Now, escape characters that are magic in Markdown:
text = escapeCharacters ( text , "\*_{}[]\\" , false ) ;
// jj the line above breaks this:
//---
//* Item
// 1. Subitem
// special char: *
//---
return text ;
}
function _DoItalicsAndBold ( text ) {
// <strong> must go first:
text = text . replace ( /([\W_]|^)(\*\*|__)(?=\S)([^\r]*?\S[\*_]*)\2([\W_]|$)/g ,
"$1<strong>$3</strong>$4" ) ;
text = text . replace ( /([\W_]|^)(\*|_)(?=\S)([^\r\*_]*?\S)\2([\W_]|$)/g ,
"$1<em>$3</em>$4" ) ;
return text ;
}
function _DoBlockQuotes ( text ) {
/ *
text = text . replace ( /
( // Wrap whole match in $1
(
^ [ \ t ] * > [ \ t ] ? // '>' at the start of a line
. + \ n // rest of the first line
( . + \ n ) * // subsequent consecutive lines
\ n * // blanks
) +
)
/ g m , f u n c t i o n ( ) { . . . } ) ;
* /
text = text . replace ( /((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm ,
function ( wholeMatch , m1 ) {
var bq = m1 ;
// attacklab: hack around Konqueror 3.5.4 bug:
// "----------bug".replace(/^-/g,"") == "bug"
bq = bq . replace ( /^[ \t]*>[ \t]?/gm , "~0" ) ; // trim one level of quoting
// attacklab: clean up hack
bq = bq . replace ( /~0/g , "" ) ;
bq = bq . replace ( /^[ \t]+$/gm , "" ) ; // trim whitespace-only lines
bq = _RunBlockGamut ( bq ) ; // recurse
bq = bq . replace ( /(^|\n)/g , "$1 " ) ;
// These leading spaces screw with <pre> content, so we need to fix that:
bq = bq . replace (
/(\s*<pre>[^\r]+?<\/pre>)/gm ,
function ( wholeMatch , m1 ) {
var pre = m1 ;
// attacklab: hack around Konqueror 3.5.4 bug:
pre = pre . replace ( /^ /mg , "~0" ) ;
pre = pre . replace ( /~0/g , "" ) ;
return pre ;
} ) ;
return hashBlock ( "<blockquote>\n" + bq + "\n</blockquote>" ) ;
}
) ;
return text ;
}
function _FormParagraphs ( text , doNotUnhash ) {
//
// Params:
// $text - string to process with html <p> tags
//
// Strip leading and trailing lines:
text = text . replace ( /^\n+/g , "" ) ;
text = text . replace ( /\n+$/g , "" ) ;
var grafs = text . split ( /\n{2,}/g ) ;
var grafsOut = [ ] ;
var markerRe = /~K(\d+)K/ ;
//
// Wrap <p> tags.
//
var end = grafs . length ;
for ( var i = 0 ; i < end ; i ++ ) {
var str = grafs [ i ] ;
// if this is an HTML marker, copy it
if ( markerRe . test ( str ) ) {
grafsOut . push ( str ) ;
}
else if ( /\S/ . test ( str ) ) {
str = _RunSpanGamut ( str ) ;
str = str . replace ( /^([ \t]*)/g , "<p>" ) ;
str += "</p>"
grafsOut . push ( str ) ;
}
}
//
// Unhashify HTML blocks
//
if ( ! doNotUnhash ) {
end = grafsOut . length ;
for ( var i = 0 ; i < end ; i ++ ) {
var foundAny = true ;
while ( foundAny ) { // we may need several runs, since the data may be nested
foundAny = false ;
grafsOut [ i ] = grafsOut [ i ] . replace ( /~K(\d+)K/g , function ( wholeMatch , id ) {
foundAny = true ;
return g _html _blocks [ id ] ;
} ) ;
}
}
}
return grafsOut . join ( "\n\n" ) ;
}
function _EncodeAmpsAndAngles ( text ) {
// Smart processing for ampersands and angle brackets that need to be encoded.
// Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
// http://bumppo.net/projects/amputator/
text = text . replace ( /&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/g , "&" ) ;
// Encode naked <'s
2013-02-10 23:37:42 -05:00
text = text . replace ( /<(?![a-z\/?!]|~D)/gi , "<" ) ;
2013-02-05 14:16:51 -05:00
return text ;
}
function _EncodeBackslashEscapes ( text ) {
//
// Parameter: String.
// Returns: The string, with after processing the following backslash
// escape sequences.
//
// attacklab: The polite way to do this is with the new
// escapeCharacters() function:
//
// text = escapeCharacters(text,"\\",true);
// text = escapeCharacters(text,"`*_{}[]()>#+-.!",true);
//
// ...but we're sidestepping its use of the (slow) RegExp constructor
// as an optimization for Firefox. This function gets called a LOT.
text = text . replace ( /\\(\\)/g , escapeCharacters _callback ) ;
text = text . replace ( /\\([`*_{}\[\]()>#+-.!])/g , escapeCharacters _callback ) ;
return text ;
}
2013-02-10 23:37:42 -05:00
function handleTrailingParens ( wholeMatch , lookbehind , protocol , link ) {
if ( lookbehind )
return wholeMatch ;
if ( link . charAt ( link . length - 1 ) !== ")" )
return "<" + protocol + link + ">" ;
var parens = link . match ( /[()]/g ) ;
var level = 0 ;
for ( var i = 0 ; i < parens . length ; i ++ ) {
if ( parens [ i ] === "(" ) {
if ( level <= 0 )
level = 1 ;
else
level ++ ;
}
else {
level -- ;
}
}
var tail = "" ;
if ( level < 0 ) {
var re = new RegExp ( "\\){1," + ( - level ) + "}$" ) ;
link = link . replace ( re , function ( trailingParens ) {
tail = trailingParens ;
return "" ;
} ) ;
}
return "<" + protocol + link + ">" + tail ;
}
2013-02-05 14:16:51 -05:00
function _DoAutoLinks ( text ) {
// note that at this point, all other URL in the text are already hyperlinked as <a href=""></a>
// *except* for the <http://www.foo.com> case
// automatically add < and > around unadorned raw hyperlinks
2013-02-10 23:37:42 -05:00
// must be preceded by a non-word character (and not by =" or <) and followed by non-word/EOF character
// simulating the lookbehind in a consuming way is okay here, since a URL can neither and with a " nor
// with a <, so there is no risk of overlapping matches.
text = text . replace ( /(="|<)?\b(https?|ftp)(:\/\/[-A-Z0-9+&@#\/%?=~_|\[\]\(\)!:,\.;]*[-A-Z0-9+&@#\/%=~_|\[\])])(?=$|\W)/gi , handleTrailingParens ) ;
2013-02-05 14:16:51 -05:00
// autolink anything like <http://example.com>
var replacer = function ( wholematch , m1 ) {
2013-02-10 23:37:42 -05:00
m1encoded = m1 . replace ( /\_\_/ , '%5F%5F' ) ;
return "<a href=\"" + m1encoded + "\">" + pluginHooks . plainLinkText ( m1 ) + "</a>" ;
2013-02-05 14:16:51 -05:00
}
2013-02-10 23:37:42 -05:00
2013-02-05 14:16:51 -05:00
text = text . replace ( /<((https?|ftp):[^'">\s]+)>/gi , replacer ) ;
2013-02-10 23:37:42 -05:00
// Email addresses: <address@domain.foo>
/ *
text = text . replace ( /
<
( ? : mailto : ) ?
(
[ - . \ w ] +
\ @
[ - a - z0 - 9 ] + ( \ . [ - a - z0 - 9 ] + ) * \ . [ a - z ] +
)
>
/ g i , _ D o A u t o L i n k s _ c a l l b a c k ( ) ) ;
* /
/ * d i s a b l i n g e m a i l a u t o l i n k i n g , s i n c e w e d o n ' t d o t h a t o n t h e s e r v e r , e i t h e r
text = text . replace ( /<(?:mailto:)?([-.\w]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)>/gi ,
function ( wholeMatch , m1 ) {
return _EncodeEmailAddress ( _UnescapeSpecialChars ( m1 ) ) ;
}
) ;
* /
2013-02-05 14:16:51 -05:00
return text ;
}
function _UnescapeSpecialChars ( text ) {
//
// Swap back in all the special characters we've hidden.
//
text = text . replace ( /~E(\d+)E/g ,
function ( wholeMatch , m1 ) {
var charCodeToReplace = parseInt ( m1 ) ;
return String . fromCharCode ( charCodeToReplace ) ;
}
) ;
return text ;
}
function _Outdent ( text ) {
//
// Remove one level of line-leading tabs or spaces
//
// attacklab: hack around Konqueror 3.5.4 bug:
// "----------bug".replace(/^-/g,"") == "bug"
text = text . replace ( /^(\t|[ ]{1,4})/gm , "~0" ) ; // attacklab: g_tab_width
// attacklab: clean up hack
text = text . replace ( /~0/g , "" )
return text ;
}
function _Detab ( text ) {
if ( ! /\t/ . test ( text ) )
return text ;
var spaces = [ " " , " " , " " , " " ] ,
skew = 0 ,
v ;
return text . replace ( /[\n\t]/g , function ( match , offset ) {
if ( match === "\n" ) {
skew = offset + 1 ;
return match ;
}
v = ( offset - skew ) % 4 ;
skew = offset + 1 ;
return spaces [ v ] ;
} ) ;
}
//
// attacklab: Utility functions
//
var _problemUrlChars = /(?:["'*()[\]:]|~D)/g ;
// hex-encodes some unusual "problem" chars in URLs to avoid URL detection problems
function encodeProblemUrlChars ( url ) {
if ( ! url )
return "" ;
var len = url . length ;
return url . replace ( _problemUrlChars , function ( match , offset ) {
if ( match == "~D" ) // escape for dollar
return "%24" ;
if ( match == ":" ) {
if ( offset == len - 1 || /[0-9\/]/ . test ( url . charAt ( offset + 1 ) ) )
return ":"
}
return "%" + match . charCodeAt ( 0 ) . toString ( 16 ) ;
} ) ;
}
function escapeCharacters ( text , charsToEscape , afterBackslash ) {
// First we have to escape the escape characters so that
// we can build a character class out of them
var regexString = "([" + charsToEscape . replace ( /([\[\]\\])/g , "\\$1" ) + "])" ;
if ( afterBackslash ) {
regexString = "\\\\" + regexString ;
}
var regex = new RegExp ( regexString , "g" ) ;
text = text . replace ( regex , escapeCharacters _callback ) ;
return text ;
}
function escapeCharacters _callback ( wholeMatch , m1 ) {
var charCodeToEscape = m1 . charCodeAt ( 0 ) ;
return "~E" + charCodeToEscape + "E" ;
}
} ; // end of the Markdown.Converter constructor
} ) ( ) ;