/* This is a fork of markdown-js with a few changes to support discourse: * We have replaced the strong/em handlers because we prefer them only to work on word boundaries. * [MOD]: non-url is fixed // Fix code within attrs if (prev && (typeof prev[0] === "string") && prev[0].match(/<[^>]+$/)) { return; } // __RAW // if ( next_block.match(is_list_re) || (next_block.match(/^ /) && (!next_block.match(/^ *\>/))) ) { */ // Released under MIT license // Copyright (c) 2009-2010 Dominic Baggott // Copyright (c) 2009-2010 Ash Berlin // Copyright (c) 2011 Christoph Dorn <christoph@christophdorn.com> (http://www.christophdorn.com) /*jshint browser:true, devel:true */ (function(expose) { var MarkdownHelpers = {}; // For Spidermonkey based engines function mk_block_toSource() { return "Markdown.mk_block( " + uneval(this.toString()) + ", " + uneval(this.trailing) + ", " + uneval(this.lineNumber) + " )"; } // node function mk_block_inspect() { var util = require("util"); return "Markdown.mk_block( " + util.inspect(this.toString()) + ", " + util.inspect(this.trailing) + ", " + util.inspect(this.lineNumber) + " )"; } MarkdownHelpers.mk_block = function(block, trail, line) { // Be helpful for default case in tests. if ( arguments.length === 1 ) trail = "\n\n"; // We actually need a String object, not a string primitive /* jshint -W053 */ var s = new String(block); s.trailing = trail; // To make it clear its not just a string s.inspect = mk_block_inspect; s.toSource = mk_block_toSource; if ( line !== undefined ) s.lineNumber = line; return s; }; var isArray = MarkdownHelpers.isArray = Array.isArray || function(obj) { return Object.prototype.toString.call(obj) === "[object Array]"; }; // Don't mess with Array.prototype. Its not friendly if ( Array.prototype.forEach ) { MarkdownHelpers.forEach = function forEach( arr, cb, thisp ) { return arr.forEach( cb, thisp ); }; } else { MarkdownHelpers.forEach = function forEach(arr, cb, thisp) { for (var i = 0; i < arr.length; i++) cb.call(thisp || arr, arr[i], i, arr); }; } MarkdownHelpers.isEmpty = function isEmpty( obj ) { for ( var key in obj ) { if ( hasOwnProperty.call( obj, key ) ) return false; } return true; }; MarkdownHelpers.extract_attr = function extract_attr( jsonml ) { return isArray(jsonml) && jsonml.length > 1 && typeof jsonml[ 1 ] === "object" && !( isArray(jsonml[ 1 ]) ) ? jsonml[ 1 ] : undefined; }; /** * class Markdown * * Markdown processing in Javascript done right. We have very particular views * on what constitutes 'right' which include: * * - produces well-formed HTML (this means that em and strong nesting is * important) * * - has an intermediate representation to allow processing of parsed data (We * in fact have two, both as [JsonML]: a markdown tree and an HTML tree). * * - is easily extensible to add new dialects without having to rewrite the * entire parsing mechanics * * - has a good test suite * * This implementation fulfills all of these (except that the test suite could * do with expanding to automatically run all the fixtures from other Markdown * implementations.) * * ##### Intermediate Representation * * *TODO* Talk about this :) Its JsonML, but document the node names we use. * * [JsonML]: http://jsonml.org/ "JSON Markup Language" **/ var Markdown = function(dialect) { switch (typeof dialect) { case "undefined": this.dialect = Markdown.dialects.Gruber; break; case "object": this.dialect = dialect; break; default: if ( dialect in Markdown.dialects ) this.dialect = Markdown.dialects[dialect]; else throw new Error("Unknown Markdown dialect '" + String(dialect) + "'"); break; } this.em_state = []; this.strong_state = []; this.debug_indent = ""; }; /** * Markdown.dialects * * Namespace of built-in dialects. **/ Markdown.dialects = {}; // Imported functions var mk_block = Markdown.mk_block = MarkdownHelpers.mk_block, isArray = MarkdownHelpers.isArray; /** * parse( markdown, [dialect] ) -> JsonML * - markdown (String): markdown string to parse * - dialect (String | Dialect): the dialect to use, defaults to gruber * * Parse `markdown` and return a markdown document as a Markdown.JsonML tree. **/ Markdown.parse = function( source, dialect ) { // dialect will default if undefined var md = new Markdown( dialect ); return md.toTree( source ); }; /** * count_lines( str ) -> count * - str (String): String whose lines we want to count * * Counts the number of linebreaks in `str` **/ function count_lines( str ) { return str.split("\n").length - 1; } // Internal - split source into rough blocks Markdown.prototype.split_blocks = function splitBlocks( input ) { input = input.replace(/(\r\n|\n|\r)/g, "\n"); // [\s\S] matches _anything_ (newline or space) // [^] is equivalent but doesn't work in IEs. var re = /([\s\S]+?)($|\n#|\n(?:\s*\n|$)+)/g, blocks = [], m; var line_no = 1; if ( ( m = /^(\s*\n)/.exec(input) ) !== null ) { // skip (but count) leading blank lines line_no += count_lines( m[0] ); re.lastIndex = m[0].length; } while ( ( m = re.exec(input) ) !== null ) { if (m[2] === "\n#") { m[2] = "\n"; re.lastIndex--; } blocks.push( mk_block( m[1], m[2], line_no ) ); line_no += count_lines( m[0] ); } return blocks; }; /** * Markdown#processBlock( block, next ) -> undefined | [ JsonML, ... ] * - block (String): the block to process * - next (Array): the following blocks * * Process `block` and return an array of JsonML nodes representing `block`. * * It does this by asking each block level function in the dialect to process * the block until one can. Succesful handling is indicated by returning an * array (with zero or more JsonML nodes), failure by a false value. * * Blocks handlers are responsible for calling [[Markdown#processInline]] * themselves as appropriate. * * If the blocks were split incorrectly or adjacent blocks need collapsing you * can adjust `next` in place using shift/splice etc. * * If any of this default behaviour is not right for the dialect, you can * define a `__call__` method on the dialect that will get invoked to handle * the block processing. */ Markdown.prototype.processBlock = function processBlock( block, next ) { var cbs = this.dialect.block, ord = cbs.__order__; if ( "__call__" in cbs ) return cbs.__call__.call(this, block, next); for ( var i = 0; i < ord.length; i++ ) { //D:this.debug( "Testing", ord[i] ); var res = cbs[ ord[i] ].call( this, block, next ); if ( res ) { //D:this.debug(" matched"); if ( !isArray(res) || ( res.length > 0 && !( isArray(res[0]) ) && ( typeof res[0] !== "string")) ) this.debug(ord[i], "didn't return a proper array"); //D:this.debug( "" ); return res; } } // Uhoh! no match! Should we throw an error? return []; }; Markdown.prototype.processInline = function processInline( block ) { return this.dialect.inline.__call__.call( this, String( block ) ); }; /** * Markdown#toTree( source ) -> JsonML * - source (String): markdown source to parse * * Parse `source` into a JsonML tree representing the markdown document. **/ // custom_tree means set this.tree to `custom_tree` and restore old value on return Markdown.prototype.toTree = function toTree( source, custom_root ) { var blocks = source instanceof Array ? source : this.split_blocks( source ); // Make tree a member variable so its easier to mess with in extensions var old_tree = this.tree; try { this.tree = custom_root || this.tree || [ "markdown" ]; blocks_loop: while ( blocks.length ) { var b = this.processBlock( blocks.shift(), blocks ); // Reference blocks and the like won't return any content if ( !b.length ) continue blocks_loop; this.tree.push.apply( this.tree, b ); } return this.tree; } finally { if ( custom_root ) this.tree = old_tree; } }; // Noop by default Markdown.prototype.debug = function () { var args = Array.prototype.slice.call( arguments); args.unshift(this.debug_indent); if ( typeof print !== "undefined" ) print.apply( print, args ); if ( typeof console !== "undefined" && typeof console.log !== "undefined" ) console.log.apply( null, args ); }; Markdown.prototype.loop_re_over_block = function( re, block, cb ) { // Dont use /g regexps with this var m, b = block.valueOf(); while ( b.length && (m = re.exec(b) ) !== null ) { b = b.substr( m[0].length ); cb.call(this, m); } return b; }; // Build default order from insertion order. Markdown.buildBlockOrder = function(d) { var ord = []; for ( var i in d ) { if ( i === "__order__" || i === "__call__" ) continue; ord.push( i ); } d.__order__ = ord; }; // Build patterns for inline matcher Markdown.buildInlinePatterns = function(d) { var patterns = []; for ( var i in d ) { // __foo__ is reserved and not a pattern if ( i.match( /^__.*__$/) ) continue; var l = i.replace( /([\\.*+?^$|()\[\]{}])/g, "\\$1" ) .replace( /\n/, "\\n" ); patterns.push( i.length === 1 ? l : "(?:" + l + ")" ); } patterns = patterns.join("|"); d.__patterns__ = patterns; //print("patterns:", uneval( patterns ) ); var fn = d.__call__; d.__call__ = function(text, pattern) { if ( pattern !== undefined ) return fn.call(this, text, pattern); else return fn.call(this, text, patterns); }; }; var extract_attr = MarkdownHelpers.extract_attr; /** * renderJsonML( jsonml[, options] ) -> String * - jsonml (Array): JsonML array to render to XML * - options (Object): options * * Converts the given JsonML into well-formed XML. * * The options currently understood are: * * - root (Boolean): wether or not the root node should be included in the * output, or just its children. The default `false` is to not include the * root itself. */ Markdown.renderJsonML = function( jsonml, options ) { options = options || {}; // include the root element in the rendered output? options.root = options.root || false; var content = []; if ( options.root ) { content.push( render_tree( jsonml ) ); } else { jsonml.shift(); // get rid of the tag if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) jsonml.shift(); // get rid of the attributes while ( jsonml.length ) content.push( render_tree( jsonml.shift() ) ); } return content.join( "\n\n" ); }; /** * toHTMLTree( markdown, [dialect] ) -> JsonML * toHTMLTree( md_tree ) -> JsonML * - markdown (String): markdown string to parse * - dialect (String | Dialect): the dialect to use, defaults to gruber * - md_tree (Markdown.JsonML): parsed markdown tree * * Turn markdown into HTML, represented as a JsonML tree. If a string is given * to this function, it is first parsed into a markdown tree by calling * [[parse]]. **/ Markdown.toHTMLTree = function toHTMLTree( input, dialect , options ) { // convert string input to an MD tree if ( typeof input === "string" ) input = this.parse( input, dialect ); // Now convert the MD tree to an HTML tree // remove references from the tree var attrs = extract_attr( input ), refs = {}; if ( attrs && attrs.references ) refs = attrs.references; var html = convert_tree_to_html( input, refs , options ); merge_text_nodes( html ); return html; }; /** * toHTML( markdown, [dialect] ) -> String * toHTML( md_tree ) -> String * - markdown (String): markdown string to parse * - md_tree (Markdown.JsonML): parsed markdown tree * * Take markdown (either as a string or as a JsonML tree) and run it through * [[toHTMLTree]] then turn it into a well-formated HTML fragment. **/ Markdown.toHTML = function toHTML( source , dialect , options ) { var input = this.toHTMLTree( source , dialect , options ); return this.renderJsonML( input ); }; function escapeHTML( text ) { if (text && text.length > 0) { return text.replace( /&/g, "&" ) .replace( /</g, "<" ) .replace( />/g, ">" ) .replace( /"/g, """ ) .replace( /'/g, "'" ); } else { return ""; } } function render_tree( jsonml ) { // basic case if ( typeof jsonml === "string" ) return jsonml; if ( jsonml[0] === "__RAW" ) { return jsonml[1]; } var tag = jsonml.shift(), attributes = {}, content = []; if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) attributes = jsonml.shift(); while ( jsonml.length ) content.push( render_tree( jsonml.shift() ) ); var tag_attrs = ""; if (typeof attributes.src !== 'undefined') { tag_attrs += ' src="' + escapeHTML( attributes.src ) + '"'; delete attributes.src; } for ( var a in attributes ) { var escaped = escapeHTML( attributes[ a ]); if (escaped && escaped.length) { tag_attrs += " " + a + '="' + escaped + '"'; } } // be careful about adding whitespace here for inline elements if ( tag === "img" || tag === "br" || tag === "hr" ) return "<"+ tag + tag_attrs + "/>"; else return "<"+ tag + tag_attrs + ">" + content.join( "" ) + "</" + tag + ">"; } function convert_tree_to_html( tree, references, options ) { var i; options = options || {}; // shallow clone var jsonml = tree.slice( 0 ); if ( typeof options.preprocessTreeNode === "function" ) jsonml = options.preprocessTreeNode(jsonml, references); // Clone attributes if they exist var attrs = extract_attr( jsonml ); if ( attrs ) { jsonml[ 1 ] = {}; for ( i in attrs ) { jsonml[ 1 ][ i ] = attrs[ i ]; } attrs = jsonml[ 1 ]; } // basic case if ( typeof jsonml === "string" ) return jsonml; // convert this node switch ( jsonml[ 0 ] ) { case "header": jsonml[ 0 ] = "h" + jsonml[ 1 ].level; delete jsonml[ 1 ].level; break; case "bulletlist": jsonml[ 0 ] = "ul"; break; case "numberlist": jsonml[ 0 ] = "ol"; break; case "listitem": jsonml[ 0 ] = "li"; break; case "para": jsonml[ 0 ] = "p"; break; case "markdown": jsonml[ 0 ] = "html"; if ( attrs ) delete attrs.references; break; case "code_block": jsonml[ 0 ] = "pre"; i = attrs ? 2 : 1; var code = [ "code" ]; code.push.apply( code, jsonml.splice( i, jsonml.length - i ) ); jsonml[ i ] = code; break; case "inlinecode": jsonml[ 0 ] = "code"; break; case "img": jsonml[ 1 ].src = jsonml[ 1 ].href; delete jsonml[ 1 ].href; break; case "linebreak": jsonml[ 0 ] = "br"; break; case "link": jsonml[ 0 ] = "a"; break; case "link_ref": jsonml[ 0 ] = "a"; // grab this ref and clean up the attribute node var ref = references[ attrs.ref ]; // if the reference exists, make the link if ( ref ) { delete attrs.ref; // add in the href and title, if present attrs.href = ref.href; if ( ref.title ) attrs.title = ref.title; // get rid of the unneeded original text delete attrs.original; } // the reference doesn't exist, so revert to plain text else { return attrs.original; } break; case "img_ref": jsonml[ 0 ] = "img"; // grab this ref and clean up the attribute node var ref = references[ attrs.ref ]; // if the reference exists, make the link if ( ref ) { delete attrs.ref; // add in the href and title, if present attrs.src = ref.href; if ( ref.title ) attrs.title = ref.title; // get rid of the unneeded original text delete attrs.original; } // the reference doesn't exist, so revert to plain text else { return attrs.original; } break; } // convert all the children i = 1; // deal with the attribute node, if it exists if ( attrs ) { // if there are keys, skip over it for ( var key in jsonml[ 1 ] ) { i = 2; break; } // if there aren't, remove it if ( i === 1 ) jsonml.splice( i, 1 ); } for ( ; i < jsonml.length; ++i ) { jsonml[ i ] = convert_tree_to_html( jsonml[ i ], references, options ); } return jsonml; } // merges adjacent text nodes into a single node function merge_text_nodes( jsonml ) { // skip the tag name and attribute hash var i = extract_attr( jsonml ) ? 2 : 1; while ( i < jsonml.length ) { // if it's a string check the next item too if ( typeof jsonml[ i ] === "string" ) { if ( i + 1 < jsonml.length && typeof jsonml[ i + 1 ] === "string" ) { // merge the second string into the first and remove it jsonml[ i ] += jsonml.splice( i + 1, 1 )[ 0 ]; } else { ++i; } } // if it's not a string recurse else { merge_text_nodes( jsonml[ i ] ); ++i; } } } var DialectHelpers = {}; DialectHelpers.inline_until_char = function( text, want ) { var consumed = 0, nodes = [], patterns = this.dialect.inline.__patterns__.replace('|_|', '|'); while ( true ) { if ( text.charAt( consumed ) === want ) { // Found the character we were looking for consumed++; return [ consumed, nodes ]; } if ( consumed >= text.length ) { // No closing char found. Abort. return [consumed, null, nodes]; } var res = this.dialect.inline.__oneElement__.call(this, text.substr( consumed ), patterns, [text.substr(0, consumed)]); consumed += res[ 0 ]; // Add any returned nodes. nodes.push.apply( nodes, res.slice( 1 ) ); } }; // Helper function to make sub-classing a dialect easier DialectHelpers.subclassDialect = function( d ) { function Block() {} Block.prototype = d.block; function Inline() {} Inline.prototype = d.inline; return { block: new Block(), inline: new Inline() }; }; var forEach = MarkdownHelpers.forEach, extract_attr = MarkdownHelpers.extract_attr, mk_block = MarkdownHelpers.mk_block, isEmpty = MarkdownHelpers.isEmpty, inline_until_char = DialectHelpers.inline_until_char; // A robust regexp for matching URLs. Thakns: https://gist.github.com/dperini/729294 var urlRegexp = /(?:(?:https?|ftp):\/\/)(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})))(?::\d{2,5})?(?:\/[^\s]*)?/i.source; /** * Gruber dialect * * The default dialect that follows the rules set out by John Gruber's * markdown.pl as closely as possible. Well actually we follow the behaviour of * that script which in some places is not exactly what the syntax web page * says. **/ var Gruber = { block: { atxHeader: function atxHeader( block, next ) { var m = block.match( /^(#{1,6})\s*(.*?)\s*#*\s*(?:\n|$)/ ); if ( !m ) return undefined; var header = [ "header", { level: m[ 1 ].length } ]; Array.prototype.push.apply(header, this.processInline(m[ 2 ])); if ( m[0].length < block.length ) next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) ); return [ header ]; }, setextHeader: function setextHeader( block, next ) { var m = block.match( /^(.*)\n([-=])\2\2+(?:\n|$)/ ); if ( !m ) return undefined; var level = ( m[ 2 ] === "=" ) ? 1 : 2, header = [ "header", { level : level } ].concat( this.processInline(m[ 1 ]) ); if ( m[0].length < block.length ) next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) ); return [ header ]; }, code: function code( block, next ) { // | Foo // |bar // should be a code block followed by a paragraph. Fun // // There might also be adjacent code block to merge. var ret = [], re = /^(?: {0,3}\t| {4})(.*)\n?/; // 4 spaces + content if ( !block.match( re ) ) return undefined; block_search: do { // Now pull out the rest of the lines var b = this.loop_re_over_block( re, block.valueOf(), function( m ) { ret.push( m[1] ); } ); if ( b.length ) { // Case alluded to in first comment. push it back on as a new block next.unshift( mk_block(b, block.trailing) ); break block_search; } else if ( next.length ) { // Check the next block - it might be code too if ( !next[0].match( re ) ) break block_search; // Pull how how many blanks lines follow - minus two to account for .join ret.push ( block.trailing.replace(/[^\n]/g, "").substring(2) ); block = next.shift(); } else { break block_search; } } while ( true ); return [ [ "code_block", ret.join("\n") ] ]; }, horizRule: function horizRule( block, next ) { // this needs to find any hr in the block to handle abutting blocks var m = block.match( /^(?:([\s\S]*?)\n)?[ \t]*([-_*])(?:[ \t]*\2){2,}[ \t]*(?:\n([\s\S]*))?$/ ); if ( !m ) return undefined; var jsonml = [ [ "hr" ] ]; // if there's a leading abutting block, process it if ( m[ 1 ] ) { var contained = mk_block( m[ 1 ], "", block.lineNumber ); jsonml.unshift.apply( jsonml, this.toTree( contained, [] ) ); } // if there's a trailing abutting block, stick it into next if ( m[ 3 ] ) next.unshift( mk_block( m[ 3 ], block.trailing, block.lineNumber + 1 ) ); return jsonml; }, // There are two types of lists. Tight and loose. Tight lists have no whitespace // between the items (and result in text just in the <li>) and loose lists, // which have an empty line between list items, resulting in (one or more) // paragraphs inside the <li>. // // There are all sorts weird edge cases about the original markdown.pl's // handling of lists: // // * Nested lists are supposed to be indented by four chars per level. But // if they aren't, you can get a nested list by indenting by less than // four so long as the indent doesn't match an indent of an existing list // item in the 'nest stack'. // // * The type of the list (bullet or number) is controlled just by the // first item at the indent. Subsequent changes are ignored unless they // are for nested lists // lists: (function( ) { // Use a closure to hide a few variables. var any_list = "[*+-]|\\d+\\.", bullet_list = /[*+-]/, // Capture leading indent as it matters for determining nested lists. is_list_re = new RegExp( "^( {0,3})(" + any_list + ")[ \t]+" ), indent_re = "(?: {0,3}\\t| {4})"; // TODO: Cache this regexp for certain depths. // Create a regexp suitable for matching an li for a given stack depth function regex_for_depth( depth ) { return new RegExp( // m[1] = indent, m[2] = list_type "(?:^(" + indent_re + "{0," + depth + "} {0,3})(" + any_list + ")\\s+)|" + // m[3] = cont "(^" + indent_re + "{0," + (depth-1) + "}[ ]{0,4})" ); } function expand_tab( input ) { return input.replace( / {0,3}\t/g, " " ); } // Add inline content `inline` to `li`. inline comes from processInline // so is an array of content function add(li, loose, inline, nl) { if ( loose ) { li.push( [ "para" ].concat(inline) ); return; } // Hmmm, should this be any block level element or just paras? var add_to = li[li.length -1] instanceof Array && li[li.length - 1][0] === "para" ? li[li.length -1] : li; // If there is already some content in this list, add the new line in if ( nl && li.length > 1 ) inline.unshift(nl); for ( var i = 0; i < inline.length; i++ ) { var what = inline[i], is_str = typeof what === "string"; if ( is_str && add_to.length > 1 && typeof add_to[add_to.length-1] === "string" ) add_to[ add_to.length-1 ] += what; else add_to.push( what ); } } // contained means have an indent greater than the current one. On // *every* line in the block function get_contained_blocks( depth, blocks ) { var re = new RegExp( "^(" + indent_re + "{" + depth + "}.*?\\n?)*$" ), replace = new RegExp("^" + indent_re + "{" + depth + "}", "gm"), ret = []; while ( blocks.length > 0 ) { // HACK: Fixes a v8 issue test = blocks[0].replace(/^ {8,}/, ' '); if ( re.exec( test ) ) { var b = blocks.shift(), // Now remove that indent x = b.replace( replace, ""); ret.push( mk_block( x, b.trailing, b.lineNumber ) ); } else break; } return ret; } // passed to stack.forEach to turn list items up the stack into paras function paragraphify(s, i, stack) { var list = s.list; var last_li = list[list.length-1]; if ( last_li[1] instanceof Array && last_li[1][0] === "para" ) return; if ( i + 1 === stack.length ) { // Last stack frame // Keep the same array, but replace the contents last_li.push( ["para"].concat( last_li.splice(1, last_li.length - 1) ) ); } else { var sublist = last_li.pop(); last_li.push( ["para"].concat( last_li.splice(1, last_li.length - 1) ), sublist ); } } // The matcher function return function( block, next ) { var m = block.match( is_list_re ); if ( !m ) return undefined; function make_list( m ) { var list = bullet_list.exec( m[2] ) ? ["bulletlist"] : ["numberlist"]; stack.push( { list: list, indent: m[1] } ); return list; } var stack = [], // Stack of lists for nesting. list = make_list( m ), last_li, loose = false, ret = [ stack[0].list ], i; // Loop to search over block looking for inner block elements and loose lists loose_search: while ( true ) { // Split into lines preserving new lines at end of line var lines = block.split( /(?=\n)/ ); // We have to grab all lines for a li and call processInline on them // once as there are some inline things that can span lines. var li_accumulate = "", nl = ""; // Loop over the lines in this block looking for tight lists. tight_search: for ( var line_no = 0; line_no < lines.length; line_no++ ) { nl = ""; var l = lines[line_no].replace(/^\n/, function(n) { nl = n; return ""; }); // TODO: really should cache this var line_re = regex_for_depth( stack.length ); m = l.match( line_re ); //print( "line:", uneval(l), "\nline match:", uneval(m) ); // We have a list item if ( m[1] !== undefined ) { // Process the previous list item, if any if ( li_accumulate.length ) { add( last_li, loose, this.processInline( li_accumulate ), nl ); // Loose mode will have been dealt with. Reset it loose = false; li_accumulate = ""; } m[1] = expand_tab( m[1] ); var wanted_depth = Math.floor(m[1].length/4)+1; //print( "want:", wanted_depth, "stack:", stack.length); if ( wanted_depth > stack.length ) { // Deep enough for a nested list outright //print ( "new nested list" ); list = make_list( m ); last_li.push( list ); last_li = list[1] = [ "listitem" ]; } else { // We aren't deep enough to be strictly a new level. This is // where Md.pl goes nuts. If the indent matches a level in the // stack, put it there, else put it one deeper then the // wanted_depth deserves. var found = false; for ( i = 0; i < stack.length; i++ ) { if ( stack[ i ].indent !== m[1] ) continue; list = stack[ i ].list; stack.splice( i+1, stack.length - (i+1) ); found = true; break; } if (!found) { //print("not found. l:", uneval(l)); wanted_depth++; if ( wanted_depth <= stack.length ) { stack.splice(wanted_depth, stack.length - wanted_depth); //print("Desired depth now", wanted_depth, "stack:", stack.length); list = stack[wanted_depth-1].list; //print("list:", uneval(list) ); } else { //print ("made new stack for messy indent"); list = make_list(m); last_li.push(list); } } //print( uneval(list), "last", list === stack[stack.length-1].list ); last_li = [ "listitem" ]; list.push(last_li); } // end depth of shenegains nl = ""; } // Add content if ( l.length > m[0].length ) li_accumulate += nl + l.substr( m[0].length ); } // tight_search if ( li_accumulate.length ) { var contents = this.processBlock(li_accumulate, []), firstBlock = contents[0]; if (firstBlock) { firstBlock.shift(); contents.splice.apply(contents, [0, 1].concat(firstBlock)); add( last_li, loose, contents, nl ); // Let's not creating a trailing \n after content in the li if(last_li[last_li.length-1] === "\n") { last_li.pop(); } // Loose mode will have been dealt with. Reset it loose = false; li_accumulate = ""; } } // Look at the next block - we might have a loose list. Or an extra // paragraph for the current li var contained = get_contained_blocks( stack.length, next ); // Deal with code blocks or properly nested lists if ( contained.length > 0 ) { // Make sure all listitems up the stack are paragraphs forEach( stack, paragraphify, this); last_li.push.apply( last_li, this.toTree( contained, [] ) ); } var next_block = next[0] && next[0].valueOf() || ""; if ( next_block.match(is_list_re) ) { block = next.shift(); // Check for an HR following a list: features/lists/hr_abutting var hr = this.dialect.block.horizRule.call( this, block, next ); if ( hr ) { ret.push.apply(ret, hr); break; } // Add paragraphs if the indentation level stays the same if (stack[stack.length-1].indent === block.match(/^\s*/)[0]) { forEach( stack, paragraphify, this); } loose = true; continue loose_search; } break; } // loose_search return ret; }; })(), blockquote: function blockquote( block, next ) { // Handle quotes that have spaces before them var m = /(^|\n) +(\>[\s\S]*)/.exec(block); if (m && m[2] && m[2].length) { var blockContents = block.replace(/(^|\n) +\>/, "$1>"); next.unshift(blockContents); return []; } if ( !block.match( /^>/m ) ) return undefined; var jsonml = []; // separate out the leading abutting block, if any. I.e. in this case: // // a // > b // if ( block[ 0 ] !== ">" ) { var lines = block.split( /\n/ ), prev = [], line_no = block.lineNumber; // keep shifting lines until you find a crotchet while ( lines.length && lines[ 0 ][ 0 ] !== ">" ) { prev.push( lines.shift() ); line_no++; } var abutting = mk_block( prev.join( "\n" ), "\n", block.lineNumber ); jsonml.push.apply( jsonml, this.processBlock( abutting, [] ) ); // reassemble new block of just block quotes! block = mk_block( lines.join( "\n" ), block.trailing, line_no ); } // if the next block is also a blockquote merge it in while ( next.length && next[ 0 ][ 0 ] === ">" ) { var b = next.shift(); block = mk_block( block + block.trailing + b, b.trailing, block.lineNumber ); } // Strip off the leading "> " and re-process as a block. var input = block.replace( /^> ?/gm, "" ), old_tree = this.tree, processedBlock = this.toTree( input, [ "blockquote" ] ), attr = extract_attr( processedBlock ); // If any link references were found get rid of them if ( attr && attr.references ) { delete attr.references; // And then remove the attribute object if it's empty if ( isEmpty( attr ) ) processedBlock.splice( 1, 1 ); } jsonml.push( processedBlock ); return jsonml; }, referenceDefn: function referenceDefn( block, next) { var re = /^\s*\[([^\[\]]+)\]:\s*(\S+)(?:\s+(?:(['"])(.*)\3|\((.*?)\)))?\n?/; // interesting matches are [ , ref_id, url, , title, title ] if ( !block.match(re) ) return undefined; var attrs = create_attrs.call( this ); var b = this.loop_re_over_block(re, block, function( m ) { create_reference(attrs, m); } ); if ( b.length ) next.unshift( mk_block( b, block.trailing ) ); return []; }, para: function para( block ) { // everything's a para! return [ ["para"].concat( this.processInline( block ) ) ]; } }, inline: { __oneElement__: function oneElement( text, patterns_or_re, previous_nodes ) { // PERF NOTE: rewritten to avoid greedy match regex \([\s\S]*?)(...)\ // greedy match performs horribly with large inline blocks, it can be so // slow it will crash chrome patterns_or_re = patterns_or_re || this.dialect.inline.__patterns__; var search_re = new RegExp(patterns_or_re.source || patterns_or_re); var pos = text.search(search_re); if (pos === -1) { return [ text.length, text ]; } else if (pos !== 0) { // Some un-interesting text matched. Return that first return [pos, text.substring(0,pos)]; } var match_re = new RegExp( "^(" + (patterns_or_re.source || patterns_or_re) + ")" ); var m = match_re.exec( text ); var res; if ( m[1] in this.dialect.inline ) { res = this.dialect.inline[ m[1] ].call( this, text.substr( m.index ), m, previous_nodes || [] ); // If no inline code executed, fallback if (!res) { var fn = this.dialect.inline[m[1][0]]; if (fn) { res = fn.call( this, text.substr( m.index ), m, previous_nodes || [] ); } } } // Default for now to make dev easier. just slurp special and output it. res = res || [ m[1].length, m[1] ]; return res; }, __call__: function inline( text, patterns ) { var out = [], res; function add(x) { //D:self.debug(" adding output", uneval(x)); if ( typeof x === "string" && typeof out[out.length-1] === "string" ) out[ out.length-1 ] += x; else out.push(x); } while ( text.length > 0 ) { res = this.dialect.inline.__oneElement__.call(this, text, patterns, out ); text = text.substr( res.shift() ); forEach(res, add ); } return out; }, // These characters are interesting elsewhere, so have rules for them so that // chunks of plain text blocks don't include them "]": function () {}, "}": function () {}, __escape__ : /^\\[\\`\*_{}<>\[\]()#\+.!\-]/, "\\": function escaped( text ) { // [ length of input processed, node/children to add... ] // Only esacape: \ ` * _ { } [ ] ( ) # * + - . ! if ( this.dialect.inline.__escape__.exec( text ) ) return [ 2, text.charAt( 1 ) ]; else // Not an esacpe return [ 1, "\\" ]; }, "![": function image( text ) { // Without this guard V8 crashes hard on the RegExp if (text.indexOf('(') >= 0 && text.indexOf(')') === -1) { return; } // Unlike images, alt text is plain text only. no other elements are // allowed in there // ![Alt text](/path/to/img.jpg "Optional title") // 1 2 3 4 <--- captures // // First attempt to use a strong URL regexp to catch things like parentheses. If it misses, use the // old one. var origMatcher = /^!\[(.*?)\][ \t]*\([ \t]*([^")]*?)(?:[ \t]+(["'])(.*?)\3)?[ \t]*\)/; m = text.match(new RegExp("^!\\[(.*?)][ \\t]*\\((" + urlRegexp + ")\\)([ \\t])*([\"'].*[\"'])?")) || text.match(origMatcher); if (m && m[2].indexOf(")]") !== -1) { m = text.match(origMatcher); } if ( m ) { if ( m[2] && m[2][0] === "<" && m[2][m[2].length-1] === ">" ) m[2] = m[2].substring( 1, m[2].length - 1 ); m[2] = this.dialect.inline.__call__.call( this, m[2], /\\/ )[0]; var attrs = { alt: m[1], href: m[2] || "" }; if ( m[4] !== undefined) attrs.title = m[4]; return [ m[0].length, [ "img", attrs ] ]; } // ![Alt text][id] m = text.match( /^!\[(.*?)\][ \t]*\[(.*?)\]/ ); if ( m ) { // We can't check if the reference is known here as it likely wont be // found till after. Check it in md tree->hmtl tree conversion return [ m[0].length, [ "img_ref", { alt: m[1], ref: m[2].toLowerCase(), original: m[0] } ] ]; } // Just consume the '![' return [ 2, "![" ]; }, "[": function link( text ) { var open = 1; for (var i=0; i<text.length; i++) { var c = text.charAt(i); if (c === '[') { open++; } if (c === ']') { open--; } if (open > 3) { return [1, "["]; } } var orig = String(text); // Inline content is possible inside `link text` var res = inline_until_char.call( this, text.substr(1), "]" ); // No closing ']' found. Just consume the [ if ( !res[1] ) { return [ res[0] + 1, text.charAt(0) ].concat(res[2]); } if ( res[0] == 1 ) { return [ 2, "[]" ]; } // empty link found. var consumed = 1 + res[ 0 ], children = res[ 1 ], link, attrs; // At this point the first [...] has been parsed. See what follows to find // out which kind of link we are (reference or direct url) text = text.substr( consumed ); // [link text](/path/to/img.jpg "Optional title") // 1 2 3 <--- captures // This will capture up to the last paren in the block. We then pull // back based on if there a matching ones in the url // ([here](/url/(test)) // The parens have to be balanced var m = text.match( /^\s*\([ \t]*([^"'\s]*)(?:[ \t]+(["'])(.*?)\2)?[ \t]*\)/ ); if ( m ) { var url = m[1].replace(/\s+$/, ''); consumed += m[0].length; if ( url && url[0] === "<" && url[url.length-1] === ">" ) url = url.substring( 1, url.length - 1 ); // If there is a title we don't have to worry about parens in the url if ( !m[3] ) { var open_parens = 1; // One open that isn't in the capture for ( var len = 0; len < url.length; len++ ) { switch ( url[len] ) { case "(": open_parens++; break; case ")": if ( --open_parens === 0) { consumed -= url.length - len; url = url.substring(0, len); } break; } } } // Process escapes only url = this.dialect.inline.__call__.call( this, url, /\\/ )[0]; attrs = { href: url || "" }; if ( m[3] !== undefined) attrs.title = m[3]; link = [ "link", attrs ].concat( children ); return [ consumed, link ]; } if (text.indexOf('(') === 0 && text.indexOf(')') !== -1) { m = text.match(new RegExp("^\\((" + urlRegexp + ")\\)")); if (m && m[1]) { consumed += m[0].length; link = ["link", {href: m[1]}].concat(children); return [consumed, link]; } } // [Alt text][id] // [Alt text] [id] m = text.match( /^\s*\[(.*?)\]/ ); if ( m ) { consumed += m[ 0 ].length; // [links][] uses links as its reference attrs = { ref: ( m[ 1 ] || String(children) ).toLowerCase(), original: orig.substr( 0, consumed ) }; if (children && children.length > 0) { link = [ "link_ref", attrs ].concat( children ); // We can't check if the reference is known here as it likely wont be // found till after. Check it in md tree->hmtl tree conversion. // Store the original so that conversion can revert if the ref isn't found. return [ consumed, link ]; } } // Another check for references m = orig.match(/^\s*\[(.*?)\]:\s*(\S+)(?:\s+(?:(['"])(.*?)\3|\((.*?)\)))?\n?/); if (m && (/^(?:(?:https?|ftp):\/\/)(?:\S+(?::\S*)?@)?(?:(?!10(?:\.\d{1,3}){3})(?!127(?:\.\d{1,3}){3})(?!169\.254(?:\.\d{1,3}){2})(?!192\.168(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})))(?::\d{2,5})?(?:\/[^\s]*)?$/i.test(m[2]) || /(\/[\w~,;\-\./?%&+#=]*)/.test(m[2]))) { attrs = create_attrs.call(this); create_reference(attrs, m); return [ m[0].length ]; } // [id] // Only if id is plain (no formatting.) if ( children.length === 1 && typeof children[0] === "string" ) { var normalized = children[0].toLowerCase().replace(/\s+/, ' '); attrs = { ref: normalized, original: orig.substr( 0, consumed ) }; link = [ "link_ref", attrs, children[0] ]; return [ consumed, link ]; } // Just consume the "[" return [ 1, "[" ]; }, "<": function autoLink( text ) { var m; if ( ( m = text.match( /^<(?:((https?|ftp|mailto):[^>]+)|(.*?@.*?\.[a-zA-Z]+))>/ ) ) !== null ) { if ( m[3] ) return [ m[0].length, [ "link", { href: "mailto:" + m[3] }, m[3] ] ]; else if ( m[2] === "mailto" ) return [ m[0].length, [ "link", { href: m[1] }, m[1].substr("mailto:".length ) ] ]; else return [ m[0].length, [ "link", { href: m[1] }, m[1] ] ]; } return [ 1, "<" ]; }, "`": function inlineCode( text, match, prev ) { // If we're in a tag, don't do it. if (prev && (typeof prev[0] === "string") && prev[0].match(/<[^>]+$/)) { return; } // Inline code block. as many backticks as you like to start it // Always skip over the opening ticks. var m = text.match( /(`+)(([\s\S]*?)\1)/ ); if ( m && m[2] ) return [ m[1].length + m[2].length, [ "inlinecode", m[3] ] ]; else { // TODO: No matching end code found - warn! return [ 1, "`" ]; } }, " \n": function lineBreak() { return [ 3, [ "linebreak" ] ]; } } }; // A helper function to create attributes function create_attrs() { if ( !extract_attr( this.tree ) ) { this.tree.splice( 1, 0, {} ); } var attrs = extract_attr( this.tree ); // make a references hash if it doesn't exist if ( attrs.references === undefined ) { attrs.references = {}; } return attrs; } // Create references for attributes function create_reference(attrs, m) { if ( m[2] && m[2][0] === "<" && m[2][m[2].length-1] === ">" ) m[2] = m[2].substring( 1, m[2].length - 1 ); var ref = attrs.references[ m[1].toLowerCase() ] = { href: m[2] }; if ( m[4] !== undefined ) ref.title = m[4]; else if ( m[5] !== undefined ) ref.title = m[5]; } Markdown.dialects.Gruber = Gruber; Markdown.buildBlockOrder ( Markdown.dialects.Gruber.block ); Markdown.buildInlinePatterns( Markdown.dialects.Gruber.inline ); // Include all our dependencies and return the resulting library. expose.Markdown = Markdown; expose.parse = Markdown.parse; expose.toHTML = Markdown.toHTML; expose.toHTMLTree = Markdown.toHTMLTree; expose.renderJsonML = Markdown.renderJsonML; expose.DialectHelpers = DialectHelpers; })(function() { window.BetterMarkdown = {}; return window.BetterMarkdown; }());