Merge pull request #2592 from Elberet/fix-parser

Fixes for quirky markdown parser behaviours
2024-11-30 10:58:31 -05:00 · 2014-07-28 13:12:22 -04:00 · 2014-07-28 13:12:22 -04:00 · 8866141ba2
commit 8866141ba2
parent b942436d7b 90d14d9ffc
6 changed files with 128 additions and 97 deletions
--- a/app/assets/javascripts/discourse/dialects/bbcode_dialect.js
+++ b/app/assets/javascripts/discourse/dialects/bbcode_dialect.js
@ -55,6 +55,20 @@ function replaceBBCodeParamsRaw(tag, emitter) {
  });
 }
 /**
  Filters an array of JSON-ML nodes, removing nodes that represent empty lines ("\n").
  @method removeEmptyLines
  @param {Array} [contents] Array of JSON-ML nodes
 **/
 function removeEmptyLines(contents) {
  var result = [];
  for (var i=0; i < contents.length; i++) {
    if (contents[i] !== "\n") { result.push(contents[i]); }
  }
  return result;
 }
 /**
  Creates a BBCode handler that accepts parameters. Passes them to the emitter.
  Processes the inside recursively so it can be nested.
@ -75,9 +89,9 @@ replaceBBCode('u', function(contents) { return ['span', {'class': 'bbcode-u'}].c
 replaceBBCode('s', function(contents) { return ['span', {'class': 'bbcode-s'}].concat(contents); });
 Discourse.Markdown.whiteListTag('span', 'class', /^bbcode-[bius]$/);
-replaceBBCode('ul', function(contents) { return ['ul'].concat(contents); });
+replaceBBCode('ul', function(contents) { return ['ul'].concat(removeEmptyLines(contents)); });
-replaceBBCode('ol', function(contents) { return ['ol'].concat(contents); });
+replaceBBCode('ol', function(contents) { return ['ol'].concat(removeEmptyLines(contents)); });
-replaceBBCode('li', function(contents) { return ['li'].concat(contents); });
+replaceBBCode('li', function(contents) { return ['li'].concat(removeEmptyLines(contents)); });
 rawBBCode('img', function(contents) { return ['img', {href: contents}]; });
 rawBBCode('email', function(contents) { return ['a', {href: "mailto:" + contents, 'data-bbcode': true}, contents]; });
--- a/app/assets/javascripts/discourse/dialects/code_dialect.js
+++ b/app/assets/javascripts/discourse/dialects/code_dialect.js
@ -10,6 +10,15 @@ var acceptableCodeClasses =
   "profile", "python", "r", "rib", "rsl", "ruby", "rust", "scala", "smalltalk", "sql", "tex", "text",
   "vala", "vbscript", "vhdl"];
 function flattenBlocks(blocks) {
  var result = "";
  blocks.forEach(function(b) {
    result += b;
    if (b.trailing) { result += b.trailing; }
  });
  return result;
 }
 Discourse.Dialect.replaceBlock({
  start: /^`{3}([^\n\[\]]+)?\n?([\s\S]*)?/gm,
  stop: '```',
@ -19,7 +28,7 @@ Discourse.Dialect.replaceBlock({
    if (matches[1] && acceptableCodeClasses.indexOf(matches[1]) !== -1) {
      klass = matches[1];
    }
-    return ['p', ['pre', ['code', {'class': klass}, blockContents.join("\n") ]]];
+    return ['p', ['pre', ['code', {'class': klass}, flattenBlocks(blockContents) ]]];
  }
 });
@ -50,6 +59,6 @@ Discourse.Dialect.replaceBlock({
  skipIfTradtionalLinebreaks: true,
  emitter: function(blockContents) {
-    return ['p', ['pre', blockContents.join("\n")]];
+    return ['p', ['pre', flattenBlocks(blockContents)]];
  }
 });
--- a/app/assets/javascripts/discourse/dialects/dialect.js
+++ b/app/assets/javascripts/discourse/dialects/dialect.js
@ -133,6 +133,19 @@ function invalidBoundary(args, prev) {
  if (args.spaceOrTagBoundary && (!last.match(/(\s|\>)$/))) { return true; }
 }
 /**
  Returns the number of (terminated) lines in a string.
  @method countLines
  @param {string} str the string.
  @returns {Integer} number of terminated lines in str
 **/
 function countLines(str) {
  var index = -1, count = 0;
  while ((index = str.indexOf("\n", index + 1)) !== -1) { count++; }
  return count;
 }
 /**
  An object used for rendering our dialects.
@ -288,7 +301,7 @@ Discourse.Dialect = {
    this.registerInline(start, function(text, match, prev) {
      if (invalidBoundary(args, prev)) { return; }
-      var endPos = self.findEndPos(text, stop, args, startLength);
+      var endPos = self.findEndPos(text, start, stop, args, startLength);
      if (endPos === -1) { return; }
      var between = text.slice(startLength, endPos);
@ -304,13 +317,14 @@ Discourse.Dialect = {
    });
  },
-  findEndPos: function(text, stop, args, start) {
+  findEndPos: function(text, start, stop, args, offset) {
-    var endPos = text.indexOf(stop, start);
+    var endPos, nextStart;
-    if (endPos === -1) { return -1; }
+    do {
-    var after = text.charAt(endPos + stop.length);
+      endPos = text.indexOf(stop, offset);
-    if (after && after.indexOf(stop) === 0) {
+      if (endPos === -1) { return -1; }
-      return this.findEndPos(text, stop, args, endPos + stop.length + 1);
+      nextStart = text.indexOf(start, offset);
-    }
+      offset = endPos + stop.length;
    } while (nextStart !== -1 && nextStart < endPos);
    return endPos;
  },
@ -358,102 +372,83 @@ Discourse.Dialect = {
      var linebreaks = dialect.options.traditional_markdown_linebreaks ||
          Discourse.SiteSettings.traditional_markdown_linebreaks;
      // Some replacers should not be run with traditional linebreaks
      if (linebreaks && args.skipIfTradtionalLinebreaks) { return; }
      args.start.lastIndex = 0;
-      var m = (args.start).exec(block);
+      var result = [], match = (args.start).exec(block);
      if (!match) { return; }
-      if (!m) { return; }
+      var lastChance = function() {
        return !next.some(function(e) { return e.indexOf(args.stop) !== -1; });
      };
-      var startPos = args.start.lastIndex - m[0].length,
+      // shave off start tag and leading text, if any.
-          leading,
+      var pos = args.start.lastIndex - match[0].length,
-          blockContents = [],
+          leading = block.slice(0, pos),
-          result = [],
+          trailing = match[2] ? match[2].replace(/^\n*/, "") : "";
-          lineNumber = block.lineNumber;
+      // just give up if there's no stop tag in this or any next block
-
+      if (block.indexOf(args.stop, pos + args.stop.length) === -1 && lastChance()) { return; }
-      if (startPos > 0) {
+      if (leading.length > 0) { result.push(['p'].concat(this.processInline(leading))); }
-        leading = block.slice(0, startPos);
+      if (trailing.length > 0) {
-        lineNumber += (leading.split("\n").length - 1);
+        next.unshift(MD.mk_block(trailing, block.trailing,
-
+          block.lineNumber + countLines(leading) + (match[2] ? match[2].length : 0) - trailing.length));
        var para = ['p'];
        this.processInline(leading).forEach(function (l) {
          para.push(l);
        });
        result.push(para);
      }
-      if (m[2]) {
+      // go through the available blocks to find the matching stop tag.
-        next.unshift(MD.mk_block(m[2], null, lineNumber + 1));
+      var contentBlocks = [], nesting = 0, actualEndPos = -1, currentBlock;
-      }
+      blockloop:
      while (currentBlock = next.shift()) {
        // collect all the start and stop tags in the current block
        args.start.lastIndex = 0;
        var startPos = [], m;
        while (m = (args.start).exec(currentBlock)) {
          startPos.push(args.start.lastIndex - m[0].length);
          args.start.lastIndex = args.start.lastIndex - (m[2] ? m[2].length : 0);
        }
        var endPos = [], offset = 0;
        while ((pos = currentBlock.indexOf(args.stop, offset)) !== -1) {
          endPos.push(pos);
          offset += (pos + args.stop.length);
        }
-      lineNumber++;
+        // go through the available end tags:
        var ep = 0, sp = 0; // array indices
        while (ep < endPos.length) {
          if (sp < startPos.length && startPos[sp] < endPos[ep]) {
            // there's an end tag, but there's also another start tag first. we need to go deeper.
            sp++; nesting++;
          } else if (nesting > 0) {
            // found an end tag, but we must go up a level first.
            ep++; nesting--;
          } else {
            // found an end tag and we're at the top: done!
            actualEndPos = endPos[ep];
            break blockloop;
          }
        }
-      var blockClosed = false;
+        if (lastChance()) {
-      for (var i=0; i<next.length; i++) {
+          // when lastChance() becomes true the first time, currentBlock contains the last
-        if (next[i].indexOf(args.stop) >= 0) {
+          // end tag available in the input blocks but it's not on the right nesting level
-          blockClosed = true;
+          // or we would have terminated the loop already. the only thing we can do is to
          // treat the last available end tag as tho it were matched with our start tag
          // and let the emitter figure out how to render the garbage inside.
          actualEndPos = endPos[endPos.length - 1];
          break;
        }
        // any left-over start tags still increase the nesting level
        nesting += startPos.length - sp;
        contentBlocks.push(currentBlock);
      }
-      if (!blockClosed) {
+      var before = currentBlock.slice(0, actualEndPos).replace(/\n*$/, ""),
-        if (m[2]) { next.shift(); }
+          after = currentBlock.slice(actualEndPos + args.stop.length).replace(/^\n*/, "");
-        return;
+      if (before.length > 0) contentBlocks.push(MD.mk_block(before, "", currentBlock.lineNumber));
-      }
+      if (after.length > 0) next.unshift(MD.mk_block(after, "", currentBlock.lineNumber + countLines(before)));
-      var numOpen = 1;
+      var emitterResult = args.emitter.call(this, contentBlocks, match, dialect.options);
-      while (next.length > 0) {
+      if (emitterResult) { result.push(emitterResult); }
        var b = next.shift(),
            blockLine = b.lineNumber,
            diff = ((typeof blockLine === "undefined") ? lineNumber : blockLine) - lineNumber,
            endFound = b.indexOf(args.stop),
            leadingContents = b.slice(0, endFound),
            trailingContents = b.slice(endFound+args.stop.length),
            m2;
        if (endFound === -1) {
          leadingContents = b;
        }
        args.start.lastIndex = 0;
        if (m2 = (args.start).exec(leadingContents)) {
          numOpen++;
          args.start.lastIndex -= m2[0].length - 1;
          while (m2 = (args.start).exec(leadingContents)) {
            numOpen++;
            args.start.lastIndex -= m2[0].length - 1;
          }
        }
        if (endFound >= 0) { numOpen--; }
        for (var j=1; j<diff; j++) {
          blockContents.push("");
        }
        lineNumber = blockLine + b.split("\n").length - 1;
        if (endFound >= 0) {
          if (trailingContents) {
            next.unshift(MD.mk_block(trailingContents.replace(/^\s+/, "")));
          }
          blockContents.push(leadingContents.replace(/\s+$/, ""));
          if (numOpen === 0) {
            break;
          }
          blockContents.push(args.stop);
        } else {
          blockContents.push(b);
        }
      }
      var emitterResult = args.emitter.call(this, blockContents, m, dialect.options);
      if (emitterResult) {
        result.push(emitterResult);
      }
      return result;
    });
  },
--- a/test/javascripts/lib/bbcode_test.js
+++ b/test/javascripts/lib/bbcode_test.js
@ -22,6 +22,7 @@ test('basic bbcode', function() {
         "<span class=\"bbcode-b\">evil <span class=\"bbcode-i\">trout</span></span>",
         "allows embedding of tags");
  format("[EMAIL]eviltrout@mailinator.com[/EMAIL]", "<a href=\"mailto:eviltrout@mailinator.com\">eviltrout@mailinator.com</a>", "supports upper case bbcode");
  format("[b]strong [b]stronger[/b][/b]", "<span class=\"bbcode-b\">strong <span class=\"bbcode-b\">stronger</span></span>", "accepts nested bbcode tags");
 });
 test('invalid bbcode', function() {
@ -44,6 +45,7 @@ test('spoiler', function() {
 test('lists', function() {
  format("[ul][li]option one[/li][/ul]", "<ul><li>option one</li></ul>", "creates an ul");
  format("[ol][li]option one[/li][/ol]", "<ol><li>option one</li></ol>", "creates an ol");
  format("[ul]\n[li]option one[/li]\n[li]option two[/li]\n[/ul]", "<ul><li>option one</li><li>option two</li></ul>", "suppresses empty lines in lists");
 });
 test('tags with arguments', function() {
@ -127,6 +129,16 @@ test("quote formatting", function() {
         "</div><blockquote><p>abc</p></blockquote></aside>\n\n<p>hello</p>",
         "handles new lines properly");
  formatQ("[quote=\"Alice, post:1, topic:1\"]\n[quote=\"Bob, post:2, topic:1\"]\n[/quote]\n[/quote]",
         "<aside class=\"quote\" data-post=\"1\" data-topic=\"1\"><div class=\"title\"><div class=\"quote-controls\"></div>Alice said:" +
         "</div><blockquote><aside class=\"quote\" data-post=\"2\" data-topic=\"1\"><div class=\"title\"><div class=\"quote-controls\"></div>Bob said:" +
         "</div><blockquote></blockquote></aside></blockquote></aside>",
         "quotes can be nested");
  formatQ("[quote=\"Alice, post:1, topic:1\"]\n[quote=\"Bob, post:2, topic:1\"]\n[/quote]",
         "<aside class=\"quote\" data-post=\"1\" data-topic=\"1\"><div class=\"title\"><div class=\"quote-controls\"></div>Alice said:" +
         "</div><blockquote><p>[quote=\"Bob, post:2, topic:1\"]</p></blockquote></aside>",
         "handles mismatched nested quote tags");
 });
 test("quotes with trailing formatting", function() {
--- a/test/javascripts/lib/markdown_test.js
+++ b/test/javascripts/lib/markdown_test.js
@ -36,6 +36,7 @@ test("Auto quoting", function() {
         "it converts single line quotes to blockquotes");
  cooked('"hello\nworld"', "<p>\"hello<br/>world\"</p>", "It doesn't convert multi line quotes");
  cooked('"hello "evil" trout"', '<p>"hello "evil" trout"</p>', "it doesn't format quotes in the middle of a line");
  cooked('["text"', '<p>["text"</p>', "it recognizes leading tag-like text");
 });
 test("Traditional Line Breaks", function() {
@ -315,7 +316,7 @@ test("links with full urls", function() {
 test("Code Blocks", function() {
  cooked("<pre>\nhello\n</pre>\n",
-         "<p><pre>\nhello</pre></p>",
+         "<p><pre>hello</pre></p>",
         "pre blocks don't include extra lines");
  cooked("```\na\nb\nc\n\nd\n```",
--- a/vendor/assets/javascripts/better_markdown.js
+++ b/vendor/assets/javascripts/better_markdown.js
@ -662,7 +662,7 @@
        return [consumed, null, nodes];
      }
-      var res = this.dialect.inline.__oneElement__.call(this, text.substr( consumed ), patterns );
+      var res = this.dialect.inline.__oneElement__.call(this, text.substr( consumed ), patterns, [text.substr(0, consumed)]);
      consumed += res[ 0 ];
      // Add any returned nodes.
      nodes.push.apply( nodes, res.slice( 1 ) );