From f95c86ac723f39341cb68b579b6c26a7b0ecdbf4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9gis=20Hanol?= <regis@hanol.fr>
Date: Sat, 7 Mar 2015 02:16:27 +0100
Subject: [PATCH] FIX: hoist code blocks content before doing any kind of
 processing

---
 .../discourse/dialects/code_dialect.js        | 25 +++++-----
 .../javascripts/discourse/dialects/dialect.js | 46 +++++++++++++++++--
 test/javascripts/lib/markdown-test.js.es6     |  8 ++--
 3 files changed, 57 insertions(+), 22 deletions(-)

diff --git a/app/assets/javascripts/discourse/dialects/code_dialect.js b/app/assets/javascripts/discourse/dialects/code_dialect.js
index 0eb5c738a..9131775a3 100644
--- a/app/assets/javascripts/discourse/dialects/code_dialect.js
+++ b/app/assets/javascripts/discourse/dialects/code_dialect.js
@@ -10,7 +10,7 @@ var acceptableCodeClasses =
    "perl", "php", "profile", "python", "r", "rib", "rsl", "ruby", "rust", "scala", "smalltalk", "sql",
    "tex", "text", "vala", "vbscript", "vhdl"];
 
-var textCodeClasses = ["text", "pre"];
+var textCodeClasses = ["text", "pre", "plain"];
 
 function flattenBlocks(blocks) {
   var result = "";
@@ -39,6 +39,17 @@ Discourse.Dialect.replaceBlock({
   }
 });
 
+Discourse.Dialect.replaceBlock({
+  start: /(<pre[^\>]*\>)([\s\S]*)/igm,
+  stop: /<\/pre>/igm,
+  rawContents: true,
+  skipIfTradtionalLinebreaks: true,
+
+  emitter: function(blockContents) {
+    return ['p', ['pre', flattenBlocks(blockContents)]];
+  }
+});
+
 // Ensure that content in a code block is fully escaped. This way it's not white listed
 // and we can use HTML and Javascript examples.
 Discourse.Dialect.on('parseNode', function (event) {
@@ -51,7 +62,6 @@ Discourse.Dialect.on('parseNode', function (event) {
 
     if (path && path[path.length-1] && path[path.length-1][0] && path[path.length-1][0] === "pre") {
       regexp = / +$/g;
-
     } else {
       regexp = /^ +| +$/g;
     }
@@ -59,17 +69,6 @@ Discourse.Dialect.on('parseNode', function (event) {
   }
 });
 
-Discourse.Dialect.replaceBlock({
-  start: /(<pre[^\>]*\>)([\s\S]*)/igm,
-  stop: /<\/pre>/igm,
-  rawContents: true,
-  skipIfTradtionalLinebreaks: true,
-
-  emitter: function(blockContents) {
-    return ['p', ['pre', flattenBlocks(blockContents)]];
-  }
-});
-
 // Whitelist the language classes
 var regexpSource = "^lang-(" + acceptableCodeClasses.join('|') + ")$";
 Discourse.Markdown.whiteListTag('code', 'class', new RegExp(regexpSource, "i"));
diff --git a/app/assets/javascripts/discourse/dialects/dialect.js b/app/assets/javascripts/discourse/dialects/dialect.js
index 66b72994b..245e52391 100644
--- a/app/assets/javascripts/discourse/dialects/dialect.js
+++ b/app/assets/javascripts/discourse/dialects/dialect.js
@@ -12,7 +12,8 @@ var parser = window.BetterMarkdown,
     initialized = false,
     emitters = [],
     hoisted,
-    preProcessors = [];
+    preProcessors = [],
+    escape = Handlebars.Utils.escapeExpression;
 
 /**
   Initialize our dialects for processing.
@@ -162,6 +163,10 @@ function hoister(t, target, replacement) {
   return t;
 }
 
+function outdent(t) {
+  return t.replace(/^[ ]{4}/gm, "");
+}
+
 
 /**
   An object used for rendering our dialects.
@@ -183,14 +188,46 @@ Discourse.Dialect = {
   cook: function(text, opts) {
     if (!initialized) { initializeDialects(); }
 
+    dialect.options = opts;
+
     // Helps us hoist out HTML
     hoisted = {};
 
+    // pre-hoist all code-blocks
+
+    // <pre>...</pre> blocks
+    text = text.replace(/(\n*)<pre>([\s\S]*?)<\/pre>/ig, function(_, before, m) {
+      var hash = md5(m);
+      hoisted[hash] = escape(m.trim());
+      return before + "<pre>" + hash + "</pre>";
+    });
+
+    // fenced blocks
+    text = text.replace(/(\n*)```([a-z0-9\-]*)\n([\s\S]*?)\n```/g, function(_, before, language, m) {
+      var hash = md5(m);
+      hoisted[hash] = escape(m.trim());
+      return before + "```" + language + "\n" + hash + "\n```";
+    });
+
+    // inline
+    text = text.replace(/(^|[^`])`([^`]*?)`([^`]|$)/g, function(_, before, m, after) {
+      var hash = md5(m);
+      hoisted[hash] = escape(m);
+      return before + "`" + hash + "`" + after;
+    });
+
+    // markdown blocks
+    text = text.replace(/(\n*)((?:(?:[ ]{4}).*\n+)+)/g, function(_, before, m) {
+      var hash = md5(m);
+      hoisted[hash] = escape(outdent(m).trim());
+      return before + "    " + hash + "\n";
+    });
+
+    // pre-processors
     preProcessors.forEach(function(p) {
       text = p(text, hoister);
     });
 
-    dialect.options = opts;
     var tree = parser.toHTMLTree(text, 'Discourse'),
         result = parser.renderJsonML(parseTree(tree));
 
@@ -203,12 +240,11 @@ Discourse.Dialect = {
     // If we hoisted out anything, put it back
     var keys = Object.keys(hoisted);
     if (keys.length) {
-      keys.forEach(function(k) {
-        result = result.replace(new RegExp(k,"g"), hoisted[k]);
+      keys.forEach(function(key) {
+        result = result.replace(new RegExp(key, "g"), hoisted[key]);
       });
     }
 
-    hoisted = {};
     return result.trim();
   },
 
diff --git a/test/javascripts/lib/markdown-test.js.es6 b/test/javascripts/lib/markdown-test.js.es6
index 7c6cf4e40..79dd7f31d 100644
--- a/test/javascripts/lib/markdown-test.js.es6
+++ b/test/javascripts/lib/markdown-test.js.es6
@@ -345,12 +345,12 @@ test("Code Blocks", function() {
          "<p><pre><code class=\"lang-json\">{hello: &#x27;world&#x27;}</code></pre></p>\n\n<p>trailing</p>",
          "It does not truncate text after a code block.");
 
-  cooked("```json\nline 1\n\nline 2\n\n\nline3\n```",
-         "<p><pre><code class=\"lang-json\">line 1\n\nline 2\n\n\nline3</code></pre></p>",
+  cooked("```json\nline 1\n\nline 2\n\n\nline 3\n```",
+         "<p><pre><code class=\"lang-json\">line 1\n\nline 2\n\n\nline 3</code></pre></p>",
          "it maintains new lines inside a code block.");
 
-  cooked("hello\nworld\n```json\nline 1\n\nline 2\n\n\nline3\n```",
-         "<p>hello<br/>world<br/></p>\n\n<p><pre><code class=\"lang-json\">line 1\n\nline 2\n\n\nline3</code></pre></p>",
+  cooked("hello\nworld\n```json\nline 1\n\nline 2\n\n\nline 3\n```",
+         "<p>hello<br/>world<br/></p>\n\n<p><pre><code class=\"lang-json\">line 1\n\nline 2\n\n\nline 3</code></pre></p>",
          "it maintains new lines inside a code block with leading content.");
 
   cooked("```ruby\n<header>hello</header>\n```",