diff --git a/app/assets/javascripts/defer/html-sanitizer-bundle.js b/app/assets/javascripts/defer/html-sanitizer-bundle.js index 12b212e53..adcde0e4e 100644 --- a/app/assets/javascripts/defer/html-sanitizer-bundle.js +++ b/app/assets/javascripts/defer/html-sanitizer-bundle.js @@ -2060,6 +2060,10 @@ var html = (function(html4) { html4.ATTRIBS.hasOwnProperty(attribKey))) { atype = html4.ATTRIBS[attribKey]; } + + // Discourse modification: give us more flexibility with whitelists + if (opt_nmTokenPolicy && opt_nmTokenPolicy(tagName, attribName, value)) { continue; } + if (atype !== null) { switch (atype) { case html4.atype['NONE']: break; @@ -2108,17 +2112,6 @@ var html = (function(html4) { log(opt_logger, tagName, attribName, oldValue, value); } break; - case html4.atype['ID']: - case html4.atype['IDREF']: - case html4.atype['IDREFS']: - case html4.atype['GLOBAL_NAME']: - case html4.atype['LOCAL_NAME']: - case html4.atype['CLASSES']: - value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value; - if (opt_logger) { - log(opt_logger, tagName, attribName, oldValue, value); - } - break; case html4.atype['URI']: value = safeUri(value, getUriEffect(tagName, attribName), @@ -2135,7 +2128,6 @@ var html = (function(html4) { case html4.atype['URI_FRAGMENT']: if (value && '#' === value.charAt(0)) { value = value.substring(1); // remove the leading '#' - value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value; if (value !== null && value !== void 0) { value = '#' + value; // restore the leading '#' } diff --git a/app/assets/javascripts/discourse/dialects/autolink_dialect.js b/app/assets/javascripts/discourse/dialects/autolink_dialect.js index d231089cd..57130511b 100644 --- a/app/assets/javascripts/discourse/dialects/autolink_dialect.js +++ b/app/assets/javascripts/discourse/dialects/autolink_dialect.js @@ -4,7 +4,7 @@ **/ var urlReplacerArgs = { matcher: /^((?:https?:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.])(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\([^\s()<>]+\)|[^`!()\[\]{};:'".,<>?«»“”‘’\s]))/gm, - spaceBoundary: true, + spaceOrTagBoundary: true, emitter: function(matches) { var url = matches[1], diff --git a/app/assets/javascripts/discourse/dialects/dialect.js b/app/assets/javascripts/discourse/dialects/dialect.js index 040e11028..86e1adf8e 100644 --- a/app/assets/javascripts/discourse/dialects/dialect.js +++ b/app/assets/javascripts/discourse/dialects/dialect.js @@ -42,10 +42,6 @@ function processTextNodes(node, event, emitter) { for (var j=1; j<node.length; j++) { var textContent = node[j]; if (typeof textContent === "string") { - if (dialect.options.sanitize && !skipSanitize[textContent]) { - textContent = Discourse.Markdown.sanitize(textContent); - } - var result = emitter(textContent, event); if (result) { if (result instanceof Array) { @@ -118,13 +114,14 @@ function parseTree(tree, path, insideCounts) { @returns {Boolean} whether there is an invalid word boundary **/ function invalidBoundary(args, prev) { - if (!args.wordBoundary && !args.spaceBoundary) { return false; } + if (!(args.wordBoundary || args.spaceBoundary || args.spaceOrTagBoundary)) { return false; } var last = prev[prev.length - 1]; if (typeof last !== "string") { return false; } if (args.wordBoundary && (last.match(/(\w|\/)$/))) { return true; } if (args.spaceBoundary && (!last.match(/\s$/))) { return true; } + if (args.spaceOrTagBoundary && (!last.match(/(\s|\>)$/))) { return true; } } /** @@ -147,9 +144,19 @@ Discourse.Dialect = { cook: function(text, opts) { if (!initialized) { initializeDialects(); } dialect.options = opts; - var tree = parser.toHTMLTree(text, 'Discourse'); + var tree = parser.toHTMLTree(text, 'Discourse'), + result = parser.renderJsonML(parseTree(tree)); - return parser.renderJsonML(parseTree(tree)); + // This feature is largely for MDTest. We prefer to strip comments + // in Discourse + + if (opts.sanitize) { + result = Discourse.Markdown.sanitize(result); + } else if (opts.sanitizerFunction) { + result = opts.sanitizerFunction(result); + } + + return result.trim(); }, /** diff --git a/app/assets/javascripts/discourse/lib/markdown.js b/app/assets/javascripts/discourse/lib/markdown.js index e3a629314..5212dbc5a 100644 --- a/app/assets/javascripts/discourse/lib/markdown.js +++ b/app/assets/javascripts/discourse/lib/markdown.js @@ -7,10 +7,51 @@ @namespace Discourse @module Discourse **/ +var _validClasses = {}, + _validIframes = [], + _validTags = {}; + +function validateAttribute(tagName, attribName, value) { + var tag = _validTags[tagName]; + + // Handle classes + if (attribName === "class") { + if (_validClasses[value]) { return value; } + + if (tag) { + var classes = tag['class']; + if (classes && (classes.indexOf(value) !== -1 || classes.indexOf('*') !== -1)) { + return value; + } + } + } else if (attribName.indexOf('data-') === 0) { + // data-* attributes + if (tag) { + var allowed = tag[attribName] || tag['data-*']; + if (allowed === value || allowed.indexOf('*') !== -1) { return value; } + } + } + + if (tag) { + var attrs = tag[attribName]; + if (attrs && (attrs.indexOf(value) !== -1 || attrs.indexOf('*') !== -1)) { return value; } + } +} + Discourse.Markdown = { - validClasses: {}, - validIframes: [], + /** + Whitelist class for only a certain tag + + @param {String} tagName to whitelist + @param {String} attribName to whitelist + @param {String} value to whitelist + **/ + whiteListTag: function(tagName, attribName, value) { + _validTags[tagName] = _validTags[tagName] || {}; + _validTags[tagName][attribName] = _validTags[tagName][attribName] || []; + _validTags[tagName][attribName].push(value || '*'); + }, /** Whitelists more classes for sanitization. @@ -19,10 +60,8 @@ Discourse.Markdown = { @method whiteListClass **/ whiteListClass: function() { - var args = Array.prototype.slice.call(arguments), - validClasses = Discourse.Markdown.validClasses; - - args.forEach(function (a) { validClasses[a] = true; }); + var args = Array.prototype.slice.call(arguments); + args.forEach(function (a) { _validClasses[a] = true; }); }, /** @@ -32,7 +71,7 @@ Discourse.Markdown = { @param {Regexp} regexp The regexp to whitelist. **/ whiteListIframe: function(regexp) { - Discourse.Markdown.validIframes.push(regexp); + _validIframes.push(regexp); }, /** @@ -124,8 +163,8 @@ Discourse.Markdown = { // whitelist some iframe only if (hints && hints.XML_TAG === "iframe" && hints.XML_ATTR === "src") { - for (var i = 0, length = Discourse.Markdown.validIframes.length; i < length; i++) { - if(Discourse.Markdown.validIframes[i].test(url)) { return url; } + for (var i = 0, length = _validIframes.length; i < length; i++) { + if(_validIframes[i].test(url)) { return url; } } return; } @@ -144,12 +183,10 @@ Discourse.Markdown = { Checks to see if a name, class or id is allowed in the cooked content @method nameIdClassAllowed - @param {String} val The name, class or id to check - @return {String} val the transformed name class or id + @param {String} tagName to check + @param {String} attribName to check + @param {String} value to check **/ - nameIdClassAllowed: function(val) { - if (Discourse.Markdown.validClasses[val]) { return val; } - }, /** @@ -161,8 +198,11 @@ Discourse.Markdown = { **/ sanitize: function(text) { if (!window.html_sanitize || !text) return ""; - text = text.replace(/<([^A-Za-z\/]|$)/g, "<$1"); - return window.html_sanitize(text, Discourse.Markdown.urlAllowed, Discourse.Markdown.nameIdClassAllowed); + + // Allow things like <3 and <_< + text = text.replace(/<([^A-Za-z\/\!]|$)/g, "<$1"); + + return window.html_sanitize(text, Discourse.Markdown.urlAllowed, validateAttribute); }, /** @@ -185,5 +225,14 @@ Discourse.Markdown = { }; RSVP.EventTarget.mixin(Discourse.Markdown); -Discourse.Markdown.whiteListClass("attachment"); +Discourse.Markdown.whiteListTag('a', 'class', 'attachment'); +Discourse.Markdown.whiteListTag('a', 'target', '_blank'); +Discourse.Markdown.whiteListTag('a', 'class', 'onebox'); +Discourse.Markdown.whiteListTag('a', 'class', 'mention'); +Discourse.Markdown.whiteListTag('div', 'class', 'title'); +Discourse.Markdown.whiteListTag('div', 'class', 'quote-controls'); +Discourse.Markdown.whiteListTag('code', 'class', '*'); +Discourse.Markdown.whiteListTag('span', 'class', 'mention'); +Discourse.Markdown.whiteListTag('aside', 'class', 'quote'); +Discourse.Markdown.whiteListTag('aside', 'data-*'); Discourse.Markdown.whiteListIframe(/^(https?:)?\/\/www\.google\.com\/maps\/embed\?.+/i); diff --git a/plugins/emoji/assets/javascripts/emoji.js.erb b/plugins/emoji/assets/javascripts/emoji.js.erb index ccfbcfdff..2eeef638a 100644 --- a/plugins/emoji/assets/javascripts/emoji.js.erb +++ b/plugins/emoji/assets/javascripts/emoji.js.erb @@ -156,5 +156,5 @@ }); } - Discourse.Markdown.whiteListClass("emoji"); + Discourse.Markdown.whiteListTag('img', 'class', 'emoji'); }).call(this); diff --git a/spec/components/pretty_text_spec.rb b/spec/components/pretty_text_spec.rb index 0a078e619..df9e2e15d 100644 --- a/spec/components/pretty_text_spec.rb +++ b/spec/components/pretty_text_spec.rb @@ -258,20 +258,11 @@ describe PrettyText do PrettyText.cook("**a*_b**").should match_html "<p><strong>a*_b</strong></p>" end - pending "small links" do - PrettyText.cook("<small>\nhttp://a.com\n<small>").should match_html "<p><small><br><a href=\"http://a.com\" rel=\"nofollow\">http://a.com</a><br></small></p>" - end - - it "does not apply italics when there is a space inside" do PrettyText.cook("** hello**").should match_html "<p>** hello**</p>" PrettyText.cook("**hello **").should match_html "<p>**hello **</p>" end - pending "allows comments through" do - PrettyText.cook("boom <!--comment-->").should match_html "<p>boom <!--comment--></p>" - end - it "allows does not bold chinese intra word" do PrettyText.cook("你**hello**").should match_html "<p>你**hello**</p>" end @@ -279,11 +270,6 @@ describe PrettyText do it "allows bold chinese" do PrettyText.cook("**你hello**").should match_html "<p><strong>你hello</strong></p>" end - - pending "does not break a streak for mentions" do - Fabricate(:user, username: 'sam') - PrettyText.cook("<small>a @sam c</small>").should match_html "<p><small>a <a class='mention' href='/users/sam'>@sam</a> c</small></p>" - end end end diff --git a/test/javascripts/lib/markdown_test.js b/test/javascripts/lib/markdown_test.js index 34d4e9f27..b153c7175 100644 --- a/test/javascripts/lib/markdown_test.js +++ b/test/javascripts/lib/markdown_test.js @@ -6,6 +6,8 @@ module("Discourse.Markdown", { var cooked = function(input, expected, text) { var result = Discourse.Markdown.cook(input, {sanitize: true}); + expected = expected.replace(/\/>/g, ">"); + // result = result.replace("/>", ">"); equal(result, expected, text); }; @@ -138,6 +140,8 @@ test("Links", function() { cooked("User [MOD]: Hello!", "<p>User [MOD]: Hello!</p>", "It does not consider references that are obviously not URLs"); + + cooked("<small>http://eviltrout.com</small>", "<p><small><a href=\"http://eviltrout.com\">http://eviltrout.com</a></small></p>", "Links within HTML tags"); }); test("simple quotes", function() { @@ -240,6 +244,9 @@ test("Mentions", function() { "<p><a class=\"mention\" href=\"/users/eviltrout\">@eviltrout</a></p>", "it doesn't onebox mentions"); + cookedOptions("<small>a @sam c</small>", alwaysTrue, + "<p><small>a <a class=\"mention\" href=\"/users/sam\">@sam</a> c</small></p>", + "it allows mentions within HTML tags"); }); @@ -370,7 +377,7 @@ test("sanitize", function() { cooked("[the answer](javascript:alert(42))", "<p><a>the answer</a></p>", "it prevents XSS"); - cooked("<i class=\"fa fa-bug fa-spin\" style=\"font-size:600%\"></i>\n<!-- -->", "<p><i></i><br/><!-- --></p>", "it doesn't circumvent XSS with comments"); + cooked("<i class=\"fa fa-bug fa-spin\" style=\"font-size:600%\"></i>\n<!-- -->", "<p><i></i><br/></p>", "it doesn't circumvent XSS with comments"); }); test("URLs in BBCode tags", function() { diff --git a/test/javascripts/mdtest/fixtures/Images.xhtml b/test/javascripts/mdtest/fixtures/Images.xhtml index 925bc1476..fdec8ebcb 100755 --- a/test/javascripts/mdtest/fixtures/Images.xhtml +++ b/test/javascripts/mdtest/fixtures/Images.xhtml @@ -12,7 +12,7 @@ <p><img src="/url/" alt="alt text" title="with a title" />.</p> -<p><img src="" alt="Empty" /></p> +<p><img alt="Empty" /></p> <p><img src="http://example.com/(parens).jpg" alt="this is a stupid URL" /></p> diff --git a/test/javascripts/mdtest/fixtures/Links, inline style.xhtml b/test/javascripts/mdtest/fixtures/Links, inline style.xhtml index 2d5a29ff2..70d02398e 100755 --- a/test/javascripts/mdtest/fixtures/Links, inline style.xhtml +++ b/test/javascripts/mdtest/fixtures/Links, inline style.xhtml @@ -10,9 +10,9 @@ <p><a href="/url/">URL wrapped in angle brackets</a>.</p> -<p><a href="/url/" title="Here's the title">URL w/ angle brackets + title</a>.</p> +<p><a href="/url/" title="Here's the title">URL w/ angle brackets + title</a>.</p> -<p><a href="">Empty</a>.</p> +<p><a>Empty</a>.</p> <p><a href="http://en.wikipedia.org/wiki/WIMP_(computing)">With parens in the URL</a></p> diff --git a/test/javascripts/mdtest/mdtest.js.erb b/test/javascripts/mdtest/mdtest.js.erb index 5bf4c22fe..f51a94ab2 100644 --- a/test/javascripts/mdtest/mdtest.js.erb +++ b/test/javascripts/mdtest/mdtest.js.erb @@ -8,13 +8,42 @@ module("MDTest", { // do not affect formatting. function normalize(str) { return str.replace(/\n\s*/g, ''). - replace(/ \/\>/g, '/>'). + replace(/ \/\>/g, '>'). replace(/ ?/g, "\t"). replace(/"/g, '"'); } +// We use a custom sanitizer for MD test that hoists out comments. In Discourse +// they are stripped, but to be compliant with the spec they should not be. +function hoistingSanitizer(result) { + var hoisted, + m = result.match(/<!--[\s\S]*?-->/g); + if (m && m.length) { + hoisted = []; + for (var i=0; i<m.length; i++) { + var c = m[i], + id = "discourse:hoisted-comment:" + i; + result = result.replace(c, id); + hoisted.push([c, id]); + } + } + + result = Discourse.Markdown.sanitize(result); + + if (hoisted) { + hoisted.forEach(function(tuple) { + result = result.replace(tuple[1], tuple[0]); + }); + } + + return result; +} + var md = function(input, expected, text) { - var result = Discourse.Markdown.cook(input, {sanitize: true, traditional_markdown_linebreaks: true}), + var result = Discourse.Markdown.cook(input, { + sanitizerFunction: hoistingSanitizer, + traditional_markdown_linebreaks: true + }), resultNorm = normalize(result), expectedNorm = normalize(expected), same = (result === expected) || (resultNorm === expectedNorm);