From 5281b7f80c43efd39488c7c34dd19b291fee7933 Mon Sep 17 00:00:00 2001 From: Robin Ward Date: Fri, 11 Oct 2013 16:24:27 -0400 Subject: [PATCH] Upgraded and refactored Sanitizing. Much less crap should get through now! Conflicts: app/assets/javascripts/discourse/components/syntax_highlighting.js --- .../defer/html-sanitizer-bundle.js | 95 +++++++++------ .../discourse/components/markdown.js | 57 ++++++++- .../components/syntax_highlighting.js | 4 +- .../discourse/dialects/bbcode_dialect.js | 14 +-- .../discourse/dialects/github_code_dialect.js | 9 ++ .../discourse/dialects/mention_dialect.js | 4 +- .../discourse/dialects/onebox_dialect.js | 4 +- .../discourse/dialects/quote_dialect.js | 4 +- app/assets/javascripts/main_include.js | 2 +- lib/pretty_text.rb | 15 ++- spec/components/pretty_text_spec.rb | 2 +- test/javascripts/components/bbcode_test.js | 8 -- test/javascripts/components/markdown_test.js | 20 +++- test/javascripts/test_helper.js | 1 - .../assets/javascripts/Markdown.Sanitizer.js | 108 ------------------ .../assets/javascripts/discourse_emoji.js | 2 + 16 files changed, 175 insertions(+), 174 deletions(-) delete mode 100644 vendor/assets/javascripts/Markdown.Sanitizer.js diff --git a/app/assets/javascripts/defer/html-sanitizer-bundle.js b/app/assets/javascripts/defer/html-sanitizer-bundle.js index 363a5ce40..ccdebf3a9 100644 --- a/app/assets/javascripts/defer/html-sanitizer-bundle.js +++ b/app/assets/javascripts/defer/html-sanitizer-bundle.js @@ -11,11 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// -// Sam: made some modifications to pass jshint and protect against global namespace pollution - - -window.sanitizeHtml = (function() { /** * @fileoverview @@ -88,7 +83,7 @@ function encodeIfExists(unescapedPart) { return encodeURIComponent(unescapedPart); } return null; -} +}; /** * if unescapedPart is non null, then escapes any characters in it that aren't * valid characters in a url and also escapes any special characters that @@ -159,7 +154,7 @@ var EXTRA_PARENT_PATHS_RE = /^(?:\.\.\/)*(?:\.\.$)?/; * } */ function collapse_dots(path) { - if (path == null) { return null; } + if (path === null) { return null; } var p = normPath(path); // Only /../ left to flatten var r = PARENT_DIRECTORY_HANDLER_RE; @@ -743,9 +738,14 @@ URI.utils = { return URI; })(); +// Exports for closure compiler. +if (typeof window !== 'undefined') { + window['URI'] = URI; +} +; // Copyright Google Inc. // Licensed under the Apache Licence Version 2.0 -// Autogenerated at Wed Feb 20 13:32:22 EST 2013 +// Autogenerated at Fri Oct 11 16:16:32 EDT 2013 // @overrides window // @provides html4 var html4 = {}; @@ -765,6 +765,7 @@ html4.atype = { 'FRAME_TARGET': 10, 'MEDIA_QUERY': 13 }; +html4[ 'atype' ] = html4.atype; html4.ATTRIBS = { '*::class': 9, '*::dir': 0, @@ -780,6 +781,7 @@ html4.ATTRIBS = { '*::onchange': 2, '*::onclick': 2, '*::ondblclick': 2, + '*::onerror': 2, '*::onfocus': 2, '*::onkeydown': 2, '*::onkeypress': 2, @@ -825,6 +827,7 @@ html4.ATTRIBS = { 'audio::mediagroup': 5, 'audio::muted': 0, 'audio::preload': 0, + 'audio::src': 1, 'bdo::dir': 0, 'blockquote::cite': 1, 'br::clear': 0, @@ -1066,8 +1069,10 @@ html4.ATTRIBS = { 'video::muted': 0, 'video::poster': 1, 'video::preload': 0, + 'video::src': 1, 'video::width': 0 }; +html4[ 'ATTRIBS' ] = html4.ATTRIBS; html4.eflags = { 'OPTIONAL_ENDTAG': 1, 'EMPTY': 2, @@ -1079,6 +1084,7 @@ html4.eflags = { 'STYLE': 128, 'VIRTUALIZED': 256 }; +html4[ 'eflags' ] = html4.eflags; html4.ELEMENTS = { 'a': 0, 'abbr': 0, @@ -1202,6 +1208,7 @@ html4.ELEMENTS = { 'video': 0, 'wbr': 2 }; +html4[ 'ELEMENTS' ] = html4.ELEMENTS; html4.ELEMENT_DOM_INTERFACES = { 'a': 'HTMLAnchorElement', 'abbr': 'HTMLElement', @@ -1325,14 +1332,17 @@ html4.ELEMENT_DOM_INTERFACES = { 'video': 'HTMLVideoElement', 'wbr': 'HTMLElement' }; +html4[ 'ELEMENT_DOM_INTERFACES' ] = html4.ELEMENT_DOM_INTERFACES; html4.ueffects = { 'NOT_LOADED': 0, 'SAME_DOCUMENT': 1, 'NEW_DOCUMENT': 2 }; +html4[ 'ueffects' ] = html4.ueffects; html4.URIEFFECTS = { 'a::href': 2, 'area::href': 2, + 'audio::src': 1, 'blockquote::cite': 0, 'command::icon': 1, 'del::cite': 0, @@ -1341,16 +1351,20 @@ html4.URIEFFECTS = { 'input::src': 1, 'ins::cite': 0, 'q::cite': 0, - 'video::poster': 1 + 'video::poster': 1, + 'video::src': 1 }; +html4[ 'URIEFFECTS' ] = html4.URIEFFECTS; html4.ltypes = { 'UNSANDBOXED': 2, 'SANDBOXED': 1, 'DATA': 0 }; +html4[ 'ltypes' ] = html4.ltypes; html4.LOADERTYPES = { 'a::href': 2, 'area::href': 2, + 'audio::src': 2, 'blockquote::cite': 2, 'command::icon': 1, 'del::cite': 2, @@ -1359,8 +1373,15 @@ html4.LOADERTYPES = { 'input::src': 1, 'ins::cite': 2, 'q::cite': 2, - 'video::poster': 1 + 'video::poster': 1, + 'video::src': 2 }; +html4[ 'LOADERTYPES' ] = html4.LOADERTYPES; +// export for Closure Compiler +if (typeof window !== 'undefined') { + window['html4'] = html4; +} +; // Copyright (C) 2006 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -1397,7 +1418,7 @@ html4.LOADERTYPES = { */ // The Turkish i seems to be a non-issue, but abort in case it is. -// if ('I'.toLowerCase() !== 'i') { throw 'I/i problem'; } # Sam ... screwing up in turkish browsers seems a silly idea +if ('I'.toLowerCase() !== 'i') { throw 'I/i problem'; } /** * \@namespace @@ -1407,9 +1428,9 @@ var html = (function(html4) { // For closure compiler var parseCssDeclarations, sanitizeCssProperty, cssSchema; if ('undefined' !== typeof window) { - parseCssDeclarations = window.parseCssDeclarations; - sanitizeCssProperty = window.sanitizeCssProperty; - cssSchema = window.cssSchema; + parseCssDeclarations = window['parseCssDeclarations']; + sanitizeCssProperty = window['sanitizeCssProperty']; + cssSchema = window['cssSchema']; } // The keys of this object must be 'quoted' or JSCompiler will mangle them! @@ -1439,7 +1460,8 @@ var html = (function(html4) { // TODO(kpreid): This retrieval is a kludge and leads to silent loss of // functionality if the document isn't available. var entityLookupElement = - ('undefined' !== typeof window && window.document) ? window.document.createElement('textarea') : null; + ('undefined' !== typeof window && window['document']) + ? window['document'].createElement('textarea') : null; /** * Decodes an HTML entity. * @@ -1608,7 +1630,7 @@ var html = (function(html4) { var splitWillCapture = ('a,b'.split(/(,)/).length === 3); // bitmask for tags with special parsing, like ").should match_html "alert(42)" + PrettyText.cook("").should match_html "

" end it 'should allow for @mentions to have punctuation' do diff --git a/test/javascripts/components/bbcode_test.js b/test/javascripts/components/bbcode_test.js index d0b4c118f..1fb59dca1 100644 --- a/test/javascripts/components/bbcode_test.js +++ b/test/javascripts/components/bbcode_test.js @@ -27,13 +27,6 @@ test('lists', function() { format("[ol][li]option one[/li][/ol]", "
  1. option one
", "creates an ol"); }); -test('color', function() { - format("[color=#00f]blue[/color]", "blue", "supports [color=] with a short hex value"); - format("[color=#ffff00]yellow[/color]", "yellow", "supports [color=] with a long hex value"); - format("[color=red]red[/color]", "red", "supports [color=] with an html color"); - format("[color=javascript:alert('wat')]noop[/color]", "noop", "it performs a noop on invalid input"); -}); - test('tags with arguments', function() { format("[size=35]BIG [b]whoop[/b][/size]", "BIG whoop", "supports [size=]"); format("[url=http://bettercallsaul.com]better call![/url]", "better call!", "supports [url] with a title"); @@ -42,7 +35,6 @@ test('tags with arguments', function() { format("[b]first[/b] [b]second[/b]", "first second", "can bold two things on the same line"); }); - test("quotes", function() { var post = Discourse.Post.create({ diff --git a/test/javascripts/components/markdown_test.js b/test/javascripts/components/markdown_test.js index daa48b2b5..4010b05fd 100644 --- a/test/javascripts/components/markdown_test.js +++ b/test/javascripts/components/markdown_test.js @@ -278,10 +278,12 @@ test("Code Blocks", function() { }); -test("SanitizeHTML", function() { +test("sanitize", function() { + var sanitize = Discourse.Markdown.sanitize; - equal(sanitizeHtml("
"), "
"); - equal(sanitizeHtml("

hello

"), "

hello

"); + equal(sanitize("bug"), "bug"); + equal(sanitize("
"), "
"); + equal(sanitize("

hello

"), "

hello

"); cooked("hello", "

hello

", "it sanitizes while cooking"); cooked("disney reddit", @@ -305,3 +307,15 @@ test("URLs in BBCode tags", function() { "named links are properly parsed"); }); + +test("urlAllowed", function() { + var allowed = function(url, msg) { + equal(Discourse.Markdown.urlAllowed(url), url, msg); + }; + + allowed("/foo/bar.html", "allows relative urls"); + allowed("http://eviltrout.com/evil/trout", "allows full urls"); + allowed("https://eviltrout.com/evil/trout", "allows https urls"); + allowed("//eviltrout.com/evil/trout", "allows protocol relative urls"); + +}); diff --git a/test/javascripts/test_helper.js b/test/javascripts/test_helper.js index 94a3597f7..37cf2bb55 100644 --- a/test/javascripts/test_helper.js +++ b/test/javascripts/test_helper.js @@ -25,7 +25,6 @@ //= require LAB.js //= require Markdown.Converter.js //= require Markdown.Editor.js -//= require Markdown.Sanitizer.js //= require better_markdown.js //= require bootbox.js //= require bootstrap-alert.js diff --git a/vendor/assets/javascripts/Markdown.Sanitizer.js b/vendor/assets/javascripts/Markdown.Sanitizer.js deleted file mode 100644 index c3283e590..000000000 --- a/vendor/assets/javascripts/Markdown.Sanitizer.js +++ /dev/null @@ -1,108 +0,0 @@ -(function () { - var output, Converter; - if (typeof exports === "object" && typeof require === "function") { // we're in a CommonJS (e.g. Node.js) module - output = exports; - Converter = require("./Markdown.Converter").Converter; - } else { - output = window.Markdown; - Converter = output.Converter; - } - - output.getSanitizingConverter = function () { - var converter = new Converter(); - converter.hooks.chain("postConversion", sanitizeHtml); - converter.hooks.chain("postConversion", balanceTags); - return converter; - } - - function sanitizeHtml(html) { - return html.replace(/<[^>]*>?/gi, sanitizeTag); - } - - // (tags that can be opened/closed) | (tags that stand alone) - var basic_tag_whitelist = /^(<\/?(b|blockquote|code|del|dd|dl|dt|em|h1|h2|h3|i|kbd|li|ol|p|pre|s|sup|sub|strong|strike|ul)>|<(br|hr)\s?\/?>)$/i; - // | - var a_white = /^(]+")?\s?>|<\/a>)$/i; - - // ]*")?(\stitle="[^"<>]*")?\s?\/?>)$/i; - - function sanitizeTag(tag) { - if (tag.match(basic_tag_whitelist) || tag.match(a_white) || tag.match(img_white)) - return tag; - else - return ""; - } - - /// - /// attempt to balance HTML tags in the html string - /// by removing any unmatched opening or closing tags - /// IMPORTANT: we *assume* HTML has *already* been - /// sanitized and is safe/sane before balancing! - /// - /// adapted from CODESNIPPET: A8591DBA-D1D3-11DE-947C-BA5556D89593 - /// - function balanceTags(html) { - - if (html == "") - return ""; - - var re = /<\/?\w+[^>]*(\s|$|>)/g; - // convert everything to lower case; this makes - // our case insensitive comparisons easier - var tags = html.toLowerCase().match(re); - - // no HTML tags present? nothing to do; exit now - var tagcount = (tags || []).length; - if (tagcount == 0) - return html; - - var tagname, tag; - var ignoredtags = "



  • "; - var match; - var tagpaired = []; - var tagremove = []; - var needsRemoval = false; - - // loop through matched tags in forward order - for (var ctag = 0; ctag < tagcount; ctag++) { - tagname = tags[ctag].replace(/<\/?(\w+).*/, "$1"); - // skip any already paired tags - // and skip tags in our ignore list; assume they're self-closed - if (tagpaired[ctag] || ignoredtags.search("<" + tagname + ">") > -1) - continue; - - tag = tags[ctag]; - match = -1; - - if (!/^<\//.test(tag)) { - // this is an opening tag - // search forwards (next tags), look for closing tags - for (var ntag = ctag + 1; ntag < tagcount; ntag++) { - if (!tagpaired[ntag] && tags[ntag] == "") { - match = ntag; - break; - } - } - } - - if (match == -1) - needsRemoval = tagremove[ctag] = true; // mark for removal - else - tagpaired[match] = true; // mark paired - } - - if (!needsRemoval) - return html; - - // delete all orphaned tags from the string - - var ctag = 0; - html = html.replace(re, function (match) { - var res = tagremove[ctag] ? "" : match; - ctag++; - return res; - }); - return html; - } -})(); diff --git a/vendor/gems/discourse_emoji/vendor/assets/javascripts/discourse_emoji.js b/vendor/gems/discourse_emoji/vendor/assets/javascripts/discourse_emoji.js index e5fd911f2..394138c5c 100644 --- a/vendor/gems/discourse_emoji/vendor/assets/javascripts/discourse_emoji.js +++ b/vendor/gems/discourse_emoji/vendor/assets/javascripts/discourse_emoji.js @@ -108,4 +108,6 @@ }); }); } + + Discourse.Markdown.whiteListClass("emoji"); }).call(this);