From b7803fc68eef04b8ab7a820d99d860776410f88b Mon Sep 17 00:00:00 2001 From: kerryliu Date: Thu, 24 Sep 2015 15:20:59 -0700 Subject: [PATCH] FIX: allow emoji class when crawling embedded content, add rspc-html-matchers --- Gemfile | 1 + Gemfile.lock | 4 + .../javascripts/admin/templates/embedding.hbs | 4 + app/models/embedding.rb | 1 + app/models/topic_embed.rb | 13 +++- config/site_settings.yml | 3 + spec/models/topic_embed_spec.rb | 78 ++++++++++++++++++- spec/spec_helper.rb | 1 + 8 files changed, 103 insertions(+), 2 deletions(-) diff --git a/Gemfile b/Gemfile index 92c81e10b..f7eca2464 100644 --- a/Gemfile +++ b/Gemfile @@ -120,6 +120,7 @@ group :test, :development do gem 'simplecov', require: false gem 'timecop' gem 'rspec-given' + gem 'rspec-html-matchers' gem 'pry-nav' gem 'spork-rails' gem 'byebug', require: ENV['RM_INFO'].nil? diff --git a/Gemfile.lock b/Gemfile.lock index 8142f566d..5da7f0bc5 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -337,6 +337,9 @@ GEM rspec-given (3.5.4) given_core (= 3.5.4) rspec (>= 2.12) + rspec-html-matchers (0.7.0) + nokogiri (~> 1) + rspec (~> 3) rspec-logsplit (0.1.3) rspec-mocks (3.2.1) diff-lcs (>= 1.2.0, < 2.0) @@ -511,6 +514,7 @@ DEPENDENCIES rmmseg-cpp rspec (~> 3.2.0) rspec-given + rspec-html-matchers rspec-rails rtlit ruby-readability diff --git a/app/assets/javascripts/admin/templates/embedding.hbs b/app/assets/javascripts/admin/templates/embedding.hbs index 30f279105..6fbe31c12 100644 --- a/app/assets/javascripts/admin/templates/embedding.hbs +++ b/app/assets/javascripts/admin/templates/embedding.hbs @@ -53,6 +53,10 @@ {{embedding-setting field="embed_blacklist_selector" value=embedding.embed_blacklist_selector placeholder=".ad-unit, header"}} + + {{embedding-setting field="embed_classname_whitelist" + value=embedding.embed_classname_whitelist + placeholder="emoji, classname"}}
diff --git a/app/models/embedding.rb b/app/models/embedding.rb index c6081b189..796f80671 100644 --- a/app/models/embedding.rb +++ b/app/models/embedding.rb @@ -9,6 +9,7 @@ class Embedding < OpenStruct embed_truncate embed_whitelist_selector embed_blacklist_selector + embed_classname_whitelist feed_polling_enabled feed_polling_url embed_username_key_from_feed) diff --git a/app/models/topic_embed.rb b/app/models/topic_embed.rb index 8d8485232..317fb230b 100644 --- a/app/models/topic_embed.rb +++ b/app/models/topic_embed.rb @@ -69,12 +69,13 @@ class TopicEmbed < ActiveRecord::Base original_uri = URI.parse(url) opts = { tags: %w[div p code pre h1 h2 h3 b em i strong a img ul li ol blockquote], - attributes: %w[href src], + attributes: %w[href src class], remove_empty_nodes: false } opts[:whitelist] = SiteSetting.embed_whitelist_selector if SiteSetting.embed_whitelist_selector.present? opts[:blacklist] = SiteSetting.embed_blacklist_selector if SiteSetting.embed_blacklist_selector.present? + embed_classname_whitelist = SiteSetting.embed_classname_whitelist if SiteSetting.embed_classname_whitelist.present? doc = Readability::Document.new(open(url).read, opts) @@ -96,6 +97,16 @@ class TopicEmbed < ActiveRecord::Base # If there is a mistyped URL, just do nothing end end + # only allow classes in the whitelist + allowed_classes = if embed_classname_whitelist.blank? then [] else embed_classname_whitelist.split(/[ ,]+/i) end + doc.search('[class]:not([class=""])').each do |node| + classes = node[:class].split(' ').select{ |classname| allowed_classes.include?(classname) } + if classes.length === 0 + node.delete('class') + else + node[:class] = classes.join(' ') + end + end end [title, doc.to_html] diff --git a/config/site_settings.yml b/config/site_settings.yml index 149713872..a9e80a93a 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -812,6 +812,9 @@ embedding: embed_blacklist_selector: default: '' hidden: true + embed_classname_whitelist: + default: 'emoji' + hidden: true legal: tos_url: diff --git a/spec/models/topic_embed_spec.rb b/spec/models/topic_embed_spec.rb index 8c9b3409d..73ff1490b 100644 --- a/spec/models/topic_embed_spec.rb +++ b/spec/models/topic_embed_spec.rb @@ -1,4 +1,5 @@ require 'spec_helper' +require 'stringio' describe TopicEmbed do @@ -30,7 +31,8 @@ describe TopicEmbed do expect(post.cooked).to eq(post.raw) # It converts relative URLs to absolute - expect(post.cooked.start_with?("hello world new post hello ")).to eq(true) + expect(post.cooked).to have_tag('a', with: { href: 'http://eviltrout.com/hello' }) + expect(post.cooked).to have_tag('img', with: { src: 'http://eviltrout.com/images/wat.jpg' }) expect(post.topic.has_topic_embed?).to eq(true) expect(TopicEmbed.where(topic_id: post.topic_id)).to be_present @@ -58,4 +60,78 @@ describe TopicEmbed do end + describe '.find_remote' do + + context 'post with allowed classes "foo" and "emoji"' do + + let(:user) { Fabricate(:user) } + let(:url) { 'http://eviltrout.com/123' } + let(:contents) { "my normal size emoji

Hi

" } + let!(:embeddable_host) { Fabricate(:embeddable_host) } + let!(:file) { StringIO.new } + + content = '' + + before(:each) do + SiteSetting.stubs(:embed_classname_whitelist).returns 'emoji , foo' + file.stubs(:read).returns contents + TopicEmbed.stubs(:open).returns file + title, content = TopicEmbed.find_remote(url) + end + + it 'img node has emoji class' do + expect(content).to have_tag('img', with: { class: 'emoji' }) + end + + it 'img node has foo class' do + expect(content).to have_tag('img', with: { class: 'foo' }) + end + + it 'p node has foo class' do + expect(content).to have_tag('p', with: { class: 'foo' }) + end + + it 'nodes removes classes other than emoji' do + expect(content).to have_tag('img', without: { class: 'other' }) + end + + end + + context 'post with no allowed classes' do + + let(:user) { Fabricate(:user) } + let(:url) { 'http://eviltrout.com/123' } + let(:contents) { "my normal size emoji

Hi

" } + let!(:embeddable_host) { Fabricate(:embeddable_host) } + let!(:file) { StringIO.new } + + content = '' + + before(:each) do + SiteSetting.stubs(:embed_classname_whitelist).returns ' ' + file.stubs(:read).returns contents + TopicEmbed.stubs(:open).returns file + title, content = TopicEmbed.find_remote(url) + end + + it 'img node doesn\'t have emoji class' do + expect(content).to have_tag('img', without: { class: 'emoji' }) + end + + it 'img node doesn\'t have foo class' do + expect(content).to have_tag('img', without: { class: 'foo' }) + end + + it 'p node doesn\'t foo class' do + expect(content).to have_tag('p', without: { class: 'foo' }) + end + + it 'img node doesn\'t have other class' do + expect(content).to have_tag('img', without: { class: 'other' }) + end + + end + + end + end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 701c0de01..30921a6e3 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -42,6 +42,7 @@ Spork.prefork do config.fail_fast = ENV['RSPEC_FAIL_FAST'] == "1" config.include Helpers config.include MessageBus + config.include RSpecHtmlMatchers config.mock_framework = :mocha config.order = 'random' config.infer_spec_type_from_file_location!