From 6995e75d410ebe4b03d31d1b0fbc1b17fa2d7eba Mon Sep 17 00:00:00 2001 From: Jaime Iniesta Date: Tue, 12 Feb 2013 09:46:45 -0500 Subject: [PATCH] Replace Hpricot with Nokogiri --- Gemfile | 1 - Gemfile.lock | 2 -- app/models/post.rb | 1 - docs/SOFTWARE.md | 2 +- lib/cooked_post_processor.rb | 2 +- lib/oneboxer.rb | 4 ++-- lib/oneboxer/amazon_onebox.rb | 10 +++++----- lib/oneboxer/android_app_store_onebox.rb | 8 ++++---- lib/oneboxer/apple_app_onebox.rb | 8 ++++---- lib/oneboxer/flickr_onebox.rb | 2 +- lib/oneboxer/wikipedia_onebox.rb | 10 +++++----- spec/components/cooked_post_processor_spec.rb | 5 ++++- spec/components/oneboxer/amazon_onebox_spec.rb | 2 +- .../oneboxer/android_app_store_onebox_spec.rb | 5 ++++- spec/components/oneboxer_spec.rb | 8 ++++---- 15 files changed, 36 insertions(+), 34 deletions(-) diff --git a/Gemfile b/Gemfile index b30566010..fdc1ebb61 100644 --- a/Gemfile +++ b/Gemfile @@ -20,7 +20,6 @@ gem 'fastimage' gem 'fog', require: false gem 'has_ip_address' gem 'hiredis' -gem 'hpricot' gem 'i18n-js' gem 'jquery-rails' gem 'multi_json' diff --git a/Gemfile.lock b/Gemfile.lock index 5aafa109d..d7c972ebe 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -192,7 +192,6 @@ GEM highline (1.6.15) hike (1.2.1) hiredis (0.4.5) - hpricot (0.8.6) httpauth (0.2.0) i18n (0.6.1) i18n-js (2.1.2) @@ -464,7 +463,6 @@ DEPENDENCIES guard-spork has_ip_address hiredis - hpricot i18n-js image_optim jasminerice diff --git a/app/models/post.rb b/app/models/post.rb index b97aa40af..520d2ce74 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -4,7 +4,6 @@ require_dependency 'rate_limiter' require_dependency 'post_revisor' require 'archetype' -require 'hpricot' require 'digest/sha1' class Post < ActiveRecord::Base diff --git a/docs/SOFTWARE.md b/docs/SOFTWARE.md index d59991fb1..00ca6e00a 100644 --- a/docs/SOFTWARE.md +++ b/docs/SOFTWARE.md @@ -31,7 +31,7 @@ The following Ruby Gems are used in Discourse: * [vestal_versions](https://rubygems.org/gems/vestal_versions) * [coffee-rails](https://rubygems.org/gems/coffee-rails) * [uglifier](https://rubygems.org/gems/uglifier) -* [hpricot](https://rubygems.org/gems/hpricot) +* [nokogiri](https://rubygems.org/gems/nokogiri) * [uuidtools](https://rubygems.org/gems/uuidtools) * [rinku](https://rubygems.org/gems/rinku) * [ruby-openid](https://rubygems.org/gems/ruby-openid) diff --git a/lib/cooked_post_processor.rb b/lib/cooked_post_processor.rb index 0a2027eba..25bba7176 100644 --- a/lib/cooked_post_processor.rb +++ b/lib/cooked_post_processor.rb @@ -9,7 +9,7 @@ class CookedPostProcessor @dirty = false @opts = opts @post = post - @doc = Hpricot(post.cooked) + @doc = Nokogiri::HTML(post.cooked) end def dirty? diff --git a/lib/oneboxer.rb b/lib/oneboxer.rb index 4775e13be..d8542e079 100644 --- a/lib/oneboxer.rb +++ b/lib/oneboxer.rb @@ -34,7 +34,7 @@ module Oneboxer if Whitelist.allowed?(url) page_html = open(url).read if page_html.present? - doc = Hpricot(page_html) + doc = Nokogiri::HTML(page_html) # See if if it has an oembed thing we can use (doc/"link[@type='application/json+oembed']").each do |oembed| @@ -56,7 +56,7 @@ module Oneboxer # Parse URLs out of HTML, returning the document when finished. def self.each_onebox_link(string_or_doc) doc = string_or_doc - doc = Hpricot(doc) if doc.is_a?(String) + doc = Nokogiri::HTML(doc) if doc.is_a?(String) onebox_links = doc.search("a.onebox") if onebox_links.present? diff --git a/lib/oneboxer/amazon_onebox.rb b/lib/oneboxer/amazon_onebox.rb index abab32cbe..d3e26aab7 100644 --- a/lib/oneboxer/amazon_onebox.rb +++ b/lib/oneboxer/amazon_onebox.rb @@ -22,19 +22,19 @@ module Oneboxer end def parse(data) - hp = Hpricot(data) + html_doc = Nokogiri::HTML(data) result = {} - result[:title] = hp.at("h1") + result[:title] = html_doc.at("h1") result[:title] = result[:title].inner_html if result[:title].present? - image = hp.at(".main-image img") + image = html_doc.at(".main-image img") result[:image] = image['src'] if image - result[:by_info] = hp.at("#by-line") + result[:by_info] = html_doc.at("#by-line") result[:by_info] = BaseOnebox.remove_whitespace(result[:by_info].inner_html) if result[:by_info].present? - summary = hp.at("#description-and-details-content") + summary = html_doc.at("#description-and-details-content") result[:text] = summary.inner_html if summary.present? result diff --git a/lib/oneboxer/android_app_store_onebox.rb b/lib/oneboxer/android_app_store_onebox.rb index 63afa6321..cb190e5c7 100644 --- a/lib/oneboxer/android_app_store_onebox.rb +++ b/lib/oneboxer/android_app_store_onebox.rb @@ -12,20 +12,20 @@ module Oneboxer def parse(data) - hp = Hpricot(data) + html_doc = Nokogiri::HTML(data) result = {} - m = hp.at("h1.doc-banner-title") + m = html_doc.at("h1.doc-banner-title") result[:title] = m.inner_text if m - m = hp.at("div#doc-original-text") + m = html_doc.at("div#doc-original-text") if m result[:text] = BaseOnebox.replace_tags_with_spaces(m.inner_html) result[:text] = result[:text][0..MAX_TEXT] end - m = hp.at("div.doc-banner-icon img") + m = html_doc.at("div.doc-banner-icon img") result[:image] = m['src'] if m result diff --git a/lib/oneboxer/apple_app_onebox.rb b/lib/oneboxer/apple_app_onebox.rb index dec524bb8..3dce646c9 100644 --- a/lib/oneboxer/apple_app_onebox.rb +++ b/lib/oneboxer/apple_app_onebox.rb @@ -17,17 +17,17 @@ module Oneboxer def parse(data) - hp = Hpricot(data) + html_doc = Nokogiri::HTML(data) result = {} - m = hp.at("h1") + m = html_doc.at("h1") result[:title] = m.inner_text if m - m = hp.at("h4 ~ p") + m = html_doc.at("h4 ~ p") result[:text] = m.inner_text[0..MAX_TEXT] if m - m = hp.at(".product img.artwork") + m = html_doc.at(".product img.artwork") result[:image] = m['src'] if m result diff --git a/lib/oneboxer/flickr_onebox.rb b/lib/oneboxer/flickr_onebox.rb index 8b0f84517..58e7ff8dd 100644 --- a/lib/oneboxer/flickr_onebox.rb +++ b/lib/oneboxer/flickr_onebox.rb @@ -9,7 +9,7 @@ module Oneboxer page_html = open(@url).read return nil if page_html.blank? - doc = Hpricot(page_html) + doc = Nokogiri::HTML(page_html) # Flikrs oembed just stopped returning images for no reason. Let's use opengraph instead. open_graph = Oneboxer.parse_open_graph(doc) diff --git a/lib/oneboxer/wikipedia_onebox.rb b/lib/oneboxer/wikipedia_onebox.rb index 694d39515..98e8a0161 100644 --- a/lib/oneboxer/wikipedia_onebox.rb +++ b/lib/oneboxer/wikipedia_onebox.rb @@ -20,23 +20,23 @@ module Oneboxer def parse(data) - hp = Hpricot(data) + html_doc = Nokogiri::HTML(data) result = {} - title = hp.at('title').inner_html + title = html_doc.at('title').inner_html result[:title] = title.gsub!(/ - Wikipedia, the free encyclopedia/, '') if title.present? # get the first image > 150 pix high - images = hp.search("img").select { |img| img['height'].to_i > 150 } + images = html_doc.search("img").select { |img| img['height'].to_i > 150 } result[:image] = "http:#{images[0]["src"]}" unless images.empty? # remove the table from mobile layout, as it can contain paras in some rare cases - hp.search("table").remove + html_doc.search("table").remove # get all the paras - paras = hp.search("p") + paras = html_doc.search("p") text = "" unless paras.empty? diff --git a/spec/components/cooked_post_processor_spec.rb b/spec/components/cooked_post_processor_spec.rb index 01ca295ba..96904db65 100644 --- a/spec/components/cooked_post_processor_spec.rb +++ b/spec/components/cooked_post_processor_spec.rb @@ -19,7 +19,10 @@ describe CookedPostProcessor do end it 'inserts the onebox' do - @cpp.html.should == "GANGNAM STYLE" + @cpp.html.should == < +GANGNAM STYLE +EXPECTED end end diff --git a/spec/components/oneboxer/amazon_onebox_spec.rb b/spec/components/oneboxer/amazon_onebox_spec.rb index 513a98a41..35ef5219b 100644 --- a/spec/components/oneboxer/amazon_onebox_spec.rb +++ b/spec/components/oneboxer/amazon_onebox_spec.rb @@ -26,7 +26,7 @@ private

The Ruby Programming Language (Paperback)

David Flanagan, Yukihiro Matsumoto

-The Ruby Programming Language is the authoritative guide to Ruby ... +The Ruby Programming Language is the authoritative guide to Ruby ...
diff --git a/spec/components/oneboxer/android_app_store_onebox_spec.rb b/spec/components/oneboxer/android_app_store_onebox_spec.rb index 6e84351d0..a4d729102 100644 --- a/spec/components/oneboxer/android_app_store_onebox_spec.rb +++ b/spec/components/oneboxer/android_app_store_onebox_spec.rb @@ -25,7 +25,10 @@ private

Talking Parrot

- Listen to the parrot repeat what you say. A Fun application for all ages. Upgrade to Talking Parrot Pro to save sounds, set them as your ringtone and control recording. Press the MENU button to access the settings where you can change the record time and repeat count. This app uses anonymous usage stats to understand and improve performance. Comments and feedback welcome. + Listen to the parrot repeat what you say. A Fun application for all ages. Upgrade to Talking Parrot Pro to save sounds, set them as your ringtone and control recording. + Press the MENU button to access the settings where you can change the record time and repeat count. + This app uses anonymous usage stats to understand and improve performance. + Comments and feedback welcome.
diff --git a/spec/components/oneboxer_spec.rb b/spec/components/oneboxer_spec.rb index 51a3815cf..295b71c7e 100644 --- a/spec/components/oneboxer_spec.rb +++ b/spec/components/oneboxer_spec.rb @@ -145,16 +145,16 @@ describe Oneboxer do it 'yields each url and element when given a string' do result = Oneboxer.each_onebox_link(@html) do |url, element| - element.is_a?(Hpricot::Elem).should be_true + element.is_a?(Nokogiri::XML::Element).should be_true url.should == 'http://discourse.org' end - result.kind_of?(Hpricot::Doc).should be_true + result.kind_of?(Nokogiri::HTML::Document).should be_true end it 'yields each url and element when given a doc' do - doc = Hpricot(@html) + doc = Nokogiri::HTML(@html) Oneboxer.each_onebox_link(doc) do |url, element| - element.is_a?(Hpricot::Elem).should be_true + element.is_a?(Nokogiri::XML::Element).should be_true url.should == 'http://discourse.org' end end