mirror of
https://github.com/codeninjasllc/discourse.git
synced 2024-11-30 10:58:31 -05:00
Replace Hpricot with Nokogiri
This commit is contained in:
parent
84a167725d
commit
6995e75d41
15 changed files with 36 additions and 34 deletions
1
Gemfile
1
Gemfile
|
@ -20,7 +20,6 @@ gem 'fastimage'
|
||||||
gem 'fog', require: false
|
gem 'fog', require: false
|
||||||
gem 'has_ip_address'
|
gem 'has_ip_address'
|
||||||
gem 'hiredis'
|
gem 'hiredis'
|
||||||
gem 'hpricot'
|
|
||||||
gem 'i18n-js'
|
gem 'i18n-js'
|
||||||
gem 'jquery-rails'
|
gem 'jquery-rails'
|
||||||
gem 'multi_json'
|
gem 'multi_json'
|
||||||
|
|
|
@ -192,7 +192,6 @@ GEM
|
||||||
highline (1.6.15)
|
highline (1.6.15)
|
||||||
hike (1.2.1)
|
hike (1.2.1)
|
||||||
hiredis (0.4.5)
|
hiredis (0.4.5)
|
||||||
hpricot (0.8.6)
|
|
||||||
httpauth (0.2.0)
|
httpauth (0.2.0)
|
||||||
i18n (0.6.1)
|
i18n (0.6.1)
|
||||||
i18n-js (2.1.2)
|
i18n-js (2.1.2)
|
||||||
|
@ -464,7 +463,6 @@ DEPENDENCIES
|
||||||
guard-spork
|
guard-spork
|
||||||
has_ip_address
|
has_ip_address
|
||||||
hiredis
|
hiredis
|
||||||
hpricot
|
|
||||||
i18n-js
|
i18n-js
|
||||||
image_optim
|
image_optim
|
||||||
jasminerice
|
jasminerice
|
||||||
|
|
|
@ -4,7 +4,6 @@ require_dependency 'rate_limiter'
|
||||||
require_dependency 'post_revisor'
|
require_dependency 'post_revisor'
|
||||||
|
|
||||||
require 'archetype'
|
require 'archetype'
|
||||||
require 'hpricot'
|
|
||||||
require 'digest/sha1'
|
require 'digest/sha1'
|
||||||
|
|
||||||
class Post < ActiveRecord::Base
|
class Post < ActiveRecord::Base
|
||||||
|
|
|
@ -31,7 +31,7 @@ The following Ruby Gems are used in Discourse:
|
||||||
* [vestal_versions](https://rubygems.org/gems/vestal_versions)
|
* [vestal_versions](https://rubygems.org/gems/vestal_versions)
|
||||||
* [coffee-rails](https://rubygems.org/gems/coffee-rails)
|
* [coffee-rails](https://rubygems.org/gems/coffee-rails)
|
||||||
* [uglifier](https://rubygems.org/gems/uglifier)
|
* [uglifier](https://rubygems.org/gems/uglifier)
|
||||||
* [hpricot](https://rubygems.org/gems/hpricot)
|
* [nokogiri](https://rubygems.org/gems/nokogiri)
|
||||||
* [uuidtools](https://rubygems.org/gems/uuidtools)
|
* [uuidtools](https://rubygems.org/gems/uuidtools)
|
||||||
* [rinku](https://rubygems.org/gems/rinku)
|
* [rinku](https://rubygems.org/gems/rinku)
|
||||||
* [ruby-openid](https://rubygems.org/gems/ruby-openid)
|
* [ruby-openid](https://rubygems.org/gems/ruby-openid)
|
||||||
|
|
|
@ -9,7 +9,7 @@ class CookedPostProcessor
|
||||||
@dirty = false
|
@dirty = false
|
||||||
@opts = opts
|
@opts = opts
|
||||||
@post = post
|
@post = post
|
||||||
@doc = Hpricot(post.cooked)
|
@doc = Nokogiri::HTML(post.cooked)
|
||||||
end
|
end
|
||||||
|
|
||||||
def dirty?
|
def dirty?
|
||||||
|
|
|
@ -34,7 +34,7 @@ module Oneboxer
|
||||||
if Whitelist.allowed?(url)
|
if Whitelist.allowed?(url)
|
||||||
page_html = open(url).read
|
page_html = open(url).read
|
||||||
if page_html.present?
|
if page_html.present?
|
||||||
doc = Hpricot(page_html)
|
doc = Nokogiri::HTML(page_html)
|
||||||
|
|
||||||
# See if if it has an oembed thing we can use
|
# See if if it has an oembed thing we can use
|
||||||
(doc/"link[@type='application/json+oembed']").each do |oembed|
|
(doc/"link[@type='application/json+oembed']").each do |oembed|
|
||||||
|
@ -56,7 +56,7 @@ module Oneboxer
|
||||||
# Parse URLs out of HTML, returning the document when finished.
|
# Parse URLs out of HTML, returning the document when finished.
|
||||||
def self.each_onebox_link(string_or_doc)
|
def self.each_onebox_link(string_or_doc)
|
||||||
doc = string_or_doc
|
doc = string_or_doc
|
||||||
doc = Hpricot(doc) if doc.is_a?(String)
|
doc = Nokogiri::HTML(doc) if doc.is_a?(String)
|
||||||
|
|
||||||
onebox_links = doc.search("a.onebox")
|
onebox_links = doc.search("a.onebox")
|
||||||
if onebox_links.present?
|
if onebox_links.present?
|
||||||
|
|
|
@ -22,19 +22,19 @@ module Oneboxer
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse(data)
|
def parse(data)
|
||||||
hp = Hpricot(data)
|
html_doc = Nokogiri::HTML(data)
|
||||||
|
|
||||||
result = {}
|
result = {}
|
||||||
result[:title] = hp.at("h1")
|
result[:title] = html_doc.at("h1")
|
||||||
result[:title] = result[:title].inner_html if result[:title].present?
|
result[:title] = result[:title].inner_html if result[:title].present?
|
||||||
|
|
||||||
image = hp.at(".main-image img")
|
image = html_doc.at(".main-image img")
|
||||||
result[:image] = image['src'] if image
|
result[:image] = image['src'] if image
|
||||||
|
|
||||||
result[:by_info] = hp.at("#by-line")
|
result[:by_info] = html_doc.at("#by-line")
|
||||||
result[:by_info] = BaseOnebox.remove_whitespace(result[:by_info].inner_html) if result[:by_info].present?
|
result[:by_info] = BaseOnebox.remove_whitespace(result[:by_info].inner_html) if result[:by_info].present?
|
||||||
|
|
||||||
summary = hp.at("#description-and-details-content")
|
summary = html_doc.at("#description-and-details-content")
|
||||||
result[:text] = summary.inner_html if summary.present?
|
result[:text] = summary.inner_html if summary.present?
|
||||||
|
|
||||||
result
|
result
|
||||||
|
|
|
@ -12,20 +12,20 @@ module Oneboxer
|
||||||
|
|
||||||
def parse(data)
|
def parse(data)
|
||||||
|
|
||||||
hp = Hpricot(data)
|
html_doc = Nokogiri::HTML(data)
|
||||||
|
|
||||||
result = {}
|
result = {}
|
||||||
|
|
||||||
m = hp.at("h1.doc-banner-title")
|
m = html_doc.at("h1.doc-banner-title")
|
||||||
result[:title] = m.inner_text if m
|
result[:title] = m.inner_text if m
|
||||||
|
|
||||||
m = hp.at("div#doc-original-text")
|
m = html_doc.at("div#doc-original-text")
|
||||||
if m
|
if m
|
||||||
result[:text] = BaseOnebox.replace_tags_with_spaces(m.inner_html)
|
result[:text] = BaseOnebox.replace_tags_with_spaces(m.inner_html)
|
||||||
result[:text] = result[:text][0..MAX_TEXT]
|
result[:text] = result[:text][0..MAX_TEXT]
|
||||||
end
|
end
|
||||||
|
|
||||||
m = hp.at("div.doc-banner-icon img")
|
m = html_doc.at("div.doc-banner-icon img")
|
||||||
result[:image] = m['src'] if m
|
result[:image] = m['src'] if m
|
||||||
|
|
||||||
result
|
result
|
||||||
|
|
|
@ -17,17 +17,17 @@ module Oneboxer
|
||||||
|
|
||||||
def parse(data)
|
def parse(data)
|
||||||
|
|
||||||
hp = Hpricot(data)
|
html_doc = Nokogiri::HTML(data)
|
||||||
|
|
||||||
result = {}
|
result = {}
|
||||||
|
|
||||||
m = hp.at("h1")
|
m = html_doc.at("h1")
|
||||||
result[:title] = m.inner_text if m
|
result[:title] = m.inner_text if m
|
||||||
|
|
||||||
m = hp.at("h4 ~ p")
|
m = html_doc.at("h4 ~ p")
|
||||||
result[:text] = m.inner_text[0..MAX_TEXT] if m
|
result[:text] = m.inner_text[0..MAX_TEXT] if m
|
||||||
|
|
||||||
m = hp.at(".product img.artwork")
|
m = html_doc.at(".product img.artwork")
|
||||||
result[:image] = m['src'] if m
|
result[:image] = m['src'] if m
|
||||||
|
|
||||||
result
|
result
|
||||||
|
|
|
@ -9,7 +9,7 @@ module Oneboxer
|
||||||
|
|
||||||
page_html = open(@url).read
|
page_html = open(@url).read
|
||||||
return nil if page_html.blank?
|
return nil if page_html.blank?
|
||||||
doc = Hpricot(page_html)
|
doc = Nokogiri::HTML(page_html)
|
||||||
|
|
||||||
# Flikrs oembed just stopped returning images for no reason. Let's use opengraph instead.
|
# Flikrs oembed just stopped returning images for no reason. Let's use opengraph instead.
|
||||||
open_graph = Oneboxer.parse_open_graph(doc)
|
open_graph = Oneboxer.parse_open_graph(doc)
|
||||||
|
|
|
@ -20,23 +20,23 @@ module Oneboxer
|
||||||
|
|
||||||
def parse(data)
|
def parse(data)
|
||||||
|
|
||||||
hp = Hpricot(data)
|
html_doc = Nokogiri::HTML(data)
|
||||||
|
|
||||||
result = {}
|
result = {}
|
||||||
|
|
||||||
title = hp.at('title').inner_html
|
title = html_doc.at('title').inner_html
|
||||||
result[:title] = title.gsub!(/ - Wikipedia, the free encyclopedia/, '') if title.present?
|
result[:title] = title.gsub!(/ - Wikipedia, the free encyclopedia/, '') if title.present?
|
||||||
|
|
||||||
# get the first image > 150 pix high
|
# get the first image > 150 pix high
|
||||||
images = hp.search("img").select { |img| img['height'].to_i > 150 }
|
images = html_doc.search("img").select { |img| img['height'].to_i > 150 }
|
||||||
|
|
||||||
result[:image] = "http:#{images[0]["src"]}" unless images.empty?
|
result[:image] = "http:#{images[0]["src"]}" unless images.empty?
|
||||||
|
|
||||||
# remove the table from mobile layout, as it can contain paras in some rare cases
|
# remove the table from mobile layout, as it can contain paras in some rare cases
|
||||||
hp.search("table").remove
|
html_doc.search("table").remove
|
||||||
|
|
||||||
# get all the paras
|
# get all the paras
|
||||||
paras = hp.search("p")
|
paras = html_doc.search("p")
|
||||||
text = ""
|
text = ""
|
||||||
|
|
||||||
unless paras.empty?
|
unless paras.empty?
|
||||||
|
|
|
@ -19,7 +19,10 @@ describe CookedPostProcessor do
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'inserts the onebox' do
|
it 'inserts the onebox' do
|
||||||
@cpp.html.should == "GANGNAM STYLE"
|
@cpp.html.should == <<EXPECTED
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||||
|
<html><body>GANGNAM STYLE</body></html>
|
||||||
|
EXPECTED
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -26,7 +26,7 @@ private
|
||||||
<h3><a href="http://www.amazon.com/Ruby-Programming-Language-David-Flanagan/dp/0596516177" target="_blank">The Ruby Programming Language (Paperback)</a></h3>
|
<h3><a href="http://www.amazon.com/Ruby-Programming-Language-David-Flanagan/dp/0596516177" target="_blank">The Ruby Programming Language (Paperback)</a></h3>
|
||||||
<h4>David Flanagan, Yukihiro Matsumoto</h4>
|
<h4>David Flanagan, Yukihiro Matsumoto</h4>
|
||||||
|
|
||||||
The Ruby Programming Language is the authoritative guide to Ruby ...
|
The Ruby Programming Language is the authoritative guide to Ruby ...
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
<div class='clearfix'></div>
|
<div class='clearfix'></div>
|
||||||
|
|
|
@ -25,7 +25,10 @@ private
|
||||||
<img src="https://lh5.ggpht.com/wrYYVu74XNUu2WHk0aSZEqgdCDCNti9Fl0_dJnhgR6jY04ajQgVg5ABMatfcTDsB810=w124" class="thumbnail">
|
<img src="https://lh5.ggpht.com/wrYYVu74XNUu2WHk0aSZEqgdCDCNti9Fl0_dJnhgR6jY04ajQgVg5ABMatfcTDsB810=w124" class="thumbnail">
|
||||||
<h3><a href="https://play.google.com/store/apps/details?id=com.moosoft.parrot" target="_blank">Talking Parrot</a></h3>
|
<h3><a href="https://play.google.com/store/apps/details?id=com.moosoft.parrot" target="_blank">Talking Parrot</a></h3>
|
||||||
|
|
||||||
Listen to the parrot repeat what you say. A Fun application for all ages. Upgrade to Talking Parrot Pro to save sounds, set them as your ringtone and control recording. Press the MENU button to access the settings where you can change the record time and repeat count. This app uses anonymous usage stats to understand and improve performance. Comments and feedback welcome.
|
Listen to the parrot repeat what you say. A Fun application for all ages. Upgrade to Talking Parrot Pro to save sounds, set them as your ringtone and control recording.
|
||||||
|
Press the MENU button to access the settings where you can change the record time and repeat count.
|
||||||
|
This app uses anonymous usage stats to understand and improve performance.
|
||||||
|
Comments and feedback welcome.
|
||||||
</div>
|
</div>
|
||||||
<div class='clearfix'></div>
|
<div class='clearfix'></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -145,16 +145,16 @@ describe Oneboxer do
|
||||||
|
|
||||||
it 'yields each url and element when given a string' do
|
it 'yields each url and element when given a string' do
|
||||||
result = Oneboxer.each_onebox_link(@html) do |url, element|
|
result = Oneboxer.each_onebox_link(@html) do |url, element|
|
||||||
element.is_a?(Hpricot::Elem).should be_true
|
element.is_a?(Nokogiri::XML::Element).should be_true
|
||||||
url.should == 'http://discourse.org'
|
url.should == 'http://discourse.org'
|
||||||
end
|
end
|
||||||
result.kind_of?(Hpricot::Doc).should be_true
|
result.kind_of?(Nokogiri::HTML::Document).should be_true
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'yields each url and element when given a doc' do
|
it 'yields each url and element when given a doc' do
|
||||||
doc = Hpricot(@html)
|
doc = Nokogiri::HTML(@html)
|
||||||
Oneboxer.each_onebox_link(doc) do |url, element|
|
Oneboxer.each_onebox_link(doc) do |url, element|
|
||||||
element.is_a?(Hpricot::Elem).should be_true
|
element.is_a?(Nokogiri::XML::Element).should be_true
|
||||||
url.should == 'http://discourse.org'
|
url.should == 'http://discourse.org'
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue