FIX: properly unescape HTML entities in excerpts

This commit is contained in:
Régis Hanol 2014-12-10 12:52:51 +01:00
parent 7e609e1834
commit 6027073547
2 changed files with 8 additions and 6 deletions

View file

@ -17,16 +17,15 @@ class ExcerptParser < Nokogiri::XML::SAX::Document
def self.get_excerpt(html, length, options)
html ||= ''
if (html.include? 'excerpt') && (SPAN_REGEX === html)
length = html.length
end
length = html.length if html.include?('excerpt') && SPAN_REGEX === html
me = self.new(length, options)
parser = Nokogiri::HTML::SAX::Parser.new(me)
catch(:done) do
parser.parse(html)
end
me.excerpt.strip!
me.excerpt
excerpt = me.excerpt.strip
excerpt = CGI.unescapeHTML(excerpt) if options[:text_entities] == true
excerpt
end
def escape_attribute(v)

View file

@ -84,7 +84,6 @@ describe PrettyText do
describe "Excerpt" do
it "sanitizes attempts to inject invalid attributes" do
spinner = "<a href=\"http://thedailywtf.com/\" data-bbcode=\"' class='fa fa-spin\">WTF</a>"
PrettyText.excerpt(spinner, 20).should match_html spinner
@ -216,6 +215,10 @@ describe PrettyText do
post.excerpt.should == two_hundred
end
it "unescapes html entities when we want text entities" do
PrettyText.excerpt("&#39;", 500, text_entities: true).should == "'"
end
end
describe "strip links" do