2013-05-28 09:48:47 +10:00
class ExcerptParser < Nokogiri :: XML :: SAX :: Document
attr_reader :excerpt
2013-06-03 16:12:24 -04:00
def initialize ( length , options = nil )
2013-05-28 09:48:47 +10:00
@length = length
@excerpt = " "
@current_length = 0
2013-06-03 16:12:24 -04:00
options || { }
2013-05-28 09:48:47 +10:00
@strip_links = options [ :strip_links ] == true
2013-06-03 16:12:24 -04:00
@text_entities = options [ :text_entities ] == true
2013-06-05 18:54:46 -04:00
@markdown_images = options [ :markdown_images ] == true
2013-05-28 09:48:47 +10:00
end
def self . get_excerpt ( html , length , options )
me = self . new ( length , options )
parser = Nokogiri :: HTML :: SAX :: Parser . new ( me )
catch ( :done ) do
parser . parse ( html ) unless html . nil?
end
2013-06-04 12:05:36 -04:00
me . excerpt . strip!
2013-05-28 09:48:47 +10:00
me . excerpt
end
2013-06-05 18:54:46 -04:00
def include_tag ( name , attributes )
characters ( " < #{ name } #{ attributes . map { | k , v | " #{ k } =' #{ v } ' " } . join ( ' ' ) } > " , false , false , false )
end
2013-05-28 09:48:47 +10:00
def start_element ( name , attributes = [ ] )
case name
when " img "
2013-06-05 18:54:46 -04:00
# If include_images is set, include the image in markdown
characters ( " ! " ) if @markdown_images
2013-05-28 09:48:47 +10:00
attributes = Hash [ * attributes . flatten ]
if attributes [ " alt " ]
characters ( " [ #{ attributes [ " alt " ] } ] " )
elsif attributes [ " title " ]
characters ( " [ #{ attributes [ " title " ] } ] " )
else
characters ( " [image] " )
end
2013-06-05 18:54:46 -04:00
characters ( " ( #{ attributes [ 'src' ] } ) " ) if @markdown_images
2013-05-28 09:48:47 +10:00
when " a "
unless @strip_links
2013-06-05 18:54:46 -04:00
include_tag ( name , attributes )
2013-05-28 09:48:47 +10:00
@in_a = true
end
when " aside "
@in_quote = true
end
end
def end_element ( name )
case name
when " a "
unless @strip_links
characters ( " </a> " , false , false , false )
@in_a = false
end
when " p " , " br "
characters ( " " )
when " aside "
@in_quote = false
end
end
def characters ( string , truncate = true , count_it = true , encode = true )
return if @in_quote
encode = encode ? lambda { | s | ERB :: Util . html_escape ( s ) } : lambda { | s | s }
if count_it && @current_length + string . length > @length
length = [ 0 , @length - @current_length - 1 ] . max
@excerpt << encode . call ( string [ 0 .. length ] ) if truncate
2013-06-03 16:12:24 -04:00
@excerpt << ( @text_entities ? " ... " : " … " )
2013-05-28 09:48:47 +10:00
@excerpt << " </a> " if @in_a
throw :done
end
@excerpt << encode . call ( string )
@current_length += string . length if count_it
end
end