2013-07-06 19:10:53 +02:00
# Post processing that we can do after a post has already been cooked.
2013-07-08 01:39:08 +02:00
# For example, inserting the onebox content, or image sizes/thumbnails.
2013-02-05 14:16:51 -05:00
2013-11-20 13:10:08 +01:00
require_dependency 'url_helper'
2013-02-05 14:16:51 -05:00
class CookedPostProcessor
2013-06-21 18:29:40 +02:00
include ActionView :: Helpers :: NumberHelper
2013-02-19 17:57:14 +11:00
2013-02-05 14:16:51 -05:00
def initialize ( post , opts = { } )
@dirty = false
@opts = opts
@post = post
2013-12-06 11:16:13 +01:00
@previous_cooked = ( @post . cooked || " " ) . dup
2013-04-10 17:52:38 +10:00
@doc = Nokogiri :: HTML :: fragment ( post . cooked )
2013-02-19 17:57:14 +11:00
@size_cache = { }
2013-02-05 14:16:51 -05:00
end
2013-11-22 01:52:26 +01:00
def post_process ( bypass_bump = false )
2015-08-14 13:05:13 +10:00
DistributedMutex . synchronize ( " post_process_ #{ @post . id } " ) do
keep_reverse_index_up_to_date
post_process_images
post_process_oneboxes
optimize_urls
pull_hotlinked_images ( bypass_bump )
end
2013-02-05 14:16:51 -05:00
end
2013-11-05 19:04:47 +01:00
def keep_reverse_index_up_to_date
upload_ids = Set . new
2013-10-14 14:27:41 +02:00
2014-07-18 17:54:18 +02:00
@doc . css ( " a[href] " ) . each do | a |
2013-11-05 19:04:47 +01:00
href = a [ " href " ] . to_s
2013-07-10 22:55:37 +02:00
if upload = Upload . get_from_url ( href )
2013-11-05 19:04:47 +01:00
upload_ids << upload . id
end
end
2014-07-18 17:54:18 +02:00
@doc . css ( " img[src] " ) . each do | img |
2013-11-05 19:04:47 +01:00
src = img [ " src " ] . to_s
if upload = Upload . get_from_url ( src )
upload_ids << upload . id
end
end
values = upload_ids . map { | u | " ( #{ @post . id } , #{ u } ) " } . join ( " , " )
PostUpload . transaction do
PostUpload . delete_all ( post_id : @post . id )
if upload_ids . length > 0
PostUpload . exec_sql ( " INSERT INTO post_uploads (post_id, upload_id) VALUES #{ values } " )
2013-07-10 22:55:37 +02:00
end
end
end
2013-02-25 19:42:20 +03:00
def post_process_images
2013-07-08 01:39:08 +02:00
images = extract_images
2013-07-06 19:10:53 +02:00
return if images . blank?
2013-02-05 14:16:51 -05:00
2013-02-19 17:57:14 +11:00
images . each do | img |
2013-11-05 19:04:47 +01:00
limit_size! ( img )
convert_to_link! ( img )
2013-04-13 16:31:20 +02:00
end
2013-02-19 17:57:14 +11:00
2013-11-05 19:04:47 +01:00
update_topic_image ( images )
2013-06-15 12:29:20 +02:00
end
2013-04-13 16:31:20 +02:00
2013-07-08 01:39:08 +02:00
def extract_images
2014-07-18 17:54:18 +02:00
# all image with a src attribute
@doc . css ( " img[src] " ) -
# minus, data images
@doc . css ( " img[src^='data'] " ) -
2015-08-05 12:57:31 +02:00
# minus, emojis
@doc . css ( " img.emoji " ) -
2014-07-18 17:54:18 +02:00
# minus, image inside oneboxes
2014-07-21 15:59:34 +02:00
oneboxed_images -
2014-08-19 18:10:23 +05:30
# minus, images inside quotes
2014-07-18 17:54:18 +02:00
@doc . css ( " .quote img " )
2013-07-08 01:39:08 +02:00
end
2014-07-21 15:59:34 +02:00
def oneboxed_images
@doc . css ( " .onebox-result img, .onebox img " )
end
2013-11-05 19:04:47 +01:00
def limit_size! ( img )
2013-11-25 18:36:13 +01:00
# retrieve the size from
# 1) the width/height attributes
# 2) the dimension from the preview (image_sizes)
# 3) the dimension of the original image (HTTP request)
w , h = get_size_from_attributes ( img ) ||
get_size_from_image_sizes ( img [ " src " ] , @opts [ :image_sizes ] ) ||
get_size ( img [ " src " ] )
2013-11-05 19:04:47 +01:00
# limit the size of the thumbnail
img [ " width " ] , img [ " height " ] = ImageSizer . resize ( w , h )
2013-07-08 01:39:08 +02:00
end
2013-11-25 18:36:13 +01:00
def get_size_from_attributes ( img )
w , h = img [ " width " ] . to_i , img [ " height " ] . to_i
return [ w , h ] if w > 0 && h > 0
end
2013-11-05 19:04:47 +01:00
def get_size_from_image_sizes ( src , image_sizes )
return unless image_sizes . present?
image_sizes . each do | image_size |
url , size = image_size [ 0 ] , image_size [ 1 ]
2015-03-16 18:57:15 +01:00
if url && url . include? ( src ) &&
size && size [ " width " ] . to_i > 0 && size [ " height " ] . to_i > 0
return [ size [ " width " ] , size [ " height " ] ]
end
2013-11-05 19:04:47 +01:00
end
2013-06-15 12:29:20 +02:00
end
2013-02-21 12:07:36 +11:00
2013-11-05 19:04:47 +01:00
def get_size ( url )
2015-08-07 19:31:15 +02:00
return @size_cache [ url ] if @size_cache . has_key? ( url )
2013-11-05 19:04:47 +01:00
absolute_url = url
absolute_url = Discourse . base_url_no_prefix + absolute_url if absolute_url =~ / ^ \/ [^ \/ ] /
# FastImage fails when there's no scheme
2013-12-16 11:44:59 +01:00
absolute_url = SiteSetting . scheme + " : " + absolute_url if absolute_url . start_with? ( " // " )
2015-08-07 19:31:15 +02:00
2013-11-05 19:04:47 +01:00
return unless is_valid_image_url? ( absolute_url )
2015-08-07 19:31:15 +02:00
2013-11-05 19:04:47 +01:00
# we can *always* crawl our own images
return unless SiteSetting . crawl_images? || Discourse . store . has_been_uploaded? ( url )
2015-08-07 19:31:15 +02:00
2013-11-05 19:04:47 +01:00
@size_cache [ url ] || = FastImage . size ( absolute_url )
rescue Zlib :: BufError # FastImage.size raises BufError for some gifs
2013-06-17 22:46:48 +02:00
end
2013-11-05 19:04:47 +01:00
def is_valid_image_url? ( url )
uri = URI . parse ( url )
%w( http https ) . include? uri . scheme
rescue URI :: InvalidURIError
2013-02-19 17:57:14 +11:00
end
2013-11-05 19:04:47 +01:00
def convert_to_link! ( img )
2013-02-19 17:57:14 +11:00
src = img [ " src " ]
2013-07-06 19:10:53 +02:00
return unless src . present?
2013-02-19 17:57:14 +11:00
2013-07-06 19:10:53 +02:00
width , height = img [ " width " ] . to_i , img [ " height " ] . to_i
2013-04-13 16:31:20 +02:00
original_width , original_height = get_size ( src )
2013-02-19 17:57:14 +11:00
2015-08-07 19:31:15 +02:00
# can't reach the image...
if original_width . nil? || original_height . nil?
2015-08-12 16:10:42 +02:00
Rails . logger . info " Can't reach ' #{ src } ' to get its dimension. "
2015-08-07 19:31:15 +02:00
return
end
2013-07-08 01:39:08 +02:00
return if original_width . to_i < = width && original_height . to_i < = height
2013-08-26 00:24:24 +02:00
return if original_width . to_i < = SiteSetting . max_image_width && original_height . to_i < = SiteSetting . max_image_height
2013-11-05 19:04:47 +01:00
return if is_a_hyperlink? ( img )
2013-07-08 01:39:08 +02:00
2013-11-05 19:04:47 +01:00
if upload = Upload . get_from_url ( src )
2013-09-27 10:55:50 +02:00
upload . create_thumbnail! ( width , height )
2013-07-08 01:39:08 +02:00
end
2013-02-19 17:57:14 +11:00
2013-07-08 01:39:08 +02:00
add_lightbox! ( img , original_width , original_height , upload )
end
2013-11-05 19:04:47 +01:00
def is_a_hyperlink? ( img )
2013-02-19 17:57:14 +11:00
parent = img . parent
while parent
2013-11-20 13:10:08 +01:00
return true if parent . name == " a "
2013-02-19 17:57:14 +11:00
break unless parent . respond_to? :parent
parent = parent . parent
end
2013-11-20 13:10:08 +01:00
false
2013-07-08 01:39:08 +02:00
end
2013-02-19 17:57:14 +11:00
2013-07-08 01:39:08 +02:00
def add_lightbox! ( img , original_width , original_height , upload = nil )
2013-06-26 02:44:20 +02:00
# first, create a div to hold our lightbox
2013-07-08 01:39:08 +02:00
lightbox = Nokogiri :: XML :: Node . new ( " div " , @doc )
2013-12-02 10:06:48 +01:00
lightbox [ " class " ] = " lightbox-wrapper "
2013-07-08 01:39:08 +02:00
img . add_next_sibling ( lightbox )
lightbox . add_child ( img )
2013-06-26 02:44:20 +02:00
# then, the link to our larger image
2013-07-08 01:39:08 +02:00
a = Nokogiri :: XML :: Node . new ( " a " , @doc )
2013-02-19 17:57:14 +11:00
img . add_next_sibling ( a )
2014-10-15 19:20:04 +02:00
if upload && Discourse . store . internal?
a [ " data-download-href " ] = Discourse . store . download_url ( upload )
end
2013-11-05 19:04:47 +01:00
a [ " href " ] = img [ " src " ]
2013-02-19 17:57:14 +11:00
a [ " class " ] = " lightbox "
a . add_child ( img )
2013-07-08 01:39:08 +02:00
# replace the image by its thumbnail
2013-11-05 19:04:47 +01:00
w , h = img [ " width " ] . to_i , img [ " height " ] . to_i
img [ " src " ] = upload . thumbnail ( w , h ) . url if upload && upload . has_thumbnail? ( w , h )
2013-07-08 01:39:08 +02:00
2013-06-26 02:44:20 +02:00
# then, some overlay informations
2013-07-08 01:39:08 +02:00
meta = Nokogiri :: XML :: Node . new ( " div " , @doc )
2013-06-26 02:44:20 +02:00
meta [ " class " ] = " meta "
2013-07-08 01:39:08 +02:00
img . add_next_sibling ( meta )
2013-06-21 18:29:40 +02:00
2013-11-05 19:04:47 +01:00
filename = get_filename ( upload , img [ " src " ] )
2013-06-21 18:29:40 +02:00
informations = " #{ original_width } x #{ original_height } "
2013-07-24 17:24:28 +10:00
informations << " #{ number_to_human_size ( upload . filesize ) } " if upload
2013-06-21 18:29:40 +02:00
2014-11-03 22:03:06 +01:00
a [ " title " ] = img [ " title " ] || filename
2013-11-29 20:03:39 +01:00
2014-11-03 22:03:06 +01:00
meta . add_child create_span_node ( " filename " , img [ " title " ] || filename )
2013-06-26 02:44:20 +02:00
meta . add_child create_span_node ( " informations " , informations )
meta . add_child create_span_node ( " expand " )
2013-06-21 18:29:40 +02:00
end
2013-02-19 17:57:14 +11:00
2013-06-26 21:53:31 +02:00
def get_filename ( upload , src )
return File . basename ( src ) unless upload
2013-07-04 00:39:23 +02:00
return upload . original_filename unless upload . original_filename =~ / ^blob( \ .png)?$ /i
2013-11-05 19:04:47 +01:00
return I18n . t ( " upload.pasted_image_filename " )
2013-06-26 21:53:31 +02:00
end
2013-06-21 18:29:40 +02:00
def create_span_node ( klass , content = nil )
2013-07-08 01:39:08 +02:00
span = Nokogiri :: XML :: Node . new ( " span " , @doc )
2013-06-21 18:29:40 +02:00
span . content = content if content
2013-11-05 19:04:47 +01:00
span [ " class " ] = klass
2013-06-21 18:29:40 +02:00
span
2013-02-05 14:16:51 -05:00
end
2013-11-05 19:04:47 +01:00
def update_topic_image ( images )
2015-04-23 19:33:29 +02:00
if @post . is_first_post?
2013-07-08 01:39:08 +02:00
img = images . first
2015-06-10 18:53:14 +02:00
@post . topic . update_column ( :image_url , img [ " src " ] [ 0 ... 255 ] ) if img [ " src " ] . present?
2013-07-08 01:39:08 +02:00
end
end
2013-11-05 19:04:47 +01:00
def post_process_oneboxes
args = {
post_id : @post . id ,
invalidate_oneboxes : ! ! @opts [ :invalidate_oneboxes ] ,
}
2014-07-21 15:59:34 +02:00
# apply oneboxes
Oneboxer . apply ( @doc ) { | url | Oneboxer . onebox ( url , args ) }
# make sure we grab dimensions for oneboxed images
2014-09-08 11:02:54 +02:00
oneboxed_images . each { | img | limit_size! ( img ) }
2013-02-05 14:16:51 -05:00
end
2013-11-05 19:04:47 +01:00
def optimize_urls
2014-10-15 19:20:04 +02:00
%w{ href data-download-href } . each do | selector |
@doc . css ( " a[ #{ selector } ] " ) . each do | a |
href = a [ " #{ selector } " ] . to_s
2015-06-12 12:02:36 +02:00
a [ " #{ selector } " ] = UrlHelper . schemaless UrlHelper . absolute ( href ) if UrlHelper . is_local ( href )
2014-10-15 19:20:04 +02:00
end
2013-11-05 19:04:47 +01:00
end
2014-07-18 17:54:18 +02:00
@doc . css ( " img[src] " ) . each do | img |
2013-11-05 19:04:47 +01:00
src = img [ " src " ] . to_s
2015-06-12 12:02:36 +02:00
img [ " src " ] = UrlHelper . schemaless UrlHelper . absolute ( src ) if UrlHelper . is_local ( src )
2013-11-05 19:04:47 +01:00
end
2013-02-05 14:16:51 -05:00
end
2013-11-22 01:52:26 +01:00
def pull_hotlinked_images ( bypass_bump = false )
2013-11-15 15:22:18 +01:00
# is the job enabled?
return unless SiteSetting . download_remote_images_to_local?
2013-11-15 16:46:41 +01:00
# have we enough disk space?
return if disable_if_low_on_disk_space
2013-11-05 19:04:47 +01:00
# we only want to run the job whenever it's changed by a user
2013-12-12 03:41:34 +01:00
return if @post . last_editor_id == Discourse . system_user . id
2013-11-05 19:04:47 +01:00
# make sure no other job is scheduled
Jobs . cancel_scheduled_job ( :pull_hotlinked_images , post_id : @post . id )
# schedule the job
delay = SiteSetting . ninja_edit_window + 1
2013-11-22 01:52:26 +01:00
Jobs . enqueue_in ( delay . seconds . to_i , :pull_hotlinked_images , post_id : @post . id , bypass_bump : bypass_bump )
2013-07-10 22:55:37 +02:00
end
2013-11-15 16:46:41 +01:00
def disable_if_low_on_disk_space
2014-10-15 19:20:04 +02:00
return false if available_disk_space > = SiteSetting . download_remote_images_threshold
SiteSetting . download_remote_images_to_local = false
# log the site setting change
reason = I18n . t ( " disable_remote_images_download_reason " )
staff_action_logger = StaffActionLogger . new ( Discourse . system_user )
staff_action_logger . log_site_setting_change ( " download_remote_images_to_local " , true , false , { details : reason } )
# also send a private message to the site contact user
SystemMessage . create_from_system_user ( Discourse . site_contact_user , :download_remote_images_disabled )
true
2013-11-15 16:46:41 +01:00
end
def available_disk_space
2015-01-27 08:25:32 +11:00
100 - ` df -P #{ Rails . root } /public/uploads | tail -1 | tr -s ' ' | cut -d ' ' -f 5 ` . to_i
2013-11-15 16:46:41 +01:00
end
2013-06-15 12:29:20 +02:00
def dirty?
2013-12-06 11:16:13 +01:00
@previous_cooked != html
2013-06-15 12:29:20 +02:00
end
def html
@doc . try ( :to_html )
2013-02-05 14:16:51 -05:00
end
end