diff --git a/app/jobs/scheduled/clean_up_uploads.rb b/app/jobs/scheduled/clean_up_uploads.rb new file mode 100644 index 000000000..6c2ff7c7a --- /dev/null +++ b/app/jobs/scheduled/clean_up_uploads.rb @@ -0,0 +1,24 @@ +module Jobs + + class CleanUpUploads < Jobs::Scheduled + recurrence { hourly } + + def execute(args) + + uploads_used_in_posts = PostUpload.uniq.pluck(:upload_id) + uploads_used_as_avatars = User.uniq.where('uploaded_avatar_id IS NOT NULL').pluck(:uploaded_avatar_id) + + grace_period = [SiteSetting.uploads_grace_period_in_hours, 1].max + + Upload.where("created_at < ?", grace_period.hour.ago) + .where("id NOT IN (?)", uploads_used_in_posts + uploads_used_as_avatars) + .find_each do |upload| + # disable this for now. + #upload.destroy + end + + end + + end + +end diff --git a/app/models/site_setting.rb b/app/models/site_setting.rb index 63a32622c..a0d577062 100644 --- a/app/models/site_setting.rb +++ b/app/models/site_setting.rb @@ -184,6 +184,7 @@ class SiteSetting < ActiveRecord::Base setting(:enforce_global_nicknames, true) setting(:discourse_org_access_key, '') + setting(:uploads_grace_period_in_hours, 1) setting(:enable_s3_uploads, false) setting(:s3_access_key_id, '') setting(:s3_secret_access_key, '') diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 495d8e4d9..9ab54fe43 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -619,6 +619,7 @@ en: suggested_topics: "Number of suggested topics shown at the bottom of a topic" + uploads_grace_period_in_hours: "Grace period (in hours) before an orphan upload is removed." enable_s3_uploads: "Place uploads on Amazon S3" s3_upload_bucket: "The Amazon S3 bucket name that files will be uploaded into. WARNING: must be lowercase (cf. http://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html)" s3_access_key_id: "The Amazon S3 access key id that will be used to upload images" diff --git a/db/migrate/20131014203951_backfill_post_upload_reverse_index.rb b/db/migrate/20131014203951_backfill_post_upload_reverse_index.rb new file mode 100644 index 000000000..0f2771bf6 --- /dev/null +++ b/db/migrate/20131014203951_backfill_post_upload_reverse_index.rb @@ -0,0 +1,42 @@ +class BackfillPostUploadReverseIndex < ActiveRecord::Migration + + def up + # clean the reverse index + execute "TRUNCATE TABLE post_uploads" + + # fill the reverse index up + Post.select([:id, :cooked]).find_each do |post| + doc = Nokogiri::HTML::fragment(post.cooked) + # images + doc.search("img").each { |img| add_to_reverse_index(img['src'], post.id) } + # thumbnails and/or attachments + doc.search("a").each { |a| add_to_reverse_index(a['href'], post.id) } + end + end + + def add_to_reverse_index(url, post_id) + # make sure we have a url to insert + return unless url.present? + # local uploads are relative + if index = url.index(local_base_url) + url = url[index..-1] + end + # filter out non-uploads + return unless url.starts_with?(local_base_url) || url.starts_with?(s3_base_url) + # update the reverse index + execute "INSERT INTO post_uploads (upload_id, post_id) + SELECT u.id, #{post_id} + FROM uploads u + WHERE u.url = '#{url}' + AND NOT EXISTS (SELECT 1 FROM post_uploads WHERE upload_id = u.id AND post_id = #{post_id})" + end + + def local_base_url + @local_base_url ||= "/uploads/#{RailsMultisite::ConnectionManagement.current_db}" + end + + def s3_base_url + @s3_base_url ||= "//#{SiteSetting.s3_upload_bucket.downcase}.s3.amazonaws.com" + end + +end diff --git a/lib/cooked_post_processor.rb b/lib/cooked_post_processor.rb index 321ae5fce..4eb0cd6dc 100644 --- a/lib/cooked_post_processor.rb +++ b/lib/cooked_post_processor.rb @@ -16,11 +16,16 @@ class CookedPostProcessor end def post_process + clean_up_reverse_index post_process_attachments post_process_images post_process_oneboxes end + def clean_up_reverse_index + PostUpload.delete_all(post_id: @post.id) + end + def post_process_attachments attachments.each do |attachment| href = attachment['href'] diff --git a/lib/tasks/images.rake b/lib/tasks/images.rake index 3dd59330f..3d9a61985 100644 --- a/lib/tasks/images.rake +++ b/lib/tasks/images.rake @@ -10,25 +10,6 @@ task "images:compress" => :environment do end end -desc "clean orphan uploaded files" -task "images:clean_orphans" => :environment do - RailsMultisite::ConnectionManagement.each_connection do |db| - puts "Cleaning up #{db}" - # ligthweight safety net to prevent users from wiping all their uploads out - if PostUpload.count == 0 && Upload.count > 0 - puts "The reverse index is empty. Make sure you run the `images:reindex` task" - next - end - Upload.joins("LEFT OUTER JOIN post_uploads ON uploads.id = post_uploads.upload_id") - .where("post_uploads.upload_id IS NULL") - .find_each do |u| - u.destroy - putc "." - end - end - puts "\ndone." -end - desc "download all hotlinked images" task "images:pull_hotlinked" => :environment do RailsMultisite::ConnectionManagement.each_connection do |db| diff --git a/spec/components/cooked_post_processor_spec.rb b/spec/components/cooked_post_processor_spec.rb index 0141e3718..480bb01c7 100644 --- a/spec/components/cooked_post_processor_spec.rb +++ b/spec/components/cooked_post_processor_spec.rb @@ -10,6 +10,7 @@ describe CookedPostProcessor do let(:post_process) { sequence("post_process") } it "post process in sequence" do + cpp.expects(:clean_up_reverse_index).in_sequence(post_process) cpp.expects(:post_process_attachments).in_sequence(post_process) cpp.expects(:post_process_images).in_sequence(post_process) cpp.expects(:post_process_oneboxes).in_sequence(post_process) @@ -18,6 +19,18 @@ describe CookedPostProcessor do end + context "clean_up_reverse_index" do + + let(:post) { build(:post) } + let(:cpp) { CookedPostProcessor.new(post) } + + it "cleans the reverse index up for the current post" do + PostUpload.expects(:delete_all).with(post_id: post.id) + cpp.clean_up_reverse_index + end + + end + context "post_process_attachments" do context "with attachment" do