added a job to clean up orphan uploads

This commit is contained in:
Régis Hanol 2013-10-14 14:27:41 +02:00
parent cf08d2c751
commit 4536b5fe04
7 changed files with 86 additions and 19 deletions

View file

@ -0,0 +1,24 @@
module Jobs
class CleanUpUploads < Jobs::Scheduled
recurrence { hourly }
def execute(args)
uploads_used_in_posts = PostUpload.uniq.pluck(:upload_id)
uploads_used_as_avatars = User.uniq.where('uploaded_avatar_id IS NOT NULL').pluck(:uploaded_avatar_id)
grace_period = [SiteSetting.uploads_grace_period_in_hours, 1].max
Upload.where("created_at < ?", grace_period.hour.ago)
.where("id NOT IN (?)", uploads_used_in_posts + uploads_used_as_avatars)
.find_each do |upload|
# disable this for now.
#upload.destroy
end
end
end
end

View file

@ -184,6 +184,7 @@ class SiteSetting < ActiveRecord::Base
setting(:enforce_global_nicknames, true)
setting(:discourse_org_access_key, '')
setting(:uploads_grace_period_in_hours, 1)
setting(:enable_s3_uploads, false)
setting(:s3_access_key_id, '')
setting(:s3_secret_access_key, '')

View file

@ -619,6 +619,7 @@ en:
suggested_topics: "Number of suggested topics shown at the bottom of a topic"
uploads_grace_period_in_hours: "Grace period (in hours) before an orphan upload is removed."
enable_s3_uploads: "Place uploads on Amazon S3"
s3_upload_bucket: "The Amazon S3 bucket name that files will be uploaded into. WARNING: must be lowercase (cf. http://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html)"
s3_access_key_id: "The Amazon S3 access key id that will be used to upload images"

View file

@ -0,0 +1,42 @@
class BackfillPostUploadReverseIndex < ActiveRecord::Migration
def up
# clean the reverse index
execute "TRUNCATE TABLE post_uploads"
# fill the reverse index up
Post.select([:id, :cooked]).find_each do |post|
doc = Nokogiri::HTML::fragment(post.cooked)
# images
doc.search("img").each { |img| add_to_reverse_index(img['src'], post.id) }
# thumbnails and/or attachments
doc.search("a").each { |a| add_to_reverse_index(a['href'], post.id) }
end
end
def add_to_reverse_index(url, post_id)
# make sure we have a url to insert
return unless url.present?
# local uploads are relative
if index = url.index(local_base_url)
url = url[index..-1]
end
# filter out non-uploads
return unless url.starts_with?(local_base_url) || url.starts_with?(s3_base_url)
# update the reverse index
execute "INSERT INTO post_uploads (upload_id, post_id)
SELECT u.id, #{post_id}
FROM uploads u
WHERE u.url = '#{url}'
AND NOT EXISTS (SELECT 1 FROM post_uploads WHERE upload_id = u.id AND post_id = #{post_id})"
end
def local_base_url
@local_base_url ||= "/uploads/#{RailsMultisite::ConnectionManagement.current_db}"
end
def s3_base_url
@s3_base_url ||= "//#{SiteSetting.s3_upload_bucket.downcase}.s3.amazonaws.com"
end
end

View file

@ -16,11 +16,16 @@ class CookedPostProcessor
end
def post_process
clean_up_reverse_index
post_process_attachments
post_process_images
post_process_oneboxes
end
def clean_up_reverse_index
PostUpload.delete_all(post_id: @post.id)
end
def post_process_attachments
attachments.each do |attachment|
href = attachment['href']

View file

@ -10,25 +10,6 @@ task "images:compress" => :environment do
end
end
desc "clean orphan uploaded files"
task "images:clean_orphans" => :environment do
RailsMultisite::ConnectionManagement.each_connection do |db|
puts "Cleaning up #{db}"
# ligthweight safety net to prevent users from wiping all their uploads out
if PostUpload.count == 0 && Upload.count > 0
puts "The reverse index is empty. Make sure you run the `images:reindex` task"
next
end
Upload.joins("LEFT OUTER JOIN post_uploads ON uploads.id = post_uploads.upload_id")
.where("post_uploads.upload_id IS NULL")
.find_each do |u|
u.destroy
putc "."
end
end
puts "\ndone."
end
desc "download all hotlinked images"
task "images:pull_hotlinked" => :environment do
RailsMultisite::ConnectionManagement.each_connection do |db|

View file

@ -10,6 +10,7 @@ describe CookedPostProcessor do
let(:post_process) { sequence("post_process") }
it "post process in sequence" do
cpp.expects(:clean_up_reverse_index).in_sequence(post_process)
cpp.expects(:post_process_attachments).in_sequence(post_process)
cpp.expects(:post_process_images).in_sequence(post_process)
cpp.expects(:post_process_oneboxes).in_sequence(post_process)
@ -18,6 +19,18 @@ describe CookedPostProcessor do
end
context "clean_up_reverse_index" do
let(:post) { build(:post) }
let(:cpp) { CookedPostProcessor.new(post) }
it "cleans the reverse index up for the current post" do
PostUpload.expects(:delete_all).with(post_id: post.id)
cpp.clean_up_reverse_index
end
end
context "post_process_attachments" do
context "with attachment" do