mirror of
https://github.com/codeninjasllc/discourse.git
synced 2024-11-23 15:48:43 -05:00
added a job to clean up orphan uploads
This commit is contained in:
parent
cf08d2c751
commit
4536b5fe04
7 changed files with 86 additions and 19 deletions
24
app/jobs/scheduled/clean_up_uploads.rb
Normal file
24
app/jobs/scheduled/clean_up_uploads.rb
Normal file
|
@ -0,0 +1,24 @@
|
|||
module Jobs
|
||||
|
||||
class CleanUpUploads < Jobs::Scheduled
|
||||
recurrence { hourly }
|
||||
|
||||
def execute(args)
|
||||
|
||||
uploads_used_in_posts = PostUpload.uniq.pluck(:upload_id)
|
||||
uploads_used_as_avatars = User.uniq.where('uploaded_avatar_id IS NOT NULL').pluck(:uploaded_avatar_id)
|
||||
|
||||
grace_period = [SiteSetting.uploads_grace_period_in_hours, 1].max
|
||||
|
||||
Upload.where("created_at < ?", grace_period.hour.ago)
|
||||
.where("id NOT IN (?)", uploads_used_in_posts + uploads_used_as_avatars)
|
||||
.find_each do |upload|
|
||||
# disable this for now.
|
||||
#upload.destroy
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
|
@ -184,6 +184,7 @@ class SiteSetting < ActiveRecord::Base
|
|||
setting(:enforce_global_nicknames, true)
|
||||
setting(:discourse_org_access_key, '')
|
||||
|
||||
setting(:uploads_grace_period_in_hours, 1)
|
||||
setting(:enable_s3_uploads, false)
|
||||
setting(:s3_access_key_id, '')
|
||||
setting(:s3_secret_access_key, '')
|
||||
|
|
|
@ -619,6 +619,7 @@ en:
|
|||
|
||||
suggested_topics: "Number of suggested topics shown at the bottom of a topic"
|
||||
|
||||
uploads_grace_period_in_hours: "Grace period (in hours) before an orphan upload is removed."
|
||||
enable_s3_uploads: "Place uploads on Amazon S3"
|
||||
s3_upload_bucket: "The Amazon S3 bucket name that files will be uploaded into. WARNING: must be lowercase (cf. http://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html)"
|
||||
s3_access_key_id: "The Amazon S3 access key id that will be used to upload images"
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
class BackfillPostUploadReverseIndex < ActiveRecord::Migration
|
||||
|
||||
def up
|
||||
# clean the reverse index
|
||||
execute "TRUNCATE TABLE post_uploads"
|
||||
|
||||
# fill the reverse index up
|
||||
Post.select([:id, :cooked]).find_each do |post|
|
||||
doc = Nokogiri::HTML::fragment(post.cooked)
|
||||
# images
|
||||
doc.search("img").each { |img| add_to_reverse_index(img['src'], post.id) }
|
||||
# thumbnails and/or attachments
|
||||
doc.search("a").each { |a| add_to_reverse_index(a['href'], post.id) }
|
||||
end
|
||||
end
|
||||
|
||||
def add_to_reverse_index(url, post_id)
|
||||
# make sure we have a url to insert
|
||||
return unless url.present?
|
||||
# local uploads are relative
|
||||
if index = url.index(local_base_url)
|
||||
url = url[index..-1]
|
||||
end
|
||||
# filter out non-uploads
|
||||
return unless url.starts_with?(local_base_url) || url.starts_with?(s3_base_url)
|
||||
# update the reverse index
|
||||
execute "INSERT INTO post_uploads (upload_id, post_id)
|
||||
SELECT u.id, #{post_id}
|
||||
FROM uploads u
|
||||
WHERE u.url = '#{url}'
|
||||
AND NOT EXISTS (SELECT 1 FROM post_uploads WHERE upload_id = u.id AND post_id = #{post_id})"
|
||||
end
|
||||
|
||||
def local_base_url
|
||||
@local_base_url ||= "/uploads/#{RailsMultisite::ConnectionManagement.current_db}"
|
||||
end
|
||||
|
||||
def s3_base_url
|
||||
@s3_base_url ||= "//#{SiteSetting.s3_upload_bucket.downcase}.s3.amazonaws.com"
|
||||
end
|
||||
|
||||
end
|
|
@ -16,11 +16,16 @@ class CookedPostProcessor
|
|||
end
|
||||
|
||||
def post_process
|
||||
clean_up_reverse_index
|
||||
post_process_attachments
|
||||
post_process_images
|
||||
post_process_oneboxes
|
||||
end
|
||||
|
||||
def clean_up_reverse_index
|
||||
PostUpload.delete_all(post_id: @post.id)
|
||||
end
|
||||
|
||||
def post_process_attachments
|
||||
attachments.each do |attachment|
|
||||
href = attachment['href']
|
||||
|
|
|
@ -10,25 +10,6 @@ task "images:compress" => :environment do
|
|||
end
|
||||
end
|
||||
|
||||
desc "clean orphan uploaded files"
|
||||
task "images:clean_orphans" => :environment do
|
||||
RailsMultisite::ConnectionManagement.each_connection do |db|
|
||||
puts "Cleaning up #{db}"
|
||||
# ligthweight safety net to prevent users from wiping all their uploads out
|
||||
if PostUpload.count == 0 && Upload.count > 0
|
||||
puts "The reverse index is empty. Make sure you run the `images:reindex` task"
|
||||
next
|
||||
end
|
||||
Upload.joins("LEFT OUTER JOIN post_uploads ON uploads.id = post_uploads.upload_id")
|
||||
.where("post_uploads.upload_id IS NULL")
|
||||
.find_each do |u|
|
||||
u.destroy
|
||||
putc "."
|
||||
end
|
||||
end
|
||||
puts "\ndone."
|
||||
end
|
||||
|
||||
desc "download all hotlinked images"
|
||||
task "images:pull_hotlinked" => :environment do
|
||||
RailsMultisite::ConnectionManagement.each_connection do |db|
|
||||
|
|
|
@ -10,6 +10,7 @@ describe CookedPostProcessor do
|
|||
let(:post_process) { sequence("post_process") }
|
||||
|
||||
it "post process in sequence" do
|
||||
cpp.expects(:clean_up_reverse_index).in_sequence(post_process)
|
||||
cpp.expects(:post_process_attachments).in_sequence(post_process)
|
||||
cpp.expects(:post_process_images).in_sequence(post_process)
|
||||
cpp.expects(:post_process_oneboxes).in_sequence(post_process)
|
||||
|
@ -18,6 +19,18 @@ describe CookedPostProcessor do
|
|||
|
||||
end
|
||||
|
||||
context "clean_up_reverse_index" do
|
||||
|
||||
let(:post) { build(:post) }
|
||||
let(:cpp) { CookedPostProcessor.new(post) }
|
||||
|
||||
it "cleans the reverse index up for the current post" do
|
||||
PostUpload.expects(:delete_all).with(post_id: post.id)
|
||||
cpp.clean_up_reverse_index
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "post_process_attachments" do
|
||||
|
||||
context "with attachment" do
|
||||
|
|
Loading…
Reference in a new issue