From 4a17d6dca6e8e5f66434600b0ffae7aaf9303c9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9gis=20Hanol?= <regis@hanol.fr>
Date: Wed, 19 Jun 2013 21:51:41 +0200
Subject: [PATCH] added a rake task to clean orphan uploaded files

---
 app/models/upload.rb                | 12 ++++++++
 lib/local_store.rb                  |  5 +++
 lib/s3.rb                           | 48 ++++++++++++++++++++---------
 lib/tasks/images.rake               | 19 ++++++++++++
 spec/components/local_store_spec.rb |  2 +-
 spec/models/upload_spec.rb          |  8 ++---
 6 files changed, 74 insertions(+), 20 deletions(-)

diff --git a/app/models/upload.rb b/app/models/upload.rb
index 869e69fb5..f935e9241 100644
--- a/app/models/upload.rb
+++ b/app/models/upload.rb
@@ -36,6 +36,13 @@ class Upload < ActiveRecord::Base
     optimized_images << thumbnail if thumbnail
   end
 
+  def delete
+    Upload.transaction do
+      Upload.remove_file url
+      super
+    end
+  end
+
   def self.create_for(user_id, file)
     # compute the sha
     sha1 = Digest::SHA1.file(file.tempfile).hexdigest
@@ -74,6 +81,11 @@ class Upload < ActiveRecord::Base
     return LocalStore.store_file(file, sha1, image_info, upload_id)
   end
 
+  def self.remove_file(url)
+    S3.remove_file(url) if SiteSetting.enable_s3_uploads?
+    LocalStore.remove_file(url)
+  end
+
   def self.uploaded_regex
     /\/uploads\/#{RailsMultisite::ConnectionManagement.current_db}\/(?<upload_id>\d+)\/[0-9a-f]{16}\.(png|jpg|jpeg|gif|tif|tiff|bmp)/
   end
diff --git a/lib/local_store.rb b/lib/local_store.rb
index 11ae1a6ea..37539b1f9 100644
--- a/lib/local_store.rb
+++ b/lib/local_store.rb
@@ -15,4 +15,9 @@ module LocalStore
     return Discourse::base_uri + "#{url_root}/#{clean_name}"
   end
 
+  def self.remove_file(url)
+    File.delete("#{Rails.root}/public#{url}")
+  rescue Errno::ENOENT
+  end
+
 end
diff --git a/lib/s3.rb b/lib/s3.rb
index 7ef07e0e6..c7c0c791e 100644
--- a/lib/s3.rb
+++ b/lib/s3.rb
@@ -1,22 +1,44 @@
 module S3
 
   def self.store_file(file, sha1, image_info, upload_id)
-    raise Discourse::SiteSettingMissing.new("s3_upload_bucket")     if SiteSetting.s3_upload_bucket.blank?
-    raise Discourse::SiteSettingMissing.new("s3_access_key_id")     if SiteSetting.s3_access_key_id.blank?
-    raise Discourse::SiteSettingMissing.new("s3_secret_access_key") if SiteSetting.s3_secret_access_key.blank?
+    S3.check_missing_site_settings
 
-    @fog_loaded = require 'fog' unless @fog_loaded
+    directory = S3.get_or_create_directory(SiteSetting.s3_upload_bucket)
 
     remote_filename = "#{upload_id}#{sha1}.#{image_info.type}"
 
-    options = S3.generate_options
-    directory = S3.get_or_create_directory(SiteSetting.s3_upload_bucket, options)
     # if this fails, it will throw an exception
     file = S3.upload(file, remote_filename, directory)
-
     return "//#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/#{remote_filename}"
   end
 
+  def self.remove_file(url)
+    S3.check_missing_site_settings
+
+    directory = S3.get_or_create_directory(SiteSetting.s3_upload_bucket)
+
+    file = S3.destroy(url, directory)
+  end
+
+  def self.check_missing_site_settings
+    raise Discourse::SiteSettingMissing.new("s3_upload_bucket")     if SiteSetting.s3_upload_bucket.blank?
+    raise Discourse::SiteSettingMissing.new("s3_access_key_id")     if SiteSetting.s3_access_key_id.blank?
+    raise Discourse::SiteSettingMissing.new("s3_secret_access_key") if SiteSetting.s3_secret_access_key.blank?
+  end
+
+  def self.get_or_create_directory(name)
+    @fog_loaded = require 'fog' unless @fog_loaded
+
+    options = S3.generate_options
+
+    fog = Fog::Storage.new(options)
+
+    directory = fog.directories.get(name)
+    directory = fog.directories.create(key: name) unless directory
+
+    directory
+  end
+
   def self.generate_options
     options = {
       provider: 'AWS',
@@ -28,14 +50,6 @@ module S3
     options
   end
 
-  def self.get_or_create_directory(name, options)
-    fog = Fog::Storage.new(options)
-    directory = fog.directories.get(name)
-    directory = fog.directories.create(key: name) unless directory
-
-    directory
-  end
-
   def self.upload(file, name, directory)
     directory.files.create(
       key: name,
@@ -45,4 +59,8 @@ module S3
     )
   end
 
+  def self.destroy(name, directory)
+    directory.files.destroy(key: name)
+  end
+
 end
diff --git a/lib/tasks/images.rake b/lib/tasks/images.rake
index d1376bd3e..4fcac3e67 100644
--- a/lib/tasks/images.rake
+++ b/lib/tasks/images.rake
@@ -30,3 +30,22 @@ task "images:reindex" => :environment do
   end
   puts "\ndone."
 end
+
+desc "clean orphan uploaded files"
+task "images:clean_orphans" => :environment do
+  RailsMultisite::ConnectionManagement.each_connection do |db|
+    puts "Cleaning up #{db}"
+    # ligthweight safety net to prevent users from wiping all their uploads out
+    if PostUpload.count == 0 && Upload.count > 0
+      puts "The reverse index is empty. Make sure you run the `images:reindex` task"
+      next
+    end
+    Upload.joins("LEFT OUTER JOIN post_uploads ON uploads.id = post_uploads.upload_id")
+          .where("post_uploads.upload_id IS NULL")
+          .find_each do |u|
+      u.delete
+      putc "."
+    end
+  end
+  puts "\ndone."
+end
diff --git a/spec/components/local_store_spec.rb b/spec/components/local_store_spec.rb
index 7421969d6..2b79ef676 100644
--- a/spec/components/local_store_spec.rb
+++ b/spec/components/local_store_spec.rb
@@ -15,7 +15,7 @@ describe LocalStore do
 
     let(:image_info) { FastImage.new(file) }
 
-    it 'returns the url of the S3 upload if successful' do
+    it 'returns the url of the uploaded file if successful' do
       # prevent the tests from creating directories & files...
       FileUtils.stubs(:mkdir_p)
       File.stubs(:open)
diff --git a/spec/models/upload_spec.rb b/spec/models/upload_spec.rb
index 68d47f720..42e22f2fd 100644
--- a/spec/models/upload_spec.rb
+++ b/spec/models/upload_spec.rb
@@ -62,17 +62,17 @@ describe Upload do
 
     it "identifies internal or relatives urls" do
       Discourse.expects(:base_url_no_prefix).returns("http://discuss.site.com")
-      Upload.has_been_uploaded?("http://discuss.site.com/upload/1234/42/ABCD.jpg").should == true
-      Upload.has_been_uploaded?("/upload/42/ABCD.jpg").should == true
+      Upload.has_been_uploaded?("http://discuss.site.com/upload/1234/42/0123456789ABCDEF.jpg").should == true
+      Upload.has_been_uploaded?("/upload/42/0123456789ABCDEF.jpg").should == true
     end
 
     it "identifies internal urls when using a CDN" do
       ActionController::Base.expects(:asset_host).returns("http://my.cdn.com").twice
-      Upload.has_been_uploaded?("http://my.cdn.com/upload/1234/42/ABCD.jpg").should == true
+      Upload.has_been_uploaded?("http://my.cdn.com/upload/1234/42/0123456789ABCDEF.jpg").should == true
     end
 
     it "identifies external urls" do
-      Upload.has_been_uploaded?("http://domain.com/upload/1234/42/ABCD.jpg").should == false
+      Upload.has_been_uploaded?("http://domain.com/upload/1234/42/0123456789ABCDEF.jpg").should == false
     end
 
   end