diff --git a/lib/file_store/base_store.rb b/lib/file_store/base_store.rb index 5523c4a2c82..2cd9dad0e31 100644 --- a/lib/file_store/base_store.rb +++ b/lib/file_store/base_store.rb @@ -60,6 +60,10 @@ module FileStore not_implemented end + def list_missing_uploads(skip_optimized: false) + not_implemented + end + def download(upload) DistributedMutex.synchronize("download_#{upload.sha1}") do filename = "#{upload.sha1}#{File.extname(upload.original_filename)}" diff --git a/lib/file_store/local_store.rb b/lib/file_store/local_store.rb index 6d4e733a359..63ae443aad6 100644 --- a/lib/file_store/local_store.rb +++ b/lib/file_store/local_store.rb @@ -97,6 +97,36 @@ module FileStore "#{public_dir}#{relative_base_url.sub("/uploads/", "/uploads/tombstone/")}" end - end + def list_missing_uploads(skip_optimized: false) + list_missing(Upload) + list_missing(OptimizedImage) unless skip_optimized + end + private + + def list_missing(model) + public_directory = "#{Rails.root}/public" + + count = 0 + model.find_each do |upload| + + # could be a remote image + next unless upload.url =~ /^\/[^\/]/ + + path = "#{public_directory}#{upload.url}" + bad = true + begin + bad = false if File.size(path) != 0 + rescue + # something is messed up + end + if bad + count += 1 + puts path + end + end + puts "#{count} of #{model.count} #{model.name.underscore.pluralize} are missing" if count > 0 + end + + end end diff --git a/lib/file_store/s3_store.rb b/lib/file_store/s3_store.rb index 6aca50f9f4d..f5b12d59264 100644 --- a/lib/file_store/s3_store.rb +++ b/lib/file_store/s3_store.rb @@ -110,5 +110,46 @@ module FileStore raise Discourse::SiteSettingMissing.new("s3_upload_bucket") if SiteSetting.Upload.s3_upload_bucket.blank? SiteSetting.Upload.s3_upload_bucket.downcase end + + def list_missing_uploads(skip_optimized: false) + list_missing(Upload, "original/") + list_missing(OptimizedImage, "optimized/") unless skip_optimized + end + + private + + def list_missing(model, prefix) + connection = ActiveRecord::Base.connection.raw_connection + connection.exec('CREATE TEMP TABLE verified_ids(val integer PRIMARY KEY)') + marker = nil + files = @s3_helper.list(prefix, marker) + + while files.count > 0 do + verified_ids = [] + + files.each do |f| + id = model.where("url LIKE '%#{f.key}' AND filesize = #{f.size}").pluck(:id).first + verified_ids << id if id.present? + marker = f.key + end + + verified_id_clause = verified_ids.map { |id| "('#{PG::Connection.escape_string(id.to_s)}')" }.join(",") + connection.exec("INSERT INTO verified_ids VALUES #{verified_id_clause}") + files = @s3_helper.list(prefix, marker) + end + + missing_uploads = model.joins('LEFT JOIN verified_ids ON val = id').where(val: nil) + missing_count = missing_uploads.count + + if missing_count > 0 + missing_uploads.find_each do |upload| + puts upload.url + end + + puts "#{missing_count} of #{model.count} #{model.name.underscore.pluralize} are missing" + end + ensure + connection.exec('DROP TABLE verified_ids') unless connection.nil? + end end end diff --git a/lib/s3_helper.rb b/lib/s3_helper.rb index b4bcb445197..f12497bc1ae 100644 --- a/lib/s3_helper.rb +++ b/lib/s3_helper.rb @@ -136,9 +136,10 @@ class S3Helper update_lifecycle("purge_tombstone", grace_period, prefix: @tombstone_prefix) end - def list(prefix = "") - prefix = get_path_for_s3_upload(prefix) - s3_bucket.objects(prefix: prefix) + def list(prefix = "", marker = nil) + options = { prefix: get_path_for_s3_upload(prefix) } + options[:marker] = marker if marker.present? + s3_bucket.objects(options) end def tag_file(key, tags) diff --git a/lib/tasks/uploads.rake b/lib/tasks/uploads.rake index fbf162d436b..cf175150227 100644 --- a/lib/tasks/uploads.rake +++ b/lib/tasks/uploads.rake @@ -404,54 +404,7 @@ task "uploads:missing" => :environment do end def list_missing_uploads(skip_optimized: false) - if Discourse.store.external? - puts "This task only works for internal storages." - return - end - - public_directory = "#{Rails.root}/public" - - count = 0 - Upload.find_each do |upload| - - # could be a remote image - next unless upload.url =~ /^\/[^\/]/ - - path = "#{public_directory}#{upload.url}" - bad = true - begin - bad = false if File.size(path) != 0 - rescue - # something is messed up - end - if bad - count += 1 - puts path - end - end - puts "#{count} of #{Upload.count} uploads are missing" if count > 0 - - unless skip_optimized - count = 0 - OptimizedImage.find_each do |optimized_image| - # remote? - next unless optimized_image.url =~ /^\/[^\/]/ - - path = "#{public_directory}#{optimized_image.url}" - - bad = true - begin - bad = false if File.size(path) != 0 - rescue - # something is messed up - end - if bad - count += 1 - puts path - end - end - puts "#{count} of #{OptimizedImage.count} optimized images are missing" if count > 0 - end + Discourse.store.list_missing_uploads(skip_optimized: skip_optimized) end ################################################################################