diff --git a/lib/file_store/s3_store.rb b/lib/file_store/s3_store.rb index d4bb6647d29..e51ea23e435 100644 --- a/lib/file_store/s3_store.rb +++ b/lib/file_store/s3_store.rb @@ -123,11 +123,11 @@ module FileStore SiteSetting.Upload.s3_upload_bucket.downcase end - def list_missing_uploads(skip_optimized: false) + def list_missing_uploads(skip_optimized: false, backfill_etags: false) if SiteSetting.enable_s3_inventory require 's3_inventory' - S3Inventory.new(s3_helper, :upload).list_missing - S3Inventory.new(s3_helper, :optimized).list_missing unless skip_optimized + S3Inventory.new(s3_helper, :upload).list_missing(backfill_etags: backfill_etags) + S3Inventory.new(s3_helper, :optimized).list_missing(backfill_etags: backfill_etags) unless skip_optimized else list_missing(Upload, "original/") list_missing(OptimizedImage, "optimized/") unless skip_optimized diff --git a/lib/s3_inventory.rb b/lib/s3_inventory.rb index 19afad59a17..3d9a5596f94 100644 --- a/lib/s3_inventory.rb +++ b/lib/s3_inventory.rb @@ -24,7 +24,7 @@ class S3Inventory end end - def list_missing + def list_missing(backfill_etags: false) if files.blank? error("Failed to list inventory from S3") return @@ -46,6 +46,13 @@ class S3Inventory end end + if backfill_etags + uploads = model.where(etag: nil).joins("LEFT JOIN #{table_name} ON #{model.table_name}.url ILIKE '%' || #{table_name}.key") + uploads.select(:id, :"#{table_name}.etag").find_each do |upload| + model.where(id: upload.id).update_all(etag: upload.etag) + end + end + uploads = (model == Upload) ? model.where("created_at < ?", last_modified) : model missing_uploads = uploads.joins("LEFT JOIN #{table_name} ON #{table_name}.etag = #{model.table_name}.etag").where("#{table_name}.etag is NULL") diff --git a/lib/tasks/uploads.rake b/lib/tasks/uploads.rake index 40b33e81e1c..ed11824becc 100644 --- a/lib/tasks/uploads.rake +++ b/lib/tasks/uploads.rake @@ -482,16 +482,16 @@ end # list all missing uploads and optimized images task "uploads:missing" => :environment do if ENV["RAILS_DB"] - list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED']) + list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED'], backfill_etags: ENV['BACKFILL_ETAGS']) else RailsMultisite::ConnectionManagement.each_connection do |db| - list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED']) + list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED'], backfill_etags: ENV['BACKFILL_ETAGS']) end end end -def list_missing_uploads(skip_optimized: false) - Discourse.store.list_missing_uploads(skip_optimized: skip_optimized) +def list_missing_uploads(skip_optimized: false, backfill_etags: false) + Discourse.store.list_missing_uploads(skip_optimized: skip_optimized, backfill_etags: backfill_etags) end ################################################################################ diff --git a/spec/components/s3_inventory_spec.rb b/spec/components/s3_inventory_spec.rb index 1156d5db01e..54b6585b9ef 100644 --- a/spec/components/s3_inventory_spec.rb +++ b/spec/components/s3_inventory_spec.rb @@ -74,4 +74,15 @@ describe "S3Inventory" do expect(output).to eq("Downloading inventory file 'Key' to tmp directory...\n#{upload.url}\n1 of 4 uploads are missing\n") end + + it "should backfill etags to uploads table correctly" do + Fabricate(:upload, url: "//bucket.amazonaws.com/original/0184537a4f419224404d013414e913a4f56018f2.jpg", created_at: 2.days.ago) + + inventory.expects(:decompress_inventory_files) + inventory.expects(:files).returns([{ key: "Key", filename: "#{csv_filename}.gz" }]).at_least(1) + + output = capture_stdout do + expect { inventory.list_missing(backfill_etags: true) }.to change { Upload.where(etag: nil).count }.by(-1) + end + end end