diff --git a/lib/tasks/uploads.rake b/lib/tasks/uploads.rake index 431563b0a28..1c7a7432419 100644 --- a/lib/tasks/uploads.rake +++ b/lib/tasks/uploads.rake @@ -85,131 +85,6 @@ task "uploads:backfill_shas" => :environment do puts "", "Done" end -################################################################################ -# migrate_from_s3 # -################################################################################ - -task "uploads:migrate_from_s3" => :environment do - ENV["RAILS_DB"] ? migrate_from_s3 : migrate_all_from_s3 -end - -task "uploads:batch_migrate_from_s3", [:limit] => :environment do |_, args| - ENV["RAILS_DB"] ? migrate_from_s3(limit: args[:limit]) : migrate_all_from_s3(limit: args[:limit]) -end - -def guess_filename(url, raw) - begin - uri = URI.parse("http:#{url}") - f = uri.open("rb", read_timeout: 5, redirect: true, allow_redirections: :all) - filename = if f.meta && f.meta["content-disposition"] - f.meta["content-disposition"][/filename="([^"]+)"/, 1].presence - end - filename ||= raw[/([^<]+)<\/a>/, 1].presence - filename ||= File.basename(url) - filename - rescue - nil - ensure - f.try(:close!) rescue nil - end -end - -def migrate_all_from_s3(limit: nil) - RailsMultisite::ConnectionManagement.each_connection { migrate_from_s3(limit: limit) } -end - -def migrate_from_s3(limit: nil) - require "file_store/s3_store" - - # make sure S3 is disabled - if SiteSetting.Upload.enable_s3_uploads - puts "You must disable S3 uploads before running that task." - exit 1 - end - - db = RailsMultisite::ConnectionManagement.current_db - - puts "Migrating uploads from S3 to local storage for '#{db}'..." - - max_file_size = [SiteSetting.max_image_size_kb, SiteSetting.max_attachment_size_kb].max.kilobytes - - migrate_posts = Post - .where("user_id > 0") - .where("raw LIKE '%.s3%.amazonaws.com/%' OR raw LIKE '%#{SiteSetting.Upload.absolute_base_url}%' OR raw LIKE '%(upload://%'") - migrate_posts = migrate_posts.limit(limit.to_i) if limit - - migrate_posts.find_each do |post| - begin - updated = false - - post.raw.gsub!(/(\/\/[\w.-]+amazonaws\.com\/(original|optimized)\/([a-z0-9]+\/)+\h{40}([\w.-]+)?)/i) do |url| - begin - if filename = guess_filename(url, post.raw) - file = FileHelper.download("http:#{url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true) - sha1 = Upload.generate_digest(file) - origin = nil - - existing_upload = Upload.find_by(sha1: sha1) - if existing_upload&.url&.start_with?("//") - filename = existing_upload.original_filename - origin = existing_upload.origin - existing_upload.destroy - end - - new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1) - if new_upload&.save - updated = true - url = new_upload.url - end - end - - url - rescue - url - end - end - - post.raw.gsub!(/(upload:\/\/[0-9a-zA-Z]+\.\w+)/) do |url| - begin - if sha1 = Upload.sha1_from_short_url(url) - if upload = Upload.find_by(sha1: sha1) - if upload.url.start_with?("//") - file = FileHelper.download("http:#{upload.url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true) - filename = upload.original_filename - origin = upload.origin - upload.destroy - - new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1) - if new_upload&.save - updated = true - url = new_upload.url - end - end - end - end - - url - rescue - url - end - end - - if updated - post.save! - post.rebake! - putc "#" - else - putc "." - end - - rescue - putc "X" - end - end - - puts "Done!" -end - ################################################################################ # migrate_to_s3 # ################################################################################ diff --git a/spec/tasks/uploads_spec.rb b/spec/tasks/uploads_spec.rb index bf817624129..1c20173c83b 100644 --- a/spec/tasks/uploads_spec.rb +++ b/spec/tasks/uploads_spec.rb @@ -139,120 +139,6 @@ RSpec.describe "tasks/uploads" do end end - describe "uploads:batch_migrate_from_s3" do - let!(:uploads) do - [ - upload1, - upload2, - ] - end - - let(:upload1) { Fabricate(:upload_s3) } - let(:upload2) { Fabricate(:upload_s3) } - - let!(:url1) { "upload://#{upload1.base62_sha1}.jpg" } - let!(:url2) { "upload://#{upload2.base62_sha1}.jpg" } - - let(:post1) { Fabricate(:post, raw: "[foo](#{url1})") } - let(:post2) { Fabricate(:post, raw: "[foo](#{url2})") } - - before do - global_setting :s3_bucket, 'file-uploads/folder' - global_setting :s3_region, 'us-east-1' - - setup_s3 - uploads.each { |upload| stub_upload(upload) } - - upload1.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload1.base62_sha1}.png" - upload1.save! - upload2.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload2.base62_sha1}.png" - upload2.save! - - PostUpload.create(post: post1, upload: upload1) - PostUpload.create(post: post2, upload: upload2) - SiteSetting.enable_s3_uploads = false - end - - def invoke_task - capture_stdout do - Rake::Task['uploads:batch_migrate_from_s3'].invoke('1') - end - end - - it "applies the limit" do - FileHelper.stubs(:download).returns(file_from_fixtures("logo.png")).once() - - freeze_time - - post1.update_columns(baked_at: 1.week.ago) - post2.update_columns(baked_at: 1.week.ago) - invoke_task - - expect(post1.reload.baked_at).not_to eq_time(1.week.ago) - expect(post2.reload.baked_at).to eq_time(1.week.ago) - end - - end - - describe "uploads:migrate_from_s3" do - let!(:uploads) do - [ - upload1, - upload2, - ] - end - - let(:upload1) { Fabricate(:upload_s3) } - let(:upload2) { Fabricate(:upload_s3) } - - let!(:url1) { "upload://#{upload1.base62_sha1}.jpg" } - let!(:url2) { "upload://#{upload2.base62_sha1}.jpg" } - - let(:post1) { Fabricate(:post, raw: "[foo](#{url1})") } - let(:post2) { Fabricate(:post, raw: "[foo](#{url2})") } - - before do - global_setting :s3_bucket, 'file-uploads/folder' - global_setting :s3_region, 'us-east-1' - setup_s3 - uploads.each { |upload| stub_upload(upload) } - - upload1.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload1.base62_sha1}.png" - upload1.save! - upload2.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload2.base62_sha1}.png" - upload2.save! - - PostUpload.create(post: post1, upload: upload1) - PostUpload.create(post: post2, upload: upload2) - SiteSetting.enable_s3_uploads = false - end - - def invoke_task - capture_stdout do - Rake::Task['uploads:migrate_from_s3'].invoke - end - end - - it "fails if s3 uploads are still enabled" do - SiteSetting.enable_s3_uploads = true - expect { invoke_task }.to raise_error(SystemExit) - end - - it "does not apply a limit" do - FileHelper.stubs(:download).with("http:#{upload1.url}", max_file_size: 4194304, tmp_file_name: "from_s3", follow_redirect: true).returns(file_from_fixtures("logo.png")).once() - FileHelper.stubs(:download).with("http:#{upload2.url}", max_file_size: 4194304, tmp_file_name: "from_s3", follow_redirect: true).returns(file_from_fixtures("logo.png")).once() - - freeze_time - - post1.update_columns(baked_at: 1.week.ago) - post2.update_columns(baked_at: 1.week.ago) - invoke_task - - expect(post1.reload.baked_at).not_to eq_time(1.week.ago) - expect(post2.reload.baked_at).not_to eq_time(1.week.ago) - end - end - describe "uploads:disable_secure_media" do def invoke_task capture_stdout do