FIX: remove migrate_from_s3 task that silently corrupts data (#11703)

Transient errors in migration are ignored, silently corrupting
data, and the migration is incomplete and misses many sources of
uploads, which will lead to an incorrect expectation of independence
from the remote object storage after announcing that the migration
was successful, regardles of whether transient errors permanently
corrupted the data.

Remove this migration until such time as it is re-written to
follow the same pattern has the migration to s3, moving the
core logic out of the task.
This commit is contained in:
Michael K Johnson
2021-01-17 16:33:29 -05:00
committed by GitHub
parent 869d25b7d3
commit 2a23e54632
2 changed files with 0 additions and 239 deletions

View File

@ -85,131 +85,6 @@ task "uploads:backfill_shas" => :environment do
puts "", "Done"
end
################################################################################
# migrate_from_s3 #
################################################################################
task "uploads:migrate_from_s3" => :environment do
ENV["RAILS_DB"] ? migrate_from_s3 : migrate_all_from_s3
end
task "uploads:batch_migrate_from_s3", [:limit] => :environment do |_, args|
ENV["RAILS_DB"] ? migrate_from_s3(limit: args[:limit]) : migrate_all_from_s3(limit: args[:limit])
end
def guess_filename(url, raw)
begin
uri = URI.parse("http:#{url}")
f = uri.open("rb", read_timeout: 5, redirect: true, allow_redirections: :all)
filename = if f.meta && f.meta["content-disposition"]
f.meta["content-disposition"][/filename="([^"]+)"/, 1].presence
end
filename ||= raw[/<a class="attachment" href="(?:https?:)?#{Regexp.escape(url)}">([^<]+)<\/a>/, 1].presence
filename ||= File.basename(url)
filename
rescue
nil
ensure
f.try(:close!) rescue nil
end
end
def migrate_all_from_s3(limit: nil)
RailsMultisite::ConnectionManagement.each_connection { migrate_from_s3(limit: limit) }
end
def migrate_from_s3(limit: nil)
require "file_store/s3_store"
# make sure S3 is disabled
if SiteSetting.Upload.enable_s3_uploads
puts "You must disable S3 uploads before running that task."
exit 1
end
db = RailsMultisite::ConnectionManagement.current_db
puts "Migrating uploads from S3 to local storage for '#{db}'..."
max_file_size = [SiteSetting.max_image_size_kb, SiteSetting.max_attachment_size_kb].max.kilobytes
migrate_posts = Post
.where("user_id > 0")
.where("raw LIKE '%.s3%.amazonaws.com/%' OR raw LIKE '%#{SiteSetting.Upload.absolute_base_url}%' OR raw LIKE '%(upload://%'")
migrate_posts = migrate_posts.limit(limit.to_i) if limit
migrate_posts.find_each do |post|
begin
updated = false
post.raw.gsub!(/(\/\/[\w.-]+amazonaws\.com\/(original|optimized)\/([a-z0-9]+\/)+\h{40}([\w.-]+)?)/i) do |url|
begin
if filename = guess_filename(url, post.raw)
file = FileHelper.download("http:#{url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true)
sha1 = Upload.generate_digest(file)
origin = nil
existing_upload = Upload.find_by(sha1: sha1)
if existing_upload&.url&.start_with?("//")
filename = existing_upload.original_filename
origin = existing_upload.origin
existing_upload.destroy
end
new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1)
if new_upload&.save
updated = true
url = new_upload.url
end
end
url
rescue
url
end
end
post.raw.gsub!(/(upload:\/\/[0-9a-zA-Z]+\.\w+)/) do |url|
begin
if sha1 = Upload.sha1_from_short_url(url)
if upload = Upload.find_by(sha1: sha1)
if upload.url.start_with?("//")
file = FileHelper.download("http:#{upload.url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true)
filename = upload.original_filename
origin = upload.origin
upload.destroy
new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1)
if new_upload&.save
updated = true
url = new_upload.url
end
end
end
end
url
rescue
url
end
end
if updated
post.save!
post.rebake!
putc "#"
else
putc "."
end
rescue
putc "X"
end
end
puts "Done!"
end
################################################################################
# migrate_to_s3 #
################################################################################