mirror of
https://github.com/discourse/discourse.git
synced 2025-05-30 15:28:37 +08:00
FIX: remove migrate_from_s3 task that silently corrupts data (#11703)
Transient errors in migration are ignored, silently corrupting data, and the migration is incomplete and misses many sources of uploads, which will lead to an incorrect expectation of independence from the remote object storage after announcing that the migration was successful, regardles of whether transient errors permanently corrupted the data. Remove this migration until such time as it is re-written to follow the same pattern has the migration to s3, moving the core logic out of the task.
This commit is contained in:

committed by
GitHub

parent
869d25b7d3
commit
2a23e54632
@ -85,131 +85,6 @@ task "uploads:backfill_shas" => :environment do
|
||||
puts "", "Done"
|
||||
end
|
||||
|
||||
################################################################################
|
||||
# migrate_from_s3 #
|
||||
################################################################################
|
||||
|
||||
task "uploads:migrate_from_s3" => :environment do
|
||||
ENV["RAILS_DB"] ? migrate_from_s3 : migrate_all_from_s3
|
||||
end
|
||||
|
||||
task "uploads:batch_migrate_from_s3", [:limit] => :environment do |_, args|
|
||||
ENV["RAILS_DB"] ? migrate_from_s3(limit: args[:limit]) : migrate_all_from_s3(limit: args[:limit])
|
||||
end
|
||||
|
||||
def guess_filename(url, raw)
|
||||
begin
|
||||
uri = URI.parse("http:#{url}")
|
||||
f = uri.open("rb", read_timeout: 5, redirect: true, allow_redirections: :all)
|
||||
filename = if f.meta && f.meta["content-disposition"]
|
||||
f.meta["content-disposition"][/filename="([^"]+)"/, 1].presence
|
||||
end
|
||||
filename ||= raw[/<a class="attachment" href="(?:https?:)?#{Regexp.escape(url)}">([^<]+)<\/a>/, 1].presence
|
||||
filename ||= File.basename(url)
|
||||
filename
|
||||
rescue
|
||||
nil
|
||||
ensure
|
||||
f.try(:close!) rescue nil
|
||||
end
|
||||
end
|
||||
|
||||
def migrate_all_from_s3(limit: nil)
|
||||
RailsMultisite::ConnectionManagement.each_connection { migrate_from_s3(limit: limit) }
|
||||
end
|
||||
|
||||
def migrate_from_s3(limit: nil)
|
||||
require "file_store/s3_store"
|
||||
|
||||
# make sure S3 is disabled
|
||||
if SiteSetting.Upload.enable_s3_uploads
|
||||
puts "You must disable S3 uploads before running that task."
|
||||
exit 1
|
||||
end
|
||||
|
||||
db = RailsMultisite::ConnectionManagement.current_db
|
||||
|
||||
puts "Migrating uploads from S3 to local storage for '#{db}'..."
|
||||
|
||||
max_file_size = [SiteSetting.max_image_size_kb, SiteSetting.max_attachment_size_kb].max.kilobytes
|
||||
|
||||
migrate_posts = Post
|
||||
.where("user_id > 0")
|
||||
.where("raw LIKE '%.s3%.amazonaws.com/%' OR raw LIKE '%#{SiteSetting.Upload.absolute_base_url}%' OR raw LIKE '%(upload://%'")
|
||||
migrate_posts = migrate_posts.limit(limit.to_i) if limit
|
||||
|
||||
migrate_posts.find_each do |post|
|
||||
begin
|
||||
updated = false
|
||||
|
||||
post.raw.gsub!(/(\/\/[\w.-]+amazonaws\.com\/(original|optimized)\/([a-z0-9]+\/)+\h{40}([\w.-]+)?)/i) do |url|
|
||||
begin
|
||||
if filename = guess_filename(url, post.raw)
|
||||
file = FileHelper.download("http:#{url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true)
|
||||
sha1 = Upload.generate_digest(file)
|
||||
origin = nil
|
||||
|
||||
existing_upload = Upload.find_by(sha1: sha1)
|
||||
if existing_upload&.url&.start_with?("//")
|
||||
filename = existing_upload.original_filename
|
||||
origin = existing_upload.origin
|
||||
existing_upload.destroy
|
||||
end
|
||||
|
||||
new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1)
|
||||
if new_upload&.save
|
||||
updated = true
|
||||
url = new_upload.url
|
||||
end
|
||||
end
|
||||
|
||||
url
|
||||
rescue
|
||||
url
|
||||
end
|
||||
end
|
||||
|
||||
post.raw.gsub!(/(upload:\/\/[0-9a-zA-Z]+\.\w+)/) do |url|
|
||||
begin
|
||||
if sha1 = Upload.sha1_from_short_url(url)
|
||||
if upload = Upload.find_by(sha1: sha1)
|
||||
if upload.url.start_with?("//")
|
||||
file = FileHelper.download("http:#{upload.url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true)
|
||||
filename = upload.original_filename
|
||||
origin = upload.origin
|
||||
upload.destroy
|
||||
|
||||
new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1)
|
||||
if new_upload&.save
|
||||
updated = true
|
||||
url = new_upload.url
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
url
|
||||
rescue
|
||||
url
|
||||
end
|
||||
end
|
||||
|
||||
if updated
|
||||
post.save!
|
||||
post.rebake!
|
||||
putc "#"
|
||||
else
|
||||
putc "."
|
||||
end
|
||||
|
||||
rescue
|
||||
putc "X"
|
||||
end
|
||||
end
|
||||
|
||||
puts "Done!"
|
||||
end
|
||||
|
||||
################################################################################
|
||||
# migrate_to_s3 #
|
||||
################################################################################
|
||||
|
Reference in New Issue
Block a user