PERF: Preload S3 inventory data for multisite clusters

This commit is contained in:
David Taylor
2020-07-29 01:49:45 +01:00
parent 620c223d50
commit 16c65a94f7
4 changed files with 139 additions and 21 deletions

View File

@ -12,9 +12,15 @@ class S3Inventory
INVENTORY_PREFIX ||= "inventory"
INVENTORY_VERSION ||= "1"
def initialize(s3_helper, type)
def initialize(s3_helper, type, preloaded_inventory_file: nil, preloaded_inventory_date: nil)
@s3_helper = s3_helper
if preloaded_inventory_file && preloaded_inventory_date
# Data preloaded, so we don't need to fetch it again
@preloaded_inventory_file = preloaded_inventory_file
@inventory_date = preloaded_inventory_date
end
if type == :upload
@type = "original"
@model = Upload
@ -25,32 +31,25 @@ class S3Inventory
end
def backfill_etags_and_list_missing
if files.blank?
if !@preloaded_inventory_file && files.blank?
error("Failed to list inventory from S3")
return
end
DistributedMutex.synchronize("s3_inventory_list_missing_#{type}", validity: 30.minutes) do
begin
files.each do |file|
next if File.exists?(file[:filename][0...-3])
download_inventory_file_to_tmp_directory(file)
decompress_inventory_file(file)
end
download_and_decompress_files if !@preloaded_inventory_file
multisite_prefix = Discourse.store.upload_path
ActiveRecord::Base.transaction do
begin
connection.exec("CREATE TEMP TABLE #{table_name}(url text UNIQUE, etag text, PRIMARY KEY(etag, url))")
connection.copy_data("COPY #{table_name} FROM STDIN CSV") do
files.each do |file|
CSV.foreach(file[:filename][0...-3], headers: false) do |row|
key = row[CSV_KEY_INDEX]
next if Rails.configuration.multisite && key.exclude?(multisite_prefix)
url = File.join(Discourse.store.absolute_base_url, key)
connection.put_copy_data("#{url},#{row[CSV_ETAG_INDEX]}\n")
end
for_each_inventory_row do |row|
key = row[CSV_KEY_INDEX]
next if Rails.configuration.multisite && key.exclude?(multisite_prefix)
url = File.join(Discourse.store.absolute_base_url, key)
connection.put_copy_data("#{url},#{row[CSV_ETAG_INDEX]}\n")
end
end
@ -87,6 +86,16 @@ class S3Inventory
end
end
def for_each_inventory_row
if @preloaded_inventory_file
CSV.foreach(@preloaded_inventory_file) { |row| yield(row) }
else
files.each do |file|
CSV.foreach(file[:filename][0...-3]) { |row| yield(row) }
end
end
end
def download_inventory_file_to_tmp_directory(file)
return if File.exists?(file[:filename])
@ -136,9 +145,36 @@ class S3Inventory
)
end
def prepare_for_all_sites
db_names = RailsMultisite::ConnectionManagement.all_dbs
db_files = {}
db_names.each do |db|
db_files[db] = Tempfile.new("#{db}-inventory.csv")
end
download_and_decompress_files
for_each_inventory_row do |row|
key = row[CSV_KEY_INDEX]
row_db = key.match(/uploads\/([^\/]+)\//)&.[](1)
if row_db && file = db_files[row_db]
file.write(row.to_csv)
end
end
db_names.each do |db|
db_files[db].rewind
end
db_files
ensure
cleanup!
end
private
def cleanup!
return if @preloaded_inventory_file
files.each do |file|
File.delete(file[:filename]) if File.exists?(file[:filename])
File.delete(file[:filename][0...-3]) if File.exists?(file[:filename][0...-3])
@ -154,6 +190,7 @@ class S3Inventory
end
def files
return if @preloaded_inventory_file
@files ||= begin
symlink_file = unsorted_files.sort_by { |file| -file.last_modified.to_i }.first
return [] if symlink_file.blank?
@ -171,6 +208,15 @@ class S3Inventory
end
end
def download_and_decompress_files
files.each do |file|
next if File.exists?(file[:filename][0...-3])
download_inventory_file_to_tmp_directory(file)
decompress_inventory_file(file)
end
end
def tmp_directory
@tmp_directory ||= begin
current_db = RailsMultisite::ConnectionManagement.current_db