FIX: S3Inventory to ignore files older than last backup restore date (#27166)

This commit updates `S3Inventory#files` to ignore S3 inventory files
which have a `last_modified` timestamp which are not at least 2 days
older than `BackupMetadata.last_restore_date` timestamp.

This check was previously only in `Jobs::EnsureS3UploadsExistence` but
`S3Inventory` can also be used via Rake tasks so this protection needs
to be in `S3Inventory` and not in the scheduled job.
This commit is contained in:
Alan Guo Xiang Tan
2024-05-24 10:54:06 +08:00
committed by GitHub
parent a4c5f85b10
commit df16ab0758
5 changed files with 77 additions and 72 deletions

View File

@ -6,11 +6,12 @@ require "csv"
class S3Inventory
attr_reader :type, :model, :inventory_date
CSV_KEY_INDEX ||= 1
CSV_ETAG_INDEX ||= 2
INVENTORY_PREFIX ||= "inventory"
INVENTORY_VERSION ||= "1"
INVENTORY_LAG ||= 2.days
CSV_KEY_INDEX = 1
CSV_ETAG_INDEX = 2
INVENTORY_PREFIX = "inventory"
INVENTORY_VERSION = "1"
INVENTORY_LAG = 2.days
WAIT_AFTER_RESTORE_DAYS = 2
def initialize(s3_helper, type, preloaded_inventory_file: nil, preloaded_inventory_date: nil)
@s3_helper = s3_helper
@ -41,11 +42,13 @@ class S3Inventory
download_and_decompress_files if !@preloaded_inventory_file
multisite_prefix = Discourse.store.upload_path
ActiveRecord::Base.transaction do
begin
connection.exec(
"CREATE TEMP TABLE #{table_name}(url text UNIQUE, etag text, PRIMARY KEY(etag, url))",
)
connection.copy_data("COPY #{table_name} FROM STDIN CSV") do
for_each_inventory_row do |row|
key = row[CSV_KEY_INDEX]
@ -258,17 +261,28 @@ class S3Inventory
def files
return if @preloaded_inventory_file
@files ||=
begin
symlink_file = unsorted_files.sort_by { |file| -file.last_modified.to_i }.first
return [] if symlink_file.blank?
if BackupMetadata.last_restore_date.present? &&
(symlink_file.last_modified - WAIT_AFTER_RESTORE_DAYS.days) <
BackupMetadata.last_restore_date
return []
end
@inventory_date = symlink_file.last_modified - INVENTORY_LAG
log "Downloading symlink file to tmp directory..."
failure_message = "Failed to download symlink file to tmp directory."
filename = File.join(tmp_directory, File.basename(symlink_file.key))
@s3_helper.download_file(symlink_file.key, filename, failure_message)
return [] if !File.exist?(filename)
File
.readlines(filename)
.map do |key|