FEATURE: Mark bad uploads with :invalid_url (#29640)

A "bad upload" in this context is a upload with a mismatched URL. This can happen when changing the S3 bucket used for uploads and the upload records in the database have not been remapped correctly.
This commit is contained in:
Bianca Nenciu
2024-11-08 02:05:14 +02:00
committed by GitHub
parent 81e171070d
commit 5a00a041f1
3 changed files with 67 additions and 7 deletions

View File

@ -74,20 +74,29 @@ RSpec.describe S3Inventory do
differing_etag = Upload.find_by(etag: "defcaac0b4aca535c284e95f30d608d0")
differing_etag.update_columns(etag: "somethingelse")
differing_url = Upload.find_by(etag: "0cdc623af39cde0adb382670a6dc702a")
differing_url.update_columns(url: differing_url.url.gsub("default", "notdefault"))
output = capture_stdout { inventory.backfill_etags_and_list_missing }
expect(output).to eq(<<~TEXT)
#{differing_etag.url} has different etag
#{differing_url.url} has different url
#{@upload_1.url}
#{@no_etag.url}
3 of 5 uploads are missing
4 of 5 uploads are missing
1 of these are caused by differing etags
Null the etag column and re-run for automatic backfill
1 of these are caused by differing urls
Empty the url column and re-run for automatic backfill
TEXT
expect(Discourse.stats.get("missing_s3_uploads")).to eq(3)
expect(Discourse.stats.get("missing_s3_uploads")).to eq(4)
end
it "marks missing uploads as not verified and found uploads as verified. uploads not checked will be verified nil" do
differing_url = Upload.find_by(etag: "0cdc623af39cde0adb382670a6dc702a")
differing_url.update_columns(url: differing_url.url.gsub("default", "notdefault"))
expect(
Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count,
).to eq(12)
@ -96,9 +105,10 @@ RSpec.describe S3Inventory do
verification_status = Upload.pluck(:verification_status)
expect(
Upload.where(verification_status: Upload.verification_statuses[:verified]).count,
).to eq(3)
).to eq(2)
expect(Upload.with_invalid_etag_verification_status.count).to eq(2)
expect(Upload.with_invalid_url_verification_status.count).to eq(1)
expect(
Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count,
@ -198,7 +208,12 @@ RSpec.describe S3Inventory do
CSV.foreach(csv_filename, headers: false) do |row|
next if row[S3Inventory::CSV_KEY_INDEX].exclude?("default")
Fabricate(:upload, etag: row[S3Inventory::CSV_ETAG_INDEX], updated_at: 2.days.ago)
Fabricate(
:upload,
url: File.join(Discourse.store.absolute_base_url, row[S3Inventory::CSV_KEY_INDEX]),
etag: row[S3Inventory::CSV_ETAG_INDEX],
updated_at: 2.days.ago,
)
end
upload = Fabricate(:upload, etag: "ETag", updated_at: 1.days.ago)