PERF: Split queries when cleaning uploads.

This reduces the number of scans that the db has to do in the query
to fetch orphan uploads. Futheremore, we were not batching our
records which bloats memory.
This commit is contained in:
Guo Xiang Tan
2016-07-01 15:22:30 +08:00
parent 4657d22eb0
commit bd07658a37
7 changed files with 111 additions and 31 deletions

View File

@ -1,25 +1,35 @@
module Jobs module Jobs
class CleanUpUploads < Jobs::Scheduled class CleanUpUploads < Jobs::Scheduled
every 1.hour every 1.hour
def execute(args) def execute(args)
return unless SiteSetting.clean_up_uploads? return unless SiteSetting.clean_up_uploads?
ignore_urls = []
ignore_urls |= UserProfile.uniq.select(:profile_background).where("profile_background IS NOT NULL AND profile_background != ''").pluck(:profile_background)
ignore_urls |= UserProfile.uniq.select(:card_background).where("card_background IS NOT NULL AND card_background != ''").pluck(:card_background)
ignore_urls |= Category.uniq.select(:logo_url).where("logo_url IS NOT NULL AND logo_url != ''").pluck(:logo_url)
ignore_urls |= Category.uniq.select(:background_url).where("background_url IS NOT NULL AND background_url != ''").pluck(:background_url)
ids = []
ids |= PostUpload.uniq.select(:upload_id).pluck(:upload_id)
ids |= User.uniq.select(:uploaded_avatar_id).where("uploaded_avatar_id IS NOT NULL").pluck(:uploaded_avatar_id)
ids |= UserAvatar.uniq.select(:gravatar_upload_id).where("gravatar_upload_id IS NOT NULL").pluck(:gravatar_upload_id)
grace_period = [SiteSetting.clean_orphan_uploads_grace_period_hours, 1].max grace_period = [SiteSetting.clean_orphan_uploads_grace_period_hours, 1].max
Upload.where("created_at < ?", grace_period.hour.ago) result = Upload.where("created_at < ?", grace_period.hour.ago)
.where("retain_hours IS NULL OR created_at < current_timestamp - interval '1 hour' * retain_hours") .where("retain_hours IS NULL OR created_at < current_timestamp - interval '1 hour' * retain_hours")
.where("id NOT IN (SELECT upload_id FROM post_uploads WHERE upload_id IS NOT NULL)")
.where("id NOT IN (SELECT uploaded_avatar_id FROM users WHERE uploaded_avatar_id IS NOT NULL)") if !ids.empty?
.where("id NOT IN (SELECT gravatar_upload_id FROM user_avatars WHERE gravatar_upload_id IS NOT NULL)") result = result.where("id NOT IN (?)", ids)
.where("url NOT IN (SELECT profile_background FROM user_profiles WHERE LENGTH(COALESCE(profile_background, '')) > 0)")
.where("url NOT IN (SELECT card_background FROM user_profiles WHERE LENGTH(COALESCE(card_background, '')) > 0)")
.where("url NOT IN (SELECT logo_url FROM categories WHERE LENGTH(COALESCE(logo_url, '')) > 0)")
.where("url NOT IN (SELECT background_url FROM categories WHERE LENGTH(COALESCE(background_url, '')) > 0)")
.destroy_all
end end
if !ignore_urls.empty?
result = result.where("url NOT IN (?)", ignore_urls)
end end
result.find_each { |upload| upload.destroy }
end
end
end end

View File

@ -120,9 +120,9 @@ describe CookedPostProcessor do
it "generates overlay information" do it "generates overlay information" do
cpp.post_process_images cpp.post_process_images
expect(cpp.html).to match_html '<p><div class="lightbox-wrapper"><a data-download-href="/uploads/default/e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98" href="/uploads/default/1/1234567890123456.jpg" class="lightbox" title="logo.png"><img src="/uploads/default/optimized/1X/e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_1_690x788.png" width="690" height="788"><div class="meta"> expect(cpp.html).to match_html "<p><div class=\"lightbox-wrapper\"><a data-download-href=\"/uploads/default/#{upload.sha1}\" href=\"/uploads/default/1/1234567890123456.jpg\" class=\"lightbox\" title=\"logo.png\"><img src=\"/uploads/default/optimized/1X/#{upload.sha1}_1_690x788.png\" width=\"690\" height=\"788\"><div class=\"meta\">
<span class="filename">logo.png</span><span class="informations">1750x2000 1.21 KB</span><span class="expand"></span> <span class=\"filename\">logo.png</span><span class=\"informations\">1750x2000 1.21 KB</span><span class=\"expand\"></span>
</div></a></div></p>' </div></a></div></p>"
expect(cpp).to be_dirty expect(cpp).to be_dirty
end end
@ -153,9 +153,9 @@ describe CookedPostProcessor do
it "generates overlay information" do it "generates overlay information" do
cpp.post_process_images cpp.post_process_images
expect(cpp.html).to match_html '<p><div class="lightbox-wrapper"><a data-download-href="/subfolder/uploads/default/e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98" href="/subfolder/uploads/default/1/1234567890123456.jpg" class="lightbox" title="logo.png"><img src="/subfolder/uploads/default/optimized/1X/e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_1_690x788.png" width="690" height="788"><div class="meta"> expect(cpp.html).to match_html "<p><div class=\"lightbox-wrapper\"><a data-download-href=\"/subfolder/uploads/default/#{upload.sha1}\" href=\"/subfolder/uploads/default/1/1234567890123456.jpg\" class=\"lightbox\" title=\"logo.png\"><img src=\"/subfolder/uploads/default/optimized/1X/#{upload.sha1}_1_690x788.png\" width=\"690\" height=\"788\"><div class=\"meta\">
<span class="filename">logo.png</span><span class="informations">1750x2000 1.21 KB</span><span class="expand"></span> <span class=\"filename\">logo.png</span><span class=\"informations\">1750x2000 1.21 KB</span><span class=\"expand\"></span>
</div></a></div></p>' </div></a></div></p>"
expect(cpp).to be_dirty expect(cpp).to be_dirty
end end
@ -181,9 +181,9 @@ describe CookedPostProcessor do
it "generates overlay information" do it "generates overlay information" do
cpp.post_process_images cpp.post_process_images
expect(cpp.html).to match_html '<p><div class="lightbox-wrapper"><a data-download-href="/uploads/default/e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98" href="/uploads/default/1/1234567890123456.jpg" class="lightbox" title="WAT"><img src="/uploads/default/optimized/1X/e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_1_690x788.png" title="WAT" width="690" height="788"><div class="meta"> expect(cpp.html).to match_html "<p><div class=\"lightbox-wrapper\"><a data-download-href=\"/uploads/default/#{upload.sha1}\" href=\"/uploads/default/1/1234567890123456.jpg\" class=\"lightbox\" title=\"WAT\"><img src=\"/uploads/default/optimized/1X/#{upload.sha1}_1_690x788.png\" title=\"WAT\" width=\"690\" height=\"788\"><div class=\"meta\">
<span class="filename">WAT</span><span class="informations">1750x2000 1.21 KB</span><span class="expand"></span> <span class=\"filename\">WAT</span><span class=\"informations\">1750x2000 1.21 KB</span><span class=\"expand\"></span>
</div></a></div></p>' </div></a></div></p>"
expect(cpp).to be_dirty expect(cpp).to be_dirty
end end

View File

@ -14,7 +14,7 @@ describe FileStore::LocalStore do
it "returns a relative url" do it "returns a relative url" do
store.expects(:copy_file) store.expects(:copy_file)
expect(store.store_upload(uploaded_file, upload)).to match(/\/uploads\/default\/original\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98\.png/) expect(store.store_upload(uploaded_file, upload)).to match(/\/uploads\/default\/original\/.+#{upload.sha1}\.png/)
end end
end end
@ -23,7 +23,7 @@ describe FileStore::LocalStore do
it "returns a relative url" do it "returns a relative url" do
store.expects(:copy_file) store.expects(:copy_file)
expect(store.store_optimized_image({}, optimized_image)).to match(/\/uploads\/default\/optimized\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_#{OptimizedImage::VERSION}_100x200\.png/) expect(store.store_optimized_image({}, optimized_image)).to match(/\/uploads\/default\/optimized\/.+#{optimized_image.upload.sha1}_#{OptimizedImage::VERSION}_100x200\.png/)
end end
end end

View File

@ -23,7 +23,7 @@ describe FileStore::S3Store do
it "returns an absolute schemaless url" do it "returns an absolute schemaless url" do
s3_helper.expects(:upload) s3_helper.expects(:upload)
expect(store.store_upload(uploaded_file, upload)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/original\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98\.png/) expect(store.store_upload(uploaded_file, upload)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/original\/.+#{upload.sha1}\.png/)
end end
end end
@ -32,7 +32,7 @@ describe FileStore::S3Store do
it "returns an absolute schemaless url" do it "returns an absolute schemaless url" do
s3_helper.expects(:upload) s3_helper.expects(:upload)
expect(store.store_optimized_image(optimized_image_file, optimized_image)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/optimized\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_#{OptimizedImage::VERSION}_100x200\.png/) expect(store.store_optimized_image(optimized_image_file, optimized_image)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/optimized\/.+#{optimized_image.upload.sha1}_#{OptimizedImage::VERSION}_100x200\.png/)
end end
end end

View File

@ -1,11 +1,11 @@
Fabricator(:upload) do Fabricator(:upload) do
user user
sha1 "e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98" sha1 { sequence(:sha1) { |n| Digest::SHA1.hexdigest(n.to_s) } }
original_filename "logo.png" original_filename "logo.png"
filesize 1234 filesize 1234
width 100 width 100
height 200 height 200
url "/uploads/default/1/1234567890123456.png" url { sequence(:url) { |n| "/uploads/default/#{n}/1234567890123456.png" } }
end end
Fabricator(:attachment, from: :upload) do Fabricator(:attachment, from: :upload) do

View File

@ -0,0 +1,3 @@
Fabricator(:user_avatar) do
user
end

View File

@ -3,16 +3,14 @@ require 'rails_helper'
require_dependency 'jobs/scheduled/clean_up_uploads' require_dependency 'jobs/scheduled/clean_up_uploads'
describe Jobs::CleanUpUploads do describe Jobs::CleanUpUploads do
before do before do
Upload.destroy_all Upload.destroy_all
SiteSetting.clean_up_uploads = true SiteSetting.clean_up_uploads = true
SiteSetting.clean_orphan_uploads_grace_period_hours = 1 SiteSetting.clean_orphan_uploads_grace_period_hours = 1
@upload = Fabricate(:upload, created_at: 2.hours.ago)
end end
it "deletes orphan uploads" do it "deletes orphan uploads" do
Fabricate(:upload, created_at: 2.hours.ago)
expect(Upload.count).to be(1) expect(Upload.count).to be(1)
Jobs::CleanUpUploads.new.execute(nil) Jobs::CleanUpUploads.new.execute(nil)
@ -20,4 +18,73 @@ describe Jobs::CleanUpUploads do
expect(Upload.count).to be(0) expect(Upload.count).to be(0)
end end
it "does not delete profile background uploads" do
profile_background_upload = Fabricate(:upload, created_at: 2.hours.ago)
UserProfile.last.update_attributes!(profile_background: profile_background_upload.url)
Jobs::CleanUpUploads.new.execute(nil)
expect(Upload.find_by(id: @upload.id)).to eq(nil)
expect(Upload.find_by(id: profile_background_upload.id)).to eq(profile_background_upload)
end
it "does not delete card background uploads" do
card_background_upload = Fabricate(:upload, created_at: 2.hours.ago)
UserProfile.last.update_attributes!(card_background: card_background_upload.url)
Jobs::CleanUpUploads.new.execute(nil)
expect(Upload.find_by(id: @upload.id)).to eq(nil)
expect(Upload.find_by(id: card_background_upload.id)).to eq(card_background_upload)
end
it "does not delete category logo uploads" do
category_logo_upload = Fabricate(:upload, created_at: 2.hours.ago)
category = Fabricate(:category, logo_url: category_logo_upload.url)
Jobs::CleanUpUploads.new.execute(nil)
expect(Upload.find_by(id: @upload.id)).to eq(nil)
expect(Upload.find_by(id: category_logo_upload.id)).to eq(category_logo_upload)
end
it "does not delete category background url uploads" do
category_background_url = Fabricate(:upload, created_at: 2.hours.ago)
category = Fabricate(:category, background_url: category_background_url.url)
Jobs::CleanUpUploads.new.execute(nil)
expect(Upload.find_by(id: @upload.id)).to eq(nil)
expect(Upload.find_by(id: category_background_url.id)).to eq(category_background_url)
end
it "does not delete post uploads" do
upload = Fabricate(:upload, created_at: 2.hours.ago)
post = Fabricate(:post, uploads: [upload])
Jobs::CleanUpUploads.new.execute(nil)
expect(Upload.find_by(id: @upload.id)).to eq(nil)
expect(Upload.find_by(id: upload.id)).to eq(upload)
end
it "does not delete user uploaded avatar" do
upload = Fabricate(:upload, created_at: 2.hours.ago)
user = Fabricate(:user, uploaded_avatar: upload)
Jobs::CleanUpUploads.new.execute(nil)
expect(Upload.find_by(id: @upload.id)).to eq(nil)
expect(Upload.find_by(id: upload.id)).to eq(upload)
end
it "does not delete user gravatar" do
upload = Fabricate(:upload, created_at: 2.hours.ago)
user = Fabricate(:user, user_avatar: Fabricate(:user_avatar, gravatar_upload: upload))
Jobs::CleanUpUploads.new.execute(nil)
expect(Upload.find_by(id: @upload.id)).to eq(nil)
expect(Upload.find_by(id: upload.id)).to eq(upload)
end
end end