diff --git a/lib/tasks/uploads.rake b/lib/tasks/uploads.rake index 97973557254..79a966b1c54 100644 --- a/lib/tasks/uploads.rake +++ b/lib/tasks/uploads.rake @@ -545,6 +545,10 @@ task "uploads:sync_s3_acls" => :environment do end end +def secure_upload_rebake_warning + puts "This task may mark a lot of posts for rebaking. To get through these quicker, the max_old_rebakes_per_15_minutes global setting (current value #{GlobalSetting.max_old_rebakes_per_15_minutes}) should be changed and the rebake_old_posts_count site setting (current value #{SiteSetting.rebake_old_posts_count}) increased as well. Do you want to proceed? (y/n)" +end + # NOTE: This needs to be updated to use the _first_ UploadReference # record for each upload to determine security, and do not mark things # as secure if the first record is something public e.g. a site setting. @@ -558,6 +562,9 @@ task "uploads:disable_secure_uploads" => :environment do exit 1 end + secure_upload_rebake_warning + exit 1 if STDIN.gets.chomp.downcase != "y" + puts "Disabling secure upload and resetting uploads to not secure in #{db}...", "" SiteSetting.secure_uploads = false @@ -583,8 +590,7 @@ task "uploads:disable_secure_uploads" => :environment do secure_upload_ids, ) adjust_acls(secure_upload_ids) - post_rebake_errors = rebake_upload_posts(post_ids_to_rebake) - log_rebake_errors(post_rebake_errors) + mark_upload_posts_for_rebake(post_ids_to_rebake) puts "", "Rebaking and uploading complete!", "" end @@ -612,6 +618,9 @@ task "uploads:secure_upload_analyse_and_update" => :environment do exit 1 end + secure_upload_rebake_warning + exit 1 if STDIN.gets.chomp.downcase != "y" + puts "Analyzing security for uploads in #{db}...", "" all_upload_ids_changed, post_ids_to_rebake = nil Upload.transaction do @@ -627,11 +636,17 @@ task "uploads:secure_upload_analyse_and_update" => :environment do # Simply mark all uploads linked to posts secure if login_required because # no anons will be able to access them; however if secure_uploads_pm_only is # true then login_required will not mark other uploads secure. + puts "", + "Site is login_required, and secure_uploads_pm_only is false. Continuing with strategy to mark all post uploads as secure.", + "" post_ids_to_rebake, all_upload_ids_changed = mark_all_as_secure_login_required else # Otherwise only mark uploads linked to posts either: # * In secure categories or PMs if !SiteSetting.secure_uploads_pm_only # * In PMs if SiteSetting.secure_uploads_pm_only + puts "", + "Site is not login_required. Continuing with normal strategy to mark uploads in secure contexts as secure.", + "" post_ids_to_rebake, all_upload_ids_changed = update_specific_upload_security_no_login_required end @@ -639,8 +654,14 @@ task "uploads:secure_upload_analyse_and_update" => :environment do # Enqueue rebakes AFTER upload transaction complete, so there is no race condition # between updating the DB and the rebakes occurring. - post_rebake_errors = rebake_upload_posts(post_ids_to_rebake) - log_rebake_errors(post_rebake_errors) + # + # This is done asynchronously by changing the baked_version to NULL on + # affected posts and relying on Post.rebake_old in the PeriodicalUpdates + # job. To speed this up, these levers can be adjusted: + # + # * SiteSetting.rebake_old_posts_count + # * GlobalSetting.max_old_rebakes_per_15_minutes + mark_upload_posts_for_rebake(post_ids_to_rebake) # Also do this AFTER upload transaction complete so we don't end up with any # errors leaving ACLs in a bad state (the ACL sync task can be run to fix any @@ -677,7 +698,7 @@ def mark_all_as_secure_login_required security_last_changed_reason = 'upload security rake task mark as secure', security_last_changed_at = NOW() FROM upl - WHERE uploads.id = upl.upload_id AND NOT uploads.secure + WHERE uploads.id = upl.upload_id RETURNING uploads.id SQL puts "Marked #{post_upload_ids_marked_secure.count} upload(s) as secure because login_required is true.", @@ -687,7 +708,7 @@ def mark_all_as_secure_login_required SET secure = false, security_last_changed_reason = 'upload security rake task mark as not secure', security_last_changed_at = NOW() - WHERE id NOT IN (?) AND uploads.secure + WHERE id NOT IN (?) RETURNING uploads.id SQL puts "Marked #{upload_ids_marked_not_secure.count} upload(s) as not secure because they are not linked to posts.", @@ -797,24 +818,20 @@ def update_uploads_access_control_post SQL end -def rebake_upload_posts(post_ids_to_rebake) +def mark_upload_posts_for_rebake(post_ids_to_rebake) posts_to_rebake = Post.where(id: post_ids_to_rebake) post_rebake_errors = [] - puts "", "Rebaking #{posts_to_rebake.length} posts with affected uploads.", "" - begin - i = 0 - posts_to_rebake.each do |post| - RakeHelpers.print_status_with_label("Rebaking posts.....", i, posts_to_rebake.length) - post.rebake! - i += 1 - end - - RakeHelpers.print_status_with_label("Rebaking complete! ", i, posts_to_rebake.length) - puts "" - rescue => e - post_rebake_errors << e.message - end - post_rebake_errors + puts "", + "Marking #{posts_to_rebake.length} posts with affected uploads for rebake. Every 15 minutes, a batch of these will be enqueued for rebaking.", + "" + posts_to_rebake.update_all(baked_version: nil) + File.write( + "secure_upload_analyse_and_update_posts_for_rebake.json", + MultiJson.dump({ post_ids: post_ids_to_rebake }), + ) + puts "", + "Post IDs written to secure_upload_analyse_and_update_posts_for_rebake.json for reference", + "" end def inline_uploads(post) diff --git a/spec/tasks/uploads_spec.rb b/spec/tasks/uploads_spec.rb index 2b251b22ae6..72325a06699 100644 --- a/spec/tasks/uploads_spec.rb +++ b/spec/tasks/uploads_spec.rb @@ -5,25 +5,26 @@ RSpec.describe "tasks/uploads" do Rake::Task.clear Discourse::Application.load_tasks SiteSetting.authorized_extensions += "|pdf" + STDIN.stubs(:gets).returns("y\n") end describe "uploads:secure_upload_analyse_and_update" do - let!(:uploads) { [multi_post_upload1, upload1, upload2, upload3] } - let(:multi_post_upload1) { Fabricate(:upload_s3) } - let(:upload1) { Fabricate(:upload_s3) } - let(:upload2) { Fabricate(:upload_s3) } - let(:upload3) { Fabricate(:upload_s3, original_filename: "test.pdf", extension: "pdf") } + let!(:uploads) { [multi_post_upload_1, upload_1, upload_2, upload_3] } + let(:multi_post_upload_1) { Fabricate(:upload_s3) } + let(:upload_1) { Fabricate(:upload_s3) } + let(:upload_2) { Fabricate(:upload_s3) } + let(:upload_3) { Fabricate(:upload_s3, original_filename: "test.pdf", extension: "pdf") } - let!(:post1) { Fabricate(:post) } - let!(:post2) { Fabricate(:post) } - let!(:post3) { Fabricate(:post) } + let!(:post_1) { Fabricate(:post) } + let!(:post_2) { Fabricate(:post) } + let!(:post_3) { Fabricate(:post) } before do - UploadReference.create(target: post1, upload: multi_post_upload1) - UploadReference.create(target: post2, upload: multi_post_upload1) - UploadReference.create(target: post2, upload: upload1) - UploadReference.create(target: post3, upload: upload2) - UploadReference.create(target: post3, upload: upload3) + UploadReference.create(target: post_1, upload: multi_post_upload_1) + UploadReference.create(target: post_2, upload: multi_post_upload_1) + UploadReference.create(target: post_2, upload: upload_1) + UploadReference.create(target: post_3, upload: upload_2) + UploadReference.create(target: post_3, upload: upload_3) end def invoke_task @@ -48,105 +49,121 @@ RSpec.describe "tasks/uploads" do it "sets an access_control_post for each post upload, using the first linked post in the case of multiple links" do invoke_task - expect(multi_post_upload1.reload.access_control_post).to eq(post1) - expect(upload1.reload.access_control_post).to eq(post2) - expect(upload2.reload.access_control_post).to eq(post3) - expect(upload3.reload.access_control_post).to eq(post3) + expect(multi_post_upload_1.reload.access_control_post).to eq(post_1) + expect(upload_1.reload.access_control_post).to eq(post_2) + expect(upload_2.reload.access_control_post).to eq(post_3) + expect(upload_3.reload.access_control_post).to eq(post_3) end context "when login_required" do before { SiteSetting.login_required = true } - it "sets everything attached to a post as secure and rebakes all those posts" do - freeze_time - - post1.update_columns(baked_at: 1.week.ago) - post2.update_columns(baked_at: 1.week.ago) - post3.update_columns(baked_at: 1.week.ago) + after do + if File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json") + File.delete("secure_upload_analyse_and_update_posts_for_rebake.json") + end + end + it "sets everything attached to a post as secure" do invoke_task - expect(post1.reload.baked_at).not_to eq_time(1.week.ago) - expect(post2.reload.baked_at).not_to eq_time(1.week.ago) - expect(post3.reload.baked_at).not_to eq_time(1.week.ago) - expect(upload2.reload.secure).to eq(true) - expect(upload1.reload.secure).to eq(true) - expect(upload3.reload.secure).to eq(true) + expect(upload_2.reload.secure).to eq(true) + expect(upload_1.reload.secure).to eq(true) + expect(upload_3.reload.secure).to eq(true) + end + + it "writes a file with the post IDs to rebake" do + invoke_task + + expect(File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json")).to eq( + true, + ) + expect( + JSON.parse(File.read("secure_upload_analyse_and_update_posts_for_rebake.json")), + ).to eq({ "post_ids" => [post_1.id, post_2.id, post_3.id] }) + end + + it "sets the baked_version to NULL for affected posts" do + invoke_task + + expect(post_1.reload.baked_version).to eq(nil) + expect(post_2.reload.baked_version).to eq(nil) + expect(post_3.reload.baked_version).to eq(nil) end context "when secure_uploads_pm_only" do before { SiteSetting.secure_uploads_pm_only = true } it "only sets everything attached to a private message post as secure and rebakes all those posts" do - freeze_time - - post1.update_columns(baked_at: 1.week.ago) - post2.update_columns(baked_at: 1.week.ago) - post3.update_columns(baked_at: 1.week.ago) - post3.topic.update(archetype: "private_message", category: nil) + post_3.topic.update(archetype: "private_message", category: nil) invoke_task - expect(post1.reload.baked_at).to eq_time(1.week.ago) - expect(post2.reload.baked_at).to eq_time(1.week.ago) - expect(post3.reload.baked_at).not_to eq_time(1.week.ago) - expect(upload1.reload.secure).to eq(false) - expect(upload2.reload.secure).to eq(true) - expect(upload3.reload.secure).to eq(true) + expect(post_1.reload.baked_version).not_to eq(nil) + expect(post_2.reload.baked_version).not_to eq(nil) + expect(post_3.reload.baked_version).to eq(nil) + expect(upload_1.reload.secure).to eq(false) + expect(upload_2.reload.secure).to eq(true) + expect(upload_3.reload.secure).to eq(true) end end end + context "when secure_uploads_pm_only" do + before { SiteSetting.secure_uploads_pm_only = true } + + it "sets a non-PM post upload to not secure" do + upload_2.update!(secure: true) + invoke_task + expect(upload_2.reload.secure).to eq(false) + end + end + it "sets the uploads that are media and attachments in the read restricted topic category to secure" do - post3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group))) + post_3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group))) invoke_task - expect(upload2.reload.secure).to eq(true) - expect(upload1.reload.secure).to eq(false) - expect(upload3.reload.secure).to eq(true) + expect(upload_2.reload.secure).to eq(true) + expect(upload_1.reload.secure).to eq(false) + expect(upload_3.reload.secure).to eq(true) end it "sets the upload in the PM topic to secure" do - post3.topic.update(archetype: "private_message", category: nil) + post_3.topic.update(archetype: "private_message", category: nil) invoke_task - expect(upload2.reload.secure).to eq(true) - expect(upload1.reload.secure).to eq(false) + expect(upload_2.reload.secure).to eq(true) + expect(upload_1.reload.secure).to eq(false) end - it "rebakes the posts attached for uploads that change secure status" do - post3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group))) - freeze_time - - post1.update_columns(baked_at: 1.week.ago) - post2.update_columns(baked_at: 1.week.ago) - post3.update_columns(baked_at: 1.week.ago) + it "sets the baked_version version to NULL for the posts attached for uploads that change secure status" do + post_3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group))) invoke_task - expect(post1.reload.baked_at).to eq_time(1.week.ago) - expect(post2.reload.baked_at).to eq_time(1.week.ago) - expect(post3.reload.baked_at).not_to eq_time(1.week.ago) + expect(post_1.reload.baked_version).not_to eq(nil) + expect(post_2.reload.baked_version).not_to eq(nil) + expect(post_3.reload.baked_version).to eq(nil) end context "for an upload that is already secure and does not need to change" do before do - post3.topic.update(archetype: "private_message", category: nil) - upload2.update(access_control_post: post3) - upload2.update_secure_status - upload3.update(access_control_post: post3) - upload3.update_secure_status + post_3.topic.update(archetype: "private_message", category: nil) + upload_2.update(access_control_post: post_3) + upload_2.update_secure_status + upload_3.update(access_control_post: post_3) + upload_3.update_secure_status end it "does not rebake the associated post" do freeze_time - post3.update_columns(baked_at: 1.week.ago) + post_3.update_columns(baked_at: 1.week.ago) invoke_task - expect(post3.reload.baked_at).to eq_time(1.week.ago) + expect(post_3.reload.baked_at).to eq_time(1.week.ago) end it "does not attempt to update the acl" do - FileStore::S3Store.any_instance.expects(:update_upload_ACL).with(upload2).never + FileStore::S3Store.any_instance.expects(:update_upload_ACL).with(upload_2).never invoke_task end end @@ -179,19 +196,25 @@ RSpec.describe "tasks/uploads" do uploads.each { |upload| stub_upload(upload) } SiteSetting.secure_uploads = true - UploadReference.create(target: post1, upload: upload1) - UploadReference.create(target: post1, upload: upload2) - UploadReference.create(target: post2, upload: upload3) - UploadReference.create(target: post2, upload: upload4) + UploadReference.create(target: post_1, upload: upload_1) + UploadReference.create(target: post_1, upload: upload_2) + UploadReference.create(target: post_2, upload: upload_3) + UploadReference.create(target: post_2, upload: upload4) end - let!(:uploads) { [upload1, upload2, upload3, upload4, upload5] } - let(:post1) { Fabricate(:post) } - let(:post2) { Fabricate(:post) } - let(:upload1) { Fabricate(:upload_s3, secure: true, access_control_post: post1) } - let(:upload2) { Fabricate(:upload_s3, secure: true, access_control_post: post1) } - let(:upload3) { Fabricate(:upload_s3, secure: true, access_control_post: post2) } - let(:upload4) { Fabricate(:upload_s3, secure: true, access_control_post: post2) } + after do + if File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json") + File.delete("secure_upload_analyse_and_update_posts_for_rebake.json") + end + end + + let!(:uploads) { [upload_1, upload_2, upload_3, upload4, upload5] } + let(:post_1) { Fabricate(:post) } + let(:post_2) { Fabricate(:post) } + let(:upload_1) { Fabricate(:upload_s3, secure: true, access_control_post: post_1) } + let(:upload_2) { Fabricate(:upload_s3, secure: true, access_control_post: post_1) } + let(:upload_3) { Fabricate(:upload_s3, secure: true, access_control_post: post_2) } + let(:upload4) { Fabricate(:upload_s3, secure: true, access_control_post: post_2) } let(:upload5) { Fabricate(:upload_s3, secure: false) } it "disables the secure upload setting" do @@ -201,24 +224,29 @@ RSpec.describe "tasks/uploads" do it "updates all secure uploads to secure: false" do invoke_task - [upload1, upload2, upload3, upload4].each { |upl| expect(upl.reload.secure).to eq(false) } + [upload_1, upload_2, upload_3, upload4].each { |upl| expect(upl.reload.secure).to eq(false) } end - it "rebakes the associated posts" do - freeze_time - - post1.update_columns(baked_at: 1.week.ago) - post2.update_columns(baked_at: 1.week.ago) + it "sets the baked_version to NULL for affected posts" do invoke_task - expect(post1.reload.baked_at).not_to eq_time(1.week.ago) - expect(post2.reload.baked_at).not_to eq_time(1.week.ago) + expect(post_1.reload.baked_version).to eq(nil) + expect(post_2.reload.baked_version).to eq(nil) + end + + it "writes a file with the post IDs to rebake" do + invoke_task + + expect(File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json")).to eq(true) + expect(JSON.parse(File.read("secure_upload_analyse_and_update_posts_for_rebake.json"))).to eq( + { "post_ids" => [post_1.id, post_2.id] }, + ) end it "updates the affected ACLs via the SyncAclsForUploads job" do invoke_task expect(Jobs::SyncAclsForUploads.jobs.last["args"][0]["upload_ids"]).to match_array( - [upload1.id, upload2.id, upload3.id, upload4.id], + [upload_1.id, upload_2.id, upload_3.id, upload4.id], ) end end