Files
discourse/lib/tasks/s3.rake
David Taylor e7450cc6da DEV: Migrate from sprockets to propshaft for assets (#32475)
We are no longer using any of the transpilation/bundling features of
Sprockets. We only use it to serve assets in development, and then
collect & fingerprint them in production. This commit switches us to use
the more modern "Propshaft" gem for that functionality.

Propshaft is much simpler than Sprockets. Instead of taking a
combination of paths + "precompile" list, Propshaft simply assumes all
files in the configured directory are required in production. Previously
we had some base paths configured quite high in the directory structure,
and then only precompiled selected assets within the directory. That's
no longer possible, so this commit refactors those places (mostly
plugin-related) to use dedicated directories under
`app/assets/generated/`.

Another difference is that Propshaft applies asset digests in
development as well as production. This is great for caching & dev/prod
consistency, but does mean some small changes were required in tests.

We previously had some freedom-patches applied to Sprockets. Some of
those had to be ported across to Propshaft. We now have three patches:

1. Skip adding digest hashes to webpack-generated chunks (which are
already digested, and referred to from other js files)

2. Avoid raising errors for missing assets in test mode. We don't always
compile assets before running basic RSpec tests.

3. Maintain relative paths for sourcemap URLs, so that files don't need
to be recompiled depending on their CDN path

Significant refactors are made to the `assets.rake` and `s3.rake` tasks,
which rely on implementation details of Sprockets/Propshaft.
2025-04-30 08:59:32 +01:00

231 lines
6.2 KiB
Ruby

# frozen_string_literal: true
def brotli_s3_path(path)
ext = File.extname(path)
"#{path[0..-ext.length]}br#{ext}"
end
def gzip_s3_path(path)
ext = File.extname(path)
"#{path[0..-ext.length]}gz#{ext}"
end
def existing_assets
@existing_assets ||= Set.new(helper.list("assets/").map(&:key))
end
def prefix_s3_path(path)
path = File.join(helper.s3_bucket_folder_path, path) if helper.s3_bucket_folder_path
path
end
def should_skip?(path)
return false if ENV["FORCE_S3_UPLOADS"]
existing_assets.include?(prefix_s3_path(path))
end
def upload(path, remote_path, content_type, content_encoding = nil, logger:)
options = {
cache_control: "max-age=31556952, public, immutable",
content_type: content_type,
acl: SiteSetting.s3_use_acls ? "public-read" : nil,
}
options[:content_encoding] = content_encoding if content_encoding
if should_skip?(remote_path)
logger << "Skipping: #{remote_path}\n"
else
logger << "Uploading: #{remote_path}\n"
File.open(path) { |file| helper.upload(file, remote_path, options) }
end
end
def use_db_s3_config
ENV["USE_DB_S3_CONFIG"]
end
def helper
@helper ||= S3Helper.build_from_config(use_db_s3_config: use_db_s3_config)
end
def assets
load_path = Rails.application.assets.load_path
results = Set.new
load_path.assets.each do |asset|
fullpath = "#{Rails.root}/public/assets/#{asset.digested_path}"
content_type = MiniMime.lookup_by_filename(fullpath)&.content_type
content_type ||= "application/json" if fullpath.end_with?(".map")
next unless content_type
asset_path = "assets/#{asset.digested_path}"
results << [fullpath, asset_path, content_type]
if File.exist?(fullpath + ".br")
results << [fullpath + ".br", brotli_s3_path(asset_path), content_type, "br"]
end
if File.exist?(fullpath + ".gz")
results << [fullpath + ".gz", gzip_s3_path(asset_path), content_type, "gzip"]
end
end
results.to_a
end
def asset_paths
Set.new(assets.map { |_, asset_path| asset_path })
end
def ensure_s3_configured!
unless GlobalSetting.use_s3? || use_db_s3_config
STDERR.puts "ERROR: Ensure S3 is configured in config/discourse.conf or environment vars"
exit 1
end
end
task "s3:correct_acl" => :environment do
ensure_s3_configured!
if !SiteSetting.s3_use_acls
$stderr.puts "Not correcting ACLs as the site is configured to not use ACLs"
return
end
puts "ensuring public-read is set on every upload and optimized image"
i = 0
base_url = Discourse.store.absolute_base_url
objects = Upload.pluck(:id, :url).map { |array| array << :upload }
objects.concat(OptimizedImage.pluck(:id, :url).map { |array| array << :optimized_image })
puts "#{objects.length} objects found"
objects.each do |id, url, type|
i += 1
if !url.start_with?(base_url)
puts "Skipping #{type} #{id} since it is not stored on s3, url is #{url}"
else
begin
key = url[(base_url.length + 1)..-1]
object = Discourse.store.s3_helper.object(key)
object.acl.put(acl: "public-read")
rescue => e
puts "Skipping #{type} #{id} url is #{url} #{e}"
end
end
puts "#{i} done" if i % 100 == 0
end
end
task "s3:correct_cachecontrol" => :environment do
ensure_s3_configured!
puts "ensuring cache-control is set on every upload and optimized image"
i = 0
base_url = Discourse.store.absolute_base_url
cache_control = "max-age=31556952, public, immutable"
objects = Upload.pluck(:id, :url).map { |array| array << :upload }
objects.concat(OptimizedImage.pluck(:id, :url).map { |array| array << :optimized_image })
puts "#{objects.length} objects found"
objects.each do |id, url, type|
i += 1
if !url.start_with?(base_url)
puts "Skipping #{type} #{id} since it is not stored on s3, url is #{url}"
else
begin
key = url[(base_url.length + 1)..-1]
object = Discourse.store.s3_helper.object(key)
object.copy_from(
copy_source: "#{object.bucket_name}/#{object.key}",
acl: SiteSetting.s3_use_acls ? "public-read" : nil,
cache_control: cache_control,
content_type: object.content_type,
content_disposition: object.content_disposition,
metadata_directive: "REPLACE",
)
rescue => e
puts "Skipping #{type} #{id} url is #{url} #{e}"
end
end
puts "#{i} done" if i % 100 == 0
end
end
task "s3:ensure_cors_rules" => :environment do
ensure_s3_configured!
puts "Installing CORS rules..."
result = S3CorsRulesets.sync(use_db_s3_config: use_db_s3_config)
if !result
puts "skipping"
next
end
puts "Assets rules status: #{result[:assets_rules_status]}."
puts "Backup rules status: #{result[:backup_rules_status]}."
puts "Direct upload rules status: #{result[:direct_upload_rules_status]}."
end
task "s3:upload_assets" => [:environment, "s3:ensure_cors_rules"] do
logger = Logger.new(STDOUT)
assets.each { |asset| upload(*asset, logger:) }
end
task "s3:expire_missing_assets" => :environment do
ensure_s3_configured!
puts "Checking for stale S3 assets..."
if Discourse.readonly_mode?
puts "Discourse is in readonly mode. Skipping s3 asset deletion in case this is a read-only mirror of a live site."
exit 0
end
assets_to_delete = existing_assets.dup
# Check that all current assets are uploaded, and remove them from the to_delete list
asset_paths.each do |current_asset_path|
uploaded = assets_to_delete.delete?(prefix_s3_path(current_asset_path))
if !uploaded
puts "A current asset does not exist on S3 (#{current_asset_path}). Aborting cleanup task."
exit 1
end
end
if assets_to_delete.size > 0
puts "Found #{assets_to_delete.size} assets to delete..."
assets_to_delete.each do |to_delete|
if !to_delete.start_with?(prefix_s3_path("assets/"))
# Sanity check, this should never happen
raise "Attempted to delete a non-/asset S3 path (#{to_delete}). Aborting"
end
end
assets_to_delete.each_slice(500) do |slice|
message = "Deleting #{slice.size} assets...\n"
message += slice.join("\n").indent(2)
puts message
helper.delete_objects(slice)
puts "... done"
end
else
puts "No stale assets found"
end
end