diff --git a/Gemfile b/Gemfile index 57fd0d4d5b2..1599efa815b 100644 --- a/Gemfile +++ b/Gemfile @@ -210,6 +210,7 @@ gem 'rubyzip', require: false gem 'sshkey', require: false gem 'rchardet', require: false +gem 'lz4-ruby', require: false, platform: :mri if ENV["IMPORT"] == "1" gem 'mysql2' diff --git a/Gemfile.lock b/Gemfile.lock index aa52c66bdf9..64fd90002c2 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -174,6 +174,7 @@ GEM crass (~> 1.0.2) nokogiri (>= 1.5.9) lru_redux (1.1.0) + lz4-ruby (0.3.3) mail (2.7.1) mini_mime (>= 0.1.1) maxminddb (0.1.22) @@ -467,6 +468,7 @@ DEPENDENCIES logstash-logger logster lru_redux + lz4-ruby mail maxminddb memory_profiler diff --git a/config/discourse_defaults.conf b/config/discourse_defaults.conf index b841af9e613..2b0bd3149b3 100644 --- a/config/discourse_defaults.conf +++ b/config/discourse_defaults.conf @@ -266,3 +266,15 @@ enable_js_error_reporting = true # Having a high number here is very low risk. Regular jobs are limited in scope and scale. mini_scheduler_workers = 5 +# enable compression on anonymous cache redis entries +# this slightly increases the cost of storing cache entries but can make it much +# cheaper to retrieve cache entries when redis is stores on a different machine to the one +# running the web +compress_anon_cache = false + +# Only store entries in redis for anonymous cache if they are observed more than N times +# for a specific key +# +# This ensures there are no pathological cases where we keep storing data in anonymous cache +# never to use it, set to 1 to store immediately, set to 0 to disable anon cache +anon_cache_store_threshold = 2 diff --git a/lib/middleware/anonymous_cache.rb b/lib/middleware/anonymous_cache.rb index c64ae6a90de..559605c69a2 100644 --- a/lib/middleware/anonymous_cache.rb +++ b/lib/middleware/anonymous_cache.rb @@ -73,7 +73,7 @@ module Middleware end def cache_key - @cache_key ||= "ANON_CACHE_#{@env["HTTP_ACCEPT"]}_#{@env["HTTP_HOST"]}#{@env["REQUEST_URI"]}|m=#{is_mobile?}|c=#{is_crawler?}|b=#{has_brotli?}|t=#{theme_ids.join(",")}" + @cache_key ||= "ANON_CACHE_#{@env["HTTP_ACCEPT"]}_#{@env["HTTP_HOST"]}#{@env["REQUEST_URI"]}|m=#{is_mobile?}|c=#{is_crawler?}|b=#{has_brotli?}|t=#{theme_ids.join(",")}#{GlobalSetting.compress_anon_cache}" end def theme_ids @@ -86,6 +86,10 @@ module Middleware end end + def cache_key_count + @cache_key_count ||= "#{cache_key}_count" + end + def cache_key_body @cache_key_body ||= "#{cache_key}_body" end @@ -154,8 +158,26 @@ module Middleware !!(!has_auth_cookie? && get? && no_cache_bypass) end + def compress(val) + if val && GlobalSetting.compress_anon_cache + require "lz4-ruby" if !defined?(LZ4) + LZ4::compress(val) + else + val + end + end + + def decompress(val) + if val && GlobalSetting.compress_anon_cache + require "lz4-ruby" if !defined?(LZ4) + LZ4::uncompress(val) + else + val + end + end + def cached(env = {}) - if body = $redis.get(cache_key_body) + if body = decompress($redis.get(cache_key_body)) if other = $redis.get(cache_key_other) other = JSON.parse(other) if req_params = other[1].delete(ADP) @@ -174,9 +196,27 @@ module Middleware # that fills it up, this avoids a herd killing you, we can probably do this using a job or redis tricks # but coordinating this is tricky def cache(result, env = {}) + return result if GlobalSetting.anon_cache_store_threshold == 0 + status, headers, response = result if status == 200 && cache_duration + + if GlobalSetting.anon_cache_store_threshold > 1 + count = $redis.eval(<<~REDIS, [cache_key_count], [cache_duration]) + local current = redis.call("incr", KEYS[1]) + redis.call("expire",KEYS[1],ARGV[1]) + return current + REDIS + + # technically lua will cast for us, but might as well be + # prudent here, hence the to_i + if count.to_i < GlobalSetting.anon_cache_store_threshold + headers["X-Discourse-Cached"] = "skip" + return [status, headers, response] + end + end + headers_stripped = headers.dup.delete_if { |k, _| ["Set-Cookie", "X-MiniProfiler-Ids"].include? k } headers_stripped["X-Discourse-Cached"] = "true" parts = [] @@ -191,7 +231,7 @@ module Middleware } end - $redis.setex(cache_key_body, cache_duration, parts.join) + $redis.setex(cache_key_body, cache_duration, compress(parts.join)) $redis.setex(cache_key_other, cache_duration, [status, headers_stripped].to_json) headers["X-Discourse-Cached"] = "store" diff --git a/spec/components/middleware/anonymous_cache_spec.rb b/spec/components/middleware/anonymous_cache_spec.rb index 9ec7682f128..74fe7f31745 100644 --- a/spec/components/middleware/anonymous_cache_spec.rb +++ b/spec/components/middleware/anonymous_cache_spec.rb @@ -129,6 +129,24 @@ describe Middleware::AnonymousCache::Helper do crawler.clear_cache end + before do + global_setting :anon_cache_store_threshold, 1 + end + + it "compresses body on demand" do + global_setting :compress_anon_cache, true + + payload = "x" * 1000 + helper.cache([200, { "HELLO" => "WORLD" }, [payload]]) + + helper = new_helper("ANON_CACHE_DURATION" => 10) + expect(helper.cached).to eq([200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, [payload]]) + + # depends on i7z implementation, but lets assume it is stable unless we discover + # otherwise + expect($redis.get(helper.cache_key_body).length).to eq(16) + end + it "handles brotli switching" do helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]]) diff --git a/spec/components/middleware/request_tracker_spec.rb b/spec/components/middleware/request_tracker_spec.rb index ca0f613db84..394e26013c4 100644 --- a/spec/components/middleware/request_tracker_spec.rb +++ b/spec/components/middleware/request_tracker_spec.rb @@ -285,7 +285,9 @@ describe Middleware::RequestTracker do } tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60, "action_dispatch.request.parameters" => request_params)) + expect(@data[:cache]).to eq("skip") + tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60, "action_dispatch.request.parameters" => request_params)) expect(@data[:cache]).to eq("store") tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60))