From ed00f3530621d61a272b237746b8bbb1eaea7998 Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Wed, 4 Sep 2019 17:18:32 +1000 Subject: [PATCH] FEATURE: improve performance of anonymous cache This commit introduces 2 features: 1. DISCOURSE_COMPRESS_ANON_CACHE (true|false, default false): this allows you to optionally compress the anon cache body entries in Redis, can be useful for high load sites with Redis that lives on a separate server to to webs 2. DISCOURSE_ANON_CACHE_STORE_THRESHOLD (default 2), only pop entries into redis if we observe them more than N times. This avoids situations where a crawler can walk a big pile of topics and store them all in Redis never to be used. Our default anon cache time for topics is only 60 seconds. Anon cache is in place to avoid the "slashdot" effect where a single topic is hit by 100s of people in one minute. --- Gemfile | 1 + Gemfile.lock | 2 + config/discourse_defaults.conf | 12 +++++ lib/middleware/anonymous_cache.rb | 46 +++++++++++++++++-- .../middleware/anonymous_cache_spec.rb | 18 ++++++++ .../middleware/request_tracker_spec.rb | 2 + 6 files changed, 78 insertions(+), 3 deletions(-) diff --git a/Gemfile b/Gemfile index 57fd0d4d5b2..1599efa815b 100644 --- a/Gemfile +++ b/Gemfile @@ -210,6 +210,7 @@ gem 'rubyzip', require: false gem 'sshkey', require: false gem 'rchardet', require: false +gem 'lz4-ruby', require: false, platform: :mri if ENV["IMPORT"] == "1" gem 'mysql2' diff --git a/Gemfile.lock b/Gemfile.lock index aa52c66bdf9..64fd90002c2 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -174,6 +174,7 @@ GEM crass (~> 1.0.2) nokogiri (>= 1.5.9) lru_redux (1.1.0) + lz4-ruby (0.3.3) mail (2.7.1) mini_mime (>= 0.1.1) maxminddb (0.1.22) @@ -467,6 +468,7 @@ DEPENDENCIES logstash-logger logster lru_redux + lz4-ruby mail maxminddb memory_profiler diff --git a/config/discourse_defaults.conf b/config/discourse_defaults.conf index b841af9e613..2b0bd3149b3 100644 --- a/config/discourse_defaults.conf +++ b/config/discourse_defaults.conf @@ -266,3 +266,15 @@ enable_js_error_reporting = true # Having a high number here is very low risk. Regular jobs are limited in scope and scale. mini_scheduler_workers = 5 +# enable compression on anonymous cache redis entries +# this slightly increases the cost of storing cache entries but can make it much +# cheaper to retrieve cache entries when redis is stores on a different machine to the one +# running the web +compress_anon_cache = false + +# Only store entries in redis for anonymous cache if they are observed more than N times +# for a specific key +# +# This ensures there are no pathological cases where we keep storing data in anonymous cache +# never to use it, set to 1 to store immediately, set to 0 to disable anon cache +anon_cache_store_threshold = 2 diff --git a/lib/middleware/anonymous_cache.rb b/lib/middleware/anonymous_cache.rb index c64ae6a90de..559605c69a2 100644 --- a/lib/middleware/anonymous_cache.rb +++ b/lib/middleware/anonymous_cache.rb @@ -73,7 +73,7 @@ module Middleware end def cache_key - @cache_key ||= "ANON_CACHE_#{@env["HTTP_ACCEPT"]}_#{@env["HTTP_HOST"]}#{@env["REQUEST_URI"]}|m=#{is_mobile?}|c=#{is_crawler?}|b=#{has_brotli?}|t=#{theme_ids.join(",")}" + @cache_key ||= "ANON_CACHE_#{@env["HTTP_ACCEPT"]}_#{@env["HTTP_HOST"]}#{@env["REQUEST_URI"]}|m=#{is_mobile?}|c=#{is_crawler?}|b=#{has_brotli?}|t=#{theme_ids.join(",")}#{GlobalSetting.compress_anon_cache}" end def theme_ids @@ -86,6 +86,10 @@ module Middleware end end + def cache_key_count + @cache_key_count ||= "#{cache_key}_count" + end + def cache_key_body @cache_key_body ||= "#{cache_key}_body" end @@ -154,8 +158,26 @@ module Middleware !!(!has_auth_cookie? && get? && no_cache_bypass) end + def compress(val) + if val && GlobalSetting.compress_anon_cache + require "lz4-ruby" if !defined?(LZ4) + LZ4::compress(val) + else + val + end + end + + def decompress(val) + if val && GlobalSetting.compress_anon_cache + require "lz4-ruby" if !defined?(LZ4) + LZ4::uncompress(val) + else + val + end + end + def cached(env = {}) - if body = $redis.get(cache_key_body) + if body = decompress($redis.get(cache_key_body)) if other = $redis.get(cache_key_other) other = JSON.parse(other) if req_params = other[1].delete(ADP) @@ -174,9 +196,27 @@ module Middleware # that fills it up, this avoids a herd killing you, we can probably do this using a job or redis tricks # but coordinating this is tricky def cache(result, env = {}) + return result if GlobalSetting.anon_cache_store_threshold == 0 + status, headers, response = result if status == 200 && cache_duration + + if GlobalSetting.anon_cache_store_threshold > 1 + count = $redis.eval(<<~REDIS, [cache_key_count], [cache_duration]) + local current = redis.call("incr", KEYS[1]) + redis.call("expire",KEYS[1],ARGV[1]) + return current + REDIS + + # technically lua will cast for us, but might as well be + # prudent here, hence the to_i + if count.to_i < GlobalSetting.anon_cache_store_threshold + headers["X-Discourse-Cached"] = "skip" + return [status, headers, response] + end + end + headers_stripped = headers.dup.delete_if { |k, _| ["Set-Cookie", "X-MiniProfiler-Ids"].include? k } headers_stripped["X-Discourse-Cached"] = "true" parts = [] @@ -191,7 +231,7 @@ module Middleware } end - $redis.setex(cache_key_body, cache_duration, parts.join) + $redis.setex(cache_key_body, cache_duration, compress(parts.join)) $redis.setex(cache_key_other, cache_duration, [status, headers_stripped].to_json) headers["X-Discourse-Cached"] = "store" diff --git a/spec/components/middleware/anonymous_cache_spec.rb b/spec/components/middleware/anonymous_cache_spec.rb index 9ec7682f128..74fe7f31745 100644 --- a/spec/components/middleware/anonymous_cache_spec.rb +++ b/spec/components/middleware/anonymous_cache_spec.rb @@ -129,6 +129,24 @@ describe Middleware::AnonymousCache::Helper do crawler.clear_cache end + before do + global_setting :anon_cache_store_threshold, 1 + end + + it "compresses body on demand" do + global_setting :compress_anon_cache, true + + payload = "x" * 1000 + helper.cache([200, { "HELLO" => "WORLD" }, [payload]]) + + helper = new_helper("ANON_CACHE_DURATION" => 10) + expect(helper.cached).to eq([200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, [payload]]) + + # depends on i7z implementation, but lets assume it is stable unless we discover + # otherwise + expect($redis.get(helper.cache_key_body).length).to eq(16) + end + it "handles brotli switching" do helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]]) diff --git a/spec/components/middleware/request_tracker_spec.rb b/spec/components/middleware/request_tracker_spec.rb index ca0f613db84..394e26013c4 100644 --- a/spec/components/middleware/request_tracker_spec.rb +++ b/spec/components/middleware/request_tracker_spec.rb @@ -285,7 +285,9 @@ describe Middleware::RequestTracker do } tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60, "action_dispatch.request.parameters" => request_params)) + expect(@data[:cache]).to eq("skip") + tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60, "action_dispatch.request.parameters" => request_params)) expect(@data[:cache]).to eq("store") tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60))