mirror of
https://github.com/discourse/discourse.git
synced 2025-05-08 10:53:05 +08:00

When the server gets overloaded and lots of requests start queuing server will attempt to shed load by returning 429 errors on background requests. The client can flag a request as background by setting the header: `Discourse-Background` to `true` Out-of-the-box we shed load when the queue time goes above 0.5 seconds. The only request we shed at the moment is the request to load up a new post when someone posts to a topic. We can extend this as we go with a more general pattern on the client. Previous to this change, rate limiting would "break" the post stream which would make suggested topics vanish and users would have to scroll the page to see more posts in the topic. Server needs this protection for cases where tons of clients are navigated to a topic and a new post is made. This can lead to a self inflicted denial of service if enough clients are viewing the topic. Due to the internal security design of Discourse it is hard for a large number of clients to share a channel where we would pass the full post body via the message bus. It also renames (and deprecates) triggerNewPostInStream to triggerNewPostsInStream This allows us to load a batch of new posts cleanly, so the controller can keep track of a backlog Co-authored-by: Joffrey JAFFEUX <j.jaffeux@gmail.com>
358 lines
10 KiB
Ruby
358 lines
10 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "rails_helper"
|
|
|
|
describe Middleware::AnonymousCache do
|
|
let(:middleware) { Middleware::AnonymousCache.new(lambda { |_| [200, {}, []] }) }
|
|
|
|
def env(opts = {})
|
|
Rack::MockRequest.env_for("http://test.com/path?bla=1").merge(opts)
|
|
end
|
|
|
|
describe Middleware::AnonymousCache::Helper do
|
|
def new_helper(opts = {})
|
|
Middleware::AnonymousCache::Helper.new(env(opts))
|
|
end
|
|
|
|
context "cachable?" do
|
|
it "true by default" do
|
|
expect(new_helper.cacheable?).to eq(true)
|
|
end
|
|
|
|
it "is false for non GET" do
|
|
expect(new_helper("ANON_CACHE_DURATION" => 10, "REQUEST_METHOD" => "POST").cacheable?).to eq(false)
|
|
end
|
|
|
|
it "is false if it has an auth cookie" do
|
|
expect(new_helper("HTTP_COOKIE" => "jack=1; _t=#{"1" * 32}; jill=2").cacheable?).to eq(false)
|
|
end
|
|
end
|
|
|
|
context "per theme cache" do
|
|
it "handles theme keys" do
|
|
theme = Fabricate(:theme, user_selectable: true)
|
|
|
|
with_bad_theme_key = new_helper("HTTP_COOKIE" => "theme_ids=abc").cache_key
|
|
with_no_theme_key = new_helper().cache_key
|
|
|
|
expect(with_bad_theme_key).to eq(with_no_theme_key)
|
|
|
|
with_good_theme_key = new_helper("HTTP_COOKIE" => "theme_ids=#{theme.id}").cache_key
|
|
|
|
expect(with_good_theme_key).not_to eq(with_no_theme_key)
|
|
end
|
|
end
|
|
|
|
context "with header-based locale locale" do
|
|
it "handles different languages" do
|
|
# Normally does not check the language header
|
|
french1 = new_helper("HTTP_ACCEPT_LANGUAGE" => "fr").cache_key
|
|
french2 = new_helper("HTTP_ACCEPT_LANGUAGE" => "FR").cache_key
|
|
english = new_helper("HTTP_ACCEPT_LANGUAGE" => SiteSetting.default_locale).cache_key
|
|
none = new_helper.cache_key
|
|
|
|
expect(none).to eq(french1)
|
|
expect(none).to eq(french2)
|
|
expect(none).to eq(english)
|
|
|
|
SiteSetting.allow_user_locale = true
|
|
SiteSetting.set_locale_from_accept_language_header = true
|
|
|
|
french1 = new_helper("HTTP_ACCEPT_LANGUAGE" => "fr").cache_key
|
|
french2 = new_helper("HTTP_ACCEPT_LANGUAGE" => "FR").cache_key
|
|
english = new_helper("HTTP_ACCEPT_LANGUAGE" => SiteSetting.default_locale).cache_key
|
|
none = new_helper.cache_key
|
|
|
|
expect(none).to eq(english)
|
|
expect(french1).to eq(french2)
|
|
expect(french1).not_to eq(none)
|
|
end
|
|
end
|
|
|
|
context "cached" do
|
|
let!(:helper) do
|
|
new_helper("ANON_CACHE_DURATION" => 10)
|
|
end
|
|
|
|
let!(:crawler) do
|
|
new_helper("ANON_CACHE_DURATION" => 10, "HTTP_USER_AGENT" => "AdsBot-Google (+http://www.google.com/adsbot.html)")
|
|
end
|
|
|
|
after do
|
|
helper.clear_cache
|
|
crawler.clear_cache
|
|
end
|
|
|
|
before do
|
|
global_setting :anon_cache_store_threshold, 1
|
|
end
|
|
|
|
it "compresses body on demand" do
|
|
global_setting :compress_anon_cache, true
|
|
|
|
payload = "x" * 1000
|
|
helper.cache([200, { "HELLO" => "WORLD" }, [payload]])
|
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10)
|
|
expect(helper.cached).to eq([200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, [payload]])
|
|
|
|
# depends on i7z implementation, but lets assume it is stable unless we discover
|
|
# otherwise
|
|
expect(Discourse.redis.get(helper.cache_key_body).length).to eq(16)
|
|
end
|
|
|
|
it "handles brotli switching" do
|
|
helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]])
|
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10)
|
|
expect(helper.cached).to eq([200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, ["hello my world"]])
|
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10, "HTTP_ACCEPT_ENCODING" => "gz, br")
|
|
expect(helper.cached).to eq(nil)
|
|
end
|
|
|
|
it "returns cached data for cached requests" do
|
|
helper.is_mobile = true
|
|
expect(helper.cached).to eq(nil)
|
|
helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]])
|
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10)
|
|
helper.is_mobile = true
|
|
expect(helper.cached).to eq([200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, ["hello my world"]])
|
|
|
|
expect(crawler.cached).to eq(nil)
|
|
crawler.cache([200, { "HELLO" => "WORLD" }, ["hello ", "world"]])
|
|
expect(crawler.cached).to eq([200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, ["hello world"]])
|
|
end
|
|
end
|
|
end
|
|
|
|
context 'background request rate limit' do
|
|
it 'will rate limit background requests' do
|
|
|
|
app = Middleware::AnonymousCache.new(
|
|
lambda do |env|
|
|
[200, {}, ["ok"]]
|
|
end
|
|
)
|
|
|
|
global_setting :background_requests_max_queue_length, 1
|
|
|
|
env = {
|
|
"HTTP_COOKIE" => "_t=#{SecureRandom.hex}",
|
|
"HOST" => "site.com",
|
|
"REQUEST_METHOD" => "GET",
|
|
"REQUEST_URI" => "/somewhere/rainbow",
|
|
"REQUEST_QUEUE_SECONDS" => 2.1,
|
|
"rack.input" => StringIO.new
|
|
}
|
|
|
|
# non background ... long request
|
|
env["REQUEST_QUEUE_SECONDS"] = 2
|
|
|
|
status, _ = app.call(env.dup)
|
|
expect(status).to eq(200)
|
|
|
|
env["HTTP_DISCOURSE_BACKGROUND"] = "true"
|
|
|
|
status, headers, body = app.call(env.dup)
|
|
expect(status).to eq(429)
|
|
expect(headers["content-type"]).to eq("application/json; charset=utf-8")
|
|
json = JSON.parse(body.join)
|
|
expect(json["extras"]["wait_seconds"]).to be > 4.9
|
|
|
|
env["REQUEST_QUEUE_SECONDS"] = 0.5
|
|
|
|
status, _ = app.call(env.dup)
|
|
expect(status).to eq(200)
|
|
|
|
end
|
|
end
|
|
|
|
context 'force_anonymous!' do
|
|
before do
|
|
RateLimiter.enable
|
|
end
|
|
|
|
after do
|
|
RateLimiter.disable
|
|
end
|
|
|
|
it 'will revert to anonymous once we reach the limit' do
|
|
|
|
RateLimiter.clear_all!
|
|
|
|
is_anon = false
|
|
|
|
app = Middleware::AnonymousCache.new(
|
|
lambda do |env|
|
|
is_anon = env["HTTP_COOKIE"].nil?
|
|
[200, {}, ["ok"]]
|
|
end
|
|
)
|
|
|
|
global_setting :force_anonymous_min_per_10_seconds, 2
|
|
global_setting :force_anonymous_min_queue_seconds, 1
|
|
|
|
env = {
|
|
"HTTP_COOKIE" => "_t=#{SecureRandom.hex}",
|
|
"HOST" => "site.com",
|
|
"REQUEST_METHOD" => "GET",
|
|
"REQUEST_URI" => "/somewhere/rainbow",
|
|
"REQUEST_QUEUE_SECONDS" => 2.1,
|
|
"rack.input" => StringIO.new
|
|
}
|
|
|
|
is_anon = false
|
|
app.call(env.dup)
|
|
expect(is_anon).to eq(false)
|
|
|
|
is_anon = false
|
|
app.call(env.dup)
|
|
expect(is_anon).to eq(false)
|
|
|
|
is_anon = false
|
|
app.call(env.dup)
|
|
expect(is_anon).to eq(true)
|
|
|
|
is_anon = false
|
|
_status, headers, _body = app.call(env.dup)
|
|
expect(is_anon).to eq(true)
|
|
expect(headers['Set-Cookie']).to eq('dosp=1; Path=/')
|
|
|
|
# tricky change, a 50ms delay still will trigger protection
|
|
# once it is tripped
|
|
|
|
env["REQUEST_QUEUE_SECONDS"] = 0.05
|
|
is_anon = false
|
|
|
|
app.call(env.dup)
|
|
expect(is_anon).to eq(true)
|
|
|
|
is_anon = false
|
|
env["REQUEST_QUEUE_SECONDS"] = 0.01
|
|
|
|
app.call(env.dup)
|
|
expect(is_anon).to eq(false)
|
|
end
|
|
end
|
|
|
|
context 'invalid request payload' do
|
|
it 'returns 413 for GET request with payload' do
|
|
status, _, _ = middleware.call(env.tap do |environment|
|
|
environment[Rack::RACK_INPUT].write("test")
|
|
end)
|
|
|
|
expect(status).to eq(413)
|
|
end
|
|
end
|
|
|
|
context "crawler blocking" do
|
|
let :non_crawler do
|
|
{
|
|
"HTTP_USER_AGENT" =>
|
|
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
|
|
}
|
|
end
|
|
|
|
def get(path, options)
|
|
@env = env({
|
|
"REQUEST_URI" => path,
|
|
"PATH_INFO" => path,
|
|
"REQUEST_PATH" => path
|
|
}.merge(options[:headers]))
|
|
@status, @response_header, @response = middleware.call(@env)
|
|
end
|
|
|
|
it "applies allowed_crawler_user_agents correctly" do
|
|
SiteSetting.allowed_crawler_user_agents = 'Googlebot'
|
|
|
|
get '/', headers: {
|
|
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
|
}
|
|
|
|
expect(@status).to eq(200)
|
|
|
|
get '/', headers: {
|
|
'HTTP_USER_AGENT' => 'Anotherbot/2.1 (+http://www.notgoogle.com/bot.html)'
|
|
}
|
|
|
|
expect(@status).to eq(403)
|
|
expect(@response).to be_an(Array)
|
|
|
|
get '/', headers: non_crawler
|
|
expect(@status).to eq(200)
|
|
end
|
|
|
|
it "doesn't block api requests" do
|
|
SiteSetting.allowed_crawler_user_agents = 'Googlebot'
|
|
api_key = Fabricate(:api_key)
|
|
|
|
get "/latest?api_key=#{api_key.key}&api_username=system", headers: {
|
|
"QUERY_STRING" => "api_key=#{api_key.key}&api_username=system"
|
|
}
|
|
expect(@status).to eq(200)
|
|
end
|
|
|
|
it "applies blocked_crawler_user_agents correctly" do
|
|
SiteSetting.blocked_crawler_user_agents = 'Googlebot'
|
|
|
|
get '/', headers: non_crawler
|
|
expect(@status).to eq(200)
|
|
|
|
get '/', headers: {
|
|
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
|
}
|
|
|
|
expect(@status).to eq(403)
|
|
|
|
get '/', headers: {
|
|
'HTTP_USER_AGENT' => 'Twitterbot/2.1 (+http://www.notgoogle.com/bot.html)'
|
|
}
|
|
|
|
expect(@status).to eq(200)
|
|
end
|
|
|
|
it "should never block robots.txt" do
|
|
SiteSetting.blocked_crawler_user_agents = 'Googlebot'
|
|
|
|
get '/robots.txt', headers: {
|
|
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
|
}
|
|
|
|
expect(@status).to eq(200)
|
|
end
|
|
|
|
it "should never block srv/status" do
|
|
SiteSetting.blocked_crawler_user_agents = 'Googlebot'
|
|
|
|
get '/srv/status', headers: {
|
|
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
|
}
|
|
|
|
expect(@status).to eq(200)
|
|
end
|
|
|
|
it "blocked crawlers shouldn't log page views" do
|
|
SiteSetting.blocked_crawler_user_agents = 'Googlebot'
|
|
|
|
get '/', headers: {
|
|
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
|
}
|
|
|
|
expect(@env["discourse.request_tracker.skip"]).to eq(true)
|
|
end
|
|
|
|
it "blocks json requests" do
|
|
SiteSetting.blocked_crawler_user_agents = 'Googlebot'
|
|
|
|
get '/srv/status.json', headers: {
|
|
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
|
}
|
|
|
|
expect(@status).to eq(403)
|
|
end
|
|
end
|
|
|
|
end
|