From 379384ae1e839da347ffc13ef3ed562f7a75f5da Mon Sep 17 00:00:00 2001 From: Sam Date: Wed, 18 Jul 2018 12:33:06 +1000 Subject: [PATCH] FIX: never block /srv/status which is used for health checks This route is also very cheap so blocking it is not required It is still rate limited and so on elsewhere --- lib/middleware/anonymous_cache.rb | 1 + .../middleware/anonymous_cache_spec.rb | 24 +++++++++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/lib/middleware/anonymous_cache.rb b/lib/middleware/anonymous_cache.rb index 4cd9cd53586..93620be0c9d 100644 --- a/lib/middleware/anonymous_cache.rb +++ b/lib/middleware/anonymous_cache.rb @@ -25,6 +25,7 @@ module Middleware @request.get? && !@request.xhr? && !@request.path.ends_with?('robots.txt') && + !@request.path.ends_with?('srv/status') && CrawlerDetection.is_blocked_crawler?(@request.env['HTTP_USER_AGENT']) end diff --git a/spec/components/middleware/anonymous_cache_spec.rb b/spec/components/middleware/anonymous_cache_spec.rb index 6c153115ab3..ce0b01e5765 100644 --- a/spec/components/middleware/anonymous_cache_spec.rb +++ b/spec/components/middleware/anonymous_cache_spec.rb @@ -173,35 +173,35 @@ describe Middleware::AnonymousCache::Helper do it "applies whitelisted_crawler_user_agents correctly" do SiteSetting.whitelisted_crawler_user_agents = 'Googlebot' - get '/srv/status', headers: { + get '/', headers: { 'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)' } expect(@status).to eq(200) - get '/srv/status', headers: { + get '/', headers: { 'HTTP_USER_AGENT' => 'Anotherbot/2.1 (+http://www.notgoogle.com/bot.html)' } expect(@status).to eq(403) - get '/srv/status', headers: non_crawler + get '/', headers: non_crawler expect(@status).to eq(200) end it "applies blacklisted_crawler_user_agents correctly" do SiteSetting.blacklisted_crawler_user_agents = 'Googlebot' - get '/srv/status', headers: non_crawler + get '/', headers: non_crawler expect(@status).to eq(200) - get '/srv/status', headers: { + get '/', headers: { 'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)' } expect(@status).to eq(403) - get '/srv/status', headers: { + get '/', headers: { 'HTTP_USER_AGENT' => 'Twitterbot/2.1 (+http://www.notgoogle.com/bot.html)' } @@ -218,13 +218,23 @@ describe Middleware::AnonymousCache::Helper do expect(@status).to eq(200) end - it "blocked crawlers shouldn't log page views" do + it "should never block srv/status" do SiteSetting.blacklisted_crawler_user_agents = 'Googlebot' get '/srv/status', headers: { 'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)' } + expect(@status).to eq(200) + end + + it "blocked crawlers shouldn't log page views" do + SiteSetting.blacklisted_crawler_user_agents = 'Googlebot' + + get '/', headers: { + 'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)' + } + expect(@env["discourse.request_tracker.skip"]).to eq(true) end