From 379384ae1e839da347ffc13ef3ed562f7a75f5da Mon Sep 17 00:00:00 2001
From: Sam <sam.saffron@gmail.com>
Date: Wed, 18 Jul 2018 12:33:06 +1000
Subject: [PATCH] FIX: never block /srv/status which is used for health checks

This route is also very cheap so blocking it is not required

It is still rate limited and so on elsewhere
---
 lib/middleware/anonymous_cache.rb             |  1 +
 .../middleware/anonymous_cache_spec.rb        | 24 +++++++++++++------
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/lib/middleware/anonymous_cache.rb b/lib/middleware/anonymous_cache.rb
index 4cd9cd53586..93620be0c9d 100644
--- a/lib/middleware/anonymous_cache.rb
+++ b/lib/middleware/anonymous_cache.rb
@@ -25,6 +25,7 @@ module Middleware
         @request.get? &&
         !@request.xhr? &&
         !@request.path.ends_with?('robots.txt') &&
+        !@request.path.ends_with?('srv/status') &&
         CrawlerDetection.is_blocked_crawler?(@request.env['HTTP_USER_AGENT'])
       end
 
diff --git a/spec/components/middleware/anonymous_cache_spec.rb b/spec/components/middleware/anonymous_cache_spec.rb
index 6c153115ab3..ce0b01e5765 100644
--- a/spec/components/middleware/anonymous_cache_spec.rb
+++ b/spec/components/middleware/anonymous_cache_spec.rb
@@ -173,35 +173,35 @@ describe Middleware::AnonymousCache::Helper do
     it "applies whitelisted_crawler_user_agents correctly" do
       SiteSetting.whitelisted_crawler_user_agents = 'Googlebot'
 
-      get '/srv/status', headers: {
+      get '/', headers: {
         'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
       }
 
       expect(@status).to eq(200)
 
-      get '/srv/status', headers: {
+      get '/', headers: {
         'HTTP_USER_AGENT' => 'Anotherbot/2.1 (+http://www.notgoogle.com/bot.html)'
       }
 
       expect(@status).to eq(403)
 
-      get '/srv/status', headers: non_crawler
+      get '/', headers: non_crawler
       expect(@status).to eq(200)
     end
 
     it "applies blacklisted_crawler_user_agents correctly" do
       SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
 
-      get '/srv/status', headers: non_crawler
+      get '/', headers: non_crawler
       expect(@status).to eq(200)
 
-      get '/srv/status', headers: {
+      get '/', headers: {
         'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
       }
 
       expect(@status).to eq(403)
 
-      get '/srv/status', headers: {
+      get '/', headers: {
         'HTTP_USER_AGENT' => 'Twitterbot/2.1 (+http://www.notgoogle.com/bot.html)'
       }
 
@@ -218,13 +218,23 @@ describe Middleware::AnonymousCache::Helper do
       expect(@status).to eq(200)
     end
 
-    it "blocked crawlers shouldn't log page views" do
+    it "should never block srv/status" do
       SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
 
       get '/srv/status', headers: {
         'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
       }
 
+      expect(@status).to eq(200)
+    end
+
+    it "blocked crawlers shouldn't log page views" do
+      SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
+
+      get '/', headers: {
+        'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
+      }
+
       expect(@env["discourse.request_tracker.skip"]).to eq(true)
     end