mirror of
https://github.com/discourse/discourse.git
synced 2025-06-03 19:39:30 +08:00
FIX: move crawler blocking into anon cache
This refinement of previous fix moves the crawler blocking into anonymous cache This ensures we never poison the cache incorrectly when blocking crawlers
This commit is contained in:
@ -33,71 +33,4 @@ RSpec.describe ApplicationController do
|
||||
end
|
||||
end
|
||||
|
||||
context "crawler blocking" do
|
||||
let :non_crawler do
|
||||
{
|
||||
"HTTP_USER_AGENT" =>
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
|
||||
}
|
||||
end
|
||||
it "applies whitelisted_crawler_user_agents correctly" do
|
||||
SiteSetting.whitelisted_crawler_user_agents = 'Googlebot'
|
||||
|
||||
get '/srv/status', headers: {
|
||||
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
||||
}
|
||||
|
||||
expect(response.status).to eq(200)
|
||||
|
||||
get '/srv/status', headers: {
|
||||
'HTTP_USER_AGENT' => 'Anotherbot/2.1 (+http://www.notgoogle.com/bot.html)'
|
||||
}
|
||||
|
||||
expect(response.status).to eq(403)
|
||||
|
||||
get '/srv/status', headers: non_crawler
|
||||
expect(response.status).to eq(200)
|
||||
end
|
||||
|
||||
it "applies blacklisted_crawler_user_agents correctly" do
|
||||
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
||||
|
||||
get '/srv/status', headers: non_crawler
|
||||
expect(response.status).to eq(200)
|
||||
|
||||
get '/srv/status', headers: {
|
||||
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
||||
}
|
||||
|
||||
expect(response.status).to eq(403)
|
||||
|
||||
get '/srv/status', headers: {
|
||||
'HTTP_USER_AGENT' => 'Twitterbot/2.1 (+http://www.notgoogle.com/bot.html)'
|
||||
}
|
||||
|
||||
expect(response.status).to eq(200)
|
||||
end
|
||||
|
||||
it "blocked crawlers shouldn't log page views" do
|
||||
ApplicationRequest.clear_cache!
|
||||
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
||||
expect {
|
||||
get '/srv/status', headers: {
|
||||
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
||||
}
|
||||
ApplicationRequest.write_cache!
|
||||
}.to_not change { ApplicationRequest.count }
|
||||
end
|
||||
|
||||
it "blocks json requests" do
|
||||
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
||||
|
||||
get '/srv/status.json', headers: {
|
||||
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
||||
}
|
||||
|
||||
expect(response.status).to eq(403)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
Reference in New Issue
Block a user