mirror of
https://github.com/discourse/discourse.git
synced 2025-05-21 18:12:32 +08:00
FIX: move crawler blocking to app controller
We need access to site settings in multisite, we do not have access yet if we attempt to get them in request tracker middleware
This commit is contained in:
@ -32,4 +32,72 @@ RSpec.describe ApplicationController do
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context "crawler blocking" do
|
||||
let :non_crawler do
|
||||
{
|
||||
"HTTP_USER_AGENT" =>
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
|
||||
}
|
||||
end
|
||||
it "applies whitelisted_crawler_user_agents correctly" do
|
||||
SiteSetting.whitelisted_crawler_user_agents = 'Googlebot'
|
||||
|
||||
get '/srv/status', headers: {
|
||||
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
||||
}
|
||||
|
||||
expect(response.status).to eq(200)
|
||||
|
||||
get '/srv/status', headers: {
|
||||
'HTTP_USER_AGENT' => 'Anotherbot/2.1 (+http://www.notgoogle.com/bot.html)'
|
||||
}
|
||||
|
||||
expect(response.status).to eq(403)
|
||||
|
||||
get '/srv/status', headers: non_crawler
|
||||
expect(response.status).to eq(200)
|
||||
end
|
||||
|
||||
it "applies blacklisted_crawler_user_agents correctly" do
|
||||
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
||||
|
||||
get '/srv/status', headers: non_crawler
|
||||
expect(response.status).to eq(200)
|
||||
|
||||
get '/srv/status', headers: {
|
||||
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
||||
}
|
||||
|
||||
expect(response.status).to eq(403)
|
||||
|
||||
get '/srv/status', headers: {
|
||||
'HTTP_USER_AGENT' => 'Twitterbot/2.1 (+http://www.notgoogle.com/bot.html)'
|
||||
}
|
||||
|
||||
expect(response.status).to eq(200)
|
||||
end
|
||||
|
||||
it "blocked crawlers shouldn't log page views" do
|
||||
ApplicationRequest.clear_cache!
|
||||
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
||||
expect {
|
||||
get '/srv/status', headers: {
|
||||
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
||||
}
|
||||
ApplicationRequest.write_cache!
|
||||
}.to_not change { ApplicationRequest.count }
|
||||
end
|
||||
|
||||
it "blocks json requests" do
|
||||
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
||||
|
||||
get '/srv/status.json', headers: {
|
||||
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
||||
}
|
||||
|
||||
expect(response.status).to eq(403)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
Reference in New Issue
Block a user