mirror of
https://github.com/discourse/discourse.git
synced 2025-05-23 21:51:13 +08:00
remove crawler blocking until multisite support
This commit is contained in:
@ -175,11 +175,11 @@ class Middleware::RequestTracker
|
|||||||
return result
|
return result
|
||||||
end
|
end
|
||||||
|
|
||||||
if block_crawler(request)
|
# if block_crawler(request)
|
||||||
log_request = false
|
# log_request = false
|
||||||
result = [403, { 'Content-Type' => 'text/plain' }, ['Crawler is not allowed']]
|
# result = [403, { 'Content-Type' => 'text/plain' }, ["Crawler is not allowed."]]
|
||||||
return result
|
# return result
|
||||||
end
|
# end
|
||||||
|
|
||||||
env["discourse.request_tracker"] = self
|
env["discourse.request_tracker"] = self
|
||||||
MethodProfiler.start
|
MethodProfiler.start
|
||||||
|
@ -286,57 +286,4 @@ describe Middleware::RequestTracker do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
context "crawler blocking" do
|
|
||||||
let :middleware do
|
|
||||||
app = lambda do |env|
|
|
||||||
[200, {}, ['OK']]
|
|
||||||
end
|
|
||||||
|
|
||||||
Middleware::RequestTracker.new(app)
|
|
||||||
end
|
|
||||||
|
|
||||||
def expect_success_response(status, _, response)
|
|
||||||
expect(status).to eq(200)
|
|
||||||
expect(response).to eq(['OK'])
|
|
||||||
end
|
|
||||||
|
|
||||||
def expect_blocked_response(status, _, response)
|
|
||||||
expect(status).to eq(403)
|
|
||||||
expect(response).to eq(['Crawler is not allowed'])
|
|
||||||
end
|
|
||||||
|
|
||||||
it "applies whitelisted_crawler_user_agents correctly" do
|
|
||||||
SiteSetting.whitelisted_crawler_user_agents = 'Googlebot'
|
|
||||||
expect_success_response(*middleware.call(env))
|
|
||||||
expect_blocked_response(*middleware.call(env('HTTP_USER_AGENT' => 'Twitterbot')))
|
|
||||||
expect_success_response(*middleware.call(env('HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)')))
|
|
||||||
expect_blocked_response(*middleware.call(env('HTTP_USER_AGENT' => 'DiscourseAPI Ruby Gem 0.19.0')))
|
|
||||||
end
|
|
||||||
|
|
||||||
it "applies blacklisted_crawler_user_agents correctly" do
|
|
||||||
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
|
||||||
expect_success_response(*middleware.call(env))
|
|
||||||
expect_blocked_response(*middleware.call(env('HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)')))
|
|
||||||
expect_success_response(*middleware.call(env('HTTP_USER_AGENT' => 'Twitterbot')))
|
|
||||||
expect_success_response(*middleware.call(env('HTTP_USER_AGENT' => 'DiscourseAPI Ruby Gem 0.19.0')))
|
|
||||||
end
|
|
||||||
|
|
||||||
it "blocked crawlers shouldn't log page views" do
|
|
||||||
ApplicationRequest.clear_cache!
|
|
||||||
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
|
||||||
expect {
|
|
||||||
middleware.call(env('HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'))
|
|
||||||
ApplicationRequest.write_cache!
|
|
||||||
}.to_not change { ApplicationRequest.count }
|
|
||||||
end
|
|
||||||
|
|
||||||
it "blocks json requests" do
|
|
||||||
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
|
||||||
expect_blocked_response(*middleware.call(env(
|
|
||||||
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)',
|
|
||||||
'HTTP_ACCEPT' => 'application/json'
|
|
||||||
)))
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
Reference in New Issue
Block a user