mirror of
https://github.com/discourse/discourse.git
synced 2025-05-22 20:11:11 +08:00
FEATURE: control which web crawlers can access using a whitelist or blacklist
This commit is contained in:
@ -28,4 +28,25 @@ module CrawlerDetection
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
# Given a user_agent that returns true from crawler?, should its request be allowed?
|
||||
def self.allow_crawler?(user_agent)
|
||||
return true if SiteSetting.whitelisted_crawler_user_agents.blank? &&
|
||||
SiteSetting.blacklisted_crawler_user_agents.blank?
|
||||
|
||||
@whitelisted_matchers ||= {}
|
||||
@blacklisted_matchers ||= {}
|
||||
|
||||
if SiteSetting.whitelisted_crawler_user_agents.present?
|
||||
whitelisted = @whitelisted_matchers[SiteSetting.whitelisted_crawler_user_agents] ||= to_matcher(SiteSetting.whitelisted_crawler_user_agents)
|
||||
!user_agent.nil? && user_agent.match?(whitelisted)
|
||||
else
|
||||
blacklisted = @blacklisted_matchers[SiteSetting.blacklisted_crawler_user_agents] ||= to_matcher(SiteSetting.blacklisted_crawler_user_agents)
|
||||
user_agent.nil? || !user_agent.match?(blacklisted)
|
||||
end
|
||||
end
|
||||
|
||||
def self.is_blocked_crawler?(user_agent)
|
||||
crawler?(user_agent) && !allow_crawler?(user_agent)
|
||||
end
|
||||
end
|
||||
|
Reference in New Issue
Block a user