FEATURE: An API to help sites build robots.txt files programatically

This is mainly useful for subfolder sites, who need to expose their
robots.txt contents to a parent site.
This commit is contained in:
Robin Ward
2018-04-16 15:43:20 -04:00
parent cfe88a67e1
commit 3d7dbdedc0
4 changed files with 67 additions and 35 deletions

View File

@ -36,26 +36,59 @@ class RobotsTxtController < ApplicationController
}
def index
if SiteSetting.allow_index_in_robots_txt
path = :index
if SiteSetting.allow_index_in_robots_txt?
@robots_info = fetch_robots_info
render :index, content_type: 'text/plain'
else
render :no_index, content_type: 'text/plain'
end
end
@crawler_delayed_agents = SiteSetting.slow_down_crawler_user_agents.split('|').map { |agent|
[agent, SiteSetting.slow_down_crawler_rate]
}
# If you are hosting Discourse in a subfolder, you will need to create your robots.txt
# in the root of your web server with the appropriate paths. This method will return
# JSON that can be used by a script to create a robots.txt that works well with your
# existing site.
def builder
render json: fetch_robots_info
end
if SiteSetting.whitelisted_crawler_user_agents.present?
@allowed_user_agents = SiteSetting.whitelisted_crawler_user_agents.split('|')
@disallowed_user_agents = ['*']
elsif SiteSetting.blacklisted_crawler_user_agents.present?
@allowed_user_agents = ['*']
@disallowed_user_agents = SiteSetting.blacklisted_crawler_user_agents.split('|')
else
@allowed_user_agents = ['*']
protected
def fetch_robots_info
deny_paths = DISALLOWED_PATHS.map { |p| Discourse.base_uri + p }
deny_all = [ "#{Discourse.base_uri}/" ]
result = {
header: "# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file",
agents: []
}
if SiteSetting.whitelisted_crawler_user_agents.present?
SiteSetting.whitelisted_crawler_user_agents.split('|').each do |agent|
result[:agents] << { name: agent, disallow: deny_paths }
end
result[:agents] << { name: '*', disallow: deny_all }
elsif SiteSetting.blacklisted_crawler_user_agents.present?
result[:agents] << { name: '*', disallow: deny_paths }
SiteSetting.blacklisted_crawler_user_agents.split('|').each do |agent|
result[:agents] << { name: agent, disallow: deny_all }
end
else
path = :no_index
result[:agents] << { name: '*', disallow: deny_paths }
end
render path, content_type: 'text/plain'
if SiteSetting.slow_down_crawler_user_agents.present?
SiteSetting.slow_down_crawler_user_agents.split('|').each do |agent|
result[:agents] << {
name: agent,
delay: SiteSetting.slow_down_crawler_rate,
disallow: deny_paths
}
end
end
result
end
end