mirror of
https://github.com/discourse/discourse.git
synced 2025-05-29 01:31:35 +08:00
FEATURE: An API to help sites build robots.txt files programatically
This is mainly useful for subfolder sites, who need to expose their robots.txt contents to a parent site.
This commit is contained in:
@ -36,26 +36,59 @@ class RobotsTxtController < ApplicationController
|
||||
}
|
||||
|
||||
def index
|
||||
if SiteSetting.allow_index_in_robots_txt
|
||||
path = :index
|
||||
if SiteSetting.allow_index_in_robots_txt?
|
||||
@robots_info = fetch_robots_info
|
||||
render :index, content_type: 'text/plain'
|
||||
else
|
||||
render :no_index, content_type: 'text/plain'
|
||||
end
|
||||
end
|
||||
|
||||
@crawler_delayed_agents = SiteSetting.slow_down_crawler_user_agents.split('|').map { |agent|
|
||||
[agent, SiteSetting.slow_down_crawler_rate]
|
||||
}
|
||||
# If you are hosting Discourse in a subfolder, you will need to create your robots.txt
|
||||
# in the root of your web server with the appropriate paths. This method will return
|
||||
# JSON that can be used by a script to create a robots.txt that works well with your
|
||||
# existing site.
|
||||
def builder
|
||||
render json: fetch_robots_info
|
||||
end
|
||||
|
||||
if SiteSetting.whitelisted_crawler_user_agents.present?
|
||||
@allowed_user_agents = SiteSetting.whitelisted_crawler_user_agents.split('|')
|
||||
@disallowed_user_agents = ['*']
|
||||
elsif SiteSetting.blacklisted_crawler_user_agents.present?
|
||||
@allowed_user_agents = ['*']
|
||||
@disallowed_user_agents = SiteSetting.blacklisted_crawler_user_agents.split('|')
|
||||
else
|
||||
@allowed_user_agents = ['*']
|
||||
protected
|
||||
|
||||
def fetch_robots_info
|
||||
deny_paths = DISALLOWED_PATHS.map { |p| Discourse.base_uri + p }
|
||||
deny_all = [ "#{Discourse.base_uri}/" ]
|
||||
|
||||
result = {
|
||||
header: "# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file",
|
||||
agents: []
|
||||
}
|
||||
|
||||
if SiteSetting.whitelisted_crawler_user_agents.present?
|
||||
SiteSetting.whitelisted_crawler_user_agents.split('|').each do |agent|
|
||||
result[:agents] << { name: agent, disallow: deny_paths }
|
||||
end
|
||||
|
||||
result[:agents] << { name: '*', disallow: deny_all }
|
||||
elsif SiteSetting.blacklisted_crawler_user_agents.present?
|
||||
result[:agents] << { name: '*', disallow: deny_paths }
|
||||
SiteSetting.blacklisted_crawler_user_agents.split('|').each do |agent|
|
||||
result[:agents] << { name: agent, disallow: deny_all }
|
||||
end
|
||||
else
|
||||
path = :no_index
|
||||
result[:agents] << { name: '*', disallow: deny_paths }
|
||||
end
|
||||
|
||||
render path, content_type: 'text/plain'
|
||||
if SiteSetting.slow_down_crawler_user_agents.present?
|
||||
SiteSetting.slow_down_crawler_user_agents.split('|').each do |agent|
|
||||
result[:agents] << {
|
||||
name: agent,
|
||||
delay: SiteSetting.slow_down_crawler_rate,
|
||||
disallow: deny_paths
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
result
|
||||
end
|
||||
|
||||
end
|
||||
|
Reference in New Issue
Block a user