mirror of
https://github.com/discourse/discourse.git
synced 2025-06-07 09:14:48 +08:00
prefix the robots.txt rules with the directory when using subfolder
This commit is contained in:
@ -2,14 +2,46 @@ class RobotsTxtController < ApplicationController
|
|||||||
layout false
|
layout false
|
||||||
skip_before_action :preload_json, :check_xhr, :redirect_to_login_if_required
|
skip_before_action :preload_json, :check_xhr, :redirect_to_login_if_required
|
||||||
|
|
||||||
|
# NOTE: order is important!
|
||||||
|
DISALLOWED_PATHS ||= %w{
|
||||||
|
/auth/cas
|
||||||
|
/auth/facebook/callback
|
||||||
|
/auth/twitter/callback
|
||||||
|
/auth/google/callback
|
||||||
|
/auth/yahoo/callback
|
||||||
|
/auth/github/callback
|
||||||
|
/auth/cas/callback
|
||||||
|
/assets/browser-update*.js
|
||||||
|
/users/
|
||||||
|
/u/
|
||||||
|
/badges/
|
||||||
|
/search
|
||||||
|
/search/
|
||||||
|
/tags
|
||||||
|
/tags/
|
||||||
|
/email/
|
||||||
|
/session
|
||||||
|
/session/
|
||||||
|
/admin
|
||||||
|
/admin/
|
||||||
|
/user-api-key
|
||||||
|
/user-api-key/
|
||||||
|
/*?api_key*
|
||||||
|
/*?*api_key*
|
||||||
|
/groups
|
||||||
|
/groups/
|
||||||
|
/t/*/*.rss
|
||||||
|
/tags/*.rss
|
||||||
|
/c/*.rss
|
||||||
|
}
|
||||||
|
|
||||||
def index
|
def index
|
||||||
if SiteSetting.allow_index_in_robots_txt
|
if SiteSetting.allow_index_in_robots_txt
|
||||||
path = :index
|
path = :index
|
||||||
@crawler_delayed_agents = []
|
|
||||||
|
|
||||||
SiteSetting.slow_down_crawler_user_agents.split('|').each do |agent|
|
@crawler_delayed_agents = SiteSetting.slow_down_crawler_user_agents.split('|').map { |agent|
|
||||||
@crawler_delayed_agents << [agent, SiteSetting.slow_down_crawler_rate]
|
[agent, SiteSetting.slow_down_crawler_rate]
|
||||||
end
|
}
|
||||||
|
|
||||||
if SiteSetting.whitelisted_crawler_user_agents.present?
|
if SiteSetting.whitelisted_crawler_user_agents.present?
|
||||||
@allowed_user_agents = SiteSetting.whitelisted_crawler_user_agents.split('|')
|
@allowed_user_agents = SiteSetting.whitelisted_crawler_user_agents.split('|')
|
||||||
|
@ -3,40 +3,14 @@
|
|||||||
<% @allowed_user_agents.each do |user_agent| %>
|
<% @allowed_user_agents.each do |user_agent| %>
|
||||||
User-agent: <%= user_agent %>
|
User-agent: <%= user_agent %>
|
||||||
<% end %>
|
<% end %>
|
||||||
Disallow: /auth/cas
|
<% RobotsTxtController::DISALLOWED_PATHS.each do |path| %>
|
||||||
Disallow: /auth/facebook/callback
|
Disallow: <%= Discourse.base_uri + path %>
|
||||||
Disallow: /auth/twitter/callback
|
<% end %>
|
||||||
Disallow: /auth/google/callback
|
|
||||||
Disallow: /auth/yahoo/callback
|
|
||||||
Disallow: /auth/github/callback
|
|
||||||
Disallow: /auth/cas/callback
|
|
||||||
Disallow: /assets/browser-update*.js
|
|
||||||
Disallow: /users/
|
|
||||||
Disallow: /u/
|
|
||||||
Disallow: /badges/
|
|
||||||
Disallow: /search
|
|
||||||
Disallow: /search/
|
|
||||||
Disallow: /tags
|
|
||||||
Disallow: /tags/
|
|
||||||
Disallow: /email/
|
|
||||||
Disallow: /session
|
|
||||||
Disallow: /session/
|
|
||||||
Disallow: /admin
|
|
||||||
Disallow: /admin/
|
|
||||||
Disallow: /user-api-key
|
|
||||||
Disallow: /user-api-key/
|
|
||||||
Disallow: /*?api_key*
|
|
||||||
Disallow: /*?*api_key*
|
|
||||||
Disallow: /groups
|
|
||||||
Disallow: /groups/
|
|
||||||
Disallow: /t/*/*.rss
|
|
||||||
Disallow: /tags/*.rss
|
|
||||||
Disallow: /c/*.rss
|
|
||||||
|
|
||||||
<% if @disallowed_user_agents %>
|
<% if @disallowed_user_agents %>
|
||||||
<% @disallowed_user_agents.each do |user_agent| %>
|
<% @disallowed_user_agents.each do |user_agent| %>
|
||||||
User-agent: <%= user_agent %>
|
User-agent: <%= user_agent %>
|
||||||
Disallow: /
|
Disallow: <%= Discourse.base_uri + "/" %>
|
||||||
|
|
||||||
<% end %>
|
<% end %>
|
||||||
<% end %>
|
<% end %>
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
# See http://www.robotstxt.org/wc/norobots.html for documentation on how to use the robots.txt file
|
# See http://www.robotstxt.org/wc/norobots.html for documentation on how to use the robots.txt file
|
||||||
#
|
#
|
||||||
User-agent: *
|
User-agent: *
|
||||||
Disallow: /
|
Disallow: <%= Discourse.base_uri + "/" %>
|
||||||
|
@ -3,6 +3,14 @@ require 'rails_helper'
|
|||||||
RSpec.describe RobotsTxtController do
|
RSpec.describe RobotsTxtController do
|
||||||
describe '#index' do
|
describe '#index' do
|
||||||
|
|
||||||
|
context 'subfolder' do
|
||||||
|
it 'prefixes the rules with the directory' do
|
||||||
|
Discourse.stubs(:base_uri).returns('/forum')
|
||||||
|
get '/robots.txt'
|
||||||
|
expect(response.body).to include("\nDisallow: /forum/admin")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
context 'crawl delay' do
|
context 'crawl delay' do
|
||||||
it 'allows you to set crawl delay on particular bots' do
|
it 'allows you to set crawl delay on particular bots' do
|
||||||
SiteSetting.allow_index_in_robots_txt = true
|
SiteSetting.allow_index_in_robots_txt = true
|
||||||
|
Reference in New Issue
Block a user