From 038a391cfee84c9ab0026dc78ba9adeb52f25eaa Mon Sep 17 00:00:00 2001 From: Natalie Tay Date: Wed, 15 Jan 2025 13:42:08 +0800 Subject: [PATCH] FIX: Remove /u/ from robots (#30782) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow up from https://github.com/discourse/discourse/pull/27712. Currently, we already add `noindex` to /u routes. However, due to robots.txt blocking this, search engines are not able to see the header. This commit removes /u from our robots.txt to allow search engines to see the header. This commit also includes a migration to remove the /u from admins who have overridden the file. I had contemplated not including this migration, but seeing there are existing site admins who are trying to remove errors from their dashboard, they would probably welcome this change. The migration replaces overridden text at this area, and will not modify if additional routes have been added in-between: Screenshot 2025-01-15 at 11 28 43 AM copy Side effect note: This might potentially result in more pageviews* from GoogleBot (for example) for a period of time as Google starts visiting the user routes they have been denied before. --- app/controllers/robots_txt_controller.rb | 2 +- ...ove_user_profile_from_overridden_robots.rb | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 db/migrate/20250115031117_remove_user_profile_from_overridden_robots.rb diff --git a/app/controllers/robots_txt_controller.rb b/app/controllers/robots_txt_controller.rb index c3498b11644..4b80c0ba3f8 100644 --- a/app/controllers/robots_txt_controller.rb +++ b/app/controllers/robots_txt_controller.rb @@ -21,7 +21,7 @@ class RobotsTxtController < ApplicationController /*?*api_key* ] - DISALLOWED_WITH_HEADER_PATHS = %w[/badges /u/ /my /search /tag/*/l /g /t/*/*.rss /c/*.rss] + DISALLOWED_WITH_HEADER_PATHS = %w[/badges /my /search /tag/*/l /g /t/*/*.rss /c/*.rss] def index if (overridden = SiteSetting.overridden_robots_txt.dup).present? diff --git a/db/migrate/20250115031117_remove_user_profile_from_overridden_robots.rb b/db/migrate/20250115031117_remove_user_profile_from_overridden_robots.rb new file mode 100644 index 00000000000..fa9f8fc56ea --- /dev/null +++ b/db/migrate/20250115031117_remove_user_profile_from_overridden_robots.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true +class RemoveUserProfileFromOverriddenRobots < ActiveRecord::Migration[7.2] + def up + all_agent_user_disallow = + "User-agent: *\nDisallow: /admin/\nDisallow: /auth/\nDisallow: /assets/browser-update*.js\nDisallow: /email/\nDisallow: /session\nDisallow: /user-api-key\nDisallow: /*?api_key*\nDisallow: /*?*api_key*\nDisallow: /badges\nDisallow: /u/" + all_agent_user_disallow_removed = all_agent_user_disallow.gsub("\nDisallow: /u/", "") + if select_value( + "SELECT value FROM site_settings WHERE name = 'overridden_robots_txt' AND value LIKE '%#{all_agent_user_disallow}%'", + ) + execute <<~SQL + UPDATE site_settings + SET value = REPLACE(value, '#{all_agent_user_disallow}', '#{all_agent_user_disallow_removed}') + WHERE name = 'overridden_robots_txt' + SQL + end + end + + def down + # raise ActiveRecord::IrreversibleMigration + end +end