DEV: Add a user agent to all HTTP requests that Discourse makes. (#31555)

This change standardises the `User-Agent` header that Discourse will send when talking to other sites.

`Discourse.user_agent` is now the authority on what the user agent value should be. For Onebox requests, this changes the user agent from their existing value to match the new value (unless overridden).

For all other requests, `Net::HTTPHeader` is monkey-patched to add a default `User-Agent` header when one hasn't been provided.
This commit is contained in:
Gary Pendergast
2025-03-03 16:32:25 +11:00
committed by GitHub
parent 8325d42e56
commit 8615fc6cbb
13 changed files with 71 additions and 19 deletions

View File

@ -13,5 +13,11 @@ class TestRequestsController < ApplicationController
max_retries: net_http.max_retries,
}
end
def test_net_http_headers
net_http_get = Net::HTTP::Get.new("example.com")
render json: net_http_get
end
end
end

View File

@ -133,7 +133,7 @@ module Jobs
"Content-Length" => web_hook_body.bytesize.to_s,
"Content-Type" => content_type,
"Host" => uri.host,
"User-Agent" => "Discourse/#{Discourse::VERSION::STRING}",
"User-Agent" => Discourse.user_agent,
"X-Discourse-Instance" => Discourse.base_url,
"X-Discourse-Event-Id" => web_hook_event.id.to_s,
"X-Discourse-Event-Type" => @arguments[:event_type],

View File

@ -5,14 +5,9 @@ Rails.application.config.to_prepare do
Onebox.options = {
twitter_client: TwitterApi,
redirect_limit: 3,
user_agent: "Discourse Forum Onebox",
allowed_ports: [80, 443, SiteSetting.port.to_i],
}
else
Onebox.options = {
twitter_client: TwitterApi,
redirect_limit: 3,
user_agent: "Discourse Forum Onebox",
}
Onebox.options = { twitter_client: TwitterApi, redirect_limit: 3 }
end
end

View File

@ -1729,6 +1729,7 @@ Discourse::Application.routes.draw do
if Rails.env.test?
# Routes that are only used for testing
get "/test_net_http_timeouts" => "test_requests#test_net_http_timeouts"
get "/test_net_http_headers" => "test_requests#test_net_http_headers"
end
end
end

View File

@ -850,6 +850,14 @@ module Discourse
GitUtils.try_git(git_cmd, default_value)
end
def self.user_agent
if git_version.present?
@user_agent ||= "Discourse/#{VERSION::STRING}-#{git_version}; +https://www.discourse.org/"
else
@user_agent ||= "Discourse/#{VERSION::STRING}; +https://www.discourse.org/"
end
end
# Either returns the site_contact_username user or the first admin.
def self.site_contact_user
user =

View File

@ -0,0 +1,15 @@
# frozen_string_literal: true
module NetHTTPHeaderPatch
def initialize_http_header(initheader)
# If no user-agent is set, set it to the default
initheader ||= {}
user_agent_key =
initheader.keys.find { |key| key.to_s.downcase == "user-agent" } || "User-Agent".to_sym
initheader[user_agent_key] ||= Discourse.user_agent
super initheader
end
end
Net::HTTPHeader.prepend(NetHTTPHeaderPatch)

View File

@ -15,10 +15,6 @@ module Onebox
path.match?(%r{^/.+?/status(es)?/\d+(/(video|photo)/\d?)?(/?\?.*)?/?$})
end
def http_params
{ "User-Agent" => "DiscourseBot/1.0" }
end
def to_html
raw.present? ? super : ""
end

View File

@ -232,9 +232,14 @@ module Onebox
end
def self.user_agent
user_agent = SiteSetting.onebox_user_agent.presence || Onebox.options.user_agent
user_agent = "#{user_agent} v#{Discourse::VERSION::STRING}"
user_agent
if SiteSetting.onebox_user_agent.present?
return "#{SiteSetting.onebox_user_agent} v#{Discourse::VERSION::STRING}"
end
if Onebox.options.user_agent.present?
return "#{Onebox.options.user_agent} v#{Discourse::VERSION::STRING}"
end
Discourse.user_agent
end
# Percent-encodes a URI string per RFC3986 - https://tools.ietf.org/html/rfc3986

View File

@ -194,6 +194,16 @@ RSpec.describe Discourse do
end
end
describe "#user_agent" do
it "returns a user agent string" do
stub_const(Discourse::VERSION, :STRING, "1.2.3") do
Discourse.stubs(:git_version).returns("123456")
expect(Discourse.user_agent).to eq("Discourse/1.2.3-123456; +https://www.discourse.org/")
end
end
end
describe "#site_contact_user" do
fab!(:admin)
fab!(:another_admin) { Fabricate(:admin) }

View File

@ -219,7 +219,7 @@ RSpec.describe Onebox::Helpers do
it "has the default Discourse user agent" do
stub_request(:get, "http://example.com/some-resource").with(
headers: {
"user-agent" => /Discourse Forum Onebox/,
"user-agent" => Discourse.user_agent,
},
).to_return(status: 200, body: "test")

View File

@ -626,9 +626,7 @@ RSpec.describe Oneboxer do
end
describe "onebox custom user agent" do
let!(:default_onebox_user_agent) do
"#{Onebox.options.user_agent} v#{Discourse::VERSION::STRING}"
end
let!(:default_onebox_user_agent) { Discourse.user_agent }
it "uses the site setting value" do
SiteSetting.force_custom_user_agent_hosts = "http://codepen.io|https://video.discourse.org/"

View File

@ -311,7 +311,11 @@ RSpec.describe Admin::WebHooksController do
expect(parsed_event["payload"]).to eq("abc")
expect(JSON.parse(parsed_event["response_headers"])).to eq(
{ "content-type" => "application/json", "yoo" => "man" },
{
"content-type" => "application/json",
"user-agent" => Discourse.user_agent,
"yoo" => "man",
},
)
expect(parsed_event["response_body"]).to eq("efg")
end

View File

@ -0,0 +1,14 @@
# frozen_string_literal: true
# We can use the redeliver event to test the user-agent header
RSpec.describe "Net::HTTPHeader sets a default user-agent" do
it "should set a user-agent when none has been set" do
get "/test_net_http_headers.json"
expect(response).to have_http_status(:success)
parsed_body = JSON.parse(response.body)
expect(parsed_body).to have_key("user-agent")
expect(parsed_body["user-agent"].first).to eq(Discourse.user_agent)
end
end