diff --git a/config/site_settings.yml b/config/site_settings.yml index 80658f628c4..8da736a7efb 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -1637,6 +1637,10 @@ onebox: force_custom_user_agent_hosts: default: "http://codepen.io" type: list + force_get_hosts: + default: "us.battle.net|news.yahoo.com|*.medium.com" + type: list + hidden: true facebook_app_access_token: default: "" secret: true diff --git a/lib/final_destination.rb b/lib/final_destination.rb index 73f7ce30302..348a44f4297 100644 --- a/lib/final_destination.rb +++ b/lib/final_destination.rb @@ -215,7 +215,7 @@ class FinalDestination @status = :resolved return @uri - when 400, 405, 406, 409, 501 + when 400, 405, 406, 409, 500, 501 response_status, small_headers = small_get(request_headers) if response_status == 200 @@ -300,7 +300,17 @@ class FinalDestination def hostname_matches?(url) url = uri(url) - @uri && url.present? && @uri.hostname == url&.hostname + + if @uri&.hostname.present? && url&.hostname.present? + hostname_parts = url.hostname.split('.') + has_wildcard = hostname_parts.first == '*' + + if has_wildcard + @uri.hostname.end_with?(hostname_parts[1..-1].join('.')) + else + @uri.hostname == url.hostname + end + end end def is_dest_valid? diff --git a/lib/oneboxer.rb b/lib/oneboxer.rb index 4e0b96ef9aa..954e810bd2c 100644 --- a/lib/oneboxer.rb +++ b/lib/oneboxer.rb @@ -32,7 +32,8 @@ module Oneboxer end def self.force_get_hosts - hosts = ['http://us.battle.net', 'https://news.yahoo.com'] + hosts = [] + hosts += SiteSetting.force_get_hosts.split('|').collect { |domain| "https://#{domain}" } hosts += SiteSetting.cache_onebox_response_body_domains.split('|').collect { |domain| "https://www.#{domain}" } hosts += amazon_domains @@ -394,8 +395,13 @@ module Oneboxer allowed += SiteSetting.allowed_iframes.split("|") end - def self.external_onebox(url) + def self.external_onebox(url, available_strategies = nil) Discourse.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do + + uri = URI(url) + available_strategies ||= Oneboxer.ordered_strategies(uri.hostname) + strategy = available_strategies.shift + fd_options = { ignore_redirects: ignore_redirects, ignore_hostnames: blocked_domains, @@ -404,6 +410,13 @@ module Oneboxer preserve_fragment_url_hosts: preserve_fragment_url_hosts } + if strategy && Oneboxer.strategies[strategy][:force_get_host] + fd_options[:force_get_hosts] = ["https://#{uri.hostname}"] + end + if strategy && Oneboxer.strategies[strategy][:force_custom_user_agent_host] + fd_options[:force_custom_user_agent_hosts] = ["https://#{uri.hostname}"] + end + user_agent_override = SiteSetting.cache_onebox_user_agent if Oneboxer.cache_response_body?(url) && SiteSetting.cache_onebox_user_agent.present? fd_options[:default_user_agent] = user_agent_override if user_agent_override @@ -415,6 +428,11 @@ module Oneboxer if fd.status == :invalid_address args[:error_message] = I18n.t("errors.onebox.invalid_address", hostname: fd.hostname) elsif fd.status_code + # Try a different oneboxing strategy, if we have any options left: + if available_strategies.present? + return external_onebox(url, available_strategies) + end + args[:error_message] = I18n.t("errors.onebox.error_response", status_code: fd.status_code) end @@ -466,6 +484,8 @@ module Oneboxer end end + Oneboxer.cache_preferred_strategy(uri.hostname, strategy) + result end end @@ -490,4 +510,44 @@ module Oneboxer end end + def self.ordered_strategies(hostname) + all = strategies.keys + preferred = Oneboxer.preferred_strategy(hostname) + + all.insert(0, all.delete(preferred)) if all.include?(preferred) + + all + end + + def self.strategies + { + default: {}, # don't override anything by default + force_get_and_ua: { + force_get_host: true, + force_custom_user_agent_host: true, + }, + } + end + + def self.cache_preferred_strategy(hostname, strategy) + return if strategy == :default + + key = redis_oneboxer_strategy_key(hostname) + Discourse.redis.without_namespace.setex(key, 2.weeks.to_i, strategy.to_s) + end + + def self.clear_preferred_strategy!(hostname) + key = redis_oneboxer_strategy_key(hostname) + Discourse.redis.without_namespace.del(key) + end + + def self.preferred_strategy(hostname) + key = redis_oneboxer_strategy_key(hostname) + Discourse.redis.without_namespace.get(key)&.to_sym + end + + def self.redis_oneboxer_strategy_key(hostname) + "ONEBOXER_STRATEGY_#{hostname}" + end + end diff --git a/spec/components/final_destination_spec.rb b/spec/components/final_destination_spec.rb index 585ca610976..678423e0cf1 100644 --- a/spec/components/final_destination_spec.rb +++ b/spec/components/final_destination_spec.rb @@ -9,7 +9,7 @@ describe FinalDestination do { ignore_redirects: ['https://ignore-me.com'], - force_get_hosts: ['https://force.get.com'], + force_get_hosts: ['https://force.get.com', 'https://*.ihaveawildcard.com/'], preserve_fragment_url_hosts: ['https://eviltrout.com'], @@ -17,6 +17,7 @@ describe FinalDestination do lookup_ip: lambda do |host| case host when 'eviltrout.com' then '52.84.143.152' + when 'particularly.eviltrout.com' then '52.84.143.152' when 'codinghorror.com' then '91.146.108.148' when 'discourse.org' then '104.25.152.10' when 'some_thing.example.com' then '104.25.152.10' @@ -24,6 +25,7 @@ describe FinalDestination do when 'internal-ipv6.com' then '2001:abc:de:01:3:3d0:6a65:c2bf' when 'ignore-me.com' then '53.84.143.152' when 'force.get.com' then '22.102.29.40' + when 'any-subdomain.ihaveawildcard.com' then '104.25.152.11' when 'wikipedia.com' then '1.2.3.4' else as_ip = IPAddr.new(host) @@ -170,8 +172,11 @@ describe FinalDestination do before do stub_request(:head, 'https://force.get.com/posts?page=4') stub_request(:get, 'https://force.get.com/posts?page=4') + stub_request(:get, 'https://any-subdomain.ihaveawildcard.com/some/other/content') stub_request(:head, 'https://eviltrout.com/posts?page=2') stub_request(:get, 'https://eviltrout.com/posts?page=2') + stub_request(:head, 'https://particularly.eviltrout.com/has/a/secret/plan') + stub_request(:get, 'https://particularly.eviltrout.com/has/a/secret/plan') end it "will do a GET when forced" do @@ -189,6 +194,23 @@ describe FinalDestination do expect(WebMock).to_not have_requested(:get, 'https://eviltrout.com/posts?page=2') expect(WebMock).to have_requested(:head, 'https://eviltrout.com/posts?page=2') end + + it "will do a GET when forced on a wildcard subdomain" do + final = FinalDestination.new('https://any-subdomain.ihaveawildcard.com/some/other/content', opts) + expect(final.resolve.to_s).to eq('https://any-subdomain.ihaveawildcard.com/some/other/content') + expect(final.status).to eq(:resolved) + expect(WebMock).to have_requested(:get, 'https://any-subdomain.ihaveawildcard.com/some/other/content') + expect(WebMock).to_not have_requested(:head, 'https://any-subdomain.ihaveawildcard.com/some/other/content') + end + + it "will do a HEAD if on a subdomain of a forced get domain without a wildcard" do + final = FinalDestination.new('https://particularly.eviltrout.com/has/a/secret/plan', opts) + expect(final.resolve.to_s).to eq('https://particularly.eviltrout.com/has/a/secret/plan') + expect(final.status).to eq(:resolved) + expect(WebMock).to_not have_requested(:get, 'https://particularly.eviltrout.com/has/a/secret/plan') + expect(WebMock).to have_requested(:head, 'https://particularly.eviltrout.com/has/a/secret/plan') + end + end context "HEAD not supported" do diff --git a/spec/components/oneboxer_spec.rb b/spec/components/oneboxer_spec.rb index b3c27c52e06..fcc0e13ef8d 100644 --- a/spec/components/oneboxer_spec.rb +++ b/spec/components/oneboxer_spec.rb @@ -355,6 +355,60 @@ describe Oneboxer do end end + context 'strategies' do + it "has a 'default' strategy" do + expect(Oneboxer.strategies.keys.first).to eq(:default) + end + + it "has a strategy with overrides" do + strategy = Oneboxer.strategies.keys[1] + expect(Oneboxer.strategies[strategy].keys).not_to eq([]) + end + + context "using a non-default strategy" do + let(:hostname) { "my.interesting.site" } + let(:url) { "https://#{hostname}/cool/content" } + let(:html) do + <<~HTML + +
+ + + + +body
+ + + HTML + end + + before do + stub_request(:head, url).to_return(status: 509) + stub_request(:get, url).to_return(status: 200, body: html) + end + + after do + Oneboxer.clear_preferred_strategy!(hostname) + end + + it "uses mutiple strategies" do + default_ordered = Oneboxer.strategies.keys + custom_ordered = Oneboxer.ordered_strategies(hostname) + expect(custom_ordered).to eq(default_ordered) + + expect(Oneboxer.preferred_strategy(hostname)).to eq(nil) + expect(Oneboxer.preview(url, invalidate_oneboxes: true)).to include("Here is some cool content") + + custom_ordered = Oneboxer.ordered_strategies(hostname) + + expect(custom_ordered.count).to eq(default_ordered.count) + expect(custom_ordered).not_to eq(default_ordered) + + expect(Oneboxer.preferred_strategy(hostname)).not_to eq(:default) + end + end + end + describe 'cache_onebox_response_body' do let(:html) do <<~HTML