diff --git a/lib/inline_oneboxer.rb b/lib/inline_oneboxer.rb index 019da0826d4..2e2dcf2c60f 100644 --- a/lib/inline_oneboxer.rb +++ b/lib/inline_oneboxer.rb @@ -51,7 +51,6 @@ class InlineOneboxer always_allow = SiteSetting.enable_inline_onebox_on_all_domains allowed_domains = SiteSetting.allowed_inline_onebox_domains&.split('|') unless always_allow - blocked_domains = SiteSetting.blocked_onebox_domains&.split('|') if always_allow || allowed_domains uri = begin @@ -62,7 +61,7 @@ class InlineOneboxer if uri.present? && uri.hostname.present? && (always_allow || allowed_domains.include?(uri.hostname)) && - !blocked_domains.include?(uri.hostname) + !domain_is_blocked?(uri.hostname) title = RetrieveTitle.crawl(url) title = nil if title && title.length < MIN_TITLE_LENGTH return onebox_for(url, title, opts) @@ -74,6 +73,12 @@ class InlineOneboxer private + def self.domain_is_blocked?(hostname) + SiteSetting.blocked_onebox_domains&.split('|').any? do |blocked| + hostname == blocked || hostname.end_with?(".#{blocked}") + end + end + def self.onebox_for(url, title, opts) title = title && Emoji.gsub_emoji_to_unicode(title) if title && opts[:post_number] diff --git a/lib/retrieve_title.rb b/lib/retrieve_title.rb index 043fb6a68a4..2fd361ca902 100644 --- a/lib/retrieve_title.rb +++ b/lib/retrieve_title.rb @@ -60,6 +60,10 @@ module RetrieveTitle encoding = nil fd.get do |_response, chunk, uri| + if (uri.present? && InlineOneboxer.domain_is_blocked?(uri.hostname)) + throw :done + end + unless Net::HTTPRedirection === _response if current current << chunk diff --git a/spec/components/inline_oneboxer_spec.rb b/spec/components/inline_oneboxer_spec.rb index 6a5917af313..8f89e48a32c 100644 --- a/spec/components/inline_oneboxer_spec.rb +++ b/spec/components/inline_oneboxer_spec.rb @@ -153,12 +153,6 @@ describe InlineOneboxer do expect(onebox).to be_blank end - it "will not crawl domains that are blocked" do - SiteSetting.blocked_onebox_domains = "eviltrout.com" - onebox = InlineOneboxer.lookup("https://eviltrout.com", skip_cache: true) - expect(onebox).to be_blank - end - it "will crawl anything if allowed to" do SiteSetting.enable_inline_onebox_on_all_domains = true @@ -204,6 +198,38 @@ describe InlineOneboxer do expect(onebox[:title]).to eq("Evil Trout's Blog") end - end + describe "lookups for blocked domains in the hostname" do + shared_examples "blocks the domain" do |setting, domain_to_test| + it "does not retrieve title" do + SiteSetting.blocked_onebox_domains = setting + onebox = InlineOneboxer.lookup(domain_to_test, skip_cache: true) + + expect(onebox).to be_blank + end + end + + shared_examples "does not fulfil blocked domain" do |setting, domain_to_test| + it "retrieves title" do + SiteSetting.blocked_onebox_domains = setting + + onebox = InlineOneboxer.lookup(domain_to_test, skip_cache: true) + + expect(onebox).to be_present + end + end + + include_examples "blocks the domain", "api.cat.org|kitten.cloud", "https://api.cat.org" + include_examples "blocks the domain", "api.cat.org|kitten.cloud", "http://kitten.cloud" + + include_examples "blocks the domain", "kitten.cloud", "http://cat.kitten.cloud" + + include_examples "blocks the domain", "api.cat.org", "https://api.cat.org/subdirectory/moar" + include_examples "blocks the domain", "kitten.cloud", "https://cat.kitten.cloud/subd" + + include_examples "does not fulfil blocked domain", "kitten.cloud", "https://cat.2kitten.cloud" + include_examples "does not fulfil blocked domain", "kitten.cloud", "https://cat.kitten.cloud9" + include_examples "does not fulfil blocked domain", "api.cat.org", "https://api-cat.org" + end + end end diff --git a/spec/components/retrieve_title_spec.rb b/spec/components/retrieve_title_spec.rb index f760a6a1f0f..6b16a9174d3 100644 --- a/spec/components/retrieve_title_spec.rb +++ b/spec/components/retrieve_title_spec.rb @@ -100,6 +100,35 @@ describe RetrieveTitle do IPSocket.stubs(:getaddress).returns('100.2.3.4') expect(RetrieveTitle.crawl("http://foobar.com/amazing")).to eq("very amazing") end + + it "returns empty title if redirect uri is in blacklist" do + SiteSetting.blocked_onebox_domains = "wikipedia.com" + + stub_request(:get, "http://foobar.com/amazing") + .to_return(status: 301, body: "", headers: { "location" => "https://wikipedia.com/amazing" }) + + stub_request(:get, "https://wikipedia.com/amazing") + .to_return(status: 200, body: "very amazing", headers: {}) + + IPSocket.stubs(:getaddress).returns('100.2.3.4') + expect(RetrieveTitle.crawl("http://foobar.com/amazing")).to eq(nil) + end + + it "returns title if 'midway redirect' is blocked but final redirect uri is not blocked" do + SiteSetting.blocked_onebox_domains = "wikipedia.com" + + stub_request(:get, "http://foobar.com/amazing") + .to_return(status: 301, body: "", headers: { "location" => "https://wikipedia.com/amazing" }) + + stub_request(:get, "https://wikipedia.com/amazing") + .to_return(status: 301, body: "", headers: { "location" => "https://cat.com/meow" }) + + stub_request(:get, "https://cat.com/meow") + .to_return(status: 200, body: "very amazing", headers: {}) + + IPSocket.stubs(:getaddress).returns('100.2.3.4') + expect(RetrieveTitle.crawl("http://foobar.com/amazing")).to eq("very amazing") + end end context 'fetch_title' do