diff --git a/lib/onebox/helpers.rb b/lib/onebox/helpers.rb index 6b8f4d3f251..10dd9fa0e63 100644 --- a/lib/onebox/helpers.rb +++ b/lib/onebox/helpers.rb @@ -36,7 +36,7 @@ module Onebox # prefer canonical link canonical_link = doc.at('//link[@rel="canonical"]/@href') canonical_uri = Addressable::URI.parse(canonical_link) - if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}" + if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}" && canonical_uri.host != "localhost" response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil) doc = Nokogiri::HTML(response) if response end diff --git a/spec/lib/onebox/helpers_spec.rb b/spec/lib/onebox/helpers_spec.rb index 6532905f501..f24719ff20f 100644 --- a/spec/lib/onebox/helpers_spec.rb +++ b/spec/lib/onebox/helpers_spec.rb @@ -52,6 +52,23 @@ RSpec.describe Onebox::Helpers do expect(described_class.fetch_html_doc(uri).to_s).to match("success") end + + context "canonical link" do + it "follows canonical link" do + uri = 'https://www.example.com' + stub_request(:get, uri).to_return(status: 200, body: "

invalid

") + stub_request(:get, 'http://foobar.com').to_return(status: 200, body: "

success

") + + expect(described_class.fetch_html_doc(uri).to_s).to match("success") + end + + it "does not follow canonical link pointing at localhost" do + uri = 'https://www.example.com' + stub_request(:get, uri).to_return(status: 200, body: "

success

") + + expect(described_class.fetch_html_doc(uri).to_s).to match("success") + end + end end describe "redirects" do