FIX: Follow the canonical URL when importing a remote topic. (#14489)

FinalDestination now supports the `follow_canonical` option, which will perform an initial GET request, parse the canonical link if present, and perform a HEAD request to it.

We use this mode during embeds to avoid treating URLs with different query parameters as different topics.
This commit is contained in:
Roman Rizzi
2021-10-01 12:48:21 -03:00
committed by GitHub
parent 0359adc0b8
commit 4c2d5158c5
4 changed files with 122 additions and 53 deletions

View File

@ -49,6 +49,13 @@ describe FinalDestination do
}
end
def canonical_follow(from, dest)
stub_request(:get, from).to_return(
status: 200,
body: "<head><link rel=\"canonical\" href=\"#{dest}\"></head>"
)
end
def redirect_response(from, dest)
stub_request(:head, from).to_return(
status: 302,
@ -175,6 +182,39 @@ describe FinalDestination do
end
end
context 'follows canonical links' do
it 'resolves the canonical link as the final destination' do
canonical_follow("https://eviltrout.com", "https://codinghorror.com/blog")
stub_request(:head, "https://codinghorror.com/blog").to_return(doc_response)
final = FinalDestination.new('https://eviltrout.com', opts.merge(follow_canonical: true))
expect(final.resolve.to_s).to eq("https://codinghorror.com/blog")
expect(final.redirected?).to eq(false)
expect(final.status).to eq(:resolved)
end
it "does not follow the canonical link if it's the same as the current URL" do
canonical_follow("https://eviltrout.com", "https://eviltrout.com")
final = FinalDestination.new('https://eviltrout.com', opts.merge(follow_canonical: true))
expect(final.resolve.to_s).to eq("https://eviltrout.com")
expect(final.redirected?).to eq(false)
expect(final.status).to eq(:resolved)
end
it "does not follow the canonical link if it's invalid" do
canonical_follow("https://eviltrout.com", "")
final = FinalDestination.new('https://eviltrout.com', opts.merge(follow_canonical: true))
expect(final.resolve.to_s).to eq("https://eviltrout.com")
expect(final.redirected?).to eq(false)
expect(final.status).to eq(:resolved)
end
end
context "GET can be forced" do
before do
stub_request(:head, 'https://force.get.com/posts?page=4')