From 8f7a3e5b29faf20c5c49a5f043f7318ee4f6571d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Hanol?= Date: Tue, 21 May 2024 21:53:03 +0200 Subject: [PATCH] FIX: subfolder absolute links in summaries This fixes the `PrettyText.make_all_links_absolute` to better handle subfolder. In subfolder, when given the cooked version of a post, links to mentions includes the `Discourse.base_path` prefix. Adding the `Discourse.base_url` was doubling the `Discourse.base_path`. The issue was hidden behind the specs which was stubbing `Discourse.base_url` instead of relying on `Discourse.base_path`. This fixes both the "algorithm" used in `PrettyText.make_all_links_absolute` to better handle this case and correct the specs to properly handle subfolder cases. There are lots of changes in the specs due to a refactoring to use squiggly heredoc strings for easier reading and less escaping. --- lib/pretty_text.rb | 24 +-- spec/lib/pretty_text_spec.rb | 304 +++++++++++++++++++++-------------- 2 files changed, 194 insertions(+), 134 deletions(-) diff --git a/lib/pretty_text.rb b/lib/pretty_text.rb index dc7520d50ad..9a86cf04ef6 100644 --- a/lib/pretty_text.rb +++ b/lib/pretty_text.rb @@ -506,17 +506,23 @@ module PrettyText end def self.make_all_links_absolute(doc) - site_uri = nil doc - .css("a") - .each do |link| - href = link["href"].to_s + .css("a[href]") + .each do |a| begin - uri = URI(href) - site_uri ||= URI(Discourse.base_url) - unless uri.host.present? || href.start_with?("mailto") - link["href"] = "#{site_uri}#{link["href"]}" - end + href = a["href"].to_s + next if href.blank? + next if href.start_with?("mailto:") + next if href.start_with?(Discourse.base_url) + next if URI(href).host.present? + + a["href"] = ( + if href.start_with?(Discourse.base_path) + "#{Discourse.base_url_no_prefix}#{href}" + else + "#{Discourse.base_url}#{href}" + end + ) rescue URI::Error # leave it end diff --git a/spec/lib/pretty_text_spec.rb b/spec/lib/pretty_text_spec.rb index ff4b2533aa4..b182dc92154 100644 --- a/spec/lib/pretty_text_spec.rb +++ b/spec/lib/pretty_text_spec.rb @@ -1322,153 +1322,207 @@ RSpec.describe PrettyText do end describe "format_for_email" do - let(:base_url) { "http://baseurl.net" } + context "when (sub)domain" do + before { Discourse.stubs(:base_path).returns("") } - before { Discourse.stubs(:base_url).returns(base_url) } - - it "does not crash" do - PrettyText.format_for_email( - 'test', - post, - ) - end - - it "adds base url to relative links" do - html = - "

@wiseguy, @trollol what do you guys think?

" - output = described_class.format_for_email(html, post) - expect(output).to eq( - "

@wiseguy, @trollol what do you guys think?

", - ) - end - - it "doesn't change external absolute links" do - html = "

Check out this guy.

" - expect(described_class.format_for_email(html, post)).to eq(html) - end - - it "doesn't change internal absolute links" do - html = "

Check out this guy.

" - expect(described_class.format_for_email(html, post)).to eq(html) - end - - it "can tolerate invalid URLs" do - html = "

Check out this guy.

" - expect { described_class.format_for_email(html, post) }.to_not raise_error - end - - it "doesn't change mailto" do - html = "

Contact me at this address.

" - expect(PrettyText.format_for_email(html, post)).to eq(html) - end - - it "prefers data-original-href attribute to get Vimeo iframe link and escapes it" do - html = - "

Check out this video – .

" - expect(PrettyText.format_for_email(html, post)).to match( - Regexp.escape("https://vimeo.com/329875646/%3E%20%3Cscript%3Ealert(1)%3C/script%3E"), - ) - end - - it "creates a valid URL when data-original-href is missing from Vimeo link" do - html = - '' - expect(PrettyText.format_for_email(html, post)).to match( - "https://vimeo.com/508864124/fcbbcc92fa", - ) - end - - describe "#convert_vimeo_iframes" do - it "converts + test HTML - md = PrettyText.format_for_email(html, post) - - expect(md).not_to include("This is a Vimeo link:

-

https://vimeo.com/1

+ expect(described_class.format_for_email(html, post)).to eq <<~HTML + test HTML end + + it "adds base url to relative links" do + html = <<~HTML +

@wiseguy, @trollol what do you guys think?

+ HTML + + expect(described_class.format_for_email(html, post)).to eq <<~HTML +

@wiseguy, @trollol what do you guys think?

+ HTML + end + + it "doesn't change external absolute links" do + html = <<~HTML +

Check out this guy.

+ HTML + + expect(described_class.format_for_email(html, post)).to eq(html) + end + + it "doesn't change internal absolute links" do + html = <<~HTML +

Check out this guy.

+ HTML + + expect(described_class.format_for_email(html, post)).to eq(html) + end + + it "can tolerate invalid URLs" do + html = <<~HTML +

Check out this guy.

+ HTML + + expect(described_class.format_for_email(html, post)).to eq(html) + end + + it "doesn't change mailto" do + html = <<~HTML +

Contact me at this address.

+ HTML + + expect(described_class.format_for_email(html, post)).to eq(html) + end + + it "prefers data-original-href attribute to get Vimeo iframe link and escapes it" do + html = <<~HTML +

Check out this video – .

+ HTML + + expect(described_class.format_for_email(html, post)).to match( + Regexp.escape("https://vimeo.com/329875646/%3E%20%3Cscript%3Ealert(1)%3C/script%3E"), + ) + end + + it "creates a valid URL when data-original-href is missing from Vimeo link" do + html = <<~HTML + + HTML + + expect(described_class.format_for_email(html, post)).to match( + "https://vimeo.com/508864124/fcbbcc92fa", + ) + end + + describe "#convert_vimeo_iframes" do + it "converts + HTML + + md = described_class.format_for_email(html, post) + + expect(md).not_to include("This is a Vimeo link:

+

https://vimeo.com/1

+ HTML + end + end + + describe "#strip_secure_uploads" do + before do + setup_s3 + SiteSetting.s3_cdn_url = "https://s3.cdn.com" + SiteSetting.secure_uploads = true + SiteSetting.login_required = true + end + + it "replaces secure video content" do + html = <<~HTML + + HTML + + md = described_class.format_for_email(html, post) + + expect(md).not_to include(" + + Audio label + + + HTML + + md = described_class.format_for_email(html, post) + + expect(md).not_to include(" + HTML + + md = described_class.format_for_email(html, post) + + expect(md).not_to include(" + HTML + + md = described_class.format_for_email(html, post) + + expect(md.scan(/stripped-secure-view-upload/).length).to eq(1) + expect(md.scan(/Redacted/).length).to eq(1) + end + + it "replaces secure images with a placeholder, keeping the url in an attribute" do + url = "/secure-uploads/original/1X/testimage.png" + html = <<~HTML + + HTML + + md = described_class.format_for_email(html, post) + + expect(md).not_to include(" - - Video label - - +

@wiseguy, @trollol what do you guys think?

HTML - md = PrettyText.format_for_email(html, post) - - expect(md).not_to include("@wiseguy, @trollol what do you guys think?

+ HTML end - it "replaces secure audio content" do + it "doesn't change external absolute links" do html = <<~HTML - +

Check out this guy.

HTML - md = PrettyText.format_for_email(html, post) - - expect(md).not_to include(" +

Check out this guy.

HTML - md = PrettyText.format_for_email(html, post) - expect(md).not_to include(" - HTML - md = PrettyText.format_for_email(html, post) - md = PrettyText.format_for_email(md, post) - - expect(md.scan(/stripped-secure-view-upload/).length).to eq(1) - expect(md.scan(/Redacted/).length).to eq(1) - end - - it "replaces secure images with a placeholder, keeping the url in an attribute" do - url = "/secure-uploads/original/1X/testimage.png" - html = <<~HTML - - HTML - md = PrettyText.format_for_email(html, post) - expect(md).not_to include("