diff --git a/app/models/topic_link.rb b/app/models/topic_link.rb index 8876057833e..a7be7fa00c3 100644 --- a/app/models/topic_link.rb +++ b/app/models/topic_link.rb @@ -117,11 +117,7 @@ SQL PrettyText .extract_links(post.cooked) .map do |u| - uri = begin - URI.parse(u.url) - rescue URI::Error - end - + uri = UrlHelper.relaxed_parse(u.url) [u, uri] end .reject { |_, p| p.nil? || "mailto".freeze == p.scheme } diff --git a/app/models/topic_link_click.rb b/app/models/topic_link_click.rb index 7ae52e6b09d..6caaff82d4a 100644 --- a/app/models/topic_link_click.rb +++ b/app/models/topic_link_click.rb @@ -15,11 +15,7 @@ class TopicLinkClick < ActiveRecord::Base url = args[:url][0...TopicLink.max_url_length] return nil if url.blank? - uri = begin - URI.parse(url) - rescue URI::Error - end - + uri = UrlHelper.relaxed_parse(url) urls = Set.new urls << url if url =~ /^http/ diff --git a/lib/url_helper.rb b/lib/url_helper.rb index 377c7d38cd1..976a63dd9a5 100644 --- a/lib/url_helper.rb +++ b/lib/url_helper.rb @@ -1,5 +1,20 @@ class UrlHelper + # At the moment this handles invalid URLs that browser address bar accepts + # where second # is not encoded + # + # Longer term we can add support of simpleidn and encode unicode domains + def self.relaxed_parse(url) + url, fragment = url.split("#", 2) + uri = URI.parse(url) + if uri + fragment = URI.escape(fragment) if fragment&.include?('#') + uri.fragment = fragment + uri + end + rescue URI::Error + end + def self.is_local(url) url.present? && ( Discourse.store.has_been_uploaded?(url) || diff --git a/spec/components/url_helper_spec.rb b/spec/components/url_helper_spec.rb index 2dd48c726c7..0ca6596d7d1 100644 --- a/spec/components/url_helper_spec.rb +++ b/spec/components/url_helper_spec.rb @@ -3,6 +3,15 @@ require_dependency 'url_helper' describe UrlHelper do + describe "#relaxed parse" do + + it "can handle double #" do + url = UrlHelper.relaxed_parse("https://test.com#test#test") + expect(url.to_s).to eq("https://test.com#test%23test") + end + + end + describe "#is_local" do it "is true when the file has been uploaded" do diff --git a/spec/fabricators/post_fabricator.rb b/spec/fabricators/post_fabricator.rb index 6f1a9a91622..b01f7e17a45 100644 --- a/spec/fabricators/post_fabricator.rb +++ b/spec/fabricators/post_fabricator.rb @@ -95,27 +95,28 @@ Fabricator(:post_with_uploads, from: :post) do end Fabricator(:post_with_uploads_and_links, from: :post) do - raw ' -Link - -Google - -text.txt (20 Bytes) -:smile: -' + raw <<~RAW + Link + + Google + + text.txt (20 Bytes) + :smile: + RAW end Fabricator(:post_with_external_links, from: :post) do user topic - raw " -Here's a link to twitter: http://twitter.com -And a link to google: http://google.com -And a secure link to google: https://google.com -And a markdown link: [forumwarz](http://forumwarz.com) -And a markdown link with a period after it [codinghorror](http://www.codinghorror.com/blog). -And one with a hash http://discourse.org#faq - " + raw <<~RAW + Here's a link to twitter: http://twitter.com + And a link to google: http://google.com + And a secure link to google: https://google.com + And a markdown link: [forumwarz](http://forumwarz.com) + And a markdown link with a period after it [codinghorror](http://www.codinghorror.com/blog). + And one with a hash http://discourse.org#faq + And one with a two hash http://discourse.org#a#b + RAW end Fabricator(:private_message_post, from: :post) do diff --git a/spec/models/topic_link_click_spec.rb b/spec/models/topic_link_click_spec.rb index 5b440f7265d..20da8c354ad 100644 --- a/spec/models/topic_link_click_spec.rb +++ b/spec/models/topic_link_click_spec.rb @@ -54,6 +54,10 @@ describe TopicLinkClick do TopicLinkClick.create_from(url: @topic_link.url, post_id: @post.id, ip: '127.0.0.0', user_id: @post.user_id) }.not_to change(TopicLinkClick, :count) + # can handle double # in a url + # NOTE: this is not compliant but exists in the wild + click = TopicLinkClick.create_from(url: "http://discourse.org#a#b", post_id: @post.id, ip: '127.0.0.1') + expect(click).to eq("http://discourse.org#a#b") end context 'with a valid url and post_id' do