DEV: Rename UriHelper.escape_uri to .normalized_encode

This is a much better description of its function. It performs idempotent normalization of a URL. If consumers truly need to `encode` a URL (including double-encoding of existing encoded entities), they can use the existing `.encode` method.
This commit is contained in:
David Taylor
2022-08-09 11:28:29 +01:00
parent f0a0252526
commit 3c81683955
9 changed files with 40 additions and 34 deletions

View File

@ -16,7 +16,7 @@ class EmbeddableHost < ActiveRecord::Base
def self.record_for_url(uri) def self.record_for_url(uri)
if uri.is_a?(String) if uri.is_a?(String)
uri = begin uri = begin
URI(UrlHelper.escape_uri(uri)) URI(UrlHelper.normalized_encode(uri))
rescue URI::Error, Addressable::URI::InvalidURIError rescue URI::Error, Addressable::URI::InvalidURIError
end end
end end
@ -50,7 +50,7 @@ class EmbeddableHost < ActiveRecord::Base
return true if url&.starts_with?(Discourse.base_url) && EmbeddableHost.exists? return true if url&.starts_with?(Discourse.base_url) && EmbeddableHost.exists?
uri = begin uri = begin
URI(UrlHelper.escape_uri(url)) URI(UrlHelper.normalized_encode(url))
rescue URI::Error rescue URI::Error
end end

View File

@ -110,7 +110,7 @@ class TopicEmbed < ActiveRecord::Base
def self.find_remote(url) def self.find_remote(url)
require 'ruby-readability' require 'ruby-readability'
url = UrlHelper.escape_uri(url) url = UrlHelper.normalized_encode(url)
original_uri = URI.parse(url) original_uri = URI.parse(url)
fd = FinalDestination.new( fd = FinalDestination.new(
url, url,
@ -164,7 +164,7 @@ class TopicEmbed < ActiveRecord::Base
unless (src.nil? || src.empty?) unless (src.nil? || src.empty?)
begin begin
# convert URL to absolute form # convert URL to absolute form
node[url_param] = URI.join(url, UrlHelper.escape_uri(src)).to_s node[url_param] = URI.join(url, UrlHelper.normalized_encode(src)).to_s
rescue URI::Error, Addressable::URI::InvalidURIError rescue URI::Error, Addressable::URI::InvalidURIError
# If there is a mistyped URL, just do nothing # If there is a mistyped URL, just do nothing
end end
@ -200,7 +200,7 @@ class TopicEmbed < ActiveRecord::Base
def self.absolutize_urls(url, contents) def self.absolutize_urls(url, contents)
url = normalize_url(url) url = normalize_url(url)
begin begin
uri = URI(UrlHelper.escape_uri(url)) uri = URI(UrlHelper.normalized_encode(url))
rescue URI::Error rescue URI::Error
return contents return contents
end end

View File

@ -36,7 +36,7 @@ class FinalDestination
def initialize(url, opts = nil) def initialize(url, opts = nil)
@url = url @url = url
@uri = uri(escape_url) if @url @uri = uri(normalized_url) if @url
@opts = opts || {} @opts = opts || {}
@force_get_hosts = @opts[:force_get_hosts] || [] @force_get_hosts = @opts[:force_get_hosts] || []
@ -417,8 +417,8 @@ class FinalDestination
false false
end end
def escape_url def normalized_url
UrlHelper.escape_uri(@url) UrlHelper.normalized_encode(@url)
end end
def private_ranges def private_ranges

View File

@ -237,7 +237,7 @@ module Oneboxer
end end
def self.onebox_raw(url, opts = {}) def self.onebox_raw(url, opts = {})
url = UrlHelper.escape_uri(url).to_s url = UrlHelper.normalized_encode(url).to_s
local_onebox(url, opts) || external_onebox(url) local_onebox(url, opts) || external_onebox(url)
rescue => e rescue => e
# no point warning here, just cause we have an issue oneboxing a url # no point warning here, just cause we have an issue oneboxing a url

View File

@ -488,7 +488,7 @@ module PrettyText
def self.convert_vimeo_iframes(doc) def self.convert_vimeo_iframes(doc)
doc.css("iframe[src*='player.vimeo.com']").each do |iframe| doc.css("iframe[src*='player.vimeo.com']").each do |iframe|
if iframe["data-original-href"].present? if iframe["data-original-href"].present?
vimeo_url = UrlHelper.escape_uri(iframe["data-original-href"]) vimeo_url = UrlHelper.normalized_encode(iframe["data-original-href"])
else else
vimeo_id = iframe['src'].split('/').last vimeo_id = iframe['src'].split('/').last
vimeo_url = "https://vimeo.com/#{vimeo_id}" vimeo_url = "https://vimeo.com/#{vimeo_id}"

View File

@ -60,10 +60,16 @@ class UrlHelper
self.absolute(Upload.secure_media_url_from_upload_url(url), nil) self.absolute(Upload.secure_media_url_from_upload_url(url), nil)
end end
# This is a poorly named method. In reality, it **normalizes** the given URL,
# and does not escape it. Therefore it's idempotent, and can be used on user
# input which includes a mix of escaped and unescaped characters
def self.escape_uri(uri) def self.escape_uri(uri)
Discourse.deprecate(
"UrlHelper.escape_uri is deprecated. For normalization of user input use `.normalized_encode`. For true encoding, use `.encode`",
output_in_test: true,
drop_from: '3.0'
)
normalized_encode(uri)
end
def self.normalized_encode(uri)
validated = nil validated = nil
url = uri.to_s url = uri.to_s

View File

@ -147,7 +147,7 @@ after_initialize do
post = options[:post] post = options[:post]
replacement = post&.url.present? ? replacement = post&.url.present? ?
"<a href='#{UrlHelper.escape_uri(post.url)}'>#{I18n.t("poll.poll")}</a>" : "<a href='#{UrlHelper.normalized_encode(post.url)}'>#{I18n.t("poll.poll")}</a>" :
I18n.t("poll.poll") I18n.t("poll.poll")
doc.css("div.poll").each do |poll| doc.css("div.poll").each do |poll|

View File

@ -606,21 +606,21 @@ RSpec.describe FinalDestination do
end end
end end
describe "#escape_url" do describe "#normalized_url" do
it "correctly escapes url" do it "correctly normalizes url" do
fragment_url = "https://eviltrout.com/2016/02/25/fixing-android-performance.html#discourse-comments" fragment_url = "https://eviltrout.com/2016/02/25/fixing-android-performance.html#discourse-comments"
expect(fd(fragment_url).escape_url.to_s).to eq(fragment_url) expect(fd(fragment_url).normalized_url.to_s).to eq(fragment_url)
expect(fd("https://eviltrout.com?s=180&#038;d=mm&#038;r=g").escape_url.to_s) expect(fd("https://eviltrout.com?s=180&#038;d=mm&#038;r=g").normalized_url.to_s)
.to eq("https://eviltrout.com?s=180&#038;d=mm&%23038;r=g") .to eq("https://eviltrout.com?s=180&#038;d=mm&%23038;r=g")
expect(fd("http://example.com/?a=\11\15").escape_url.to_s).to eq("http://example.com/?a=%09%0D") expect(fd("http://example.com/?a=\11\15").normalized_url.to_s).to eq("http://example.com/?a=%09%0D")
expect(fd("https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE").escape_url.to_s) expect(fd("https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE").normalized_url.to_s)
.to eq('https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE') .to eq('https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE')
expect(fd('https://ru.wikipedia.org/wiki/Свобо').escape_url.to_s) expect(fd('https://ru.wikipedia.org/wiki/Свобо').normalized_url.to_s)
.to eq('https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE') .to eq('https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE')
end end
end end

View File

@ -85,40 +85,40 @@ RSpec.describe UrlHelper do
end end
end end
describe "#escape_uri" do describe "#normalized_encode" do
it "does not double escape %3A (:)" do it "does not double escape %3A (:)" do
url = "http://discourse.org/%3A/test" url = "http://discourse.org/%3A/test"
expect(UrlHelper.escape_uri(url)).to eq(url) expect(UrlHelper.normalized_encode(url)).to eq(url)
end end
it "does not double escape %2F (/)" do it "does not double escape %2F (/)" do
url = "http://discourse.org/%2F/test" url = "http://discourse.org/%2F/test"
expect(UrlHelper.escape_uri(url)).to eq(url) expect(UrlHelper.normalized_encode(url)).to eq(url)
end end
it "doesn't escape simple URL" do it "doesn't escape simple URL" do
url = UrlHelper.escape_uri('http://example.com/foo/bar') url = UrlHelper.normalized_encode('http://example.com/foo/bar')
expect(url).to eq('http://example.com/foo/bar') expect(url).to eq('http://example.com/foo/bar')
end end
it "escapes unsafe chars" do it "escapes unsafe chars" do
url = UrlHelper.escape_uri("http://example.com/?a=\11\15") url = UrlHelper.normalized_encode("http://example.com/?a=\11\15")
expect(url).to eq('http://example.com/?a=%09%0D') expect(url).to eq('http://example.com/?a=%09%0D')
end end
it "escapes non-ascii chars" do it "escapes non-ascii chars" do
url = UrlHelper.escape_uri('http://example.com/ماهی') url = UrlHelper.normalized_encode('http://example.com/ماهی')
expect(url).to eq('http://example.com/%D9%85%D8%A7%D9%87%DB%8C') expect(url).to eq('http://example.com/%D9%85%D8%A7%D9%87%DB%8C')
end end
it "doesn't escape already escaped chars (space)" do it "doesn't escape already escaped chars (space)" do
url = UrlHelper.escape_uri('http://example.com/foo%20bar/foo bar/') url = UrlHelper.normalized_encode('http://example.com/foo%20bar/foo bar/')
expect(url).to eq('http://example.com/foo%20bar/foo%20bar/') expect(url).to eq('http://example.com/foo%20bar/foo%20bar/')
end end
it "doesn't escape already escaped chars (hash)" do it "doesn't escape already escaped chars (hash)" do
url = 'https://calendar.google.com/calendar/embed?src=en.uk%23holiday@group.v.calendar.google.com&ctz=Europe%2FLondon' url = 'https://calendar.google.com/calendar/embed?src=en.uk%23holiday@group.v.calendar.google.com&ctz=Europe%2FLondon'
escaped = UrlHelper.escape_uri(url) escaped = UrlHelper.normalized_encode(url)
expect(escaped).to eq(url) expect(escaped).to eq(url)
end end
@ -126,27 +126,27 @@ RSpec.describe UrlHelper do
skip "see: https://github.com/sporkmonger/addressable/issues/472" skip "see: https://github.com/sporkmonger/addressable/issues/472"
url = "https://example.com/ article/id%3A1.2%2F1/bar" url = "https://example.com/ article/id%3A1.2%2F1/bar"
expected = "https://example.com/%20article/id%3A1.2%2F1/bar" expected = "https://example.com/%20article/id%3A1.2%2F1/bar"
escaped = UrlHelper.escape_uri(url) escaped = UrlHelper.normalized_encode(url)
expect(escaped).to eq(expected) expect(escaped).to eq(expected)
end end
it "handles emoji domain names" do it "handles emoji domain names" do
url = "https://💻.example/💻?computer=💻" url = "https://💻.example/💻?computer=💻"
expected = "https://xn--3s8h.example/%F0%9F%92%BB?computer=%F0%9F%92%BB" expected = "https://xn--3s8h.example/%F0%9F%92%BB?computer=%F0%9F%92%BB"
escaped = UrlHelper.escape_uri(url) escaped = UrlHelper.normalized_encode(url)
expect(escaped).to eq(expected) expect(escaped).to eq(expected)
end end
it "handles special-character domain names" do it "handles special-character domain names" do
url = "https://éxample.com/test" url = "https://éxample.com/test"
expected = "https://xn--xample-9ua.com/test" expected = "https://xn--xample-9ua.com/test"
escaped = UrlHelper.escape_uri(url) escaped = UrlHelper.normalized_encode(url)
expect(escaped).to eq(expected) expect(escaped).to eq(expected)
end end
it "performs basic normalization" do it "performs basic normalization" do
url = "http://EXAMPLE.com/a" url = "http://EXAMPLE.com/a"
escaped = UrlHelper.escape_uri(url) escaped = UrlHelper.normalized_encode(url)
expect(escaped).to eq("http://example.com/a") expect(escaped).to eq("http://example.com/a")
end end
@ -155,7 +155,7 @@ RSpec.describe UrlHelper do
# sensitive information stripped # sensitive information stripped
presigned_url = "https://test.com/original/3X/b/5/575bcc2886bf7a39684b57ca90be85f7d399bbc7.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AK8888999977%2F20200130%2Fus-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200130T064355Z&X-Amz-Expires=15&X-Amz-SignedHeaders=host&X-Amz-Security-Token=blahblah%2Bblahblah%2Fblah%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEQAR&X-Amz-Signature=test" presigned_url = "https://test.com/original/3X/b/5/575bcc2886bf7a39684b57ca90be85f7d399bbc7.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AK8888999977%2F20200130%2Fus-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200130T064355Z&X-Amz-Expires=15&X-Amz-SignedHeaders=host&X-Amz-Security-Token=blahblah%2Bblahblah%2Fblah%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEQAR&X-Amz-Signature=test"
encoded_presigned_url = "https://test.com/original/3X/b/5/575bcc2886bf7a39684b57ca90be85f7d399bbc7.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AK8888999977/20200130/us-west-1/s3/aws4_request&X-Amz-Date=20200130T064355Z&X-Amz-Expires=15&X-Amz-SignedHeaders=host&X-Amz-Security-Token=blahblah+blahblah/blah//////////wEQA==&X-Amz-Signature=test" encoded_presigned_url = "https://test.com/original/3X/b/5/575bcc2886bf7a39684b57ca90be85f7d399bbc7.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AK8888999977/20200130/us-west-1/s3/aws4_request&X-Amz-Date=20200130T064355Z&X-Amz-Expires=15&X-Amz-SignedHeaders=host&X-Amz-Security-Token=blahblah+blahblah/blah//////////wEQA==&X-Amz-Signature=test"
expect(UrlHelper.escape_uri(presigned_url)).not_to eq(encoded_presigned_url) expect(UrlHelper.normalized_encode(presigned_url)).not_to eq(encoded_presigned_url)
end end
end end