mirror of
https://github.com/discourse/discourse.git
synced 2025-06-04 22:26:02 +08:00
DEV: Extract out html cleanup so it can be used on other types of cooked content (#31385)
`PrettyText.cook` does two things: 1️⃣ convert raw to cooked, 2️⃣ partial sanitisation. This commit splits the 2️⃣ up so that it can be applied to other cooked content.
This commit is contained in:
@ -295,25 +295,13 @@ module PrettyText
|
||||
JS
|
||||
end
|
||||
|
||||
def self.cook(text, opts = {})
|
||||
def self.cook(raw, opts = {})
|
||||
options = opts.dup
|
||||
working_text = text.dup
|
||||
working_text = raw.dup
|
||||
|
||||
sanitized = markdown(working_text, options)
|
||||
html = markdown(working_text, options)
|
||||
|
||||
doc = Nokogiri::HTML5.fragment(sanitized)
|
||||
|
||||
add_nofollow = !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content
|
||||
add_rel_attributes_to_user_content(doc, add_nofollow)
|
||||
strip_hidden_unicode_bidirectional_characters(doc)
|
||||
sanitize_hotlinked_media(doc)
|
||||
add_video_placeholder_image(doc)
|
||||
|
||||
add_mentions(doc, user_id: opts[:user_id]) if SiteSetting.enable_mentions
|
||||
|
||||
scrubber = Loofah::Scrubber.new { |node| node.remove if node.name == "script" }
|
||||
loofah_fragment = Loofah.html5_fragment(doc.to_html)
|
||||
loofah_fragment.scrub!(scrubber).to_html
|
||||
cleanup(html, opts)
|
||||
end
|
||||
|
||||
def self.strip_hidden_unicode_bidirectional_characters(doc)
|
||||
@ -692,6 +680,22 @@ module PrettyText
|
||||
rval
|
||||
end
|
||||
|
||||
def self.cleanup(html, opts = {})
|
||||
doc = Nokogiri::HTML5.fragment(html)
|
||||
|
||||
add_nofollow = !opts[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content
|
||||
add_rel_attributes_to_user_content(doc, add_nofollow)
|
||||
strip_hidden_unicode_bidirectional_characters(doc)
|
||||
sanitize_hotlinked_media(doc)
|
||||
add_video_placeholder_image(doc)
|
||||
|
||||
add_mentions(doc, user_id: opts[:user_id]) if SiteSetting.enable_mentions
|
||||
|
||||
scrubber = Loofah::Scrubber.new { |node| node.remove if node.name == "script" }
|
||||
loofah_fragment = Loofah.html5_fragment(doc.to_html)
|
||||
loofah_fragment.scrub!(scrubber).to_html
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
USER_TYPE = "user"
|
||||
|
@ -731,6 +731,26 @@ RSpec.describe PrettyText do
|
||||
expect(cooked).to eq(html.strip)
|
||||
end
|
||||
|
||||
it "strips out unicode bidirectional (bidi) override characters and replaces with a highlighted span" do
|
||||
cooked = <<~HTML
|
||||
<p>X</p>
|
||||
<pre><code class="lang-auto">var isAdmin = false;
|
||||
\u202E
|
||||
</code></pre>
|
||||
HTML
|
||||
cleaned = PrettyText.cleanup(cooked)
|
||||
hidden_bidi_title = I18n.t("post.hidden_bidi_character")
|
||||
|
||||
html = <<~HTML
|
||||
<p>X</p>
|
||||
<pre><code class="lang-auto">var isAdmin = false;
|
||||
<span class="bidi-warning" title="#{hidden_bidi_title}"><U+202E></span>
|
||||
</code></pre>
|
||||
HTML
|
||||
|
||||
expect(cleaned.strip).to eq(html.strip)
|
||||
end
|
||||
|
||||
it "fuzzes all possible dangerous unicode bidirectional (bidi) override characters, making sure they are replaced" do
|
||||
bad_bidi = [
|
||||
"\u202A",
|
||||
@ -2389,6 +2409,9 @@ HTML
|
||||
it "should strip SCRIPT" do
|
||||
expect(PrettyText.cook("<script>alert(42)</script>")).to eq ""
|
||||
expect(PrettyText.cook("<div><script>alert(42)</script></div>")).to eq "<div></div>"
|
||||
|
||||
expect(PrettyText.cleanup("<script>alert(42)</script>")).to eq ""
|
||||
expect(PrettyText.cleanup("<div><script>alert(42)</script></div>")).to eq "<div></div>"
|
||||
end
|
||||
|
||||
it "strips script regardless of sanitize" do
|
||||
|
Reference in New Issue
Block a user