From ae5ad250f671471f7e6601f443a6dfb9f1ea12c1 Mon Sep 17 00:00:00 2001 From: Natalie Tay Date: Tue, 25 Feb 2025 10:36:36 +0800 Subject: [PATCH] DEV: Extract out html cleanup so it can be used on other types of cooked content (#31385) `PrettyText.cook` does two things: :one: convert raw to cooked, :two: partial sanitisation. This commit splits the :two: up so that it can be applied to other cooked content. --- lib/pretty_text.rb | 36 ++++++++++++++++++++---------------- spec/lib/pretty_text_spec.rb | 23 +++++++++++++++++++++++ 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/lib/pretty_text.rb b/lib/pretty_text.rb index e73e0af4499..04ec6d9a718 100644 --- a/lib/pretty_text.rb +++ b/lib/pretty_text.rb @@ -295,25 +295,13 @@ module PrettyText JS end - def self.cook(text, opts = {}) + def self.cook(raw, opts = {}) options = opts.dup - working_text = text.dup + working_text = raw.dup - sanitized = markdown(working_text, options) + html = markdown(working_text, options) - doc = Nokogiri::HTML5.fragment(sanitized) - - add_nofollow = !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content - add_rel_attributes_to_user_content(doc, add_nofollow) - strip_hidden_unicode_bidirectional_characters(doc) - sanitize_hotlinked_media(doc) - add_video_placeholder_image(doc) - - add_mentions(doc, user_id: opts[:user_id]) if SiteSetting.enable_mentions - - scrubber = Loofah::Scrubber.new { |node| node.remove if node.name == "script" } - loofah_fragment = Loofah.html5_fragment(doc.to_html) - loofah_fragment.scrub!(scrubber).to_html + cleanup(html, opts) end def self.strip_hidden_unicode_bidirectional_characters(doc) @@ -692,6 +680,22 @@ module PrettyText rval end + def self.cleanup(html, opts = {}) + doc = Nokogiri::HTML5.fragment(html) + + add_nofollow = !opts[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content + add_rel_attributes_to_user_content(doc, add_nofollow) + strip_hidden_unicode_bidirectional_characters(doc) + sanitize_hotlinked_media(doc) + add_video_placeholder_image(doc) + + add_mentions(doc, user_id: opts[:user_id]) if SiteSetting.enable_mentions + + scrubber = Loofah::Scrubber.new { |node| node.remove if node.name == "script" } + loofah_fragment = Loofah.html5_fragment(doc.to_html) + loofah_fragment.scrub!(scrubber).to_html + end + private USER_TYPE = "user" diff --git a/spec/lib/pretty_text_spec.rb b/spec/lib/pretty_text_spec.rb index 819c1832188..95d6d60bd97 100644 --- a/spec/lib/pretty_text_spec.rb +++ b/spec/lib/pretty_text_spec.rb @@ -731,6 +731,26 @@ RSpec.describe PrettyText do expect(cooked).to eq(html.strip) end + it "strips out unicode bidirectional (bidi) override characters and replaces with a highlighted span" do + cooked = <<~HTML +

X

+
var isAdmin = false;
+        \u202E
+        
+ HTML + cleaned = PrettyText.cleanup(cooked) + hidden_bidi_title = I18n.t("post.hidden_bidi_character") + + html = <<~HTML +

X

+
var isAdmin = false;
+        <U+202E>
+        
+ HTML + + expect(cleaned.strip).to eq(html.strip) + end + it "fuzzes all possible dangerous unicode bidirectional (bidi) override characters, making sure they are replaced" do bad_bidi = [ "\u202A", @@ -2389,6 +2409,9 @@ HTML it "should strip SCRIPT" do expect(PrettyText.cook("")).to eq "" expect(PrettyText.cook("
")).to eq "
" + + expect(PrettyText.cleanup("")).to eq "" + expect(PrettyText.cleanup("
")).to eq "
" end it "strips script regardless of sanitize" do