From 9bff0882c3df594850de68ade2697407c84dcd7b Mon Sep 17 00:00:00 2001
From: Krzysztof Kotlarek <kotlarek.krzysztof@gmail.com>
Date: Tue, 5 May 2020 13:46:57 +1000
Subject: [PATCH] FEATURE: Nokogumbo (#9577)

* FEATURE: Nokogumbo

Use Nokogumbo HTML parser.
---
 app/helpers/user_notifications_helper.rb      |   4 +-
 app/jobs/onceoff/grant_emoji.rb               |   2 +-
 app/jobs/onceoff/grant_onebox.rb              |   2 +-
 app/jobs/regular/pull_hotlinked_images.rb     |   2 +-
 app/jobs/regular/update_username.rb           |   4 +-
 app/models/category.rb                        |   2 +-
 app/models/post.rb                            |   2 +-
 app/models/post_analyzer.rb                   |   2 +-
 app/models/quoted_post.rb                     |   2 +-
 app/models/theme_field.rb                     |   2 +-
 app/models/topic_embed.rb                     |   8 +-
 app/services/inline_uploads.rb                |   6 +-
 app/services/search_indexer.rb                |   2 +-
 ...3951_backfill_post_upload_reverse_index.rb |   2 +-
 db/migrate/20140715055242_add_quoted_posts.rb |   2 +-
 lib/content_security_policy/extension.rb      |   3 +-
 lib/cooked_post_processor.rb                  |   4 +-
 lib/discourse_diff.rb                         |   2 +-
 lib/email/receiver.rb                         |   2 +-
 lib/email/styles.rb                           |  10 +-
 .../engine/whitelisted_generic_onebox.rb      |   2 +-
 lib/oneboxer.rb                               |   6 +-
 lib/post_revisor.rb                           |   2 +-
 lib/pretty_text.rb                            |  26 +++--
 lib/quote_comparer.rb                         |   2 +-
 lib/retrieve_title.rb                         |   2 +-
 lib/reviewable/conversation.rb                |   2 +-
 lib/tasks/emoji.rake                          |   2 +-
 .../spec/components/pretty_text_spec.rb       |   4 +-
 .../lib/discourse_narrative_bot/actions.rb    |   2 +-
 .../advanced_user_narrative.rb                |   6 +-
 .../new_user_narrative.rb                     |   8 +-
 plugins/poll/plugin.rb                        |   2 +-
 plugins/poll/spec/lib/pretty_text_spec.rb     |   2 +-
 script/import_scripts/ipboard3.rb             |   2 +-
 script/import_scripts/jive.rb                 |   2 +-
 script/import_scripts/jive_api.rb             |   2 +-
 script/import_scripts/lithium.rb              |   2 +-
 spec/components/cooked_post_processor_spec.rb | 107 ++++++++----------
 spec/components/email/styles_spec.rb          |   4 +-
 spec/components/excerpt_parser_spec.rb        |   2 +-
 spec/components/pretty_text_spec.rb           |  32 +++---
 spec/lib/content_security_policy_spec.rb      |   6 +-
 spec/models/topic_embed_spec.rb               |   4 +-
 spec/requests/categories_controller_spec.rb   |   2 +-
 spec/requests/email_controller_spec.rb        |   6 +-
 spec/requests/embed_controller_spec.rb        |   2 +-
 .../requests/user_api_keys_controller_spec.rb |   2 +-
 spec/services/username_changer_spec.rb        |  34 +++---
 spec/support/match_html_matcher.rb            |   2 +-
 50 files changed, 165 insertions(+), 179 deletions(-)

diff --git a/app/helpers/user_notifications_helper.rb b/app/helpers/user_notifications_helper.rb
index eb0293183b2..ffbf7352c2a 100644
--- a/app/helpers/user_notifications_helper.rb
+++ b/app/helpers/user_notifications_helper.rb
@@ -13,7 +13,7 @@ module UserNotificationsHelper
   end
 
   def correct_top_margin(html, desired)
-    fragment = Nokogiri::HTML.fragment(html)
+    fragment = Nokogiri::HTML5.fragment(html)
     if para = fragment.css("p:first").first
       para["style"] = "margin-top: #{desired};"
     end
@@ -32,7 +32,7 @@ module UserNotificationsHelper
   end
 
   def first_paragraphs_from(html)
-    doc = Nokogiri::HTML(html)
+    doc = Nokogiri::HTML5(html)
 
     result = +""
     length = 0
diff --git a/app/jobs/onceoff/grant_emoji.rb b/app/jobs/onceoff/grant_emoji.rb
index 5f85b431b82..5abdb34d5d4 100644
--- a/app/jobs/onceoff/grant_emoji.rb
+++ b/app/jobs/onceoff/grant_emoji.rb
@@ -14,7 +14,7 @@ module Jobs
         .where("cooked LIKE '%emoji%'")
         .find_in_batches do |group|
         group.each do |p|
-          doc = Nokogiri::HTML::fragment(p.cooked)
+          doc = Nokogiri::HTML5::fragment(p.cooked)
           if (doc.css("img.emoji") - doc.css(".quote img")).size > 0
             to_award[p.user_id] ||= { post_id: p.id, created_at: p.created_at }
           end
diff --git a/app/jobs/onceoff/grant_onebox.rb b/app/jobs/onceoff/grant_onebox.rb
index 59cf443f4e5..66d2cf26706 100644
--- a/app/jobs/onceoff/grant_onebox.rb
+++ b/app/jobs/onceoff/grant_onebox.rb
@@ -19,7 +19,7 @@ module Jobs
           begin
             # Note we can't use `p.cooked` here because oneboxes have been cooked out
             cooked = PrettyText.cook(p.raw)
-            doc = Nokogiri::HTML::fragment(cooked)
+            doc = Nokogiri::HTML5::fragment(cooked)
             if doc.search('a.onebox').size > 0
               to_award[p.user_id] ||= { post_id: p.id, created_at: p.created_at }
             end
diff --git a/app/jobs/regular/pull_hotlinked_images.rb b/app/jobs/regular/pull_hotlinked_images.rb
index 25703659460..e7644e8ca84 100644
--- a/app/jobs/regular/pull_hotlinked_images.rb
+++ b/app/jobs/regular/pull_hotlinked_images.rb
@@ -157,7 +157,7 @@ module Jobs
     end
 
     def extract_images_from(html)
-      doc = Nokogiri::HTML::fragment(html)
+      doc = Nokogiri::HTML5::fragment(html)
 
       doc.css("img[src], a.lightbox[href]") -
         doc.css("img.avatar") -
diff --git a/app/jobs/regular/update_username.rb b/app/jobs/regular/update_username.rb
index d43c119060d..7c9fcedb5a2 100644
--- a/app/jobs/regular/update_username.rb
+++ b/app/jobs/regular/update_username.rb
@@ -154,11 +154,11 @@ module Jobs
     # and there is no reason to invalidate oneboxes, run the post analyzer etc.
     # when only the username changes.
     def update_cooked(cooked)
-      doc = Nokogiri::HTML.fragment(cooked)
+      doc = Nokogiri::HTML5.fragment(cooked)
 
       doc.css("a.mention").each do |a|
         a.content = a.content.gsub(@cooked_mention_username_regex, "@#{@new_username}")
-        a["href"] = a["href"].gsub(@cooked_mention_user_path_regex, "/u/#{@new_username}") if a["href"]
+        a["href"] = a["href"].gsub(@cooked_mention_user_path_regex, "/u/#{URI.escape(@new_username)}") if a["href"]
       end
 
       doc.css("aside.quote").each do |aside|
diff --git a/app/models/category.rb b/app/models/category.rb
index bbf1e9b98cb..ddab403c5d2 100644
--- a/app/models/category.rb
+++ b/app/models/category.rb
@@ -306,7 +306,7 @@ class Category < ActiveRecord::Base
 
     @@cache_text ||= LruRedux::ThreadSafeCache.new(1000)
     @@cache_text.getset(self.description) do
-      text = Nokogiri::HTML.fragment(self.description).text.strip
+      text = Nokogiri::HTML5.fragment(self.description).text.strip
       Rack::Utils.escape_html(text).html_safe
     end
   end
diff --git a/app/models/post.rb b/app/models/post.rb
index 5da5d163200..f2ee10543f3 100644
--- a/app/models/post.rb
+++ b/app/models/post.rb
@@ -953,7 +953,7 @@ class Post < ActiveRecord::Base
       /\/uploads\/short-url\/[a-zA-Z0-9]+(\.[a-z0-9]+)?/
     ]
 
-    fragments ||= Nokogiri::HTML::fragment(self.cooked)
+    fragments ||= Nokogiri::HTML5::fragment(self.cooked)
     selectors = fragments.css("a/@href", "img/@src", "source/@src", "track/@src", "video/@poster")
 
     links = selectors.map do |media|
diff --git a/app/models/post_analyzer.rb b/app/models/post_analyzer.rb
index 63fe9724b4d..bae36c39f86 100644
--- a/app/models/post_analyzer.rb
+++ b/app/models/post_analyzer.rb
@@ -131,7 +131,7 @@ class PostAnalyzer
 
   def cooked_stripped
     @cooked_stripped ||= begin
-      doc = Nokogiri::HTML.fragment(cook(@raw, topic_id: @topic_id))
+      doc = Nokogiri::HTML5.fragment(cook(@raw, topic_id: @topic_id))
       doc.css("pre .mention, aside.quote > .title, aside.quote .mention, aside.quote .mention-group, .onebox, .elided").remove
       doc
     end
diff --git a/app/models/quoted_post.rb b/app/models/quoted_post.rb
index 03b981e1fb3..9a6a96e9ebf 100644
--- a/app/models/quoted_post.rb
+++ b/app/models/quoted_post.rb
@@ -9,7 +9,7 @@ class QuotedPost < ActiveRecord::Base
   #  we are double parsing this fragment, this may be worth optimising later
   def self.extract_from(post)
 
-    doc = Nokogiri::HTML.fragment(post.cooked)
+    doc = Nokogiri::HTML5.fragment(post.cooked)
 
     uniq = {}
 
diff --git a/app/models/theme_field.rb b/app/models/theme_field.rb
index 351c7b90f1a..a9f98ce4078 100644
--- a/app/models/theme_field.rb
+++ b/app/models/theme_field.rb
@@ -78,7 +78,7 @@ class ThemeField < ActiveRecord::Base
 
     js_compiler = ThemeJavascriptCompiler.new(theme_id, self.theme.name)
 
-    doc = Nokogiri::HTML.fragment(html)
+    doc = Nokogiri::HTML5.fragment(html)
 
     doc.css('script[type="text/x-handlebars"]').each do |node|
       name = node["name"] || node["data-template-name"] || "broken"
diff --git a/app/models/topic_embed.rb b/app/models/topic_embed.rb
index 43de234a159..a18e5b85dea 100644
--- a/app/models/topic_embed.rb
+++ b/app/models/topic_embed.rb
@@ -126,7 +126,7 @@ class TopicEmbed < ActiveRecord::Base
       return
     end
 
-    raw_doc = Nokogiri::HTML(html)
+    raw_doc = Nokogiri::HTML5(html)
     auth_element = raw_doc.at('meta[@name="author"]')
     if auth_element.present?
       response.author = User.where(username_lower: auth_element[:content].strip).first
@@ -142,7 +142,7 @@ class TopicEmbed < ActiveRecord::Base
       title.strip!
     end
     response.title = title
-    doc = Nokogiri::HTML(read_doc.content)
+    doc = Nokogiri::HTML5(read_doc.content)
 
     tags = { 'img' => 'src', 'script' => 'src', 'a' => 'href' }
     doc.search(tags.keys.join(',')).each do |node|
@@ -198,7 +198,7 @@ class TopicEmbed < ActiveRecord::Base
     prefix = "#{uri.scheme}://#{uri.host}"
     prefix += ":#{uri.port}" if uri.port != 80 && uri.port != 443
 
-    fragment = Nokogiri::HTML.fragment("<div>#{contents}</div>")
+    fragment = Nokogiri::HTML5.fragment("<div>#{contents}</div>")
     fragment.css('a').each do |a|
       href = a['href']
       if href.present? && href.start_with?('/')
@@ -220,7 +220,7 @@ class TopicEmbed < ActiveRecord::Base
   end
 
   def self.first_paragraph_from(html)
-    doc = Nokogiri::HTML(html)
+    doc = Nokogiri::HTML5(html)
 
     result = +""
     doc.css('p').each do |p|
diff --git a/app/services/inline_uploads.rb b/app/services/inline_uploads.rb
index e75348bccee..0facc329fba 100644
--- a/app/services/inline_uploads.rb
+++ b/app/services/inline_uploads.rb
@@ -16,7 +16,7 @@ class InlineUploads
       end
     end
 
-    cooked_fragment = Nokogiri::HTML::fragment(PrettyText.cook(markdown, disable_emojis: true))
+    cooked_fragment = Nokogiri::HTML5::fragment(PrettyText.cook(markdown, disable_emojis: true))
     link_occurences = []
 
     cooked_fragment.traverse do |node|
@@ -183,7 +183,7 @@ class InlineUploads
 
   def self.match_anchor(markdown, external_href: false)
     markdown.scan(/((<a[^<]+>)([^<\a>]*?)<\/a>)/i) do |match|
-      node = Nokogiri::HTML::fragment(match[0]).children[0]
+      node = Nokogiri::HTML5::fragment(match[0]).children[0]
       href =  node.attributes["href"]&.value
 
       if href && (matched_uploads(href).present? || external_href)
@@ -199,7 +199,7 @@ class InlineUploads
 
   def self.match_img(markdown, external_src: false)
     markdown.scan(/(<(?!img)[^<>]+\/?>)?(\s*)(<img [^>\n]+>)/i) do |match|
-      node = Nokogiri::HTML::fragment(match[2].strip).children[0]
+      node = Nokogiri::HTML5::fragment(match[2].strip).children[0]
       src =  node.attributes["src"]&.value
 
       if src && (matched_uploads(src).present? || external_src)
diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb
index 17817c4c099..ceeaaf27f25 100644
--- a/app/services/search_indexer.rb
+++ b/app/services/search_indexer.rb
@@ -191,7 +191,7 @@ class SearchIndexer
     def self.scrub(html, strip_diacritics: false)
       return +"" if html.blank?
 
-      document = Nokogiri::HTML("<div>#{html}</div>", nil, Encoding::UTF_8.to_s)
+      document = Nokogiri::HTML5("<div>#{html}</div>", nil, Encoding::UTF_8.to_s)
 
       nodes = document.css(
         "div.#{CookedPostProcessor::LIGHTBOX_WRAPPER_CSS_CLASS}"
diff --git a/db/migrate/20131014203951_backfill_post_upload_reverse_index.rb b/db/migrate/20131014203951_backfill_post_upload_reverse_index.rb
index 793da0ce64a..0c5f4bb3e67 100644
--- a/db/migrate/20131014203951_backfill_post_upload_reverse_index.rb
+++ b/db/migrate/20131014203951_backfill_post_upload_reverse_index.rb
@@ -8,7 +8,7 @@ class BackfillPostUploadReverseIndex < ActiveRecord::Migration[4.2]
 
     # fill the reverse index up
     Post.select([:id, :cooked]).find_each do |post|
-      doc = Nokogiri::HTML::fragment(post.cooked)
+      doc = Nokogiri::HTML5::fragment(post.cooked)
       # images
       doc.search("img").each { |img| add_to_reverse_index(img['src'], post.id) }
       # thumbnails and/or attachments
diff --git a/db/migrate/20140715055242_add_quoted_posts.rb b/db/migrate/20140715055242_add_quoted_posts.rb
index 47550f92513..d3052b8a07c 100644
--- a/db/migrate/20140715055242_add_quoted_posts.rb
+++ b/db/migrate/20140715055242_add_quoted_posts.rb
@@ -30,7 +30,7 @@ SQL
 
     results.each do |row|
       post_id, max_id = row["id"].to_i
-      doc = Nokogiri::HTML.fragment(row["cooked"])
+      doc = Nokogiri::HTML5.fragment(row["cooked"])
 
       uniq = {}
 
diff --git a/lib/content_security_policy/extension.rb b/lib/content_security_policy/extension.rb
index 4c8231b60a1..93eab088e41 100644
--- a/lib/content_security_policy/extension.rb
+++ b/lib/content_security_policy/extension.rb
@@ -61,7 +61,8 @@ class ContentSecurityPolicy
       auto_script_src_extension = { script_src: [] }
       html_fields.each(&:ensure_baked!)
       doc = html_fields.map(&:value_baked).join("\n")
-      Nokogiri::HTML.fragment(doc).css('script[src]').each do |node|
+
+      Nokogiri::HTML5.fragment(doc).css('script[src]').each do |node|
         src = node['src']
         uri = URI(src)
 
diff --git a/lib/cooked_post_processor.rb b/lib/cooked_post_processor.rb
index a2d75bfdf9e..725258cc10d 100644
--- a/lib/cooked_post_processor.rb
+++ b/lib/cooked_post_processor.rb
@@ -24,7 +24,7 @@ class CookedPostProcessor
     @cooking_options = @cooking_options.symbolize_keys
 
     cooked = post.cook(post.raw, @cooking_options)
-    @doc = Nokogiri::HTML::fragment(cooked)
+    @doc = Nokogiri::HTML5::fragment(cooked)
     @has_oneboxes = post.post_analyzer.found_oneboxes?
     @size_cache = {}
 
@@ -95,7 +95,7 @@ class CookedPostProcessor
 
     return if previous.blank?
 
-    previous_text = Nokogiri::HTML::fragment(previous).text.strip
+    previous_text = Nokogiri::HTML5::fragment(previous).text.strip
     quoted_text = @doc.css("aside.quote:first-child blockquote").first&.text&.strip || ""
 
     return if previous_text.gsub(/(\s){2,}/, '\1') != quoted_text.gsub(/(\s){2,}/, '\1')
diff --git a/lib/discourse_diff.rb b/lib/discourse_diff.rb
index c2b31716237..cdf37870201 100644
--- a/lib/discourse_diff.rb
+++ b/lib/discourse_diff.rb
@@ -168,7 +168,7 @@ class DiscourseDiff
   end
 
   def tokenize_html_blocks(html)
-    Nokogiri::HTML.fragment(html).search("./*").map(&:to_html)
+    Nokogiri::HTML5.fragment(html).search("./*").map(&:to_html)
   end
 
   def tokenize_html(html)
diff --git a/lib/email/receiver.rb b/lib/email/receiver.rb
index 69f7ccf2704..3cb73a8976d 100644
--- a/lib/email/receiver.rb
+++ b/lib/email/receiver.rb
@@ -338,7 +338,7 @@ module Email
       markdown, elided_markdown = if html.present?
         # use the first html extracter that matches
         if html_extracter = HTML_EXTRACTERS.select { |_, r| html[r] }.min_by { |_, r| html =~ r }
-          doc = Nokogiri::HTML.fragment(html)
+          doc = Nokogiri::HTML5.fragment(html)
           self.public_send(:"extract_from_#{html_extracter[0]}", doc)
         else
           markdown = HtmlToMarkdown.new(html, keep_img_tags: true, keep_cid_imgs: true).to_markdown
diff --git a/lib/email/styles.rb b/lib/email/styles.rb
index 955398aad8c..69bfc7ec233 100644
--- a/lib/email/styles.rb
+++ b/lib/email/styles.rb
@@ -15,7 +15,7 @@ module Email
     def initialize(html, opts = nil)
       @html = html
       @opts = opts || {}
-      @fragment = Nokogiri::HTML.fragment(@html)
+      @fragment = Nokogiri::HTML5.parse(@html)
       @custom_styles = nil
     end
 
@@ -161,7 +161,7 @@ module Email
           src_uri = i["data-original-href"].present? ? URI(i["data-original-href"]) : URI(i['src'])
           # If an iframe is protocol relative, use SSL when displaying it
           display_src = "#{src_uri.scheme || 'https'}://#{src_uri.host}#{src_uri.path}#{src_uri.query.nil? ? '' : '?' + src_uri.query}#{src_uri.fragment.nil? ? '' : '#' + src_uri.fragment}"
-          i.replace "<p><a href='#{src_uri.to_s}'>#{CGI.escapeHTML(display_src)}</a><p>"
+          i.replace(Nokogiri::HTML5.fragment("<p><a href='#{src_uri.to_s}'>#{CGI.escapeHTML(display_src)}</a><p>"))
         rescue URI::Error
           # If the URL is weird, remove the iframe
           i.remove
@@ -242,7 +242,11 @@ module Email
       strip_classes_and_ids
       replace_relative_urls
       replace_secure_media_urls
-      @fragment.to_html
+      include_body? ? @fragment.at("body").to_html : @fragment.at("body").children.to_html
+    end
+
+    def include_body?
+      @html =~ /<body>/i
     end
 
     def strip_avatars_and_emojis
diff --git a/lib/onebox/engine/whitelisted_generic_onebox.rb b/lib/onebox/engine/whitelisted_generic_onebox.rb
index a10f22e83b4..7a46a0d1e5d 100644
--- a/lib/onebox/engine/whitelisted_generic_onebox.rb
+++ b/lib/onebox/engine/whitelisted_generic_onebox.rb
@@ -24,7 +24,7 @@ module Onebox
         return true if WhitelistedGenericOnebox.html_providers.include?(data[:provider_name])
 
         if data[:html]["iframe"]
-          fragment = Nokogiri::HTML::fragment(data[:html])
+          fragment = Nokogiri::HTML5::fragment(data[:html])
           if iframe = fragment.at_css("iframe")
             src = iframe["src"]
             return src.present? && SiteSetting.allowed_iframes.split("|").any? { |url| src.start_with?(url) }
diff --git a/lib/oneboxer.rb b/lib/oneboxer.rb
index 4e3ab68cf8d..d3625f19d8b 100644
--- a/lib/oneboxer.rb
+++ b/lib/oneboxer.rb
@@ -78,7 +78,7 @@ module Oneboxer
   # Parse URLs out of HTML, returning the document when finished.
   def self.each_onebox_link(string_or_doc, extra_paths: [])
     doc = string_or_doc
-    doc = Nokogiri::HTML::fragment(doc) if doc.is_a?(String)
+    doc = Nokogiri::HTML5::fragment(doc) if doc.is_a?(String)
 
     onebox_links = doc.css("a.#{ONEBOX_CSS_CLASS}", *extra_paths)
     if onebox_links.present?
@@ -94,14 +94,14 @@ module Oneboxer
 
   def self.apply(string_or_doc, extra_paths: nil)
     doc = string_or_doc
-    doc = Nokogiri::HTML::fragment(doc) if doc.is_a?(String)
+    doc = Nokogiri::HTML5::fragment(doc) if doc.is_a?(String)
     changed = false
 
     each_onebox_link(doc, extra_paths: extra_paths) do |url, element|
       onebox, _ = yield(url, element)
 
       if onebox
-        parsed_onebox = Nokogiri::HTML::fragment(onebox)
+        parsed_onebox = Nokogiri::HTML5::fragment(onebox)
         next unless parsed_onebox.children.count > 0
 
         if element&.parent&.node_name&.downcase == "p" &&
diff --git a/lib/post_revisor.rb b/lib/post_revisor.rb
index 9da306db091..fd85696cf06 100644
--- a/lib/post_revisor.rb
+++ b/lib/post_revisor.rb
@@ -579,7 +579,7 @@ class PostRevisor
   def update_category_description
     return unless category = Category.find_by(topic_id: @topic.id)
 
-    doc = Nokogiri::HTML.fragment(@post.cooked)
+    doc = Nokogiri::HTML5.fragment(@post.cooked)
     doc.css("img").remove
 
     if html = doc.css("p").first&.inner_html&.strip
diff --git a/lib/pretty_text.rb b/lib/pretty_text.rb
index 73c18d0099c..9f3f65e3507 100644
--- a/lib/pretty_text.rb
+++ b/lib/pretty_text.rb
@@ -259,7 +259,7 @@ module PrettyText
 
     sanitized = markdown(working_text, options)
 
-    doc = Nokogiri::HTML.fragment(sanitized)
+    doc = Nokogiri::HTML5.fragment(sanitized)
 
     if !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content
       add_rel_nofollow_to_user_content(doc)
@@ -269,7 +269,11 @@ module PrettyText
       add_mentions(doc, user_id: opts[:user_id])
     end
 
-    doc.to_html
+    scrubber = Loofah::Scrubber.new do |node|
+      node.remove if node.name == 'script'
+    end
+    loofah_fragment = Loofah.fragment(doc.to_html)
+    loofah_fragment.scrub!(scrubber).to_html
   end
 
   def self.add_rel_nofollow_to_user_content(doc)
@@ -282,7 +286,7 @@ module PrettyText
     doc.css("a").each do |l|
       href = l["href"].to_s
       begin
-        uri = URI(href)
+        uri = URI(URI.escape(href))
         site_uri ||= URI(Discourse.base_url)
 
         if !uri.host.present? ||
@@ -305,7 +309,7 @@ module PrettyText
 
   def self.extract_links(html)
     links = []
-    doc = Nokogiri::HTML.fragment(html)
+    doc = Nokogiri::HTML5.fragment(html)
 
     # remove href inside quotes & elided part
     doc.css("aside.quote a, .elided a").each { |a| a["href"] = "" }
@@ -338,7 +342,7 @@ module PrettyText
 
   def self.excerpt(html, max_length, options = {})
     # TODO: properly fix this HACK in ExcerptParser without introducing XSS
-    doc = Nokogiri::HTML.fragment(html)
+    doc = Nokogiri::HTML5.fragment(html)
     DiscourseEvent.trigger(:reduce_excerpt, doc, options)
     strip_image_wrapping(doc)
     strip_oneboxed_media(doc)
@@ -350,7 +354,7 @@ module PrettyText
     return string if string.blank?
 
     # If the user is not basic, strip links from their bio
-    fragment = Nokogiri::HTML.fragment(string)
+    fragment = Nokogiri::HTML5.fragment(string)
     fragment.css('a').each { |a| a.replace(a.inner_html) }
     fragment.to_html
   end
@@ -395,14 +399,14 @@ module PrettyText
   def self.strip_secure_media(doc)
     doc.css("a[href]").each do |a|
       if Upload.secure_media_url?(a["href"])
-        target = %w(video audio).include?(a&.parent&.parent&.name) ? a.parent.parent : a
+        target = %w(video audio).include?(a&.parent&.name) ? a.parent : a
         target.replace "<p class='secure-media-notice'>#{I18n.t("emails.secure_media_placeholder")}</p>"
       end
     end
   end
 
   def self.format_for_email(html, post = nil)
-    doc = Nokogiri::HTML.fragment(html)
+    doc = Nokogiri::HTML5.fragment(html)
     DiscourseEvent.trigger(:reduce_cooked, doc, post)
     strip_secure_media(doc) if post&.with_secure_media?
     strip_image_wrapping(doc)
@@ -462,13 +466,13 @@ module PrettyText
 
         case type
         when USER_TYPE
-          element['href'] = "#{Discourse::base_uri}/u/#{name}"
+          element['href'] = "#{Discourse::base_uri}/u/#{URI.escape(name)}"
         when GROUP_MENTIONABLE_TYPE
           element['class'] = 'mention-group notify'
-          element['href'] = "#{Discourse::base_uri}/groups/#{name}"
+          element['href'] = "#{Discourse::base_uri}/groups/#{URI.escape(name)}"
         when GROUP_TYPE
           element['class'] = 'mention-group'
-          element['href'] = "#{Discourse::base_uri}/groups/#{name}"
+          element['href'] = "#{Discourse::base_uri}/groups/#{URI.escape(name)}"
         end
       end
     end
diff --git a/lib/quote_comparer.rb b/lib/quote_comparer.rb
index 5da2891a7e1..74f39f84a77 100644
--- a/lib/quote_comparer.rb
+++ b/lib/quote_comparer.rb
@@ -18,7 +18,7 @@ class QuoteComparer
   def modified?
     return true if @text.blank? || @parent_post.blank?
 
-    parent_text = Nokogiri::HTML::fragment(@parent_post.cooked).text.delete(QuoteComparer.whitespace)
+    parent_text = Nokogiri::HTML5::fragment(@parent_post.cooked).text.delete(QuoteComparer.whitespace)
     text = @text.delete(QuoteComparer.whitespace)
 
     !parent_text.include?(text)
diff --git a/lib/retrieve_title.rb b/lib/retrieve_title.rb
index 3c55ec7aecf..227da9f0cbd 100644
--- a/lib/retrieve_title.rb
+++ b/lib/retrieve_title.rb
@@ -11,7 +11,7 @@ module RetrieveTitle
 
   def self.extract_title(html)
     title = nil
-    if doc = Nokogiri::HTML(html)
+    if doc = Nokogiri::HTML5(html)
 
       title = doc.at('title')&.inner_text
 
diff --git a/lib/reviewable/conversation.rb b/lib/reviewable/conversation.rb
index 53eba48ef1a..696959dd922 100644
--- a/lib/reviewable/conversation.rb
+++ b/lib/reviewable/conversation.rb
@@ -17,7 +17,7 @@ class Reviewable < ActiveRecord::Base
       def self.excerpt(cooked)
         excerpt = ::Post.excerpt(cooked, 250, keep_emoji_images: true)
         # remove the first link if it's the first node
-        fragment = Nokogiri::HTML.fragment(excerpt)
+        fragment = Nokogiri::HTML5.fragment(excerpt)
         if fragment.children.first == fragment.css("a:first").first && fragment.children.first
           fragment.children.first.remove
         end
diff --git a/lib/tasks/emoji.rake b/lib/tasks/emoji.rake
index ad05c0d1b24..ace173d55c2 100644
--- a/lib/tasks/emoji.rake
+++ b/lib/tasks/emoji.rake
@@ -353,7 +353,7 @@ def generate_emoji_groups(keywords, sections)
   puts "Generating groups..."
 
   list = open(EMOJI_ORDERING_URL).read
-  doc = Nokogiri::HTML(list)
+  doc = Nokogiri::HTML5(list)
   table = doc.css("table")[0]
 
   EMOJI_GROUPS.map do |group|
diff --git a/plugins/discourse-details/spec/components/pretty_text_spec.rb b/plugins/discourse-details/spec/components/pretty_text_spec.rb
index aa768305ecf..dd0cf14a722 100644
--- a/plugins/discourse-details/spec/components/pretty_text_spec.rb
+++ b/plugins/discourse-details/spec/components/pretty_text_spec.rb
@@ -8,7 +8,7 @@ describe PrettyText do
   let(:post) { Fabricate(:post) }
 
   it "supports details tag" do
-    cooked_html = <<~HTML
+    cooked_html = <<~HTML.gsub("\n", "")
       <details>
       <summary>
       foo</summary>
@@ -17,7 +17,7 @@ describe PrettyText do
     HTML
 
     expect(cooked_html).to match_html(cooked_html)
-    expect(PrettyText.cook("[details=foo]\nbar\n[/details]")).to match_html(cooked_html)
+    expect(PrettyText.cook("[details=foo]\nbar\n[/details]").gsub("\n", "")).to match_html(cooked_html)
   end
 
   it "deletes elided content" do
diff --git a/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/actions.rb b/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/actions.rb
index 342e1491284..56afacf2af7 100644
--- a/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/actions.rb
+++ b/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/actions.rb
@@ -68,7 +68,7 @@ module DiscourseNarrativeBot
     end
 
     def bot_mentioned?(post)
-      doc = Nokogiri::HTML.fragment(post.cooked)
+      doc = Nokogiri::HTML5.fragment(post.cooked)
 
       valid = false
 
diff --git a/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/advanced_user_narrative.rb b/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/advanced_user_narrative.rb
index aa66cb1802a..52f1cdab65a 100644
--- a/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/advanced_user_narrative.rb
+++ b/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/advanced_user_narrative.rb
@@ -280,7 +280,7 @@ module DiscourseNarrativeBot
       topic_id = @post.topic_id
       return unless valid_topic?(topic_id)
 
-      if Nokogiri::HTML.fragment(@post.cooked).css('.hashtag').size > 0
+      if Nokogiri::HTML5.fragment(@post.cooked).css('.hashtag').size > 0
         raw = <<~RAW
           #{I18n.t("#{I18N_KEY}.category_hashtag.reply", i18n_post_args)}
 
@@ -331,7 +331,7 @@ module DiscourseNarrativeBot
       topic_id = @post.topic_id
       return unless valid_topic?(topic_id)
 
-      if Nokogiri::HTML.fragment(@post.cooked).css(".poll").size > 0
+      if Nokogiri::HTML5.fragment(@post.cooked).css(".poll").size > 0
         raw = <<~RAW
           #{I18n.t("#{I18N_KEY}.poll.reply", i18n_post_args)}
 
@@ -354,7 +354,7 @@ module DiscourseNarrativeBot
 
       fake_delay
 
-      if Nokogiri::HTML.fragment(@post.cooked).css("details").size > 0
+      if Nokogiri::HTML5.fragment(@post.cooked).css("details").size > 0
         reply_to(@post, I18n.t("#{I18N_KEY}.details.reply", i18n_post_args))
       else
         reply_to(@post, I18n.t("#{I18N_KEY}.details.not_found", i18n_post_args)) unless @data[:attempted]
diff --git a/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/new_user_narrative.rb b/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/new_user_narrative.rb
index 5e9a1f38a54..9898ba085e8 100644
--- a/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/new_user_narrative.rb
+++ b/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/new_user_narrative.rb
@@ -326,7 +326,7 @@ module DiscourseNarrativeBot
 
       cooked = @post.post_analyzer.cook(@post.raw, {})
 
-      if Nokogiri::HTML.fragment(cooked).css("img").size > 0
+      if Nokogiri::HTML5.fragment(cooked).css("img").size > 0
         set_state_data(:post_id, @post.id)
 
         if get_state_data(:liked)
@@ -366,7 +366,7 @@ module DiscourseNarrativeBot
       post_topic_id = @post.topic_id
       return unless valid_topic?(post_topic_id)
 
-      if Nokogiri::HTML.fragment(@post.cooked).css("b", "strong", "em", "i", ".bbcode-i", ".bbcode-b").size > 0
+      if Nokogiri::HTML5.fragment(@post.cooked).css("b", "strong", "em", "i", ".bbcode-i", ".bbcode-b").size > 0
         raw = <<~RAW
           #{I18n.t("#{I18N_KEY}.formatting.reply", i18n_post_args)}
 
@@ -390,7 +390,7 @@ module DiscourseNarrativeBot
       post_topic_id = @post.topic_id
       return unless valid_topic?(post_topic_id)
 
-      doc = Nokogiri::HTML.fragment(@post.cooked)
+      doc = Nokogiri::HTML5.fragment(@post.cooked)
 
       if doc.css(".quote").size > 0
         raw = <<~RAW
@@ -416,7 +416,7 @@ module DiscourseNarrativeBot
       post_topic_id = @post.topic_id
       return unless valid_topic?(post_topic_id)
 
-      doc = Nokogiri::HTML.fragment(@post.cooked)
+      doc = Nokogiri::HTML5.fragment(@post.cooked)
 
       if doc.css(".emoji").size > 0
         raw = <<~RAW
diff --git a/plugins/poll/plugin.rb b/plugins/poll/plugin.rb
index e0c1057e36d..78fa0ba548a 100644
--- a/plugins/poll/plugin.rb
+++ b/plugins/poll/plugin.rb
@@ -350,7 +350,7 @@ after_initialize do
         # in the validators instead of cooking twice
         cooked = PrettyText.cook(raw, topic_id: topic_id, user_id: user_id)
 
-        Nokogiri::HTML(cooked).css("div.poll").map do |p|
+        Nokogiri::HTML5(cooked).css("div.poll").map do |p|
           poll = { "options" => [], "name" => DiscoursePoll::DEFAULT_POLL_NAME }
 
           # attributes
diff --git a/plugins/poll/spec/lib/pretty_text_spec.rb b/plugins/poll/spec/lib/pretty_text_spec.rb
index db8af253a8e..c4d09d2aacd 100644
--- a/plugins/poll/spec/lib/pretty_text_spec.rb
+++ b/plugins/poll/spec/lib/pretty_text_spec.rb
@@ -131,7 +131,7 @@ describe PrettyText do
     MD
 
     onebox = Oneboxer.onebox_raw(post.full_url, user_id: Fabricate(:user).id)
-    doc = Nokogiri::HTML(onebox[:preview])
+    doc = Nokogiri::HTML5(onebox[:preview])
 
     expect(onebox[:preview]).to include("A post with a poll")
     expect(onebox[:preview]).to include("<a href=\"#{post.url}\">poll</a>")
diff --git a/script/import_scripts/ipboard3.rb b/script/import_scripts/ipboard3.rb
index 0791e2b3a00..4e7638633df 100644
--- a/script/import_scripts/ipboard3.rb
+++ b/script/import_scripts/ipboard3.rb
@@ -376,7 +376,7 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
 
     raw.gsub!(/<(.+)>&nbsp;<\/\1>/, "\n\n")
 
-    doc = Nokogiri::HTML.fragment(raw)
+    doc = Nokogiri::HTML5.fragment(raw)
 
     doc.css("blockquote.ipsBlockquote").each do |bq|
       post_id = post_id_from_imported_post_id(bq["data-cid"])
diff --git a/script/import_scripts/jive.rb b/script/import_scripts/jive.rb
index 8d386a52c58..f380b2bfee2 100644
--- a/script/import_scripts/jive.rb
+++ b/script/import_scripts/jive.rb
@@ -218,7 +218,7 @@ class ImportScripts::Jive < ImportScripts::Base
     raw = raw.dup
     raw = raw[5..-6]
 
-    doc = Nokogiri::HTML.fragment(raw)
+    doc = Nokogiri::HTML5.fragment(raw)
     doc.css('img').each do |img|
       img.remove if img['class'] == "jive-image"
     end
diff --git a/script/import_scripts/jive_api.rb b/script/import_scripts/jive_api.rb
index cf6df4d5bef..cee4928227d 100644
--- a/script/import_scripts/jive_api.rb
+++ b/script/import_scripts/jive_api.rb
@@ -297,7 +297,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
   end
 
   def process_raw(raw)
-    doc = Nokogiri::HTML.fragment(raw)
+    doc = Nokogiri::HTML5.fragment(raw)
 
     # convert emoticon
     doc.css("span.emoticon-inline").each do |span|
diff --git a/script/import_scripts/lithium.rb b/script/import_scripts/lithium.rb
index ac18a3fa36f..161618a7c16 100644
--- a/script/import_scripts/lithium.rb
+++ b/script/import_scripts/lithium.rb
@@ -913,7 +913,7 @@ SQL
       raw.sub!(match, content)
     end
 
-    doc = Nokogiri::HTML.fragment(raw)
+    doc = Nokogiri::HTML5.fragment(raw)
 
     doc.css("a,img,li-image").each do |l|
       upload_name, image, linked_upload = [nil] * 3
diff --git a/spec/components/cooked_post_processor_spec.rb b/spec/components/cooked_post_processor_spec.rb
index 47262f1342e..51e9eadf69f 100644
--- a/spec/components/cooked_post_processor_spec.rb
+++ b/spec/components/cooked_post_processor_spec.rb
@@ -453,10 +453,8 @@ describe CookedPostProcessor do
         it "generates overlay information" do
           cpp.post_process
 
-          expect(cpp.html).to match_html <<~HTML
-            <p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost#{upload.url}" data-download-href="//test.localhost/#{upload_path}/#{upload.sha1}" title="logo.png"><img src="//test.localhost/#{upload_path}/optimized/1X/#{upload.sha1}_#{OptimizedImage::VERSION}_690x788.png" width="690" height="788"><div class="meta">
-            <svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">logo.png</span><span class="informations">1750×2000 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg>
-            </div></a></div></p>
+          expect(cpp.html).to match_html <<~HTML.rstrip
+            <p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost#{upload.url}" data-download-href="//test.localhost/#{upload_path}/#{upload.sha1}" title="logo.png"><img src="//test.localhost/#{upload_path}/optimized/1X/#{upload.sha1}_#{OptimizedImage::VERSION}_690x788.png" width="690" height="788"><div class="meta"><svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">logo.png</span><span class="informations">1750×2000 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg></div></a></div></p>
           HTML
 
           expect(cpp).to be_dirty
@@ -475,7 +473,7 @@ describe CookedPostProcessor do
 
             cpp.post_process
 
-            expect(cpp.html).to match_html <<~HTML
+            expect(cpp.html).to match_html <<~HTML.rstrip
               <p><img class="onebox" src="//test.localhost/#{upload_path}/original/1X/1234567890123456.jpg" width="690" height="788"></p>
             HTML
           end
@@ -491,7 +489,7 @@ describe CookedPostProcessor do
 
             cpp.post_process
 
-            expect(cpp.html).to match_html <<~HTML
+            expect(cpp.html).to match_html <<~HTML.rstrip
               <p><img src="//test.localhost/#{upload_path}/original/1X/1234567890123456.svg" width="690" height="788"></p>
             HTML
           end
@@ -619,10 +617,8 @@ describe CookedPostProcessor do
         it "crops the image" do
           cpp.post_process
 
-          expect(cpp.html).to match_html <<~HTML
-            <p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost#{upload.url}" data-download-href="//test.localhost/#{upload_path}/#{upload.sha1}" title="logo.png"><img src="//test.localhost/#{upload_path}/optimized/1X/#{upload.sha1}_#{OptimizedImage::VERSION}_230x500.png" width="230" height="500"><div class="meta">
-            <svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">logo.png</span><span class="informations">1125×2436 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg>
-            </div></a></div></p>
+          expect(cpp.html).to match_html <<~HTML.rstrip
+            <p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost#{upload.url}" data-download-href="//test.localhost/#{upload_path}/#{upload.sha1}" title="logo.png"><img src="//test.localhost/#{upload_path}/optimized/1X/#{upload.sha1}_#{OptimizedImage::VERSION}_230x500.png" width="230" height="500"><div class="meta"><svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">logo.png</span><span class="informations">1125×2436 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg></div></a></div></p>
           HTML
 
           expect(cpp).to be_dirty
@@ -652,10 +648,8 @@ describe CookedPostProcessor do
         it "generates overlay information" do
           cpp.post_process
 
-          expect(cpp.html). to match_html <<~HTML
-            <p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost/subfolder#{upload.url}" data-download-href="//test.localhost/subfolder/#{upload_path}/#{upload.sha1}" title="logo.png"><img src="//test.localhost/subfolder/#{upload_path}/optimized/1X/#{upload.sha1}_#{OptimizedImage::VERSION}_690x788.png" width="690" height="788"><div class="meta">
-            <svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">logo.png</span><span class="informations">1750×2000 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg>
-            </div></a></div></p>
+          expect(cpp.html). to match_html <<~HTML.rstrip
+            <p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost/subfolder#{upload.url}" data-download-href="//test.localhost/subfolder/#{upload_path}/#{upload.sha1}" title="logo.png"><img src="//test.localhost/subfolder/#{upload_path}/optimized/1X/#{upload.sha1}_#{OptimizedImage::VERSION}_690x788.png" width="690" height="788"><div class="meta"><svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">logo.png</span><span class="informations">1750×2000 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg></div></a></div></p>
           HTML
 
           expect(cpp).to be_dirty
@@ -665,10 +659,8 @@ describe CookedPostProcessor do
           upload.update!(original_filename: "><img src=x onerror=alert('haha')>.png")
           cpp.post_process
 
-          expect(cpp.html).to match_html <<~HTML
-            <p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost/subfolder#{upload.url}" data-download-href="//test.localhost/subfolder/#{upload_path}/#{upload.sha1}" title="&amp;gt;&amp;lt;img src=x onerror=alert(&amp;#39;haha&amp;#39;)&amp;gt;.png"><img src="//test.localhost/subfolder/#{upload_path}/optimized/1X/#{upload.sha1}_#{OptimizedImage::VERSION}_690x788.png" width="690" height="788"><div class="meta">
-            <svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">&amp;gt;&amp;lt;img src=x onerror=alert(&amp;#39;haha&amp;#39;)&amp;gt;.png</span><span class="informations">1750×2000 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg>
-            </div></a></div></p>
+          expect(cpp.html).to match_html <<~HTML.rstrip
+            <p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost/subfolder#{upload.url}" data-download-href="//test.localhost/subfolder/#{upload_path}/#{upload.sha1}" title="&amp;gt;&amp;lt;img src=x onerror=alert(&amp;#39;haha&amp;#39;)&amp;gt;.png"><img src="//test.localhost/subfolder/#{upload_path}/optimized/1X/#{upload.sha1}_#{OptimizedImage::VERSION}_690x788.png" width="690" height="788"><div class="meta"><svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">&amp;gt;&amp;lt;img src=x onerror=alert(&amp;#39;haha&amp;#39;)&amp;gt;.png</span><span class="informations">1750×2000 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg></div></a></div></p>
           HTML
         end
 
@@ -693,10 +685,8 @@ describe CookedPostProcessor do
         it "generates overlay information using image title and ignores alt" do
           cpp.post_process
 
-          expect(cpp.html).to match_html <<~HTML
-            <p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost#{upload.url}" data-download-href="//test.localhost/#{upload_path}/#{upload.sha1}" title="WAT"><img src="//test.localhost/#{upload_path}/optimized/1X/#{upload.sha1}_#{OptimizedImage::VERSION}_690x788.png" title="WAT" alt="RED" width="690" height="788"><div class="meta">
-            <svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">WAT</span><span class="informations">1750×2000 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg>
-            </div></a></div></p>
+          expect(cpp.html).to match_html <<~HTML.rstrip
+            <p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost#{upload.url}" data-download-href="//test.localhost/#{upload_path}/#{upload.sha1}" title="WAT"><img src="//test.localhost/#{upload_path}/optimized/1X/#{upload.sha1}_#{OptimizedImage::VERSION}_690x788.png" title="WAT" alt="RED" width="690" height="788"><div class="meta"><svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">WAT</span><span class="informations">1750×2000 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg></div></a></div></p>
           HTML
 
           expect(cpp).to be_dirty
@@ -723,10 +713,8 @@ describe CookedPostProcessor do
         it "generates overlay information using image title" do
           cpp.post_process
 
-          expect(cpp.html).to match_html <<~HTML
-            <p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost#{upload.url}" data-download-href="//test.localhost/#{upload_path}/#{upload.sha1}" title="WAT"><img src="//test.localhost/#{upload_path}/optimized/1X/#{upload.sha1}_#{OptimizedImage::VERSION}_690x788.png" title="WAT" width="690" height="788"><div class="meta">
-            <svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">WAT</span><span class="informations">1750×2000 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg>
-            </div></a></div></p>
+          expect(cpp.html).to match_html <<~HTML.rstrip
+            <p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost#{upload.url}" data-download-href="//test.localhost/#{upload_path}/#{upload.sha1}" title="WAT"><img src="//test.localhost/#{upload_path}/optimized/1X/#{upload.sha1}_#{OptimizedImage::VERSION}_690x788.png" title="WAT" width="690" height="788"><div class="meta"><svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">WAT</span><span class="informations">1750×2000 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg></div></a></div></p>
           HTML
 
           expect(cpp).to be_dirty
@@ -753,10 +741,8 @@ describe CookedPostProcessor do
         it "generates overlay information using image alt" do
           cpp.post_process
 
-          expect(cpp.html).to match_html <<~HTML
-            <p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost#{upload.url}" data-download-href="//test.localhost/#{upload_path}/#{upload.sha1}" title="RED"><img src="//test.localhost/#{upload_path}/optimized/1X/#{upload.sha1}_#{OptimizedImage::VERSION}_690x788.png" alt="RED" width="690" height="788"><div class="meta">
-            <svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">RED</span><span class="informations">1750×2000 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg>
-            </div></a></div></p>
+          expect(cpp.html).to match_html <<~HTML.rstrip
+            <p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost#{upload.url}" data-download-href="//test.localhost/#{upload_path}/#{upload.sha1}" title="RED"><img src="//test.localhost/#{upload_path}/optimized/1X/#{upload.sha1}_#{OptimizedImage::VERSION}_690x788.png" alt="RED" width="690" height="788"><div class="meta"><svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">RED</span><span class="informations">1750×2000 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg></div></a></div></p>
           HTML
 
           expect(cpp).to be_dirty
@@ -993,7 +979,7 @@ describe CookedPostProcessor do
       cpp = CookedPostProcessor.new(post, disable_loading_image: true)
       cpp.post_process
 
-      doc = Nokogiri::HTML::fragment(cpp.html)
+      doc = Nokogiri::HTML5::fragment(cpp.html)
       expect(doc.css('.lightbox-wrapper').size).to eq(1)
       expect(doc.css('img').first['srcset']).to_not eq(nil)
     end
@@ -1008,7 +994,7 @@ describe CookedPostProcessor do
       cpp = CookedPostProcessor.new(post, disable_loading_image: true)
       cpp.post_process
 
-      doc = Nokogiri::HTML::fragment(cpp.html)
+      doc = Nokogiri::HTML5::fragment(cpp.html)
       expect(doc.css('.lightbox-wrapper').size).to eq(0)
       expect(doc.css('img').first['srcset']).to_not eq(nil)
     end
@@ -1023,7 +1009,7 @@ describe CookedPostProcessor do
       cpp = CookedPostProcessor.new(post, disable_loading_image: true)
       cpp.post_process
 
-      doc = Nokogiri::HTML::fragment(cpp.html)
+      doc = Nokogiri::HTML5::fragment(cpp.html)
       expect(doc.css('.lightbox-wrapper').size).to eq(0)
       expect(doc.css('img').first['srcset']).to_not eq(nil)
     end
@@ -1227,7 +1213,7 @@ describe CookedPostProcessor do
 
     it "uses schemaless url for uploads" do
       cpp.optimize_urls
-      expect(cpp.html).to match_html <<~HTML
+      expect(cpp.html).to match_html <<~HTML.rstrip
         <p><a href="//test.localhost/#{upload_path}/original/2X/2345678901234567.jpg">Link</a><br>
         <img src="//test.localhost/#{upload_path}/original/1X/1234567890123456.jpg"><br>
         <a href="http://www.google.com" rel="nofollow noopener">Google</a><br>
@@ -1242,7 +1228,7 @@ describe CookedPostProcessor do
       it "uses schemaless CDN url for http uploads" do
         Rails.configuration.action_controller.stubs(:asset_host).returns("http://my.cdn.com")
         cpp.optimize_urls
-        expect(cpp.html).to match_html <<~HTML
+        expect(cpp.html).to match_html <<~HTML.rstrip
           <p><a href="//my.cdn.com/#{upload_path}/original/2X/2345678901234567.jpg">Link</a><br>
           <img src="//my.cdn.com/#{upload_path}/original/1X/1234567890123456.jpg"><br>
           <a href="http://www.google.com" rel="nofollow noopener">Google</a><br>
@@ -1255,7 +1241,7 @@ describe CookedPostProcessor do
       it "doesn't use schemaless CDN url for https uploads" do
         Rails.configuration.action_controller.stubs(:asset_host).returns("https://my.cdn.com")
         cpp.optimize_urls
-        expect(cpp.html).to match_html <<~HTML
+        expect(cpp.html).to match_html <<~HTML.rstrip
           <p><a href="https://my.cdn.com/#{upload_path}/original/2X/2345678901234567.jpg">Link</a><br>
           <img src="https://my.cdn.com/#{upload_path}/original/1X/1234567890123456.jpg"><br>
           <a href="http://www.google.com" rel="nofollow noopener">Google</a><br>
@@ -1269,7 +1255,7 @@ describe CookedPostProcessor do
         SiteSetting.login_required = true
         Rails.configuration.action_controller.stubs(:asset_host).returns("http://my.cdn.com")
         cpp.optimize_urls
-        expect(cpp.html).to match_html <<~HTML
+        expect(cpp.html).to match_html <<~HTML.rstrip
           <p><a href="//my.cdn.com/#{upload_path}/original/2X/2345678901234567.jpg">Link</a><br>
           <img src="//my.cdn.com/#{upload_path}/original/1X/1234567890123456.jpg"><br>
           <a href="http://www.google.com" rel="nofollow noopener">Google</a><br>
@@ -1283,7 +1269,7 @@ describe CookedPostProcessor do
         SiteSetting.prevent_anons_from_downloading_files = true
         Rails.configuration.action_controller.stubs(:asset_host).returns("http://my.cdn.com")
         cpp.optimize_urls
-        expect(cpp.html).to match_html <<~HTML
+        expect(cpp.html).to match_html <<~HTML.rstrip
           <p><a href="//my.cdn.com/#{upload_path}/original/2X/2345678901234567.jpg">Link</a><br>
           <img src="//my.cdn.com/#{upload_path}/original/1X/1234567890123456.jpg"><br>
           <a href="http://www.google.com" rel="nofollow noopener">Google</a><br>
@@ -1318,7 +1304,7 @@ describe CookedPostProcessor do
           cpp = CookedPostProcessor.new(the_post)
           cpp.optimize_urls
 
-          expect(cpp.html).to match_html <<~HTML
+          expect(cpp.html).to match_html <<~HTML.rstrip
             <p>This post has a local emoji <img src="https://local.cdn.com/images/emoji/twitter/+1.png?v=#{Emoji::EMOJI_VERSION}" title=":+1:" class="emoji" alt=":+1:"> and an external upload</p>
             <p><img src="https://s3.cdn.com/#{stored_path}" alt="smallest.png" data-base62-sha1="#{upload.base62_sha1}" width="10" height="20"></p>
           HTML
@@ -1336,7 +1322,7 @@ describe CookedPostProcessor do
           cpp = CookedPostProcessor.new(the_post)
           cpp.optimize_urls
 
-          expect(cpp.html).to match_html <<~HTML
+          expect(cpp.html).to match_html <<~HTML.rstrip
             <p>This post has a local emoji <img src="https://local.cdn.com/images/emoji/twitter/+1.png?v=#{Emoji::EMOJI_VERSION}" title=":+1:" class="emoji" alt=":+1:"> and an external upload</p>
             <p><img src="/secure-media-uploads/#{stored_path}" alt="smallest.png" data-base62-sha1="#{upload.base62_sha1}" width="10" height="20"></p>
           HTML
@@ -1357,18 +1343,20 @@ describe CookedPostProcessor do
 
             the_post = Fabricate(:post, raw: "This post has an S3 video onebox:\n#{video_upload.url}")
 
-            cpp = CookedPostProcessor.new(the_post)
+            cpp = CookedPostProcessor.new(the_post.reload)
+            cpp.post_process_oneboxes
+
+            cpp = CookedPostProcessor.new(the_post.reload)
             cpp.post_process_oneboxes
 
             expect(cpp.html).to match_html <<~HTML
-              <p>This post has an S3 video onebox:<br></p>
-              <div class="onebox video-onebox">
-                <video width="100%" height="100%" controls="">
-                  <source src="#{video_upload.url}">
-                    <a href="#{video_upload.url}" rel="nofollow ugc noopener">#{video_upload.url}</a>
-                  </source>
-                </video>
-              </div>
+              <p>This post has an S3 video onebox:<br>
+                        </p><div class="onebox video-onebox">
+                          <video width="100%" height="100%" controls="">
+                            <source src="#{video_upload.url}">
+                            <a href="#{video_upload.url}" rel="nofollow ugc noopener">#{video_upload.url}</a>
+                          </video>
+                        </div>
             HTML
           end
 
@@ -1384,13 +1372,12 @@ describe CookedPostProcessor do
 
             secure_url = video_upload.url.sub(SiteSetting.s3_cdn_url, "#{Discourse.base_url}/secure-media-uploads")
 
-            expect(cpp.html).to match_html <<~HTML
+            expect(cpp.html).to match_html <<~HTML.rstrip
               <p>This post has an S3 video onebox:<br>
               <div class="onebox video-onebox">
                 <video width="100%" height="100%" controls="">
                   <source src="#{secure_url}">
-                    <a href="#{secure_url}">#{secure_url}</a>
-                  </source>
+                  <a href="#{secure_url}">#{secure_url}</a>
                 </video>
               </div>
               </p>
@@ -1416,7 +1403,7 @@ describe CookedPostProcessor do
             stub_request(:head, audio_upload.url)
             stub_request(:get, image_upload.url)
 
-            raw = <<~RAW
+            raw = <<~RAW.rstrip
               This post has a video upload.
               #{video_upload.url}
 
@@ -1435,19 +1422,17 @@ describe CookedPostProcessor do
             secure_video_url = video_upload.url.sub(SiteSetting.s3_cdn_url, "#{Discourse.base_url}/secure-media-uploads")
             secure_audio_url = audio_upload.url.sub(SiteSetting.s3_cdn_url, "#{Discourse.base_url}/secure-media-uploads")
 
-            expect(cpp.html).to match_html <<~HTML
-              <p>This post has a video upload.<br></p>
+            expect(cpp.html).to match_html <<~HTML.rstrip
+              <p>This post has a video upload.<br>
               <div class="onebox video-onebox">
                 <video width="100%" height="100%" controls="">
                   <source src="#{secure_video_url}">
-                    <a href="#{secure_video_url}">#{secure_video_url}</a>
-                  </source>
+                  <a href="#{secure_video_url}">#{secure_video_url}</a>
                 </video>
               </div>
-
-             <p>This post has an audio upload.<br>
-                <audio controls><source src="#{secure_audio_url}"><a href="#{secure_audio_url}">#{secure_audio_url}</a></source></audio>
               </p>
+              <p>This post has an audio upload.<br>
+              <audio controls=""><source src="#{secure_audio_url}"><a href="#{secure_audio_url}">#{secure_audio_url}</a></audio></p>
               <p>And an image upload.<br>
               <img src="#{image_upload.url}" alt="#{image_upload.original_filename}" data-base62-sha1="#{image_upload.base62_sha1}"></p>
 
@@ -1616,7 +1601,7 @@ describe CookedPostProcessor do
 
     let(:post) { build(:post) }
     let(:cpp) { CookedPostProcessor.new(post) }
-    let(:doc) { Nokogiri::HTML::fragment('<body><div><a><img id="linked_image"></a><p><img id="standard_image"></p></div></body>') }
+    let(:doc) { Nokogiri::HTML5::fragment('<body><div><a><img id="linked_image"></a><p><img id="standard_image"></p></div></body>') }
 
     it "is true when the image is inside a link" do
       img = doc.css("img#linked_image").first
diff --git a/spec/components/email/styles_spec.rb b/spec/components/email/styles_spec.rb
index bdcd27aa066..74addfe29dd 100644
--- a/spec/components/email/styles_spec.rb
+++ b/spec/components/email/styles_spec.rb
@@ -8,14 +8,14 @@ describe Email::Styles do
   def basic_fragment(html)
     styler = Email::Styles.new(html)
     styler.format_basic
-    Nokogiri::HTML.fragment(styler.to_html)
+    Nokogiri::HTML5.fragment(styler.to_html)
   end
 
   def html_fragment(html)
     styler = Email::Styles.new(html)
     styler.format_basic
     styler.format_html
-    Nokogiri::HTML.fragment(styler.to_html)
+    Nokogiri::HTML5.fragment(styler.to_html)
   end
 
   context "basic formatter" do
diff --git a/spec/components/excerpt_parser_spec.rb b/spec/components/excerpt_parser_spec.rb
index 8f0654bab5f..363dd049e0a 100644
--- a/spec/components/excerpt_parser_spec.rb
+++ b/spec/components/excerpt_parser_spec.rb
@@ -18,7 +18,7 @@ describe ExcerptParser do
       </details>
     HTML
 
-    expect(ExcerptParser.get_excerpt(html, 50, {})).to match_html(<<~HTML)
+    expect(ExcerptParser.get_excerpt(html, 50, {})).to match_html(<<~HTML.rstrip)
       <details><summary>FOO</summary>BAR
       Lorem ipsum dolor sit amet, consectetur adi&hellip;</details>
     HTML
diff --git a/spec/components/pretty_text_spec.rb b/spec/components/pretty_text_spec.rb
index 784b607972a..8969a526bd5 100644
--- a/spec/components/pretty_text_spec.rb
+++ b/spec/components/pretty_text_spec.rb
@@ -184,7 +184,7 @@ describe PrettyText do
           <aside class="quote no-group" data-username="#{user.username}" data-post="123" data-topic="456" data-full="true">
           <div class="title">
           <div class="quote-controls"></div>
-          <img alt width="20" height="20" src="//test.localhost/uploads/default/avatars/42d/57c/46ce7ee487/40.png" class="avatar"> #{user.username}:</div>
+          <img alt="" width="20" height="20" src="//test.localhost/uploads/default/avatars/42d/57c/46ce7ee487/40.png" class="avatar"> #{user.username}:</div>
           <blockquote>
           <p>ddd</p>
           </blockquote>
@@ -206,7 +206,7 @@ describe PrettyText do
           <aside class="quote no-group" data-username="#{user.username}" data-post="123" data-topic="456" data-full="true">
           <div class="title">
           <div class="quote-controls"></div>
-          <img alt width="20" height="20" src="//test.localhost/uploads/default/avatars/42d/57c/46ce7ee487/40.png" class="avatar"> #{user.username}:</div>
+          <img alt="" width="20" height="20" src="//test.localhost/uploads/default/avatars/42d/57c/46ce7ee487/40.png" class="avatar"> #{user.username}:</div>
           <blockquote>
           <p>ddd</p>
           </blockquote>
@@ -227,7 +227,7 @@ describe PrettyText do
           <aside class="quote no-group" data-username="#{user.username}" data-post="555" data-topic="666">
           <div class="title">
           <div class="quote-controls"></div>
-          <img alt width="20" height="20" src="//test.localhost/uploads/default/avatars/42d/57c/46ce7ee487/40.png" class="avatar"> #{user.username}:</div>
+          <img alt="" width="20" height="20" src="//test.localhost/uploads/default/avatars/42d/57c/46ce7ee487/40.png" class="avatar"> #{user.username}:</div>
           <blockquote>
           <p>ddd</p>
           </blockquote>
@@ -254,7 +254,7 @@ describe PrettyText do
           <aside class="quote group-#{group.name}" data-username="#{user.username}" data-post="2" data-topic="#{topic.id}">
           <div class="title">
           <div class="quote-controls"></div>
-          <img alt width="20" height="20" src="//test.localhost/uploads/default/avatars/42d/57c/46ce7ee487/40.png" class="avatar"><a href="http://test.localhost/t/this-is-a-test-topic/#{topic.id}/2">This is a test topic</a>
+          <img alt="" width="20" height="20" src="//test.localhost/uploads/default/avatars/42d/57c/46ce7ee487/40.png" class="avatar"><a href="http://test.localhost/t/this-is-a-test-topic/#{topic.id}/2">This is a test topic</a>
           </div>
           <blockquote>
           <p>ddd</p>
@@ -828,7 +828,7 @@ describe PrettyText do
 
   describe "strip_image_wrapping" do
     def strip_image_wrapping(html)
-      doc = Nokogiri::HTML.fragment(html)
+      doc = Nokogiri::HTML5.fragment(html)
       described_class.strip_image_wrapping(doc)
       doc.to_html
     end
@@ -1122,7 +1122,7 @@ describe PrettyText do
   it "can handle mixed lists" do
     # known bug in old md engine
     cooked = PrettyText.cook("* a\n\n1. b")
-    expect(cooked).to match_html("<ul>\n<li>a</li>\n</ul><ol>\n<li>b</li>\n</ol>")
+    expect(cooked).to match_html("<ul>\n<li>a</li>\n</ul>\n<ol>\n<li>b</li>\n</ol>")
   end
 
   it "can handle traditional vs non traditional newlines" do
@@ -1342,13 +1342,13 @@ HTML
 
   it "supports img bbcode" do
     cooked = PrettyText.cook "[img]http://www.image/test.png[/img]"
-    html = "<p><img src=\"http://www.image/test.png\" alt></p>"
+    html = "<p><img src=\"http://www.image/test.png\" alt=\"\"></p>"
     expect(cooked).to eq(html)
   end
 
   it "provides safety for img bbcode" do
     cooked = PrettyText.cook "[img]http://aaa.com<script>alert(1);</script>[/img]"
-    html = '<p><img src="http://aaa.com&lt;script&gt;alert(1);&lt;/script&gt;" alt></p>'
+    html = '<p><img src="http://aaa.com&lt;script&gt;alert(1);&lt;/script&gt;" alt=""></p>'
     expect(cooked).to eq(html)
   end
 
@@ -1433,10 +1433,10 @@ HTML
 
       html = <<~HTML
         <p><img src="http://png.com/my.png" alt="title with | title" width="220" height="100"><br>
-        <img src="http://png.com/my.png" alt><br>
-        <img src="http://png.com/my.png" alt width="220" height="100"><br>
+        <img src="http://png.com/my.png" alt=""><br>
+        <img src="http://png.com/my.png" alt="" width="220" height="100"><br>
         <img src="http://png.com/my.png" alt="stuff"><br>
-        <img src="http://png.com/my.png" alt title="some title" width="110" height="50"></p>
+        <img src="http://png.com/my.png" alt="" title="some title" width="110" height="50"></p>
       HTML
 
       expect(cooked).to eq(html.strip)
@@ -1452,11 +1452,11 @@ HTML
       MD
 
       html = <<~HTML
-        <p><img src="http://png.com/my.png" alt width="110" height="50"><br>
-        <img src="http://png.com/my.png" alt width="110" height="50"><br>
-        <img src="http://png.com/my.png" alt width="110" height="50"><br>
-        <img src="http://png.com/my.png" alt width="150" height="68"><br>
-        <img src="http://png.com/my.png" alt width="110" height="50"></p>
+        <p><img src="http://png.com/my.png" alt="" width="110" height="50"><br>
+        <img src="http://png.com/my.png" alt="" width="110" height="50"><br>
+        <img src="http://png.com/my.png" alt="" width="110" height="50"><br>
+        <img src="http://png.com/my.png" alt="" width="150" height="68"><br>
+        <img src="http://png.com/my.png" alt="" width="110" height="50"></p>
       HTML
 
       expect(cooked).to eq(html.strip)
diff --git a/spec/lib/content_security_policy_spec.rb b/spec/lib/content_security_policy_spec.rb
index 168887cd473..4196b29d669 100644
--- a/spec/lib/content_security_policy_spec.rb
+++ b/spec/lib/content_security_policy_spec.rb
@@ -217,9 +217,9 @@ describe ContentSecurityPolicy do
       policy # call this first to make sure further actions clear the cache
 
       theme.set_field(target: :common, name: "header", value: <<~SCRIPT)
-        <script src='https://example.com/myscript.js'/>
-        <script src='//example2.com/protocol-less-script.js'/>
-        <script src='domain-only.com'/>
+        <script src='https://example.com/myscript.js'></script>
+        <script src='//example2.com/protocol-less-script.js'></script>
+        <script src='domain-only.com'></script>
         <script>console.log('inline script')</script>
       SCRIPT
 
diff --git a/spec/models/topic_embed_spec.rb b/spec/models/topic_embed_spec.rb
index c45a2209e10..ddfbe115ecf 100644
--- a/spec/models/topic_embed_spec.rb
+++ b/spec/models/topic_embed_spec.rb
@@ -14,7 +14,7 @@ describe TopicEmbed do
     fab!(:user) { Fabricate(:user) }
     let(:title) { "How to turn a fish from good to evil in 30 seconds" }
     let(:url) { 'http://eviltrout.com/123' }
-    let(:contents) { "hello world new post <a href='/hello'>hello</a> <img src='/images/wat.jpg'>" }
+    let(:contents) { "<p>hello world new post <a href='/hello'>hello</a> <img src='/images/wat.jpg'></p>" }
     fab!(:embeddable_host) { Fabricate(:embeddable_host) }
 
     it "returns nil when the URL is malformed" do
@@ -46,7 +46,7 @@ describe TopicEmbed do
 
       it "Supports updating the post content" do
         expect do
-          TopicEmbed.import(user, url, "New title received", "muhahaha new contents!")
+          TopicEmbed.import(user, url, "New title received", "<p>muhahaha new contents!</p>")
         end.to change { topic_embed.reload.content_sha1 }
         expect(topic_embed.topic.title).to eq("New title received")
 
diff --git a/spec/requests/categories_controller_spec.rb b/spec/requests/categories_controller_spec.rb
index 97293742d35..e4b0cec35c6 100644
--- a/spec/requests/categories_controller_spec.rb
+++ b/spec/requests/categories_controller_spec.rb
@@ -11,7 +11,7 @@ describe CategoriesController do
     it 'web crawler view has correct urls for subfolder install' do
       set_subfolder "/forum"
       get '/categories', headers: { 'HTTP_USER_AGENT' => 'Googlebot' }
-      html = Nokogiri::HTML(response.body)
+      html = Nokogiri::HTML5(response.body)
       expect(html.css('body.crawler')).to be_present
       expect(html.css("a[href=\"/forum/c/#{category.slug}\"]")).to be_present
     end
diff --git a/spec/requests/email_controller_spec.rb b/spec/requests/email_controller_spec.rb
index 10658907b63..ec60265f667 100644
--- a/spec/requests/email_controller_spec.rb
+++ b/spec/requests/email_controller_spec.rb
@@ -231,7 +231,7 @@ RSpec.describe EmailController do
 
         navigate_to_unsubscribe
 
-        source = Nokogiri::HTML::fragment(response.body)
+        source = Nokogiri::HTML5::fragment(response.body)
         expect(source.css(".combobox option").map(&:inner_text)).to eq(slow_digest_frequencies)
       end
 
@@ -242,7 +242,7 @@ RSpec.describe EmailController do
 
         navigate_to_unsubscribe
 
-        source = Nokogiri::HTML::fragment(response.body)
+        source = Nokogiri::HTML5::fragment(response.body)
         expect(source.css(".combobox option[selected='selected']")[0]['value']).to eq(six_months_freq.to_s)
       end
 
@@ -253,7 +253,7 @@ RSpec.describe EmailController do
 
         navigate_to_unsubscribe
 
-        source = Nokogiri::HTML::fragment(response.body)
+        source = Nokogiri::HTML5::fragment(response.body)
         expect(source.css(".combobox option[selected='selected']")[0]['value']).to eq(never_frequency.to_s)
       end
     end
diff --git a/spec/requests/embed_controller_spec.rb b/spec/requests/embed_controller_spec.rb
index 71d6eef807d..ac0d6c3aa83 100644
--- a/spec/requests/embed_controller_spec.rb
+++ b/spec/requests/embed_controller_spec.rb
@@ -146,7 +146,7 @@ describe EmbedController do
 
       get '/embed/comments', params: { embed_url: embed_url }, headers: headers
 
-      html = Nokogiri::HTML.fragment(response.body)
+      html = Nokogiri::HTML5.fragment(response.body)
       css_link = html.at("link[data-target=embedded_theme]").attribute("href").value
 
       get css_link
diff --git a/spec/requests/user_api_keys_controller_spec.rb b/spec/requests/user_api_keys_controller_spec.rb
index 8148bef88a6..c612eb55a1d 100644
--- a/spec/requests/user_api_keys_controller_spec.rb
+++ b/spec/requests/user_api_keys_controller_spec.rb
@@ -238,7 +238,7 @@ describe UserApiKeysController do
       SiteSetting.min_trust_level_for_user_api_key = 0
       post "/user-api-key", params: args
       expect(response.status).not_to eq(302)
-      payload = Nokogiri::HTML(response.body).at('code').content
+      payload = Nokogiri::HTML5(response.body).at('code').content
       encrypted = Base64.decode64(payload)
       key = OpenSSL::PKey::RSA.new(private_key)
       parsed = JSON.parse(key.private_decrypt(encrypted))
diff --git a/spec/services/username_changer_spec.rb b/spec/services/username_changer_spec.rb
index 0013c82a942..416c986ca9e 100644
--- a/spec/services/username_changer_spec.rb
+++ b/spec/services/username_changer_spec.rb
@@ -142,7 +142,7 @@ describe UsernameChanger do
           post = create_post_and_change_username(raw: ".@foo -@foo %@foo _@foo ,@foo ;@foo @@foo")
 
           expect(post.raw).to eq(".@bar -@bar %@bar _@bar ,@bar ;@bar @@bar")
-          expect(post.cooked).to match_html(<<~HTML)
+          expect(post.cooked).to match_html(<<~HTML.rstrip)
             <p>.<a class="mention" href="/u/bar">@bar</a>
                -<a class="mention" href="/u/bar">@bar</a>
                %<a class="mention" href="/u/bar">@bar</a>
@@ -164,7 +164,7 @@ describe UsernameChanger do
           post = create_post_and_change_username(raw: "**@foo** *@foo* _@foo_ ~~@foo~~")
 
           expect(post.raw).to eq("**@bar** *@bar* _@bar_ ~~@bar~~")
-          expect(post.cooked).to match_html(<<~HTML)
+          expect(post.cooked).to match_html(<<~HTML.rstrip)
             <p><strong><a class="mention" href="/u/bar">@bar</a></strong>
                <em><a class="mention" href="/u/bar">@bar</a></em>
                <em><a class="mention" href="/u/bar">@bar</a></em>
@@ -176,7 +176,7 @@ describe UsernameChanger do
           post = create_post_and_change_username(raw: "@foo. @foo, @foo: @foo; @foo_ @foo-")
 
           expect(post.raw).to eq("@bar. @bar, @bar: @bar; @bar_ @bar-")
-          expect(post.cooked).to match_html(<<~HTML)
+          expect(post.cooked).to match_html(<<~HTML.rstrip)
             <p><a class="mention" href="/u/bar">@bar</a>.
                <a class="mention" href="/u/bar">@bar</a>,
                <a class="mention" href="/u/bar">@bar</a>:
@@ -220,12 +220,8 @@ describe UsernameChanger do
           post = create_post_and_change_username(raw: "@foo @foobar @foo-bar @foo_bar @foo1")
 
           expect(post.raw).to eq("@bar @foobar @foo-bar @foo_bar @foo1")
-          expect(post.cooked).to match_html(<<~HTML)
-            <p><a class="mention" href="/u/bar">@bar</a>
-               <a class="mention" href="/u/foobar">@foobar</a>
-               <a class="mention" href="/u/foo-bar">@foo-bar</a>
-               <a class="mention" href="/u/foo_bar">@foo_bar</a>
-               <a class="mention" href="/u/foo1">@foo1</a></p>
+          expect(post.cooked).to match_html(<<~HTML.rstrip)
+            <p><a class="mention" href="/u/bar">@bar</a> <a class="mention" href="/u/foobar">@foobar</a> <a class="mention" href="/u/foo-bar">@foo-bar</a> <a class="mention" href="/u/foo_bar">@foo_bar</a> <a class="mention" href="/u/foo1">@foo1</a></p>
           HTML
         end
 
@@ -311,12 +307,8 @@ describe UsernameChanger do
             post = create_post_and_change_username(raw: "@թռչուն @թռչուն鳥 @թռչուն-鳥 @թռչուն_鳥 @թռչուն٩", target_username: 'птица')
 
             expect(post.raw).to eq("@птица @թռչուն鳥 @թռչուն-鳥 @թռչուն_鳥 @թռչուն٩")
-            expect(post.cooked).to match_html(<<~HTML)
-              <p><a class="mention" href="/u/%D0%BF%D1%82%D0%B8%D1%86%D0%B0">@птица</a>
-                 <a class="mention" href="/u/%D5%A9%D5%BC%D5%B9%D5%B8%D6%82%D5%B6%E9%B3%A5">@թռչուն鳥</a>
-                 <a class="mention" href="/u/%D5%A9%D5%BC%D5%B9%D5%B8%D6%82%D5%B6-%E9%B3%A5">@թռչուն-鳥</a>
-                 <a class="mention" href="/u/%D5%A9%D5%BC%D5%B9%D5%B8%D6%82%D5%B6_%E9%B3%A5">@թռչուն_鳥</a>
-                 <a class="mention" href="/u/%D5%A9%D5%BC%D5%B9%D5%B8%D6%82%D5%B6%D9%A9">@թռչուն٩</a></p>
+            expect(post.cooked).to match_html(<<~HTML.rstrip)
+              <p><a class="mention" href="/u/%D0%BF%D1%82%D0%B8%D1%86%D0%B0">@птица</a> <a class="mention" href="/u/%D5%A9%D5%BC%D5%B9%D5%B8%D6%82%D5%B6%E9%B3%A5">@թռչուն鳥</a> <a class="mention" href="/u/%D5%A9%D5%BC%D5%B9%D5%B8%D6%82%D5%B6-%E9%B3%A5">@թռչուն-鳥</a> <a class="mention" href="/u/%D5%A9%D5%BC%D5%B9%D5%B8%D6%82%D5%B6_%E9%B3%A5">@թռչուն_鳥</a> <a class="mention" href="/u/%D5%A9%D5%BC%D5%B9%D5%B8%D6%82%D5%B6%D9%A9">@թռչուն٩</a></p>
             HTML
           end
 
@@ -364,7 +356,7 @@ describe UsernameChanger do
             dolor sit amet
           RAW
 
-          expect(post.cooked).to match_html(<<~HTML)
+          expect(post.cooked).to match_html(<<~HTML.rstrip)
             <p>Lorem ipsum</p>
             <aside class="quote no-group" data-username="bar" data-post="1" data-topic="#{quoted_post.topic.id}">
             <div class="title">
@@ -377,7 +369,7 @@ describe UsernameChanger do
             <aside class="quote no-group" data-username="bar">
             <div class="title">
             <div class="quote-controls"></div>
-            <img alt='' width="20" height="20" src="#{avatar_url}" class="avatar"> bar:</div>
+            <img alt="" width="20" height="20" src="#{avatar_url}" class="avatar"> bar:</div>
             <blockquote>
             <p>quoted post</p>
             </blockquote>
@@ -385,7 +377,7 @@ describe UsernameChanger do
             <aside class="quote no-group" data-username="bar" data-post="1" data-topic="#{quoted_post.topic.id}">
             <div class="title">
             <div class="quote-controls"></div>
-            <img alt='' width="20" height="20" src="#{avatar_url}" class="avatar"> bar:</div>
+            <img alt="" width="20" height="20" src="#{avatar_url}" class="avatar"> bar:</div>
             <blockquote>
             <p>quoted post</p>
             </blockquote>
@@ -415,7 +407,7 @@ describe UsernameChanger do
           end
 
           let(:expected_cooked) do
-            <<~HTML
+            <<~HTML.rstrip
               <p>Lorem ipsum</p>
               <aside class="quote no-group" data-username="bar" data-post="1" data-topic="#{quoted_post.topic.id}">
               <div class="title">
@@ -459,7 +451,7 @@ describe UsernameChanger do
 
           expect(post.raw).to eq(raw)
 
-          expect(post.cooked).to match_html(<<~HTML)
+          expect(post.cooked).to match_html(<<~HTML.rstrip)
             <p><aside class="quote" data-post="#{quoted_post.post_number}" data-topic="#{quoted_post.topic.id}">
               <div class="title">
                 <div class="quote-controls"></div>
@@ -491,7 +483,7 @@ describe UsernameChanger do
 
           expect(post.raw).to eq(raw)
 
-          expect(post.cooked).to match_html(<<~HTML)
+          expect(post.cooked).to match_html(<<~HTML.rstrip)
             <p><aside class="quote" data-post="#{quoted_post.post_number}" data-topic="#{quoted_post.topic.id}">
               <div class="title">
                 <div class="quote-controls"></div>
diff --git a/spec/support/match_html_matcher.rb b/spec/support/match_html_matcher.rb
index c6b6b771026..94519530e71 100644
--- a/spec/support/match_html_matcher.rb
+++ b/spec/support/match_html_matcher.rb
@@ -17,7 +17,7 @@ RSpec::Matchers.define :match_html do |expected|
   end
 
   def make_canonical_html(html)
-    Nokogiri::HTML(html) { |config| config.options = Nokogiri::XML::ParseOptions::NOBLANKS | Nokogiri::XML::ParseOptions::COMPACT }
+    Nokogiri::HTML5(html) { |config| config[:options] = Nokogiri::XML::ParseOptions::NOBLANKS | Nokogiri::XML::ParseOptions::COMPACT }
   end
 
 end