FIX: HtmlToMarkdown should keep HTML entities for <, > and & within HTML elements

Not all HTML elements are converted into Markdown. Some are kept as HTML.
Without this fix XML/HTML entities that are formatted as text instead of code are swallowed by Discourse.
This also fixes quotes in the `title` attribute of the `<abbr>` tag.
This commit is contained in:
Gerhard Schlager
2024-06-09 14:17:32 +02:00
committed by Gerhard Schlager
parent 3c9d61d302
commit 7bdf47b864
2 changed files with 67 additions and 3 deletions

View File

@ -197,7 +197,9 @@ class HtmlToMarkdown
ALLOWED ||= %w[kbd del ins small big sub sup dl dd dt mark]
ALLOWED.each do |tag|
define_method("visit_#{tag}") { |node| "<#{tag}>#{traverse(node)}</#{tag}>" }
define_method("visit_#{tag}") do |node|
"<#{tag}>#{traverse(node, within_html_block: true)}</#{tag}>"
end
end
def visit_blockquote(node)
@ -250,8 +252,8 @@ class HtmlToMarkdown
def visit_abbr(node)
title = node["title"].presence
title_attr = title ? %[ title="#{title}"] : ""
"<abbr#{title_attr}>#{traverse(node)}</abbr>"
attributes = { title: } if title
create_element("abbr", traverse(node, within_html_block: true), attributes).to_html
end
def visit_acronym(node)