mirror of
https://github.com/discourse/discourse.git
synced 2025-05-22 06:14:12 +08:00
FIX: Improve HTML to Markdown conversion (#9231)
This commit ensures that whitespaces are preserved in <pre>, but removed inside text paragraphs.
This commit is contained in:
@ -31,7 +31,7 @@ class HtmlToMarkdown
|
||||
|
||||
def remove_whitespaces!
|
||||
@doc.traverse do |node|
|
||||
if node.is_a? Nokogiri::XML::Text
|
||||
if node.is_a?(Nokogiri::XML::Text) && node.parent.name != "pre"
|
||||
node.content = node.content.gsub(/\A[[:space:]]+/, "") if node.previous_element&.description&.block?
|
||||
node.content = node.content.gsub(/\A[[:space:]]+/, "") if node.previous_element.nil? && node.parent.description&.block?
|
||||
node.content = node.content.gsub(/[[:space:]]+\z/, "") if node.next_element&.description&.block?
|
||||
@ -220,10 +220,21 @@ class HtmlToMarkdown
|
||||
end
|
||||
|
||||
def visit_text(node)
|
||||
top_block = @stack[-1]
|
||||
|
||||
if top_block.name == "pre"
|
||||
top_block.markdown << node.text
|
||||
return
|
||||
end
|
||||
|
||||
node.content = node.content.gsub(/\A[[:space:]]+/, "") if node.previous_element.nil? && EMPHASIS.include?(node.parent.name)
|
||||
indent = node.text[/^\s+/] || ""
|
||||
|
||||
if top_block.markdown.present? && indent = node.text[/^\s+/]
|
||||
top_block.markdown << indent
|
||||
end
|
||||
|
||||
text = node.text.gsub(/^\s+/, "").gsub(/\s{2,}/, " ")
|
||||
@stack[-1].markdown << [indent, text].join("")
|
||||
top_block.markdown << text
|
||||
end
|
||||
|
||||
def format_block
|
||||
|
Reference in New Issue
Block a user