FIX: Hoisting linebreaks shouldn't fail for HTML5 elements (#14364)

This commit is contained in:
Gerhard Schlager
2021-09-17 10:41:34 +02:00
committed by GitHub
parent 2c7cc40be3
commit 962ccf0ab5
2 changed files with 14 additions and 7 deletions

View File

@ -53,7 +53,7 @@ class HtmlToMarkdown
doc.css("br.#{klass}").each do |br| doc.css("br.#{klass}").each do |br|
parent = br.parent parent = br.parent
if parent.description.block? if block?(parent)
br.remove_class(klass) br.remove_class(klass)
else else
before, after = parent.children.slice_when { |n| n == br }.to_a before, after = parent.children.slice_when { |n| n == br }.to_a
@ -194,7 +194,7 @@ class HtmlToMarkdown
BLOCKS ||= %w{div tr} BLOCKS ||= %w{div tr}
BLOCKS.each do |tag| BLOCKS.each do |tag|
define_method("visit_#{tag}") do |node| define_method("visit_#{tag}") do |node|
prefix = node.previous_element&.description&.block? ? "" : "\n" prefix = block?(node.previous_element) ? "" : "\n"
"#{prefix}#{traverse(node)}\n" "#{prefix}#{traverse(node)}\n"
end end
end end
@ -283,7 +283,7 @@ class HtmlToMarkdown
LISTS ||= %w{ul ol} LISTS ||= %w{ul ol}
LISTS.each do |tag| LISTS.each do |tag|
define_method("visit_#{tag}") do |node| define_method("visit_#{tag}") do |node|
prefix = node.previous_element&.description&.block? ? "" : "\n" prefix = block?(node.previous_element) ? "" : "\n"
suffix = node.ancestors("ul, ol, li").size > 0 ? "" : "\n" suffix = node.ancestors("ul, ol, li").size > 0 ? "" : "\n"
"#{prefix}#{traverse(node)}#{suffix}" "#{prefix}#{traverse(node)}#{suffix}"
end end
@ -358,4 +358,9 @@ class HtmlToMarkdown
node.text node.text
end end
HTML5_BLOCK_ELEMENTS ||= %w[article aside details dialog figcaption figure footer header main nav section]
def block?(node)
return false if !node
node.description&.block? || HTML5_BLOCK_ELEMENTS.include?(node.name)
end
end end

View File

@ -38,9 +38,9 @@ describe HtmlToMarkdown do
HTML HTML
markdown = <<~MD markdown = <<~MD
Let me see if it happens by answering your message through Thunderbird. Let me see if it happens by answering your message through Thunderbird.
Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1
MD MD
expect(html_to_markdown(html)).to eq(markdown.strip) expect(html_to_markdown(html)).to eq(markdown.strip)
@ -70,13 +70,15 @@ describe HtmlToMarkdown do
html = <<~HTML html = <<~HTML
<aside class="quote no-group"> <aside class="quote no-group">
<blockquote> <blockquote>
<p>hello.</p> <p>Hello,<br>is it me you're looking for?</p>
</blockquote> </blockquote>
<br>
</aside> </aside>
HTML HTML
markdown = <<~MD markdown = <<~MD
> hello. > Hello,
> is it me you're looking for?
MD MD
expect(html_to_markdown(html)).to eq(markdown.strip) expect(html_to_markdown(html)).to eq(markdown.strip)