FIX: properly trim whitespaces (including those pesky   html entities)

This commit is contained in:
Régis Hanol
2017-05-03 18:04:31 +02:00
parent dbb6e461aa
commit c880af8120
2 changed files with 18 additions and 4 deletions

View File

@ -15,10 +15,10 @@ class HtmlToMarkdown
def remove_whitespaces!
@doc.traverse do |node|
if node.is_a? Nokogiri::XML::Text
node.content = node.content.lstrip if node.previous_element&.description&.block?
node.content = node.content.lstrip if node.previous_element.nil? && node.parent.description&.block?
node.content = node.content.rstrip if node.next_element&.description&.block?
node.content = node.content.rstrip if node.next_element.nil? && node.parent.description&.block?
node.content = node.content.gsub(/\A[[:space:]]+/, "") if node.previous_element&.description&.block?
node.content = node.content.gsub(/\A[[:space:]]+/, "") if node.previous_element.nil? && node.parent.description&.block?
node.content = node.content.gsub(/[[:space:]]+\z/, "") if node.next_element&.description&.block?
node.content = node.content.gsub(/[[:space:]]+\z/, "") if node.next_element.nil? && node.parent.description&.block?
node.remove if node.content.empty?
end
end