FEATURE: Nokogumbo (#9577)

* FEATURE: Nokogumbo

Use Nokogumbo HTML parser.
This commit is contained in:
Krzysztof Kotlarek
2020-05-05 13:46:57 +10:00
committed by GitHub
parent b8b1cbbfb9
commit 9bff0882c3
50 changed files with 165 additions and 179 deletions

View File

@ -78,7 +78,7 @@ module Oneboxer
# Parse URLs out of HTML, returning the document when finished.
def self.each_onebox_link(string_or_doc, extra_paths: [])
doc = string_or_doc
doc = Nokogiri::HTML::fragment(doc) if doc.is_a?(String)
doc = Nokogiri::HTML5::fragment(doc) if doc.is_a?(String)
onebox_links = doc.css("a.#{ONEBOX_CSS_CLASS}", *extra_paths)
if onebox_links.present?
@ -94,14 +94,14 @@ module Oneboxer
def self.apply(string_or_doc, extra_paths: nil)
doc = string_or_doc
doc = Nokogiri::HTML::fragment(doc) if doc.is_a?(String)
doc = Nokogiri::HTML5::fragment(doc) if doc.is_a?(String)
changed = false
each_onebox_link(doc, extra_paths: extra_paths) do |url, element|
onebox, _ = yield(url, element)
if onebox
parsed_onebox = Nokogiri::HTML::fragment(onebox)
parsed_onebox = Nokogiri::HTML5::fragment(onebox)
next unless parsed_onebox.children.count > 0
if element&.parent&.node_name&.downcase == "p" &&