mirror of
https://github.com/discourse/discourse.git
synced 2025-06-04 23:36:11 +08:00
FIX: Wikipedia onebox images and sections (#31384)
Both article images and section extraction were not working for wikipedia oneboxes, this commit fixes both and updates our spec fixture responses to use the new HTML
This commit is contained in:
@ -30,14 +30,18 @@ module Onebox
|
||||
if m_url_hash.nil? # no hash found in url
|
||||
paras = raw.search("p") # default get all the paras
|
||||
else
|
||||
section_header_title = raw.xpath("//span[@id='#{CGI.unescape(m_url_hash_name)}']")
|
||||
section_header_title =
|
||||
raw.xpath(
|
||||
"//*[@id=\"#{CGI.unescape(m_url_hash_name)}\"][self::h1 or self::h2 or self::h3 or self::h4 or self::h5 or self::h6]",
|
||||
)
|
||||
|
||||
if section_header_title.empty?
|
||||
paras = raw.search("p") # default get all the paras
|
||||
else
|
||||
section_title_text = section_header_title.inner_text
|
||||
section_header = section_header_title[0].parent # parent element of the section span element should be an <h3> node
|
||||
cur_element = section_header
|
||||
|
||||
# Get .mw-heading which wraps the h* element
|
||||
cur_element = section_header_title[0].parent
|
||||
|
||||
# p|text|div covers the general case. We assume presence of at least 1 P node. if section has no P node we may end up with a P node from the next section.
|
||||
# div tag is commonly used as an assets wraper in an article section. often as the first element holding an image.
|
||||
@ -95,7 +99,7 @@ module Onebox
|
||||
description: text,
|
||||
}
|
||||
|
||||
img = raw.css(".image img")
|
||||
img = raw.css(".infobox-image img")
|
||||
|
||||
if img && img.size > 0
|
||||
img.each do |i|
|
||||
|
1535
spec/fixtures/onebox/wikipedia.response
vendored
1535
spec/fixtures/onebox/wikipedia.response
vendored
File diff suppressed because it is too large
Load Diff
1528
spec/fixtures/onebox/wikipedia_url_encoded.response
vendored
1528
spec/fixtures/onebox/wikipedia_url_encoded.response
vendored
File diff suppressed because one or more lines are too long
@ -21,7 +21,7 @@ RSpec.describe Onebox::Engine::WikipediaOnebox do
|
||||
end
|
||||
|
||||
it "includes summary" do
|
||||
expect(html).to include("Billy Jack is a 1971 action/drama")
|
||||
expect(html).to include("Billy Jack is a 1971 American action drama independent film")
|
||||
end
|
||||
end
|
||||
|
||||
@ -34,9 +34,7 @@ RSpec.describe Onebox::Engine::WikipediaOnebox do
|
||||
end
|
||||
|
||||
describe "url with url-encoded section hash" do
|
||||
let(:wp_link) do
|
||||
"https://fr.wikipedia.org/wiki/Th%C3%A9ologie#La_th%C3%A9ologie_selon_Aristote"
|
||||
end
|
||||
let(:wp_link) { "https://fr.wikipedia.org/wiki/Th%C3%A9ologie#L'ontoth%C3%A9ologie" }
|
||||
|
||||
before do
|
||||
stub_request(:get, "https://fr.wikipedia.org/wiki/Th%C3%A9ologie").to_return(
|
||||
@ -46,7 +44,7 @@ RSpec.describe Onebox::Engine::WikipediaOnebox do
|
||||
end
|
||||
|
||||
it "includes summary" do
|
||||
expect(html).to include("Le terme est repris par")
|
||||
expect(html).to include("investigation rationnelle sur les substances divines")
|
||||
end
|
||||
end
|
||||
|
||||
|
Reference in New Issue
Block a user