mirror of
https://github.com/discourse/discourse.git
synced 2025-06-01 03:35:15 +08:00
FIX: ignore malformed HTML for title extraction (#18040)
Certain HTML can be rejected by nokogumbo, specifically cases where there are enormous amounts of attributes This ensures that malformed HTML is simply skipped instead of leaking out an exception and terminating downstream processes.
This commit is contained in:
@ -51,6 +51,18 @@ RSpec.describe RetrieveTitle do
|
||||
)
|
||||
expect(title).to eq("Video Title")
|
||||
end
|
||||
|
||||
it "will not exception out for invalid html" do
|
||||
attributes = (1..1000).map { |x| " attr#{x}='1' " }.join
|
||||
title = RetrieveTitle.extract_title <<~HTML
|
||||
<html>
|
||||
<title>test</title>
|
||||
<body #{attributes}>
|
||||
</html>
|
||||
HTML
|
||||
|
||||
expect(title).to eq(nil)
|
||||
end
|
||||
end
|
||||
|
||||
describe ".crawl" do
|
||||
|
Reference in New Issue
Block a user