mirror of
https://github.com/discourse/discourse.git
synced 2025-05-22 16:21:18 +08:00
FIX: Hack our title retriever so that it parses YouTube URLs
This commit is contained in:
@ -15,6 +15,12 @@ module RetrieveTitle
|
||||
|
||||
title = doc.at('title')&.inner_text
|
||||
|
||||
# A horrible hack - YouTube uses `document.title` to populate the title
|
||||
# for some reason. For any other site than YouTube this wouldn't be worth it.
|
||||
if title == "YouTube" && html =~ /document\.title *= *"(.*)";/
|
||||
title = Regexp.last_match[1].sub(/ - YouTube$/, '')
|
||||
end
|
||||
|
||||
if !title && node = doc.at('meta[property="og:title"]')
|
||||
title = node['content']
|
||||
end
|
||||
@ -32,9 +38,11 @@ module RetrieveTitle
|
||||
private
|
||||
|
||||
def self.max_chunk_size(uri)
|
||||
# Amazon leaves the title until very late. Normally it's a bad idea to make an exception for
|
||||
# one host but amazon is a big one.
|
||||
|
||||
# Amazon and YouTube leave the title until very late. Exceptions are bad
|
||||
# but these are large sites.
|
||||
return 80 if uri.host =~ /amazon\.(com|ca|co\.uk|es|fr|de|it|com\.au|com\.br|cn|in|co\.jp|com\.mx)$/
|
||||
return 300 if uri.host =~ /youtube\.com$/ || uri.host =~ /youtu.be/
|
||||
|
||||
# default is 10k
|
||||
10
|
||||
|
Reference in New Issue
Block a user