FIX: Don't error out when trying to retrieve title and URL won't encode (#24660)

This commit is contained in:
Ted Johansson
2023-12-01 15:03:06 +08:00
committed by GitHub
parent aadc104817
commit 54e813e964
4 changed files with 23 additions and 2 deletions

View File

@ -2,6 +2,11 @@
module RetrieveTitle
CRAWL_TIMEOUT = 1
UNRECOVERABLE_ERRORS = [
Net::ReadTimeout,
FinalDestination::SSRFError,
FinalDestination::UrlEncodingError,
]
def self.crawl(url, max_redirects: nil, initial_https_redirect_ignore_limit: false)
fetch_title(
@ -9,8 +14,8 @@ module RetrieveTitle
max_redirects: max_redirects,
initial_https_redirect_ignore_limit: initial_https_redirect_ignore_limit,
)
rescue Net::ReadTimeout, FinalDestination::SSRFError
# do nothing for Net::ReadTimeout errors
rescue *UNRECOVERABLE_ERRORS
# ¯\_(ツ)_/¯
end
def self.extract_title(html, encoding = nil)