Special case: When crawling a link to an image, just put the filename as

the title.
This commit is contained in:
Robin Ward 2014-04-10 13:45:13 -04:00
parent 99e2bab62d
commit e80851b0fa

View File

@ -89,6 +89,16 @@ module Jobs
crawled = false
# Special case: Images
# If the link is to an image, put the filename as the title
if topic_link.url =~ /\.(jpg|gif|png)$/
uri = URI(topic_link.url)
filename = File.basename(uri.path)
crawled = (TopicLink.where(id: topic_link.id).update_all(["title = ?, crawled_at = CURRENT_TIMESTAMP", filename]) == 1)
end
unless crawled
# Fetch the beginning of the document to find the title
result = CrawlTopicLink.fetch_beginning(topic_link.url)
doc = Nokogiri::HTML(result)
if doc
@ -102,6 +112,7 @@ module Jobs
end
end
end
end
rescue Exception
# If there was a connection error, do nothing
ensure