FIX: search indexer had various cases where it could fail

Previous to this fix is a post had the test www.test.com/abc it would fail
to index.

This also simplifies the rules to avoid full url parsing which can be
expensive
This commit is contained in:
Sam Saffron
2019-06-04 16:19:27 +10:00
parent bb98785569
commit 6428aa5b1f
2 changed files with 17 additions and 10 deletions

View File

@ -21,16 +21,13 @@ class SearchIndexer
# insert some extra words for I.am.a.word so "word" is tokenized
# I.am.a.word becomes I.am.a.word am a word
raw.gsub(/[^[:space:]]*[\.]+[^[:space:]]*/) do |with_dot|
if with_dot.match?(PlainTextToMarkdown::URL_REGEX)
"#{with_dot} #{URI.parse(with_dot).hostname.gsub('.', ' ')}"
else
split = with_dot.split(".")
if split.length > 1
with_dot + ((+" ") << split[1..-1].join(" "))
else
with_dot
end
split = with_dot.split(/https?:\/\/|[?:;,.\/]/)
if split.length > 1
with_dot + ((+" ") << split[1..-1].reject { |x| x.blank? }.join(" "))
else
with_dot
end
end
end