mirror of
https://github.com/discourse/discourse.git
synced 2025-05-31 17:45:54 +08:00
FIX: Relevance search will now consider document length in ranking.
The default ranking options ranks by the number of matches which is highly problematic when posts are stuffed with a keyword. The ranking will now be divided by the document length which is a much fairer way to rank.
This commit is contained in:
@ -838,13 +838,14 @@ class Search
|
|||||||
posts = posts.order("posts.like_count DESC")
|
posts = posts.order("posts.like_count DESC")
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
# 0|32 default normalization scaled into the range zero to one
|
# 2|32 divides the rank by the document length and scales the range from
|
||||||
|
# zero to one
|
||||||
data_ranking = <<~SQL
|
data_ranking = <<~SQL
|
||||||
(
|
(
|
||||||
TS_RANK_CD(
|
TS_RANK_CD(
|
||||||
post_search_data.search_data,
|
post_search_data.search_data,
|
||||||
#{ts_query(weight_filter: weights)},
|
#{ts_query(weight_filter: weights)},
|
||||||
0|32
|
2|32
|
||||||
) *
|
) *
|
||||||
(
|
(
|
||||||
CASE categories.search_priority
|
CASE categories.search_priority
|
||||||
|
@ -334,6 +334,27 @@ describe Search do
|
|||||||
expect(result.posts).to contain_exactly(reply)
|
expect(result.posts).to contain_exactly(reply)
|
||||||
expect(result.blurb(reply)).to eq(expected_blurb)
|
expect(result.blurb(reply)).to eq(expected_blurb)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'does not allow a post with repeated words to dominate the ranking' do
|
||||||
|
category = Fabricate(:category, name: "winter is coming")
|
||||||
|
|
||||||
|
post = Fabricate(:post,
|
||||||
|
raw: "I think winter will end soon",
|
||||||
|
topic: Fabricate(:topic,
|
||||||
|
title: "dragon john snow winter",
|
||||||
|
category: category
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
post2 = Fabricate(:post,
|
||||||
|
raw: "I think winter winter winter winter winter will end soon",
|
||||||
|
topic: Fabricate(:topic, title: "dragon john snow summer", category: category)
|
||||||
|
)
|
||||||
|
|
||||||
|
result = Search.execute('winter')
|
||||||
|
|
||||||
|
expect(result.posts).to eq([post, post2, category.topic.first_post])
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
context 'searching for quoted title' do
|
context 'searching for quoted title' do
|
||||||
@ -940,22 +961,45 @@ describe Search do
|
|||||||
today = Date.today
|
today = Date.today
|
||||||
yesterday = 1.day.ago
|
yesterday = 1.day.ago
|
||||||
two_days_ago = 2.days.ago
|
two_days_ago = 2.days.ago
|
||||||
|
category = Fabricate(:category)
|
||||||
|
|
||||||
|
old_topic = Fabricate(:topic,
|
||||||
|
title: 'First Topic, testing the created_at sort',
|
||||||
|
created_at: two_days_ago,
|
||||||
|
category: category
|
||||||
|
)
|
||||||
|
|
||||||
old_topic = Fabricate(:topic,
|
|
||||||
title: 'First Topic, testing the created_at sort',
|
|
||||||
created_at: two_days_ago)
|
|
||||||
latest_topic = Fabricate(:topic,
|
latest_topic = Fabricate(:topic,
|
||||||
title: 'Second Topic, testing the created_at sort',
|
title: 'Second Topic, testing the created_at sort',
|
||||||
created_at: yesterday)
|
created_at: yesterday,
|
||||||
|
category: category
|
||||||
|
)
|
||||||
|
|
||||||
old_relevant_topic_post = Fabricate(:post, topic: old_topic, created_at: yesterday, raw: 'Relevant Topic')
|
old_relevant_topic_post = Fabricate(:post,
|
||||||
latest_irelevant_topic_post = Fabricate(:post, topic: latest_topic, created_at: today, raw: 'Not Relevant')
|
topic: old_topic,
|
||||||
|
created_at: yesterday,
|
||||||
|
raw: 'Relevant Relevant Topic'
|
||||||
|
)
|
||||||
|
|
||||||
|
latest_irelevant_topic_post = Fabricate(:post,
|
||||||
|
topic: latest_topic,
|
||||||
|
created_at: today,
|
||||||
|
raw: 'Not Relevant'
|
||||||
|
)
|
||||||
|
|
||||||
# Expecting the default results
|
# Expecting the default results
|
||||||
expect(Search.execute('Topic').posts.map(&:id)).to eq([old_relevant_topic_post.id, latest_irelevant_topic_post.id])
|
expect(Search.execute('Topic').posts).to contain_exactly(
|
||||||
|
old_relevant_topic_post,
|
||||||
|
latest_irelevant_topic_post,
|
||||||
|
category.topic.first_post
|
||||||
|
)
|
||||||
|
|
||||||
# Expecting the ordered by topic creation results
|
# Expecting the ordered by topic creation results
|
||||||
expect(Search.execute('Topic order:latest_topic').posts.map(&:id)).to eq([latest_irelevant_topic_post.id, old_relevant_topic_post.id])
|
expect(Search.execute('Topic order:latest_topic').posts).to contain_exactly(
|
||||||
|
latest_irelevant_topic_post,
|
||||||
|
old_relevant_topic_post,
|
||||||
|
category.topic.first_post
|
||||||
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'can tokenize dots' do
|
it 'can tokenize dots' do
|
||||||
|
Reference in New Issue
Block a user