From 6385fbbfbf420994ca25c6e2cab32baf910fb9e8 Mon Sep 17 00:00:00 2001 From: Guo Xiang Tan Date: Wed, 15 Jul 2020 13:25:15 +0800 Subject: [PATCH] FIX: Ignore document length in search when ranking by relevance. Considering document length in search introduced too much variance in our search results such that it makes certain searches better but at the same time made certain searches worst. Instead, we want to have a more determistic way of ranking search so that it is easier to reason about why a post is rank higher in search than another. The long term plan to tackle repeated terms is to restrict the number of positions for a given lexeme in our search index. --- config/site_settings.yml | 2 +- spec/components/search_spec.rb | 29 +++-------------------------- 2 files changed, 4 insertions(+), 27 deletions(-) diff --git a/config/site_settings.yml b/config/site_settings.yml index c7f82ac2471..aeab2e006ee 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -1748,7 +1748,7 @@ backups: search: search_ranking_normalization: - default: '1' + default: '0' hidden: true min_search_term_length: client: true diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb index 4f3fd88a673..90579f89956 100644 --- a/spec/components/search_spec.rb +++ b/spec/components/search_spec.rb @@ -402,29 +402,6 @@ describe Search do expect(result.blurb(reply)).to eq(expected_blurb) end - it 'does not allow a post with repeated words to dominate the ranking' do - category = Fabricate(:category_with_definition, name: "winter is coming") - - post = Fabricate(:post, - raw: "I think winter will end soon", - topic: Fabricate(:topic, - title: "dragon john snow winter", - category: category - ) - ) - - post2 = Fabricate(:post, - raw: "I think #{'winter' * 20} will end soon", - topic: Fabricate(:topic, title: "dragon john snow summer", category: category) - ) - - result = Search.execute('winter') - - expect(result.posts.pluck(:id)).to eq([ - post.id, category.topic.first_post.id, post2.id - ]) - end - it 'applies a small penalty to closed topic when ranking' do post = Fabricate(:post, raw: "My weekly update", @@ -698,12 +675,12 @@ describe Search do expect(search.posts.map(&:id)).to eq([ child_of_ignored_category.topic.first_post, category.topic.first_post, - post, - post2 + post2, + post ].map(&:id)) search = Search.execute("snow") - expect(search.posts).to eq([post, post2]) + expect(search.posts.map(&:id)).to eq([post2.id, post.id]) category.set_permissions({}) category.save