From d8704c11ca3b15da9fdfda7792f775ad67cb0f9e Mon Sep 17 00:00:00 2001 From: Guo Xiang Tan Date: Tue, 2 Apr 2019 09:52:59 +0800 Subject: [PATCH] PERF: Better use of index when queueing a topci for search reindex. Also move `Search::INDEX_VERSION` to `SearchIndexer` which is where the version is actually being used. --- app/jobs/scheduled/reindex_search.rb | 10 ++++---- app/services/search_indexer.rb | 12 ++++++--- lib/search.rb | 1 - spec/jobs/reindex_search_spec.rb | 2 +- spec/services/search_indexer_spec.rb | 37 +++++++++++++++++++++------- 5 files changed, 42 insertions(+), 20 deletions(-) diff --git a/app/jobs/scheduled/reindex_search.rb b/app/jobs/scheduled/reindex_search.rb index 408fd1f517d..b6dfc1e8061 100644 --- a/app/jobs/scheduled/reindex_search.rb +++ b/app/jobs/scheduled/reindex_search.rb @@ -71,7 +71,7 @@ module Jobs def load_problem_post_ids(limit) params = { locale: SiteSetting.default_locale, - version: Search::INDEX_VERSION, + version: SearchIndexer::INDEX_VERSION, limit: limit } @@ -96,7 +96,7 @@ module Jobs def load_problem_category_ids(limit) Category.joins(:category_search_data) .where('category_search_data.locale != ? - OR category_search_data.version != ?', SiteSetting.default_locale, Search::INDEX_VERSION) + OR category_search_data.version != ?', SiteSetting.default_locale, SearchIndexer::INDEX_VERSION) .limit(limit) .pluck(:id) end @@ -104,7 +104,7 @@ module Jobs def load_problem_topic_ids(limit) Topic.joins(:topic_search_data) .where('topic_search_data.locale != ? - OR topic_search_data.version != ?', SiteSetting.default_locale, Search::INDEX_VERSION) + OR topic_search_data.version != ?', SiteSetting.default_locale, SearchIndexer::INDEX_VERSION) .limit(limit) .pluck(:id) end @@ -112,7 +112,7 @@ module Jobs def load_problem_user_ids(limit) User.joins(:user_search_data) .where('user_search_data.locale != ? - OR user_search_data.version != ?', SiteSetting.default_locale, Search::INDEX_VERSION) + OR user_search_data.version != ?', SiteSetting.default_locale, SearchIndexer::INDEX_VERSION) .limit(limit) .pluck(:id) end @@ -120,7 +120,7 @@ module Jobs def load_problem_tag_ids(limit) Tag.joins(:tag_search_data) .where('tag_search_data.locale != ? - OR tag_search_data.version != ?', SiteSetting.default_locale, Search::INDEX_VERSION) + OR tag_search_data.version != ?', SiteSetting.default_locale, SearchIndexer::INDEX_VERSION) .limit(limit) .pluck(:id) end diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb index a7278ad2341..88aec292ed8 100644 --- a/app/services/search_indexer.rb +++ b/app/services/search_indexer.rb @@ -2,6 +2,8 @@ require_dependency 'search' class SearchIndexer + INDEX_VERSION = 2 + REINDEX_VERSION = 0 def self.disable @disabled = true @@ -61,7 +63,7 @@ class SearchIndexer raw_data: indexed_data, id: id, locale: SiteSetting.default_locale, - version: Search::INDEX_VERSION + version: INDEX_VERSION } # Would be nice to use AR here but not sure how to execut Postgres functions @@ -116,10 +118,12 @@ class SearchIndexer def self.queue_post_reindex(topic_id) return if @disabled - DB.exec(<<~SQL, topic_id: topic_id) + DB.exec(<<~SQL, topic_id: topic_id, version: REINDEX_VERSION) UPDATE post_search_data - SET version = 0 - WHERE post_id IN (SELECT id FROM posts WHERE topic_id = :topic_id) + SET version = :version + FROM posts + WHERE post_search_data.post_id = posts.id + AND posts.topic_id = :topic_id SQL end diff --git a/lib/search.rb b/lib/search.rb index d0e296a9306..f7b89e4ded8 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -1,7 +1,6 @@ require_dependency 'search/grouped_search_results' class Search - INDEX_VERSION = 2.freeze DIACRITICS ||= /([\u0300-\u036f]|[\u1AB0-\u1AFF]|[\u1DC0-\u1DFF]|[\u20D0-\u20FF])/ cattr_accessor :preloaded_topic_custom_fields diff --git a/spec/jobs/reindex_search_spec.rb b/spec/jobs/reindex_search_spec.rb index 0fe89adabfe..3df9fa242fb 100644 --- a/spec/jobs/reindex_search_spec.rb +++ b/spec/jobs/reindex_search_spec.rb @@ -25,7 +25,7 @@ describe Jobs::ReindexSearch do model.reload subject.execute({}) - expect(model.send("#{m}_search_data").version).to eq Search::INDEX_VERSION + expect(model.send("#{m}_search_data").version).to eq SearchIndexer::INDEX_VERSION end end diff --git a/spec/services/search_indexer_spec.rb b/spec/services/search_indexer_spec.rb index 9290b6737b8..5762ffd1467 100644 --- a/spec/services/search_indexer_spec.rb +++ b/spec/services/search_indexer_spec.rb @@ -3,6 +3,14 @@ require 'rails_helper' describe SearchIndexer do let(:post_id) { 99 } + before do + SearchIndexer.enable + end + + after do + SearchIndexer.disable + end + def scrub(html, strip_diacritics: false) SearchIndexer.scrub_html_for_search(html, strip_diacritics: strip_diacritics) end @@ -75,7 +83,7 @@ describe SearchIndexer do raw_data, locale, version = PostSearchData.where(post_id: post_id).pluck(:raw_data, :locale, :version)[0] expect(raw_data).to eq("This is a test") expect(locale).to eq("en") - expect(version).to eq(Search::INDEX_VERSION) + expect(version).to eq(SearchIndexer::INDEX_VERSION) SearchIndexer.update_posts_index(post_id, "tester", "", nil, nil) @@ -86,14 +94,6 @@ describe SearchIndexer do describe '.index' do let(:post) { Fabricate(:post) } - before do - SearchIndexer.enable - end - - after do - SearchIndexer.disable - end - it 'should index posts correctly' do expect { post }.to change { PostSearchData.count }.by(1) @@ -153,4 +153,23 @@ describe SearchIndexer do ) end end + + describe '.queue_post_reindex' do + let(:post) { Fabricate(:post) } + let(:topic) { post.topic } + + it 'should reset the version of search data for all posts in the topic' do + post2 = Fabricate(:post) + + SearchIndexer.queue_post_reindex(topic.id) + + expect(post.reload.post_search_data.version).to eq( + SearchIndexer::REINDEX_VERSION + ) + + expect(post2.reload.post_search_data.version).to eq( + SearchIndexer::INDEX_VERSION + ) + end + end end