mirror of
https://github.com/discourse/discourse.git
synced 2025-05-22 07:53:49 +08:00
PERF: Avoid parsing Post#cooked
with Nokogiri for every search.
This commit is contained in:

committed by
Alan Guo Xiang Tan

parent
b979579c1b
commit
181c4eb760
@ -10,7 +10,7 @@ class SimilarTopicsController < ApplicationController
|
|||||||
attr_reader :topic
|
attr_reader :topic
|
||||||
|
|
||||||
def blurb
|
def blurb
|
||||||
Search::GroupedSearchResults.blurb_for(@topic.try(:blurb))
|
Search::GroupedSearchResults.blurb_for(cooked: @topic.try(:blurb))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
class SearchIndexer
|
class SearchIndexer
|
||||||
POST_INDEX_VERSION = 3
|
POST_INDEX_VERSION = 4
|
||||||
MIN_POST_REINDEX_VERSION = 3
|
MIN_POST_REINDEX_VERSION = 3
|
||||||
TOPIC_INDEX_VERSION = 3
|
TOPIC_INDEX_VERSION = 3
|
||||||
CATEGORY_INDEX_VERSION = 3
|
CATEGORY_INDEX_VERSION = 3
|
||||||
@ -39,8 +39,6 @@ class SearchIndexer
|
|||||||
setweight(to_tsvector('#{stemmer}', coalesce(:d,'')), 'D')
|
setweight(to_tsvector('#{stemmer}', coalesce(:d,'')), 'D')
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
indexed_data = search_data.select { |d| d.length > 0 }.join(' ')
|
|
||||||
|
|
||||||
ranked_params = {
|
ranked_params = {
|
||||||
a: search_data[0],
|
a: search_data[0],
|
||||||
b: search_data[1],
|
b: search_data[1],
|
||||||
@ -48,6 +46,13 @@ class SearchIndexer
|
|||||||
d: search_data[3],
|
d: search_data[3],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
indexed_data =
|
||||||
|
if table.to_s == "post"
|
||||||
|
ranked_params[:d]
|
||||||
|
else
|
||||||
|
search_data.select { |d| d.length > 0 }.join(' ')
|
||||||
|
end
|
||||||
|
|
||||||
tsvector = DB.query_single("SELECT #{ranked_index}", ranked_params)[0]
|
tsvector = DB.query_single("SELECT #{ranked_index}", ranked_params)[0]
|
||||||
additional_lexemes = []
|
additional_lexemes = []
|
||||||
|
|
||||||
@ -105,7 +110,7 @@ class SearchIndexer
|
|||||||
scrubbed_cooked = scrub_html_for_search(cooked)[0...Topic::MAX_SIMILAR_BODY_LENGTH]
|
scrubbed_cooked = scrub_html_for_search(cooked)[0...Topic::MAX_SIMILAR_BODY_LENGTH]
|
||||||
|
|
||||||
# a bit inconsitent that we use title as A and body as B when in
|
# a bit inconsitent that we use title as A and body as B when in
|
||||||
# the post index body is C
|
# the post index body is D
|
||||||
update_index(table: 'topic', id: topic_id, raw_data: [title, scrubbed_cooked])
|
update_index(table: 'topic', id: topic_id, raw_data: [title, scrubbed_cooked])
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -165,9 +170,11 @@ class SearchIndexer
|
|||||||
end
|
end
|
||||||
|
|
||||||
category_name = topic.category&.name if topic
|
category_name = topic.category&.name if topic
|
||||||
|
|
||||||
if topic
|
if topic
|
||||||
tags = topic.tags.select(:id, :name)
|
tags = topic.tags.select(:id, :name).to_a
|
||||||
unless tags.empty?
|
|
||||||
|
if tags.present?
|
||||||
tag_names = (tags.map(&:name) + Tag.where(target_tag_id: tags.map(&:id)).pluck(:name)).join(' ')
|
tag_names = (tags.map(&:name) + Tag.where(target_tag_id: tags.map(&:id)).pluck(:name)).join(' ')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -1128,7 +1128,7 @@ class Search
|
|||||||
end
|
end
|
||||||
|
|
||||||
def posts_eager_loads(query)
|
def posts_eager_loads(query)
|
||||||
query = query.includes(:user)
|
query = query.includes(:user, :post_search_data)
|
||||||
topic_eager_loads = [:category]
|
topic_eager_loads = [:category]
|
||||||
|
|
||||||
if SiteSetting.tagging_enabled
|
if SiteSetting.tagging_enabled
|
||||||
|
@ -58,7 +58,19 @@ class Search
|
|||||||
end
|
end
|
||||||
|
|
||||||
def blurb(post)
|
def blurb(post)
|
||||||
GroupedSearchResults.blurb_for(post.cooked, @blurb_term, @blurb_length)
|
opts = {
|
||||||
|
term: @blurb_term,
|
||||||
|
blurb_length: @blurb_length
|
||||||
|
}
|
||||||
|
|
||||||
|
if post.post_search_data.version > SearchIndexer::MIN_POST_REINDEX_VERSION
|
||||||
|
opts[:cooked] = post.post_search_data.raw_data
|
||||||
|
opts[:scrub] = false
|
||||||
|
else
|
||||||
|
opts[:cooked] = post.cooked
|
||||||
|
end
|
||||||
|
|
||||||
|
GroupedSearchResults.blurb_for(**opts)
|
||||||
end
|
end
|
||||||
|
|
||||||
def add(object)
|
def add(object)
|
||||||
@ -73,9 +85,9 @@ class Search
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.blurb_for(cooked, term = nil, blurb_length = BLURB_LENGTH)
|
def self.blurb_for(cooked: nil, term: nil, blurb_length: BLURB_LENGTH, scrub: true)
|
||||||
blurb = nil
|
blurb = nil
|
||||||
cooked = SearchIndexer.scrub_html_for_search(cooked)
|
cooked = SearchIndexer.scrub_html_for_search(cooked) if scrub
|
||||||
|
|
||||||
urls = Set.new
|
urls = Set.new
|
||||||
cooked.scan(URI.regexp(%w{http https})) { urls << $& }
|
cooked.scan(URI.regexp(%w{http https})) { urls << $& }
|
||||||
|
@ -38,7 +38,7 @@ describe Search do
|
|||||||
|
|
||||||
link to a video file: https://somesite.com/content/somethingelse.MOV
|
link to a video file: https://somesite.com/content/somethingelse.MOV
|
||||||
RAW
|
RAW
|
||||||
result = Search::GroupedSearchResults.blurb_for(cooked)
|
result = Search::GroupedSearchResults.blurb_for(cooked: cooked)
|
||||||
expect(result).to eq("link to an external page: https://google.com/?u=bar link to an audio file: #{I18n.t("search.audio")} link to a video file: #{I18n.t("search.video")}")
|
expect(result).to eq("link to an external page: https://google.com/?u=bar link to an audio file: #{I18n.t("search.audio")} link to a video file: #{I18n.t("search.video")}")
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -51,7 +51,7 @@ describe Search do
|
|||||||
http://localhost/uploads/default/original/1X/90adc0092b30c04b761541bc0322d0dce3d896e7.m4a
|
http://localhost/uploads/default/original/1X/90adc0092b30c04b761541bc0322d0dce3d896e7.m4a
|
||||||
RAW
|
RAW
|
||||||
|
|
||||||
result = Search::GroupedSearchResults.blurb_for(cooked)
|
result = Search::GroupedSearchResults.blurb_for(cooked: cooked)
|
||||||
expect(result).to eq("Here goes a test cooked with enough characters to hit the blurb limit. Something is very interesting about this audio file. #{I18n.t("search.audio")}")
|
expect(result).to eq("Here goes a test cooked with enough characters to hit the blurb limit. Something is very interesting about this audio file. #{I18n.t("search.audio")}")
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -59,7 +59,7 @@ describe Search do
|
|||||||
cooked = <<~RAW
|
cooked = <<~RAW
|
||||||
invalid URL: http:error] should not trip up blurb generation.
|
invalid URL: http:error] should not trip up blurb generation.
|
||||||
RAW
|
RAW
|
||||||
result = Search::GroupedSearchResults.blurb_for(cooked)
|
result = Search::GroupedSearchResults.blurb_for(cooked: cooked)
|
||||||
expect(result).to eq("invalid URL: http:error] should not trip up blurb generation.")
|
expect(result).to eq("invalid URL: http:error] should not trip up blurb generation.")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -3,10 +3,22 @@
|
|||||||
require 'rails_helper'
|
require 'rails_helper'
|
||||||
|
|
||||||
describe SearchController do
|
describe SearchController do
|
||||||
|
fab!(:awesome_topic) do
|
||||||
|
topic = Fabricate(:topic)
|
||||||
|
tag = Fabricate(:tag)
|
||||||
|
topic.tags << tag
|
||||||
|
Fabricate(:tag, target_tag_id: tag.id)
|
||||||
|
topic
|
||||||
|
end
|
||||||
|
|
||||||
fab!(:awesome_post) do
|
fab!(:awesome_post) do
|
||||||
SearchIndexer.enable
|
SearchIndexer.enable
|
||||||
Fabricate(:post, raw: 'this is my really awesome post')
|
Fabricate(:post, topic: awesome_topic, raw: 'this is my really awesome post')
|
||||||
|
end
|
||||||
|
|
||||||
|
fab!(:awesome_post_2) do
|
||||||
|
SearchIndexer.enable
|
||||||
|
Fabricate(:post, raw: 'this is my really awesome post 2')
|
||||||
end
|
end
|
||||||
|
|
||||||
fab!(:user) do
|
fab!(:user) do
|
||||||
@ -95,10 +107,14 @@ describe SearchController do
|
|||||||
|
|
||||||
data = response.parsed_body
|
data = response.parsed_body
|
||||||
|
|
||||||
expect(data['posts'].length).to eq(1)
|
expect(data['posts'].length).to eq(2)
|
||||||
expect(data['posts'][0]['id']).to eq(awesome_post.id)
|
expect(data['posts'][0]['id']).to eq(awesome_post_2.id)
|
||||||
expect(data['posts'][0]['blurb']).to eq(awesome_post.raw)
|
expect(data['posts'][0]['blurb']).to eq(awesome_post_2.raw)
|
||||||
expect(data['topics'][0]['id']).to eq(awesome_post.topic_id)
|
expect(data['topics'][0]['id']).to eq(awesome_post_2.topic_id)
|
||||||
|
|
||||||
|
expect(data['posts'][1]['id']).to eq(awesome_post.id)
|
||||||
|
expect(data['posts'][1]['blurb']).to eq(awesome_post.raw)
|
||||||
|
expect(data['topics'][1]['id']).to eq(awesome_post.topic_id)
|
||||||
end
|
end
|
||||||
|
|
||||||
it "can search correctly with advanced search filters" do
|
it "can search correctly with advanced search filters" do
|
||||||
|
@ -20,12 +20,13 @@ describe SearchIndexer do
|
|||||||
it 'correctly indexes chinese' do
|
it 'correctly indexes chinese' do
|
||||||
SiteSetting.default_locale = 'zh_CN'
|
SiteSetting.default_locale = 'zh_CN'
|
||||||
data = "你好世界"
|
data = "你好世界"
|
||||||
expect(data.split(" ").length).to eq(1)
|
|
||||||
|
|
||||||
SearchIndexer.update_posts_index(post_id, "你好世界", "", "", nil)
|
SearchIndexer.update_posts_index(post_id, "", "", "", data)
|
||||||
|
|
||||||
raw_data = PostSearchData.where(post_id: post_id).pluck(:raw_data)[0]
|
post_search_data = PostSearchData.find_by(post_id: post_id)
|
||||||
expect(raw_data.split(' ').length).to eq(2)
|
|
||||||
|
expect(post_search_data.raw_data).to eq("你好 世界")
|
||||||
|
expect(post_search_data.search_data).to eq("'世界':2 '你好':1")
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'extract youtube title' do
|
it 'extract youtube title' do
|
||||||
@ -104,11 +105,6 @@ describe SearchIndexer do
|
|||||||
expect(raw_data).to eq("This is a test")
|
expect(raw_data).to eq("This is a test")
|
||||||
expect(locale).to eq(SiteSetting.default_locale)
|
expect(locale).to eq(SiteSetting.default_locale)
|
||||||
expect(version).to eq(SearchIndexer::POST_INDEX_VERSION)
|
expect(version).to eq(SearchIndexer::POST_INDEX_VERSION)
|
||||||
|
|
||||||
SearchIndexer.update_posts_index(post_id, "tester", "", nil, nil)
|
|
||||||
|
|
||||||
raw_data = PostSearchData.where(post_id: post_id).pluck(:raw_data)[0]
|
|
||||||
expect(raw_data).to eq("tester")
|
|
||||||
end
|
end
|
||||||
|
|
||||||
describe '.index' do
|
describe '.index' do
|
||||||
@ -118,10 +114,10 @@ describe SearchIndexer do
|
|||||||
expect { post }.to change { PostSearchData.count }.by(1)
|
expect { post }.to change { PostSearchData.count }.by(1)
|
||||||
|
|
||||||
expect { post.update!(raw: "this is new content") }
|
expect { post.update!(raw: "this is new content") }
|
||||||
.to change { post.reload.post_search_data.raw_data }
|
.to change { post.reload.post_search_data.search_data }
|
||||||
|
|
||||||
expect { post.update!(topic_id: Fabricate(:topic).id) }
|
expect { post.update!(topic_id: Fabricate(:topic).id) }
|
||||||
.to change { post.reload.post_search_data.raw_data }
|
.to change { post.reload.post_search_data.search_data }
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'should not index posts with empty raw' do
|
it 'should not index posts with empty raw' do
|
||||||
@ -141,7 +137,7 @@ describe SearchIndexer do
|
|||||||
topic = post.topic
|
topic = post.topic
|
||||||
|
|
||||||
expect(post.post_search_data.raw_data).to eq(
|
expect(post.post_search_data.raw_data).to eq(
|
||||||
"#{topic.title} #{topic.category.name} https://meta.discourse.org/some.png"
|
"https://meta.discourse.org/some.png"
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -158,7 +154,7 @@ describe SearchIndexer do
|
|||||||
topic = post.topic
|
topic = post.topic
|
||||||
|
|
||||||
expect(post.post_search_data.raw_data).to eq(
|
expect(post.post_search_data.raw_data).to eq(
|
||||||
"#{topic.title} #{category.name} a https://cnn.com , http://stuff.com.au b http://abc.net/xyz=1 abc.net/xyz=1"
|
"a https://cnn.com , http://stuff.com.au b http://abc.net/xyz=1 abc.net/xyz=1"
|
||||||
)
|
)
|
||||||
|
|
||||||
expect(post.post_search_data.search_data).to eq(
|
expect(post.post_search_data.search_data).to eq(
|
||||||
@ -190,7 +186,7 @@ describe SearchIndexer do
|
|||||||
)
|
)
|
||||||
|
|
||||||
expect(post.post_search_data.raw_data).to eq(
|
expect(post.post_search_data.raw_data).to eq(
|
||||||
"#{topic.title} #{topic.category.name} Let me see how I can fix this image white walkers GOT"
|
"Let me see how I can fix this image white walkers GOT"
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
Reference in New Issue
Block a user