mirror of
https://github.com/discourse/discourse.git
synced 2025-05-29 11:48:08 +08:00
PERF: Limit characters used to generate headline for search blurb.
We determined using the following benchmark script that limiting to 2500 chars would mean a maximum of 25ms spent generating headlines. ``` require 'benchmark/ips' string = <<~STRING Far far away, behind the word mountains... STRING def sql_excerpt(string, l = 1000000) DB.query_single(<<~SQL) SELECT TS_HEADLINE('english', left('#{string}', #{l}), PLAINTO_TSQUERY('mountains')) SQL end def ruby_excerpt(string) output = DB.query_single("SELECT '#{string}'")[0] Search::GroupedSearchResults::TextHelper.excerpt(output, 'mountains', radius: 100) end puts "Ruby Excerpt: #{ruby_excerpt(string)}" puts "SQL Excerpt: #{sql_excerpt(string)}" puts Benchmark.ips do |x| x.time = 10 [1000, 2500, 5000, 10000, 20000, 50000].each do |l| short_string = string[0..l] x.report("ts_headline excerpt #{l}") do sql_excerpt(short_string, l) end x.report("actionview excerpt #{l}") do ruby_excerpt(short_string) end end x.compare! end ``` ``` actionview excerpt 1000: 20570.7 i/s actionview excerpt 2500: 17863.1 i/s - 1.15x (± 0.00) slower actionview excerpt 5000: 14228.9 i/s - 1.45x (± 0.00) slower actionview excerpt 10000: 10906.2 i/s - 1.89x (± 0.00) slower actionview excerpt 20000: 6255.0 i/s - 3.29x (± 0.00) slower ts_headline excerpt 1000: 4337.5 i/s - 4.74x (± 0.00) slower actionview excerpt 50000: 3222.7 i/s - 6.38x (± 0.00) slower ts_headline excerpt 2500: 2240.4 i/s - 9.18x (± 0.00) slower ts_headline excerpt 5000: 1258.7 i/s - 16.34x (± 0.00) slower ts_headline excerpt 10000: 667.2 i/s - 30.83x (± 0.00) slower ts_headline excerpt 20000: 348.7 i/s - 58.98x (± 0.00) slower ts_headline excerpt 50000: 131.9 i/s - 155.91x (± 0.00) slower ```
This commit is contained in:
@ -1164,6 +1164,10 @@ class Search
|
||||
query.includes(topic: topic_eager_loads)
|
||||
end
|
||||
|
||||
# Limited for performance reasons since `TS_HEADLINE` is slow when the text
|
||||
# document is too long.
|
||||
MAX_LENGTH_FOR_HEADLINE = 2500
|
||||
|
||||
def posts_scope(default_scope = Post.all)
|
||||
if SiteSetting.use_pg_headlines_for_excerpt
|
||||
search_term = @term.present? ? PG::Connection.escape_string(@term) : nil
|
||||
@ -1174,7 +1178,7 @@ class Search
|
||||
.joins("INNER JOIN topics t1 ON t1.id = posts.topic_id")
|
||||
.select(
|
||||
"TS_HEADLINE(#{ts_config}, t1.fancy_title, PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS topic_title_headline",
|
||||
"TS_HEADLINE(#{ts_config}, pd.raw_data, PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS headline",
|
||||
"TS_HEADLINE(#{ts_config}, LEFT(pd.raw_data, #{MAX_LENGTH_FOR_HEADLINE}), PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS headline",
|
||||
default_scope.arel.projections
|
||||
)
|
||||
else
|
||||
|
Reference in New Issue
Block a user