FIX: Experimental hashtag search result matching and limit fixes (#19144)

This changes the hashtag search to first do a lookup to find
results where the slug exactly matches the
search term. Now when we search for hashtags, the
exact matches will be found first and put at the top of
the results.

`ChatChannelFetcher` has also been modified here to allow
for more options for performance -- we do not need to
query DM channels for secured IDs when looking up or searching
channels for hashtags, since they should never show in
results there (they have no slugs). Nor do we need to include
the channel archive records.

Also changes the limit of hashtag results to 20 by default
with a hidden site setting, and makes it so the scroll for the
results is overflowed.
This commit is contained in:
Martin Brennan
2022-11-24 10:07:59 +10:00
committed by GitHub
parent 6b7bdc991d
commit 274b21663e
7 changed files with 131 additions and 45 deletions

View File

@ -30,6 +30,9 @@ a.hashtag-cooked {
} }
.hashtag-autocomplete { .hashtag-autocomplete {
max-height: 300px;
overflow-y: scroll;
.hashtag-autocomplete__option { .hashtag-autocomplete__option {
.hashtag-autocomplete__link { .hashtag-autocomplete__link {
align-items: center; align-items: center;

View File

@ -2,7 +2,7 @@
class HashtagAutocompleteService class HashtagAutocompleteService
HASHTAGS_PER_REQUEST = 20 HASHTAGS_PER_REQUEST = 20
SEARCH_MAX_LIMIT = 20 SEARCH_MAX_LIMIT = 50
attr_reader :guardian attr_reader :guardian
cattr_reader :data_sources, :contexts cattr_reader :data_sources, :contexts
@ -77,7 +77,7 @@ class HashtagAutocompleteService
icon: self.icon, icon: self.icon,
type: self.type, type: self.type,
ref: self.ref, ref: self.ref,
slug: self.slug slug: self.slug,
} }
end end
end end
@ -122,9 +122,8 @@ class HashtagAutocompleteService
# composer we want a slug without a suffix to be a category first, tag second. # composer we want a slug without a suffix to be a category first, tag second.
if slugs_without_suffixes.any? if slugs_without_suffixes.any?
types_in_priority_order.each do |type| types_in_priority_order.each do |type|
found_from_slugs = set_refs(@@data_sources[type].lookup(guardian, slugs_without_suffixes)) found_from_slugs = execute_lookup!(lookup_results, type, guardian, slugs_without_suffixes)
found_from_slugs.each { |item| item.type = type }.sort_by! { |item| item.text.downcase }
lookup_results[type.to_sym] = lookup_results[type.to_sym].concat(found_from_slugs)
slugs_without_suffixes = slugs_without_suffixes - found_from_slugs.map(&:ref) slugs_without_suffixes = slugs_without_suffixes - found_from_slugs.map(&:ref)
break if slugs_without_suffixes.empty? break if slugs_without_suffixes.empty?
end end
@ -139,9 +138,7 @@ class HashtagAutocompleteService
.select { |slug| slug.ends_with?("::#{type}") } .select { |slug| slug.ends_with?("::#{type}") }
.map { |slug| slug.gsub("::#{type}", "") } .map { |slug| slug.gsub("::#{type}", "") }
next if slugs_for_type.empty? next if slugs_for_type.empty?
found_from_slugs = set_refs(@@data_sources[type].lookup(guardian, slugs_for_type)) execute_lookup!(lookup_results, type, guardian, slugs_for_type)
found_from_slugs.each { |item| item.type = type }.sort_by! { |item| item.text.downcase }
lookup_results[type.to_sym] = lookup_results[type.to_sym].concat(found_from_slugs)
end end
end end
@ -156,6 +153,9 @@ class HashtagAutocompleteService
# searching tags. The @guardian handles permissions around which results should # searching tags. The @guardian handles permissions around which results should
# be returned here. # be returned here.
# #
# Items which have a slug that exactly matches the search term via lookup will be found
# first and floated to the top of the results, and still be ordered by type.
#
# @param {String} term Search term, from the UI generally where the user is typing #has... # @param {String} term Search term, from the UI generally where the user is typing #has...
# @param {Array} types_in_priority_order The resource types we are searching for # @param {Array} types_in_priority_order The resource types we are searching for
# and the priority order in which we should # and the priority order in which we should
@ -164,32 +164,46 @@ class HashtagAutocompleteService
# bother searching subsequent types if the first types in # bother searching subsequent types if the first types in
# the array already reach the limit. # the array already reach the limit.
# @returns {Array} The results as HashtagItems # @returns {Array} The results as HashtagItems
def search(term, types_in_priority_order, limit: 5) def search(
term,
types_in_priority_order,
limit: SiteSetting.experimental_hashtag_search_result_limit
)
raise Discourse::InvalidParameters.new(:order) if !types_in_priority_order.is_a?(Array) raise Discourse::InvalidParameters.new(:order) if !types_in_priority_order.is_a?(Array)
limit = [limit, SEARCH_MAX_LIMIT].min limit = [limit, SEARCH_MAX_LIMIT].min
limited_results = [] limited_results = []
slugs_by_type = {} top_ranked_type = nil
term = term.downcase term = term.downcase
types_in_priority_order = types_in_priority_order =
types_in_priority_order.select { |type| @@data_sources.keys.include?(type) } types_in_priority_order.select { |type| @@data_sources.keys.include?(type) }
# Float exact matches by slug to the top of the list, any of these will be excluded
# from further results.
types_in_priority_order.each do |type|
search_results = execute_lookup!(nil, type, guardian, [term])
limited_results.concat(search_results) if search_results
break if limited_results.length >= limit
end
return limited_results if limited_results.length >= limit
# Search the data source for each type, validate and sort results, # Search the data source for each type, validate and sort results,
# and break off from searching more data sources if we reach our limit # and break off from searching more data sources if we reach our limit
types_in_priority_order.each do |type| types_in_priority_order.each do |type|
search_results = search_results = search_for_type(type, guardian, term, limit - limited_results.length)
set_refs(@@data_sources[type].search(guardian, term, limit - limited_results.length))
next if search_results.empty? next if search_results.empty?
all_data_items_valid = next if !all_data_items_valid?(search_results)
search_results.all? do |item|
item.kind_of?(HashtagItem) && item.slug.present? && item.text.present?
end
next if !all_data_items_valid
search_results.each { |item| item.type = type }.sort_by! { |item| item.text.downcase } search_results =
slugs_by_type[type] = search_results.map(&:slug) search_results
.reject do |item|
limited_results.any? { |exact| exact.type == type && exact.slug === item.slug }
end
.sort_by { |item| item.text.downcase }
top_ranked_type = type if top_ranked_type.nil?
limited_results.concat(search_results) limited_results.concat(search_results)
break if limited_results.length >= limit break if limited_results.length >= limit
end end
@ -205,7 +219,6 @@ class HashtagAutocompleteService
# #
# For example, if there is a category with the slug #general and a tag # For example, if there is a category with the slug #general and a tag
# with the slug #general, then the tag will have its ref changed to #general::tag # with the slug #general, then the tag will have its ref changed to #general::tag
top_ranked_type = slugs_by_type.keys.first
limited_results.each do |hashtag_item| limited_results.each do |hashtag_item|
next if hashtag_item.type == top_ranked_type next if hashtag_item.type == top_ranked_type
@ -272,4 +285,28 @@ class HashtagAutocompleteService
def set_refs(hashtag_items) def set_refs(hashtag_items)
hashtag_items.each { |item| item.ref ||= item.slug } hashtag_items.each { |item| item.ref ||= item.slug }
end end
def all_data_items_valid?(items)
items.all? { |item| item.kind_of?(HashtagItem) && item.slug.present? && item.text.present? }
end
def search_for_type(type, guardian, term, limit)
set_refs(@@data_sources[type].search(guardian, term, limit)).each { |item| item.type = type }
end
def execute_lookup!(lookup_results, type, guardian, slugs)
found_from_slugs = lookup_for_type(type, guardian, slugs)
return if !all_data_items_valid?(found_from_slugs)
found_from_slugs.sort_by! { |item| item.text.downcase }
if lookup_results.present?
lookup_results[type.to_sym] = lookup_results[type.to_sym].concat(found_from_slugs)
end
found_from_slugs
end
def lookup_for_type(type, guardian, slugs)
set_refs(@@data_sources[type].lookup(guardian, slugs)).each { |item| item.type = type }
end
end end

View File

@ -28,7 +28,6 @@ class TagHashtagDataSource
def self.lookup(guardian, slugs) def self.lookup(guardian, slugs)
return [] if !SiteSetting.tagging_enabled return [] if !SiteSetting.tagging_enabled
DiscourseTagging DiscourseTagging
.filter_visible(Tag.where_name(slugs), guardian) .filter_visible(Tag.where_name(slugs), guardian)
.map { |tag| tag_to_hashtag_item(tag) } .map { |tag| tag_to_hashtag_item(tag) }
@ -46,6 +45,7 @@ class TagHashtagDataSource
for_input: true, for_input: true,
order_search_results: true, order_search_results: true,
) )
TagsController TagsController
.tag_counts_json(tags_with_counts) .tag_counts_json(tags_with_counts)
.take(limit) .take(limit)

View File

@ -2036,6 +2036,10 @@ developer:
default: false default: false
client: true client: true
hidden: true hidden: true
experimental_hashtag_search_result_limit:
default: 20
client: true
hidden: true
enable_new_user_profile_nav_groups: enable_new_user_profile_nav_groups:
client: true client: true
type: group_list type: group_list

View File

@ -29,10 +29,8 @@ module Chat::ChatChannelFetcher
SQL SQL
end end
def self.generate_allowed_channel_ids_sql(guardian) def self.generate_allowed_channel_ids_sql(guardian, exclude_dm_channels: false)
<<~SQL category_channel_sql =
-- secured category chat channels
#{
ChatChannel ChatChannel
.select(:id) .select(:id)
.joins( .joins(
@ -43,40 +41,66 @@ module Chat::ChatChannelFetcher
allowed_category_ids: guardian.allowed_category_ids, allowed_category_ids: guardian.allowed_category_ids,
) )
.to_sql .to_sql
}
dm_channel_sql = ""
if !exclude_dm_channels
dm_channel_sql = <<~SQL
UNION UNION
-- secured direct message chat channels -- secured direct message chat channels
#{ #{
ChatChannel ChatChannel
.select(:id) .select(:id)
.joins( .joins(
"INNER JOIN direct_message_channels ON direct_message_channels.id = chat_channels.chatable_id "INNER JOIN direct_message_channels ON direct_message_channels.id = chat_channels.chatable_id
AND chat_channels.chatable_type = 'DirectMessage' AND chat_channels.chatable_type = 'DirectMessage'
INNER JOIN direct_message_users ON direct_message_users.direct_message_channel_id = direct_message_channels.id", INNER JOIN direct_message_users ON direct_message_users.direct_message_channel_id = direct_message_channels.id",
) )
.where("direct_message_users.user_id = :user_id", user_id: guardian.user.id) .where("direct_message_users.user_id = :user_id", user_id: guardian.user.id)
.to_sql .to_sql
} }
SQL
end
<<~SQL
-- secured category chat channels
#{category_channel_sql}
#{dm_channel_sql}
SQL SQL
end end
def self.secured_public_channel_slug_lookup(guardian, slugs)
allowed_channel_ids = generate_allowed_channel_ids_sql(guardian, exclude_dm_channels: true)
ChatChannel
.joins(
"LEFT JOIN categories ON categories.id = chat_channels.chatable_id AND chat_channels.chatable_type = 'Category'",
)
.where(chatable_type: ChatChannel.public_channel_chatable_types)
.where("chat_channels.id IN (#{allowed_channel_ids})")
.where("chat_channels.slug IN (:slugs)", slugs: slugs)
.limit(1)
end
def self.secured_public_channel_search(guardian, options = {}) def self.secured_public_channel_search(guardian, options = {})
allowed_channel_ids =
generate_allowed_channel_ids_sql(guardian, exclude_dm_channels: options[:exclude_dm_channels])
channels = ChatChannel.includes(chatable: [:topic_only_relative_url])
channels = channels.includes(:chat_channel_archive) if options[:include_archives]
channels = channels =
ChatChannel channels
.includes(:chat_channel_archive)
.includes(chatable: [:topic_only_relative_url])
.joins( .joins(
"LEFT JOIN categories ON categories.id = chat_channels.chatable_id AND chat_channels.chatable_type = 'Category'", "LEFT JOIN categories ON categories.id = chat_channels.chatable_id AND chat_channels.chatable_type = 'Category'",
) )
.where(chatable_type: ChatChannel.public_channel_chatable_types) .where(chatable_type: ChatChannel.public_channel_chatable_types)
.where("chat_channels.id IN (#{generate_allowed_channel_ids_sql(guardian)})") .where("chat_channels.id IN (#{allowed_channel_ids})")
channels = channels.where(status: options[:status]) if options[:status].present? channels = channels.where(status: options[:status]) if options[:status].present?
if options[:filter].present? if options[:filter].present?
sql = "chat_channels.name ILIKE :filter OR chat_channels.slug ILIKE :filter OR categories.name ILIKE :filter" sql =
"chat_channels.name ILIKE :filter OR chat_channels.slug ILIKE :filter OR categories.name ILIKE :filter"
channels = channels =
channels.where(sql, filter: "%#{options[:filter].downcase}%").order( channels.where(sql, filter: "%#{options[:filter].downcase}%").order(
"chat_channels.name ASC, categories.name ASC", "chat_channels.name ASC, categories.name ASC",
@ -115,7 +139,11 @@ module Chat::ChatChannelFetcher
end end
def self.secured_public_channels(guardian, memberships, options = { following: true }) def self.secured_public_channels(guardian, memberships, options = { following: true })
channels = secured_public_channel_search(guardian, options) channels =
secured_public_channel_search(
guardian,
options.merge(include_archives: true),
)
decorate_memberships_with_tracking_data(guardian, channels, memberships) decorate_memberships_with_tracking_data(guardian, channels, memberships)
channels = channels.to_a channels = channels.to_a
preload_custom_fields_for(channels) preload_custom_fields_for(channels)

View File

@ -19,7 +19,7 @@ class Chat::ChatChannelHashtagDataSource
def self.lookup(guardian, slugs) def self.lookup(guardian, slugs)
if SiteSetting.enable_experimental_hashtag_autocomplete if SiteSetting.enable_experimental_hashtag_autocomplete
Chat::ChatChannelFetcher Chat::ChatChannelFetcher
.secured_public_channel_search(guardian, slugs: slugs) .secured_public_channel_slug_lookup(guardian, slugs)
.map { |channel| channel_to_hashtag_item(guardian, channel) } .map { |channel| channel_to_hashtag_item(guardian, channel) }
else else
[] []
@ -29,7 +29,12 @@ class Chat::ChatChannelHashtagDataSource
def self.search(guardian, term, limit) def self.search(guardian, term, limit)
if SiteSetting.enable_experimental_hashtag_autocomplete if SiteSetting.enable_experimental_hashtag_autocomplete
Chat::ChatChannelFetcher Chat::ChatChannelFetcher
.secured_public_channel_search(guardian, filter: term, limit: limit) .secured_public_channel_search(
guardian,
filter: term,
limit: limit,
exclude_dm_channels: true,
)
.map { |channel| channel_to_hashtag_item(guardian, channel) } .map { |channel| channel_to_hashtag_item(guardian, channel) }
else else
[] []

View File

@ -92,7 +92,9 @@ RSpec.describe HashtagAutocompleteService do
end end
it "does not search other data sources if the limit is reached by earlier type data sources" do it "does not search other data sources if the limit is reached by earlier type data sources" do
Site.any_instance.expects(:categories).never # only expected once to try get the exact matches first
site_guardian_categories = Site.new(guardian).categories
Site.any_instance.expects(:categories).once.returns(site_guardian_categories)
subject.search("book", %w[tag category], limit: 1) subject.search("book", %w[tag category], limit: 1)
end end
@ -167,19 +169,26 @@ RSpec.describe HashtagAutocompleteService do
context "when multiple tags and categories are returned" do context "when multiple tags and categories are returned" do
fab!(:category2) { Fabricate(:category, name: "Book Zone", slug: "book-zone") } fab!(:category2) { Fabricate(:category, name: "Book Zone", slug: "book-zone") }
fab!(:category3) { Fabricate(:category, name: "Book Dome", slug: "book-dome") } fab!(:category3) { Fabricate(:category, name: "Book Dome", slug: "book-dome") }
fab!(:category4) { Fabricate(:category, name: "Bookworld", slug: "book") }
fab!(:tag2) { Fabricate(:tag, name: "mid-books") } fab!(:tag2) { Fabricate(:tag, name: "mid-books") }
fab!(:tag3) { Fabricate(:tag, name: "terrible-books") } fab!(:tag3) { Fabricate(:tag, name: "terrible-books") }
fab!(:tag4) { Fabricate(:tag, name: "book") } fab!(:tag4) { Fabricate(:tag, name: "book") }
it "orders them by name within their type order" do it "orders them by name within their type order" do
expect(subject.search("book", %w[category tag], limit: 10).map(&:ref)).to eq( expect(subject.search("book", %w[category tag], limit: 10).map(&:ref)).to eq(
%w[book-club book-dome book-zone book great-books mid-books terrible-books], %w[book book::tag book-club book-dome book-zone great-books mid-books terrible-books],
) )
end end
it "orders correctly with lower limits" do it "orders correctly with lower limits" do
expect(subject.search("book", %w[category tag], limit: 5).map(&:ref)).to eq( expect(subject.search("book", %w[category tag], limit: 5).map(&:ref)).to eq(
%w[book-club book-dome book-zone book great-books], %w[book book::tag book-club book-dome book-zone],
)
end
it "prioritises exact matches to the top of the list" do
expect(subject.search("book", %w[category tag], limit: 10).map(&:ref)).to eq(
%w[book book::tag book-club book-dome book-zone great-books mid-books terrible-books],
) )
end end
end end