From 274b21663e8283d0053b29ffabf3afa8042ee3cd Mon Sep 17 00:00:00 2001 From: Martin Brennan Date: Thu, 24 Nov 2022 10:07:59 +1000 Subject: [PATCH] FIX: Experimental hashtag search result matching and limit fixes (#19144) This changes the hashtag search to first do a lookup to find results where the slug exactly matches the search term. Now when we search for hashtags, the exact matches will be found first and put at the top of the results. `ChatChannelFetcher` has also been modified here to allow for more options for performance -- we do not need to query DM channels for secured IDs when looking up or searching channels for hashtags, since they should never show in results there (they have no slugs). Nor do we need to include the channel archive records. Also changes the limit of hashtag results to 20 by default with a hidden site setting, and makes it so the scroll for the results is overflowed. --- .../common/components/hashtag.scss | 3 + app/services/hashtag_autocomplete_service.rb | 77 ++++++++++++++----- app/services/tag_hashtag_data_source.rb | 2 +- config/site_settings.yml | 4 + plugins/chat/lib/chat_channel_fetcher.rb | 66 +++++++++++----- .../lib/chat_channel_hashtag_data_source.rb | 9 ++- .../hashtag_autocomplete_service_spec.rb | 15 +++- 7 files changed, 131 insertions(+), 45 deletions(-) diff --git a/app/assets/stylesheets/common/components/hashtag.scss b/app/assets/stylesheets/common/components/hashtag.scss index a45c421dc2a..35d40724676 100644 --- a/app/assets/stylesheets/common/components/hashtag.scss +++ b/app/assets/stylesheets/common/components/hashtag.scss @@ -30,6 +30,9 @@ a.hashtag-cooked { } .hashtag-autocomplete { + max-height: 300px; + overflow-y: scroll; + .hashtag-autocomplete__option { .hashtag-autocomplete__link { align-items: center; diff --git a/app/services/hashtag_autocomplete_service.rb b/app/services/hashtag_autocomplete_service.rb index 3af74922c05..085dd591ab0 100644 --- a/app/services/hashtag_autocomplete_service.rb +++ b/app/services/hashtag_autocomplete_service.rb @@ -2,7 +2,7 @@ class HashtagAutocompleteService HASHTAGS_PER_REQUEST = 20 - SEARCH_MAX_LIMIT = 20 + SEARCH_MAX_LIMIT = 50 attr_reader :guardian cattr_reader :data_sources, :contexts @@ -77,7 +77,7 @@ class HashtagAutocompleteService icon: self.icon, type: self.type, ref: self.ref, - slug: self.slug + slug: self.slug, } end end @@ -122,9 +122,8 @@ class HashtagAutocompleteService # composer we want a slug without a suffix to be a category first, tag second. if slugs_without_suffixes.any? types_in_priority_order.each do |type| - found_from_slugs = set_refs(@@data_sources[type].lookup(guardian, slugs_without_suffixes)) - found_from_slugs.each { |item| item.type = type }.sort_by! { |item| item.text.downcase } - lookup_results[type.to_sym] = lookup_results[type.to_sym].concat(found_from_slugs) + found_from_slugs = execute_lookup!(lookup_results, type, guardian, slugs_without_suffixes) + slugs_without_suffixes = slugs_without_suffixes - found_from_slugs.map(&:ref) break if slugs_without_suffixes.empty? end @@ -139,9 +138,7 @@ class HashtagAutocompleteService .select { |slug| slug.ends_with?("::#{type}") } .map { |slug| slug.gsub("::#{type}", "") } next if slugs_for_type.empty? - found_from_slugs = set_refs(@@data_sources[type].lookup(guardian, slugs_for_type)) - found_from_slugs.each { |item| item.type = type }.sort_by! { |item| item.text.downcase } - lookup_results[type.to_sym] = lookup_results[type.to_sym].concat(found_from_slugs) + execute_lookup!(lookup_results, type, guardian, slugs_for_type) end end @@ -156,6 +153,9 @@ class HashtagAutocompleteService # searching tags. The @guardian handles permissions around which results should # be returned here. # + # Items which have a slug that exactly matches the search term via lookup will be found + # first and floated to the top of the results, and still be ordered by type. + # # @param {String} term Search term, from the UI generally where the user is typing #has... # @param {Array} types_in_priority_order The resource types we are searching for # and the priority order in which we should @@ -164,32 +164,46 @@ class HashtagAutocompleteService # bother searching subsequent types if the first types in # the array already reach the limit. # @returns {Array} The results as HashtagItems - def search(term, types_in_priority_order, limit: 5) + def search( + term, + types_in_priority_order, + limit: SiteSetting.experimental_hashtag_search_result_limit + ) raise Discourse::InvalidParameters.new(:order) if !types_in_priority_order.is_a?(Array) limit = [limit, SEARCH_MAX_LIMIT].min limited_results = [] - slugs_by_type = {} + top_ranked_type = nil term = term.downcase types_in_priority_order = types_in_priority_order.select { |type| @@data_sources.keys.include?(type) } + # Float exact matches by slug to the top of the list, any of these will be excluded + # from further results. + types_in_priority_order.each do |type| + search_results = execute_lookup!(nil, type, guardian, [term]) + limited_results.concat(search_results) if search_results + break if limited_results.length >= limit + end + + return limited_results if limited_results.length >= limit + # Search the data source for each type, validate and sort results, # and break off from searching more data sources if we reach our limit types_in_priority_order.each do |type| - search_results = - set_refs(@@data_sources[type].search(guardian, term, limit - limited_results.length)) + search_results = search_for_type(type, guardian, term, limit - limited_results.length) next if search_results.empty? - all_data_items_valid = - search_results.all? do |item| - item.kind_of?(HashtagItem) && item.slug.present? && item.text.present? - end - next if !all_data_items_valid + next if !all_data_items_valid?(search_results) - search_results.each { |item| item.type = type }.sort_by! { |item| item.text.downcase } - slugs_by_type[type] = search_results.map(&:slug) + search_results = + search_results + .reject do |item| + limited_results.any? { |exact| exact.type == type && exact.slug === item.slug } + end + .sort_by { |item| item.text.downcase } + top_ranked_type = type if top_ranked_type.nil? limited_results.concat(search_results) break if limited_results.length >= limit end @@ -205,7 +219,6 @@ class HashtagAutocompleteService # # For example, if there is a category with the slug #general and a tag # with the slug #general, then the tag will have its ref changed to #general::tag - top_ranked_type = slugs_by_type.keys.first limited_results.each do |hashtag_item| next if hashtag_item.type == top_ranked_type @@ -272,4 +285,28 @@ class HashtagAutocompleteService def set_refs(hashtag_items) hashtag_items.each { |item| item.ref ||= item.slug } end + + def all_data_items_valid?(items) + items.all? { |item| item.kind_of?(HashtagItem) && item.slug.present? && item.text.present? } + end + + def search_for_type(type, guardian, term, limit) + set_refs(@@data_sources[type].search(guardian, term, limit)).each { |item| item.type = type } + end + + def execute_lookup!(lookup_results, type, guardian, slugs) + found_from_slugs = lookup_for_type(type, guardian, slugs) + return if !all_data_items_valid?(found_from_slugs) + found_from_slugs.sort_by! { |item| item.text.downcase } + + if lookup_results.present? + lookup_results[type.to_sym] = lookup_results[type.to_sym].concat(found_from_slugs) + end + + found_from_slugs + end + + def lookup_for_type(type, guardian, slugs) + set_refs(@@data_sources[type].lookup(guardian, slugs)).each { |item| item.type = type } + end end diff --git a/app/services/tag_hashtag_data_source.rb b/app/services/tag_hashtag_data_source.rb index 33e95f020ce..8e6b838244c 100644 --- a/app/services/tag_hashtag_data_source.rb +++ b/app/services/tag_hashtag_data_source.rb @@ -28,7 +28,6 @@ class TagHashtagDataSource def self.lookup(guardian, slugs) return [] if !SiteSetting.tagging_enabled - DiscourseTagging .filter_visible(Tag.where_name(slugs), guardian) .map { |tag| tag_to_hashtag_item(tag) } @@ -46,6 +45,7 @@ class TagHashtagDataSource for_input: true, order_search_results: true, ) + TagsController .tag_counts_json(tags_with_counts) .take(limit) diff --git a/config/site_settings.yml b/config/site_settings.yml index 0bb8f196161..96edaa9d6df 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -2036,6 +2036,10 @@ developer: default: false client: true hidden: true + experimental_hashtag_search_result_limit: + default: 20 + client: true + hidden: true enable_new_user_profile_nav_groups: client: true type: group_list diff --git a/plugins/chat/lib/chat_channel_fetcher.rb b/plugins/chat/lib/chat_channel_fetcher.rb index 2fcb6774d13..129c64cb6c6 100644 --- a/plugins/chat/lib/chat_channel_fetcher.rb +++ b/plugins/chat/lib/chat_channel_fetcher.rb @@ -29,10 +29,8 @@ module Chat::ChatChannelFetcher SQL end - def self.generate_allowed_channel_ids_sql(guardian) - <<~SQL - -- secured category chat channels - #{ + def self.generate_allowed_channel_ids_sql(guardian, exclude_dm_channels: false) + category_channel_sql = ChatChannel .select(:id) .joins( @@ -43,40 +41,66 @@ module Chat::ChatChannelFetcher allowed_category_ids: guardian.allowed_category_ids, ) .to_sql - } + dm_channel_sql = "" + if !exclude_dm_channels + dm_channel_sql = <<~SQL UNION -- secured direct message chat channels #{ - ChatChannel - .select(:id) - .joins( - "INNER JOIN direct_message_channels ON direct_message_channels.id = chat_channels.chatable_id + ChatChannel + .select(:id) + .joins( + "INNER JOIN direct_message_channels ON direct_message_channels.id = chat_channels.chatable_id AND chat_channels.chatable_type = 'DirectMessage' INNER JOIN direct_message_users ON direct_message_users.direct_message_channel_id = direct_message_channels.id", - ) - .where("direct_message_users.user_id = :user_id", user_id: guardian.user.id) - .to_sql - } + ) + .where("direct_message_users.user_id = :user_id", user_id: guardian.user.id) + .to_sql + } + SQL + end + + <<~SQL + -- secured category chat channels + #{category_channel_sql} + #{dm_channel_sql} SQL end + def self.secured_public_channel_slug_lookup(guardian, slugs) + allowed_channel_ids = generate_allowed_channel_ids_sql(guardian, exclude_dm_channels: true) + ChatChannel + .joins( + "LEFT JOIN categories ON categories.id = chat_channels.chatable_id AND chat_channels.chatable_type = 'Category'", + ) + .where(chatable_type: ChatChannel.public_channel_chatable_types) + .where("chat_channels.id IN (#{allowed_channel_ids})") + .where("chat_channels.slug IN (:slugs)", slugs: slugs) + .limit(1) + end + def self.secured_public_channel_search(guardian, options = {}) + allowed_channel_ids = + generate_allowed_channel_ids_sql(guardian, exclude_dm_channels: options[:exclude_dm_channels]) + + channels = ChatChannel.includes(chatable: [:topic_only_relative_url]) + channels = channels.includes(:chat_channel_archive) if options[:include_archives] + channels = - ChatChannel - .includes(:chat_channel_archive) - .includes(chatable: [:topic_only_relative_url]) + channels .joins( "LEFT JOIN categories ON categories.id = chat_channels.chatable_id AND chat_channels.chatable_type = 'Category'", ) .where(chatable_type: ChatChannel.public_channel_chatable_types) - .where("chat_channels.id IN (#{generate_allowed_channel_ids_sql(guardian)})") + .where("chat_channels.id IN (#{allowed_channel_ids})") channels = channels.where(status: options[:status]) if options[:status].present? if options[:filter].present? - sql = "chat_channels.name ILIKE :filter OR chat_channels.slug ILIKE :filter OR categories.name ILIKE :filter" + sql = + "chat_channels.name ILIKE :filter OR chat_channels.slug ILIKE :filter OR categories.name ILIKE :filter" channels = channels.where(sql, filter: "%#{options[:filter].downcase}%").order( "chat_channels.name ASC, categories.name ASC", @@ -115,7 +139,11 @@ module Chat::ChatChannelFetcher end def self.secured_public_channels(guardian, memberships, options = { following: true }) - channels = secured_public_channel_search(guardian, options) + channels = + secured_public_channel_search( + guardian, + options.merge(include_archives: true), + ) decorate_memberships_with_tracking_data(guardian, channels, memberships) channels = channels.to_a preload_custom_fields_for(channels) diff --git a/plugins/chat/lib/chat_channel_hashtag_data_source.rb b/plugins/chat/lib/chat_channel_hashtag_data_source.rb index 8c824ac4b0f..06b9eba2a33 100644 --- a/plugins/chat/lib/chat_channel_hashtag_data_source.rb +++ b/plugins/chat/lib/chat_channel_hashtag_data_source.rb @@ -19,7 +19,7 @@ class Chat::ChatChannelHashtagDataSource def self.lookup(guardian, slugs) if SiteSetting.enable_experimental_hashtag_autocomplete Chat::ChatChannelFetcher - .secured_public_channel_search(guardian, slugs: slugs) + .secured_public_channel_slug_lookup(guardian, slugs) .map { |channel| channel_to_hashtag_item(guardian, channel) } else [] @@ -29,7 +29,12 @@ class Chat::ChatChannelHashtagDataSource def self.search(guardian, term, limit) if SiteSetting.enable_experimental_hashtag_autocomplete Chat::ChatChannelFetcher - .secured_public_channel_search(guardian, filter: term, limit: limit) + .secured_public_channel_search( + guardian, + filter: term, + limit: limit, + exclude_dm_channels: true, + ) .map { |channel| channel_to_hashtag_item(guardian, channel) } else [] diff --git a/spec/services/hashtag_autocomplete_service_spec.rb b/spec/services/hashtag_autocomplete_service_spec.rb index 75fad31ac37..241c470ac6b 100644 --- a/spec/services/hashtag_autocomplete_service_spec.rb +++ b/spec/services/hashtag_autocomplete_service_spec.rb @@ -92,7 +92,9 @@ RSpec.describe HashtagAutocompleteService do end it "does not search other data sources if the limit is reached by earlier type data sources" do - Site.any_instance.expects(:categories).never + # only expected once to try get the exact matches first + site_guardian_categories = Site.new(guardian).categories + Site.any_instance.expects(:categories).once.returns(site_guardian_categories) subject.search("book", %w[tag category], limit: 1) end @@ -167,19 +169,26 @@ RSpec.describe HashtagAutocompleteService do context "when multiple tags and categories are returned" do fab!(:category2) { Fabricate(:category, name: "Book Zone", slug: "book-zone") } fab!(:category3) { Fabricate(:category, name: "Book Dome", slug: "book-dome") } + fab!(:category4) { Fabricate(:category, name: "Bookworld", slug: "book") } fab!(:tag2) { Fabricate(:tag, name: "mid-books") } fab!(:tag3) { Fabricate(:tag, name: "terrible-books") } fab!(:tag4) { Fabricate(:tag, name: "book") } it "orders them by name within their type order" do expect(subject.search("book", %w[category tag], limit: 10).map(&:ref)).to eq( - %w[book-club book-dome book-zone book great-books mid-books terrible-books], + %w[book book::tag book-club book-dome book-zone great-books mid-books terrible-books], ) end it "orders correctly with lower limits" do expect(subject.search("book", %w[category tag], limit: 5).map(&:ref)).to eq( - %w[book-club book-dome book-zone book great-books], + %w[book book::tag book-club book-dome book-zone], + ) + end + + it "prioritises exact matches to the top of the list" do + expect(subject.search("book", %w[category tag], limit: 10).map(&:ref)).to eq( + %w[book book::tag book-club book-dome book-zone great-books mid-books terrible-books], ) end end