SECURITY: Restrict allowed URL patterns

Restrict allowed URL patterns for oneboxes.
2025-05-22 22:43:33 +08:00 · 2024-12-19 11:01:54 -07:00
parent 17e1bfe069
commit 17116c440b
86 changed files with 1131 additions and 61 deletions
--- a/lib/onebox/engine.rb
+++ b/lib/onebox/engine.rb
@ -4,6 +4,13 @@ module Onebox
  module Engine
    def self.included(object)
      object.extend(ClassMethods)
+      object.singleton_class.class_eval do
+        def method_added(method_name)
+          if method_name == :matches_path
+            raise "Define matches_path as a class method (def self.matches_path) in #{self}"
+          end
+        end
+      end
    end

    def self.engines
@ -100,9 +107,15 @@ module Onebox
        end
      end

-      def ===(other)
-        if other.kind_of?(URI)
-          !!(other.to_s =~ class_variable_get(:@@matcher))
+      def ===(uri)
+        if uri.is_a?(URI)
+          # Check for new domain/path matching
+          if class_variable_defined?(:@@domains)
+            matches?(uri) && matches_path(uri.path)
+          else
+            # Fallback to matches_regexp if no domains are defined
+            class_variable_defined?(:@@matcher) && !!(uri.to_s =~ class_variable_get(:@@matcher))
+          end
        else
          super
        end
@ -116,6 +129,28 @@ module Onebox
        class_variable_set :@@matcher, r
      end

+      def matches_domain(*domains, allow_subdomains: false)
+        class_variable_set :@@domains, domains.map(&:downcase)
+        class_variable_set :@@allow_subdomains, allow_subdomains
+      end
+
+      def matches?(uri)
+        domains = class_variable_get(:@@domains)
+        allow_subdomains = class_variable_get(:@@allow_subdomains)
+
+        if allow_subdomains
+          domains.any? do |domain|
+            uri.host.downcase.end_with?(".#{domain}") || uri.host.downcase == domain
+          end
+        else
+          domains.include?(uri.host.downcase)
+        end
+      end
+
+      def matches_path(path)
+        true
+      end
+
      def matches_content_type(ct)
        class_variable_set :@@matcher_content_type, ct
      end
--- a/lib/onebox/engine/animated_image_onebox.rb
+++ b/lib/onebox/engine/animated_image_onebox.rb
@ -6,8 +6,8 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://.*(giphy\.com|gph\.is|tenor\.com)/})
      always_https
+      matches_domain("giphy.com", "gph.is", "tenor.com")

      def to_html
        og = get_opengraph
--- a/lib/onebox/engine/asciinema_onebox.rb
+++ b/lib/onebox/engine/asciinema_onebox.rb
@ -7,7 +7,11 @@ module Onebox
      include StandardEmbed

      always_https
-      matches_regexp(/^https?:\/\/asciinema\.org\/a\/[\p{Alnum}_\-]+$/)
+      matches_domain("asciinema.org")
+
+      def self.matches_path(path)
+        path.match?(%r{^/a/[\p{Alnum}_\-]+$})
+      end

      def to_html
        "<script type='text/javascript' src='https://asciinema.org/a/#{match[:asciinema_id]}.js' id='asciicast-#{match[:asciinema_id]}' async></script>"
--- a/lib/onebox/engine/audio_com_onebox.rb
+++ b/lib/onebox/engine/audio_com_onebox.rb
@ -6,9 +6,9 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://audio\.com})
-      requires_iframe_origins "https://audio.com"
      always_https
+      requires_iframe_origins "https://audio.com"
+      matches_domain("audio.com")

      def to_html
        oembed = get_oembed
--- a/lib/onebox/engine/audioboom_onebox.rb
+++ b/lib/onebox/engine/audioboom_onebox.rb
@ -6,8 +6,12 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://audioboom\.com/posts/\d+})
      always_https
+      matches_domain("audioboom.com")
+
+      def self.matches_path(path)
+        path.match?(%r{^/posts/\d+$})
+      end

      def placeholder_html
        oembed = get_oembed
--- a/lib/onebox/engine/band_camp_onebox.rb
+++ b/lib/onebox/engine/band_camp_onebox.rb
@ -6,10 +6,14 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://.*\.bandcamp\.com/(album|track)/})
+      matches_domain("bandcamp.com", allow_subdomains: true)
      always_https
      requires_iframe_origins "https://bandcamp.com"

+      def self.matches_path(path)
+        path.match?(%r{^/(album|track)/})
+      end
+
      def placeholder_html
        og = get_opengraph
        "<img src='#{og.image}' height='#{og.video_height}' #{og.title_attr}>"
--- a/lib/onebox/engine/cloud_app_onebox.rb
+++ b/lib/onebox/engine/cloud_app_onebox.rb
@ -6,7 +6,7 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://cl\.ly})
+      matches_domain("cl.ly")
      always_https

      def to_html
--- a/lib/onebox/engine/coub_onebox.rb
+++ b/lib/onebox/engine/coub_onebox.rb
@ -6,9 +6,13 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://coub\.com/view/})
+      matches_domain("coub.com")
      always_https

+      def self.matches_path(path)
+        path.start_with?("/view/")
+      end
+
      def placeholder_html
        oembed = get_oembed
        "<img src='#{oembed.thumbnail_url}' height='#{oembed.thumbnail_height}' width='#{oembed.thumbnail_width}' #{oembed.title_attr}>"
--- a/lib/onebox/engine/facebook_media_onebox.rb
+++ b/lib/onebox/engine/facebook_media_onebox.rb
@ -6,7 +6,7 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://(?:www\.)?facebook\.com/(\w+)/(videos|\?).*})
+      matches_domain("facebook.com", "www.facebook.com")
      always_https
      requires_iframe_origins "https://www.facebook.com"

--- a/lib/onebox/engine/five_hundred_px_onebox.rb
+++ b/lib/onebox/engine/five_hundred_px_onebox.rb
@ -6,9 +6,13 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://500px\.com/photo/\d+/})
+      matches_domain("500px.com")
      always_https

+      def self.matches_path(path)
+        path.match?(%r{^/photo/\d+/})
+      end
+
      def to_html
        og = get_opengraph
        "<img src='#{og.image}' width='#{og.image_width}' height='#{og.image_height}' class='onebox' #{og.title_attr}>"
--- a/lib/onebox/engine/flickr_onebox.rb
+++ b/lib/onebox/engine/flickr_onebox.rb
@ -8,9 +8,13 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://www\.flickr\.com/photos/})
+      matches_domain("www.flickr.com")
      always_https

+      def self.matches_path(path)
+        path.start_with?("/photos/")
+      end
+
      def to_html
        og = get_opengraph
        return album_html(og) if og.url =~ %r{/sets/}
--- a/lib/onebox/engine/flickr_shortened_onebox.rb
+++ b/lib/onebox/engine/flickr_shortened_onebox.rb
@ -9,8 +9,12 @@ module Onebox
      include StandardEmbed
      include OpengraphImage

-      matches_regexp(%r{^https?://flic\.kr/p/})
+      matches_domain("flic.kr")
      always_https
+
+      def self.matches_path(path)
+        path.start_with?("/p/")
+      end
    end
  end
 end
--- a/lib/onebox/engine/gfycat_onebox.rb
+++ b/lib/onebox/engine/gfycat_onebox.rb
@ -6,7 +6,7 @@ module Onebox
      include Engine
      include JSON

-      matches_regexp(%r{^https?://gfycat\.com/})
+      matches_domain("gfycat.com")
      always_https

      # This engine should have priority over AllowlistedGenericOnebox.
--- a/lib/onebox/engine/github_commit_onebox.rb
+++ b/lib/onebox/engine/github_commit_onebox.rb
@ -12,9 +12,13 @@ module Onebox
      include Onebox::Mixins::GithubBody
      include Onebox::Mixins::GithubAuthHeader

-      matches_regexp(%r{^https?://(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:/)?(?:.)*/commit/})
+      matches_domain("github.com", "www.github.com")
      always_https

+      def self.matches_path(path)
+        path.match?(%r{^/[\w\-]+/[\w\-]+/commit/[a-f0-9]{40}$})
+      end
+
      def url
        "https://api.github.com/repos/#{match[:org]}/#{match[:repository]}/commits/#{match[:sha]}"
      end
--- a/lib/onebox/engine/github_folder_onebox.rb
+++ b/lib/onebox/engine/github_folder_onebox.rb
@ -7,9 +7,13 @@ module Onebox
      include StandardEmbed
      include LayoutSupport

-      matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com[\:\d]*(\/[^\/]+){2}\/tree/)
+      matches_domain("github.com", "www.github.com")
      always_https

+      def self.matches_path(path)
+        path.match?(%r{^/[\w\-]+/[\w\-]+/tree/})
+      end
+
      private

      def data
--- a/lib/onebox/engine/github_gist_onebox.rb
+++ b/lib/onebox/engine/github_gist_onebox.rb
@ -9,9 +9,13 @@ module Onebox

      MAX_FILES = 3

-      matches_regexp(%r{^http(?:s)?://gist\.(?:(?:\w)+\.)?(github)\.com(?:/)?})
+      matches_domain("gist.github.com")
      always_https

+      def self.matches_path(path)
+        path.match?(%r{^/[\w\-]+/[a-f0-9]+(/|$)})
+      end
+
      def url
        "https://api.github.com/gists/#{match[:sha]}"
      end
--- a/lib/onebox/engine/github_pull_request_onebox.rb
+++ b/lib/onebox/engine/github_pull_request_onebox.rb
@ -12,9 +12,13 @@ module Onebox
      include Onebox::Mixins::GithubBody
      include Onebox::Mixins::GithubAuthHeader

-      matches_regexp(%r{^https?://(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:/)?(?:.)*/pull})
+      matches_domain("github.com", "www.github.com")
      always_https

+      def self.matches_path(path)
+        path.match?(%r{.*/pull})
+      end
+
      def url
        "https://api.github.com/repos/#{match[:org]}/#{match[:repository]}/pulls/#{match[:number]}"
      end
--- a/lib/onebox/engine/github_repo_onebox.rb
+++ b/lib/onebox/engine/github_repo_onebox.rb
@ -13,9 +13,13 @@ module Onebox

      GITHUB_COMMENT_REGEX = /(<!--.*?-->\r\n)/m

-      matches_regexp(%r{^https?:\/\/(?:www\.)?(?!gist\.)[^\/]*github\.com\/[^\/]+\/[^\/]+\/?$})
+      matches_domain("github.com", "www.github.com")
      always_https

+      def self.matches_path(path)
+        path.match?(%r{^/[^/]+/[^/]+/?$})
+      end
+
      def url
        "https://api.github.com/repos/#{match[:org]}/#{match[:repository]}"
      end
--- a/lib/onebox/engine/gitlab_blob_onebox.rb
+++ b/lib/onebox/engine/gitlab_blob_onebox.rb
@ -6,7 +6,7 @@ module Onebox
  module Engine
    class GitlabBlobOnebox
      def self.git_regexp
-        %r{^https?://(www\.)?gitlab\.com.*/blob/}
+        %r{^https?://(?:www\.)?gitlab\.com/.*/blob/.*}
      end

      def self.onebox_name
--- a/lib/onebox/engine/google_calendar_onebox.rb
+++ b/lib/onebox/engine/google_calendar_onebox.rb
@ -5,10 +5,14 @@ module Onebox
    class GoogleCalendarOnebox
      include Engine

-      matches_regexp(/^(https?:)?\/\/((www|calendar)\.google\.[\w.]{2,}|goo\.gl)\/calendar\/.+$/)
+      matches_domain("www.google.com", "google.com", "calendar.google.com", "goo.gl")
      always_https
      requires_iframe_origins "https://calendar.google.com"

+      def self.matches_path(path)
+        path.match?(%r{^/calendar/.*$})
+      end
+
      def to_html
        url = @url.split("&").first
        src = ::Onebox::Helpers.normalize_url_for_output(url)
--- a/lib/onebox/engine/google_photos_onebox.rb
+++ b/lib/onebox/engine/google_photos_onebox.rb
@ -6,9 +6,13 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://(photos)\.(app\.goo\.gl|google\.com)})
+      matches_domain("photos.google.com", "photos.app.goo.gl")
      always_https

+      def self.matches_path(path)
+        path.match?(%r{^/.*$})
+      end
+
      def to_html
        og = get_opengraph
        return video_html(og) if og.video_secure_url
--- a/lib/onebox/engine/hackernews_onebox.rb
+++ b/lib/onebox/engine/hackernews_onebox.rb
@ -10,6 +10,7 @@ module Onebox
      REGEX = %r{^https?://news\.ycombinator\.com/item\?id=(?<item_id>\d+)}

      matches_regexp(REGEX)
+      always_https

      # This is their official API: https://blog.ycombinator.com/hacker-news-api/
      def url
--- a/lib/onebox/engine/imgur_onebox.rb
+++ b/lib/onebox/engine/imgur_onebox.rb
@ -6,7 +6,7 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://(www\.)?imgur\.com})
+      matches_domain("imgur.com", "www.imgur.com")
      always_https

      def to_html
--- a/lib/onebox/engine/instagram_onebox.rb
+++ b/lib/onebox/engine/instagram_onebox.rb
@ -7,12 +7,14 @@ module Onebox
      include StandardEmbed
      include LayoutSupport

-      matches_regexp(
-        %r{^https?://(?:www\.)?(?:instagram\.com|instagr\.am)/?(?:.*)/(?:p|tv)/[a-zA-Z\d_-]+},
-      )
+      matches_domain("www.instagram.com", "instagram.com", "instagr.am")
      always_https
      requires_iframe_origins "https://www.instagram.com"

+      def self.matches_path(path)
+        path.match?(%r{^/(?:[\w\-]+/)?(?:p|tv)/[a-zA-Z\d_-]+/?(?:\?.*)?$})
+      end
+
      def clean_url
        url
          .scan(
--- a/lib/onebox/engine/kaltura_onebox.rb
+++ b/lib/onebox/engine/kaltura_onebox.rb
@ -6,10 +6,14 @@ module Onebox
      include Engine
      include StandardEmbed

+      matches_domain("kaltura.com", allow_subdomains: true)
      always_https
-      matches_regexp(%r{^https?://[a-z0-9]+\.kaltura\.com/id/[a-zA-Z0-9]+})
      requires_iframe_origins "https://*.kaltura.com"

+      def self.matches_path(path)
+        path.match?(%r{^/id/[a-zA-Z0-9]+$})
+      end
+
      def preview_html
        og = get_opengraph

--- a/lib/onebox/engine/loom_onebox.rb
+++ b/lib/onebox/engine/loom_onebox.rb
@ -6,9 +6,13 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://(www\.)?loom\.com/share/\w+(/\w+)?/?})
-      requires_iframe_origins "https://www.loom.com"
+      matches_domain("loom.com", "www.loom.com")
      always_https
+      requires_iframe_origins "https://www.loom.com"
+
+      def self.matches_path(path)
+        path.match?(%r{^/share/\w+(/\w+)?/?$})
+      end

      def placeholder_html
        ::Onebox::Helpers.video_placeholder_html
--- a/lib/onebox/engine/mixcloud_onebox.rb
+++ b/lib/onebox/engine/mixcloud_onebox.rb
@ -6,7 +6,7 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://www\.mixcloud\.com/})
+      matches_domain("www.mixcloud.com")
      always_https
      requires_iframe_origins "https://www.mixcloud.com"

--- a/lib/onebox/engine/motoko_onebox.rb
+++ b/lib/onebox/engine/motoko_onebox.rb
@ -6,7 +6,7 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://embed\.(motoko|smartcontracts)\.org/?.*})
+      matches_domain("embed.motoko.org", "embed.smartcontracts.org")
      requires_iframe_origins("https://embed.motoko.org", "https://embed.smartcontracts.org")
      always_https

--- a/lib/onebox/engine/pastebin_onebox.rb
+++ b/lib/onebox/engine/pastebin_onebox.rb
@ -8,7 +8,8 @@ module Onebox

      MAX_LINES = 10

-      matches_regexp(%r{^http?://pastebin\.com})
+      matches_domain("pastebin.com")
+      always_https

      private

--- a/lib/onebox/engine/pubmed_onebox.rb
+++ b/lib/onebox/engine/pubmed_onebox.rb
@ -6,7 +6,11 @@ module Onebox
      include Engine
      include LayoutSupport

-      matches_regexp(%r{^https?://(?:(?:\w)+\.)?(www.ncbi.nlm.nih)\.gov(?:/)?/pubmed/\d+})
+      matches_domain("ncbi.nlm.nih.gov", allow_subdomains: true)
+
+      def self.matches_path(path)
+        path.match?(%r{^/pubmed/\d+$})
+      end

      private

--- a/lib/onebox/engine/reddit_media_onebox.rb
+++ b/lib/onebox/engine/reddit_media_onebox.rb
@ -6,7 +6,8 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://(www\.)?reddit\.com})
+      always_https
+      matches_domain("reddit.com", "www.reddit.com")

      def to_html
        if raw[:type] == "image"
--- a/lib/onebox/engine/replit_onebox.rb
+++ b/lib/onebox/engine/replit_onebox.rb
@ -6,7 +6,7 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://(replit\.com|repl\.it)/.+})
+      matches_domain("replit.com", "repl.it")
      always_https

      def placeholder_html
--- a/lib/onebox/engine/simplecast_onebox.rb
+++ b/lib/onebox/engine/simplecast_onebox.rb
@ -6,10 +6,14 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{https?://(.+)?simplecast.com/(episodes|s)/.*})
+      matches_domain("simplecast.com", allow_subdomains: true)
      always_https
      requires_iframe_origins("https://player.simplecast.com")

+      def self.matches_path(path)
+        path.match?(%r{^/(episodes|s)/.+})
+      end
+
      def to_html
        get_oembed.html
      end
--- a/lib/onebox/engine/sketch_fab_onebox.rb
+++ b/lib/onebox/engine/sketch_fab_onebox.rb
@ -6,12 +6,14 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(
-        /^https?:\/\/sketchfab\.com\/(?:models\/|3d-models\/(?:[^\/\s]+-)?)([a-z0-9]{32})/,
-      )
+      matches_domain("sketchfab.com")
      always_https
      requires_iframe_origins("https://sketchfab.com")

+      def self.matches_path(path)
+        path.match?(%r{^/(models/|3d-models/[^/\s]+-)?[a-z0-9]{32}})
+      end
+
      def to_html
        og = get_opengraph
        src = og.video_url.gsub("autostart=1", "")
--- a/lib/onebox/engine/slides_onebox.rb
+++ b/lib/onebox/engine/slides_onebox.rb
@ -6,9 +6,14 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(/^https?:\/\/slides\.com\/[\p{Alnum}_\-]+\/[\p{Alnum}_\-]+$/)
+      matches_domain("slides.com")
+      always_https
      requires_iframe_origins "https://slides.com"

+      def self.matches_path(path)
+        path.match?(%r{^/[\p{Alnum}_\-]+/[\p{Alnum}_\-]+$})
+      end
+
      def to_html
        <<-HTML
          <iframe
--- a/lib/onebox/engine/sound_cloud_onebox.rb
+++ b/lib/onebox/engine/sound_cloud_onebox.rb
@ -6,9 +6,9 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://soundcloud\.com})
-      requires_iframe_origins "https://w.soundcloud.com"
+      matches_domain("soundcloud.com", "www.soundcloud.com")
      always_https
+      requires_iframe_origins "https://w.soundcloud.com"

      def to_html
        oembed = get_oembed
--- a/lib/onebox/engine/spotify_onebox.rb
+++ b/lib/onebox/engine/spotify_onebox.rb
@ -6,7 +6,7 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://open\.spotify/\.com})
+      matches_domain("open.spotify.com")
      requires_iframe_origins "https://open.spotify.com"
      always_https

--- a/lib/onebox/engine/steam_store_onebox.rb
+++ b/lib/onebox/engine/steam_store_onebox.rb
@ -6,10 +6,14 @@ module Onebox
      include Engine
      include StandardEmbed

+      matches_domain("store.steampowered.com")
      always_https
-      matches_regexp(%r{^https?://store\.steampowered\.com/app/\d+})
      requires_iframe_origins "https://store.steampowered.com"

+      def self.matches_path(path)
+        path.match?(%r{^/app/\d+$})
+      end
+
      def placeholder_html
        og = get_opengraph
        <<-HTML
--- a/lib/onebox/engine/threads_status_onebox.rb
+++ b/lib/onebox/engine/threads_status_onebox.rb
@ -7,9 +7,13 @@ module Onebox
      include LayoutSupport
      include HTML

-      matches_regexp(%r{^https?://www\.threads\.net/t/(?<id>[\d\w_-]+)/?.*?$})
+      matches_domain("threads.net", "www.threads.net")
      always_https

+      def self.matches_path(path)
+        path.match?(%r{^/t/[\d\w_-]+/?})
+      end
+
      def self.priority
        1
      end
--- a/lib/onebox/engine/tiktok_onebox.rb
+++ b/lib/onebox/engine/tiktok_onebox.rb
@ -6,10 +6,14 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://((?:m|www)\.)?tiktok\.com(?:/@(.+)\/video/|/v/)\d+(/\w+)?/?})
+      matches_domain("tiktok.com", "www.tiktok.com", "m.tiktok.com")
      requires_iframe_origins "https://www.tiktok.com"
      always_https

+      def self.matches_path(path)
+        path.match?(%r{^(/@.+/video/\d+|/v/\d+)(/\w+)?/?$})
+      end
+
      def placeholder_html
        <<-HTML
          <img
--- a/lib/onebox/engine/trello_onebox.rb
+++ b/lib/onebox/engine/trello_onebox.rb
@ -6,10 +6,14 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https://trello\.com/[bc]/\W*})
+      matches_domain("trello.com")
      requires_iframe_origins "https://trello.com"
      always_https

+      def self.matches_path(path)
+        path.match?(%r{^/[bc]/\w*})
+      end
+
      def to_html
        src = "https://trello.com/#{match[:type]}/#{match[:key]}.html"
        height = match[:type] == "b" ? 400 : 200
--- a/lib/onebox/engine/twitter_status_onebox.rb
+++ b/lib/onebox/engine/twitter_status_onebox.rb
@ -8,11 +8,13 @@ module Onebox
      include HTML
      include ActionView::Helpers::NumberHelper

-      matches_regexp(
-        %r{^https?://(mobile\.|www\.)?(twitter\.com|x\.com)/.+?/status(es)?/\d+(/(video|photo)/\d?+)?+(/?\?.*)?/?$},
-      )
+      matches_domain("twitter.com", "www.twitter.com", "mobile.twitter.com", "x.com", "www.x.com")
      always_https

+      def self.matches_path(path)
+        path.match?(%r{^/.+?/status(es)?/\d+(/(video|photo)/\d?)?(/?\?.*)?/?$})
+      end
+
      def http_params
        { "User-Agent" => "DiscourseBot/1.0" }
      end
--- a/lib/onebox/engine/typeform_onebox.rb
+++ b/lib/onebox/engine/typeform_onebox.rb
@ -5,10 +5,14 @@ module Onebox
    class TypeformOnebox
      include Engine

-      matches_regexp(%r{^https?://[a-z0-9\-_]+\.typeform\.com/to/[a-zA-Z0-9]+})
+      matches_domain("typeform.com", allow_subdomains: true)
      requires_iframe_origins "https://*.typeform.com"
      always_https

+      def self.matches_path(path)
+        path.match?(%r{^/to/[a-zA-Z0-9]+$})
+      end
+
      def to_html
        typeform_src = build_typeform_src

--- a/lib/onebox/engine/vimeo_onebox.rb
+++ b/lib/onebox/engine/vimeo_onebox.rb
@ -6,10 +6,14 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://(www\.)?vimeo\.com/\d+(/\w+)?/?})
+      matches_domain("vimeo.com", "www.vimeo.com")
      requires_iframe_origins "https://player.vimeo.com"
      always_https

+      def self.matches_path(path)
+        path.match?(%r{^/\d+(/\w+)?/?$})
+      end
+
      def placeholder_html
        ::Onebox::Helpers.video_placeholder_html
      end
--- a/lib/onebox/engine/wikimedia_onebox.rb
+++ b/lib/onebox/engine/wikimedia_onebox.rb
@ -7,9 +7,13 @@ module Onebox
      include LayoutSupport
      include JSON

-      matches_regexp(%r{^https?://commons\.wikimedia\.org/wiki/(File:.+)})
+      matches_domain("commons.wikimedia.org")
      always_https

+      def self.matches_path(path)
+        path.match?(%r{^/wiki/File:.+})
+      end
+
      def self.priority
        # Wikimedia links end in an image extension.
        # E.g. https://commons.wikimedia.org/wiki/File:Stones_members_montage2.jpg
--- a/lib/onebox/engine/wikipedia_onebox.rb
+++ b/lib/onebox/engine/wikipedia_onebox.rb
@ -7,9 +7,13 @@ module Onebox
      include LayoutSupport
      include HTML

-      matches_regexp(%r{^https?://.*\.wikipedia\.(com|org)})
+      matches_domain("wikipedia.com", "wikipedia.org", allow_subdomains: true)
      always_https

+      def self.matches_path(path)
+        true # Matches any path under the specified domains
+      end
+
      private

      def data
--- a/lib/onebox/engine/wistia_onebox.rb
+++ b/lib/onebox/engine/wistia_onebox.rb
@ -6,10 +6,14 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{https?://(.+)?(wistia.com|wi.st)/(medias|embed)/.*})
+      matches_domain("wistia.com", "wi.st", allow_subdomains: true)
      requires_iframe_origins("https://fast.wistia.com", "https://fast.wistia.net")
      always_https

+      def self.matches_path(path)
+        path.match?(%r{^/(medias|embed)/.*$})
+      end
+
      def to_html
        oembed = get_oembed
        extracted_url = oembed.html.match(/iframe\ src\=\"(.*?)\"/)
--- a/lib/onebox/engine/xkcd_onebox.rb
+++ b/lib/onebox/engine/xkcd_onebox.rb
@ -7,7 +7,12 @@ module Onebox
      include LayoutSupport
      include JSON

-      matches_regexp(%r{^https?://(www\.)?(m\.)?xkcd\.com/\d+})
+      matches_domain("xkcd.com", "www.xkcd.com", "m.xkcd.com")
+      always_https
+
+      def self.matches_path(path)
+        path.match?(%r{^/\d+$})
+      end

      def url
        "https://xkcd.com/#{match[:comic_id]}/info.0.json"
--- a/lib/onebox/engine/youku_onebox.rb
+++ b/lib/onebox/engine/youku_onebox.rb
@ -6,7 +6,9 @@ module Onebox
      include Engine
      include HTML

-      matches_regexp(%r{^(https?://)?([\da-z\.-]+)(youku.com/)(.)+/?$})
+      matches_domain("youku.com", "youku.tv", allow_subdomains: true)
+      always_https
+
      requires_iframe_origins "https://player.youku.com"

      # Try to get the video ID. Works for URLs of the form:
--- a/lib/onebox/engine/youtube_onebox.rb
+++ b/lib/onebox/engine/youtube_onebox.rb
@ -6,10 +6,14 @@ module Onebox
      include Engine
      include StandardEmbed

-      matches_regexp(%r{^https?://(?:www\.)?(?:m\.)?(?:youtube\.com|youtu\.be)/.+$})
+      matches_domain("youtube.com", "www.youtube.com", "m.youtube.com", "youtu.be")
      requires_iframe_origins "https://www.youtube.com"
      always_https

+      def self.matches_path(path)
+        path.match?(%r{^/.+$})
+      end
+
      WIDTH = 480
      HEIGHT = 360

--- a/spec/lib/onebox/engine/asciinema_onebox_spec.rb
+++ b/spec/lib/onebox/engine/asciinema_onebox_spec.rb
@ -0,0 +1,20 @@
+# frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::AsciinemaOnebox do
+  describe ".===" do
+    it "matches valid Asciinema URL" do
+      valid_url = URI("https://asciinema.org/a/abc123")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "does not match invalid domain" do
+      invalid_url = URI("https://asciinema.org.malicious.com/a/abc123")
+      expect(described_class === invalid_url).to eq(false)
+    end
+
+    it "does not match unrelated URL" do
+      unrelated_url = URI("https://example.com/a/abc123")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/audio_com_onebox_spec.rb
+++ b/spec/lib/onebox/engine/audio_com_onebox_spec.rb
@ -20,4 +20,26 @@ RSpec.describe Onebox::Engine::AudioComOnebox do
      Onebox.preview("https://audio.com/agilov/collections/discourse-test-collection").to_s,
    ).to match(%r{<iframe src="https://audio\.com/embed/collection/1773124246389900})
  end
+
+  describe ".===" do
+    it "matches valid URL" do
+      valid_url = URI("https://audio.com/path/to/resource")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid URL without path" do
+      valid_url = URI("https://audio.com")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "does not match invalid URL with subdomain" do
+      invalid_url = URI("https://sub.audio.com/path/to/resource")
+      expect(described_class === invalid_url).to eq(false)
+    end
+
+    it "does not match invalid URL with valid domain as part of another domain" do
+      malicious_url = URI("https://audio.com.malicious.com")
+      expect(described_class === malicious_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/audioboom_onebox_spec.rb
+++ b/spec/lib/onebox/engine/audioboom_onebox_spec.rb
@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::AudioboomOnebox do
+  describe ".===" do
+    it "matches valid Audioboom URL" do
+      valid_url = URI("https://audioboom.com/posts/12345")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "does not match invalid domain" do
+      invalid_url = URI("https://audioboom.com.malicious.com/posts/12345")
+      expect(described_class === invalid_url).to eq(false)
+    end
+
+    it "does not match invalid path" do
+      invalid_url = URI("https://audioboom.com/somethingelse/12345")
+      expect(described_class === invalid_url).to eq(false)
+    end
+
+    it "does not match unrelated URL" do
+      unrelated_url = URI("https://example.com/posts/12345")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/bandcamp_onebox_spec.rb
+++ b/spec/lib/onebox/engine/bandcamp_onebox_spec.rb
@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::BandCampOnebox do
+  describe ".===" do
+    it "matches valid Bandcamp album URL" do
+      valid_url_album = URI("https://artist.bandcamp.com/album/some-album")
+      expect(described_class === valid_url_album).to eq(true)
+    end
+
+    it "matches valid Bandcamp track URL" do
+      valid_url_track = URI("https://artist.bandcamp.com/track/some-track")
+      expect(described_class === valid_url_track).to eq(true)
+    end
+
+    it "does not match invalid path" do
+      invalid_path_url = URI("https://artist.bandcamp.com/playlist/some-playlist")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+
+    it "does not match unrelated domain" do
+      unrelated_url = URI("https://example.com/album/some-album")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/cloud_app_onebox_spec.rb
+++ b/spec/lib/onebox/engine/cloud_app_onebox_spec.rb
@ -35,4 +35,16 @@ RSpec.describe Onebox::Engine::CloudAppOnebox do
  it "links to other formats" do
    expect(Onebox.preview("https://cl.ly/1x1f2g253l03").to_s).to match(/<a/)
  end
+
+  describe ".===" do
+    it "matches valid cl.ly URL" do
+      valid_url = URI("https://cl.ly/path/to/resource")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "does not match URL with valid domain as part of another domain" do
+      malicious_url = URI("https://cl.ly.malicious.com/path")
+      expect(described_class === malicious_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/coub_onebox_spec.rb
+++ b/spec/lib/onebox/engine/coub_onebox_spec.rb
@ -0,0 +1,20 @@
+# frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::CoubOnebox do
+  describe ".===" do
+    it "matches valid coub URL" do
+      valid_url = URI("https://coub.com/view/12345")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "does not match malicious URL with valid domain as part of another domain" do
+      malicious_url = URI("https://coub.com.malicious.com/view/12345")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match invalid path" do
+      invalid_path_url = URI("https://coub.com/invalid/abc123")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/five_hundred_px_onebox_spec.rb
+++ b/spec/lib/onebox/engine/five_hundred_px_onebox_spec.rb
@ -0,0 +1,20 @@
+# frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::FiveHundredPxOnebox do
+  describe ".===" do
+    it "matches valid 500px photo URL" do
+      valid_url = URI("https://500px.com/photo/123456/")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "does not match URL with valid domain as part of another domain" do
+      malicious_url = URI("https://500px.com.malicious.com/photo/123456/")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match invalid path" do
+      invalid_path_url = URI("https://500px.com/invalid/123456/")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/flickr_onebox_spec.rb
+++ b/spec/lib/onebox/engine/flickr_onebox_spec.rb
@ -0,0 +1,20 @@
+# frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::FlickrOnebox do
+  describe ".===" do
+    it "matches valid Flickr photos URL" do
+      valid_url = URI("https://www.flickr.com/photos/username/123456/")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "does not match URL with valid domain as part of another domain" do
+      malicious_url = URI("https://www.flickr.com.malicious.com/photos/username/123456/")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match invalid path" do
+      invalid_path_url = URI("https://www.flickr.com/invalid/123456/")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/flickr_shortened_onebox_spec.rb
+++ b/spec/lib/onebox/engine/flickr_shortened_onebox_spec.rb
@ -0,0 +1,20 @@
+# frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::FlickrShortenedOnebox do
+  describe ".===" do
+    it "matches valid Flickr shortened URL" do
+      valid_url = URI("https://flic.kr/p/123abc")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "does not match URL with valid domain as part of another domain" do
+      malicious_url = URI("https://flic.kr.malicious.com/p/123abc")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match invalid path" do
+      invalid_path_url = URI("https://flic.kr/invalid/123abc")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/gfycat_onebox_spec.rb
+++ b/spec/lib/onebox/engine/gfycat_onebox_spec.rb
@ -32,4 +32,16 @@ RSpec.describe Onebox::Engine::GfycatOnebox do
  it "has keywords" do
    expect(html).to include("<a href='https://gfycat.com/gifs/search/lego'>#lego</a>")
  end
+
+  describe ".===" do
+    it "matches valid Gfycat URL" do
+      valid_url = URI("https://gfycat.com/some-gif")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "does not match URL with valid domain as part of another domain" do
+      malicious_url = URI("https://gfycat.com.malicious.com/some-gif")
+      expect(described_class === malicious_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/github_commit_onebox_spec.rb
+++ b/spec/lib/onebox/engine/github_commit_onebox_spec.rb
@ -133,4 +133,39 @@ RSpec.describe Onebox::Engine::GithubCommitOnebox do
      end
    end
  end
+
+  describe ".===" do
+    it "matches valid GitHub commit URL" do
+      valid_url =
+        URI("https://github.com/owner/repo/commit/9a9b9c9d9e9f9a9b9c9d9e9f9a9b9c9d9e9f9a9b")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "does not match URL with subdomain" do
+      subdomain_url =
+        URI(
+          "https://subdomain.github.com/owner/repo/commit/9a9b9c9d9e9f9g9h9i9j9k9l9m9n9o9p9q9r9s9t",
+        )
+      expect(described_class === subdomain_url).to eq(false)
+    end
+
+    it "does not match URL with additional domain" do
+      malicious_url =
+        URI(
+          "https://github.com.malicious.com/owner/repo/commit/9a9b9c9d9e9f9g9h9i9j9k9l9m9n9o9p9q9r9s9t",
+        )
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match URL with invalid path" do
+      invalid_path_url =
+        URI("https://github.com/owner/repo/invalid/9a9b9c9d9e9f9g9h9i9j9k9l9m9n9o9p9q9r9s9t")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+
+    it "does not match URL with invalid commit hash" do
+      invalid_hash_url = URI("https://github.com/owner/repo/commit/invalidhash")
+      expect(described_class === invalid_hash_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/github_folder_onebox_spec.rb
+++ b/spec/lib/onebox/engine/github_folder_onebox_spec.rb
@ -67,4 +67,26 @@ RSpec.describe Onebox::Engine::GithubFolderOnebox do
      )
    end
  end
+
+  describe ".===" do
+    it "matches valid GitHub tree URL" do
+      valid_url = URI("https://github.com/username/repository/tree/main")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid GitHub tree URL with www" do
+      valid_url_with_www = URI("https://www.github.com/username/repository/tree/main")
+      expect(described_class === valid_url_with_www).to eq(true)
+    end
+
+    it "does not match URL with valid domain as part of another domain" do
+      malicious_url = URI("https://github.com.malicious.com/username/repository/tree/main")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match invalid path" do
+      invalid_path_url = URI("https://github.com/username/repository/invalid/main")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/github_gist_onebox_spec.rb
+++ b/spec/lib/onebox/engine/github_gist_onebox_spec.rb
@ -70,4 +70,31 @@ RSpec.describe Onebox::Engine::GithubGistOnebox do
      end
    end
  end
+
+  describe ".===" do
+    it "matches valid Gist URL" do
+      valid_url = URI("https://gist.github.com/username/123456")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid Gist URL with trailing slash" do
+      valid_url_with_slash = URI("https://gist.github.com/username/123456/")
+      expect(described_class === valid_url_with_slash).to eq(true)
+    end
+
+    it "does not match URL with extra domain" do
+      malicious_url = URI("https://gist.github.com.malicious.com/username/123456")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match URL with subdomain" do
+      subdomain_url = URI("https://sub.gist.github.com/username/123456")
+      expect(described_class === subdomain_url).to eq(false)
+    end
+
+    it "does not match URL with wrong domain" do
+      invalid_url = URI("https://gist.github.io/username/123456")
+      expect(described_class === invalid_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/github_pull_request_onebox_spec.rb
+++ b/spec/lib/onebox/engine/github_pull_request_onebox_spec.rb
@ -151,4 +151,26 @@ RSpec.describe Onebox::Engine::GithubPullRequestOnebox do
      )
    end
  end
+
+  describe ".===" do
+    it "matches valid GitHub Pull Request URL" do
+      valid_url = URI("https://github.com/username/repository/pull/123")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid GitHub Pull Request URL with www" do
+      valid_url_with_www = URI("https://www.github.com/username/repository/pull/123")
+      expect(described_class === valid_url_with_www).to eq(true)
+    end
+
+    it "does not match URL with valid domain as part of another domain" do
+      malicious_url = URI("https://github.com.malicious.com/username/repository/pull/123")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match invalid path" do
+      invalid_path_url = URI("https://github.com/username/repository/invalid/1234567890abcdef")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/github_repo_onebox_spec.rb
+++ b/spec/lib/onebox/engine/github_repo_onebox_spec.rb
@ -69,4 +69,36 @@ RSpec.describe Onebox::Engine::GithubRepoOnebox do
      )
    end
  end
+
+  describe ".===" do
+    it "matches valid GitHub repository URL" do
+      valid_url = URI("https://github.com/username/repository/")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid GitHub repository URL without trailing slash" do
+      valid_url_without_slash = URI("https://github.com/username/repository")
+      expect(described_class === valid_url_without_slash).to eq(true)
+    end
+
+    it "matches valid GitHub repository URL with www" do
+      valid_url_with_www = URI("https://www.github.com/username/repository/")
+      expect(described_class === valid_url_with_www).to eq(true)
+    end
+
+    it "does not match Gist URL" do
+      gist_url = URI("https://gist.github.com/username/123456")
+      expect(described_class === gist_url).to eq(false)
+    end
+
+    it "does not match URL with valid domain as part of another domain" do
+      malicious_url = URI("https://github.com.malicious.com/username/repository/")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match invalid path" do
+      invalid_path_url = URI("https://github.com/username/repository/invalid")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/gitlab_blob_onebox_spec.rb
+++ b/spec/lib/onebox/engine/gitlab_blob_onebox_spec.rb
@ -23,4 +23,31 @@ RSpec.describe Onebox::Engine::GitlabBlobOnebox do
      expect(html).to include("module Onebox")
    end
  end
+
+  describe ".===" do
+    it "matches valid GitLab blob URL" do
+      valid_url = URI("https://gitlab.com/group/project/-/blob/main/file.txt")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid GitLab blob URL with www" do
+      valid_url_with_www = URI("https://www.gitlab.com/group/project/-/blob/main/file.txt")
+      expect(described_class === valid_url_with_www).to eq(true)
+    end
+
+    it "does not match URL with extra domain" do
+      malicious_url = URI("https://gitlab.com.malicious.com/group/project/-/blob/main/file.txt")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match URL with subdomain" do
+      subdomain_url = URI("https://sub.gitlab.com/group/project/-/blob/main/file.txt")
+      expect(described_class === subdomain_url).to eq(false)
+    end
+
+    it "does not match URL with invalid path" do
+      invalid_path_url = URI("https://gitlab.com/group/project/-/tree/main")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/google_calendar_onebox_spec.rb
+++ b/spec/lib/onebox/engine/google_calendar_onebox_spec.rb
@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::GoogleCalendarOnebox do
+  describe ".===" do
+    it "matches valid Google Calendar URL" do
+      valid_url = URI("https://calendar.google.com/calendar/u/0/r/eventedit")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid shortened URL" do
+      valid_shortened_url = URI("https://goo.gl/calendar/abcd1234")
+      expect(described_class === valid_shortened_url).to eq(true)
+    end
+
+    it "does not match URL with extra domain" do
+      malicious_url = URI("https://calendar.google.com.malicious.com/calendar/u/0/r/eventedit")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match URL with subdomain" do
+      subdomain_url = URI("https://sub.calendar.google.com/calendar/u/0/r/eventedit")
+      expect(described_class === subdomain_url).to eq(false)
+    end
+
+    it "does not match URL with invalid path" do
+      invalid_path_url = URI("https://calendar.google.com/someotherpath")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/google_photos_onebox_spec.rb
+++ b/spec/lib/onebox/engine/google_photos_onebox_spec.rb
@ -25,4 +25,31 @@ RSpec.describe Onebox::Engine::GooglePhotosOnebox do
      "https://lh3.googleusercontent.com/ZlYoleNnrVo8qdx0qEjKi_-_VXY7pqqCqIW-B88EMqJ0etibFw1kEu4bzo-T4jyOQ9Ey2ekADim_L3re4lT3aBmYJUwhjkEUb5Yk59YaCSy2R8AoME5Rx4wviDRgICllF8g6lsZnS8c=w600-h315-p-k",
    )
  end
+
+  describe ".===" do
+    it "matches valid Google Photos URL with google.com domain" do
+      valid_url = URI("https://photos.google.com/share/abcd1234")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid Google Photos URL with app.goo.gl domain" do
+      valid_url_short = URI("https://photos.app.goo.gl/abcd1234")
+      expect(described_class === valid_url_short).to eq(true)
+    end
+
+    it "does not match URL with extra domain" do
+      malicious_url = URI("https://photos.google.com.malicious.com/share/abcd1234")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match URL with subdomain" do
+      subdomain_url = URI("https://sub.photos.google.com/share/abcd1234")
+      expect(described_class === subdomain_url).to eq(false)
+    end
+
+    it "does not match URL with unsupported domain" do
+      invalid_url = URI("https://photos.otherdomain.com/share/abcd1234")
+      expect(described_class === invalid_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/hackernews_spec.rb
+++ b/spec/lib/onebox/engine/hackernews_spec.rb
@ -57,4 +57,31 @@ RSpec.describe Onebox::Engine::HackernewsOnebox do
      expect(html).to include("2013")
    end
  end
+
+  describe ".===" do
+    it "matches valid Hacker News item URL" do
+      valid_url = URI("https://news.ycombinator.com/item?id=12345")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "does not match URL with valid domain as part of another domain" do
+      malicious_url = URI("https://news.ycombinator.com.malicious.com/item?id=12345")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match unrelated domain" do
+      unrelated_url = URI("https://example.com/item?id=12345")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+
+    it "does not match invalid path" do
+      invalid_path_url = URI("https://news.ycombinator.com/itemx?id=12345")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+
+    it "does not match invalid query string" do
+      invalid_query_url = URI("https://news.ycombinator.com/item?foo=bar")
+      expect(described_class === invalid_query_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/imgur_onebox_spec.rb
+++ b/spec/lib/onebox/engine/imgur_onebox_spec.rb
@ -11,4 +11,31 @@ RSpec.describe Onebox::Engine::ImgurOnebox do
    imgur.stubs(:is_album?).returns(true)
    expect(html).to include("<span class='album-title'>[Album] Did you miss me?</span>")
  end
+
+  describe ".===" do
+    it "matches valid Imgur URL" do
+      valid_url = URI("https://imgur.com/gallery/abcd1234")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid Imgur URL with www" do
+      valid_url_with_www = URI("https://www.imgur.com/gallery/abcd1234")
+      expect(described_class === valid_url_with_www).to eq(true)
+    end
+
+    it "does not match URL with extra domain" do
+      malicious_url = URI("https://imgur.com.malicious.com/gallery/abcd1234")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match URL with subdomain" do
+      subdomain_url = URI("https://sub.imgur.com/gallery/abcd1234")
+      expect(described_class === subdomain_url).to eq(false)
+    end
+
+    it "does not match unrelated URL" do
+      unrelated_url = URI("https://example.com/gallery/abcd1234")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/instagram_onebox_spec.rb
+++ b/spec/lib/onebox/engine/instagram_onebox_spec.rb
@ -76,4 +76,46 @@ RSpec.describe Onebox::Engine::InstagramOnebox do
      expect(html).to include("<iframe")
    end
  end
+
+  describe ".===" do
+    it "matches valid Instagram post URL" do
+      valid_url = URI("https://www.instagram.com/p/abc123xyz/")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid Instagram TV URL" do
+      valid_url_tv = URI("https://instagram.com/tv/abc123xyz")
+      expect(described_class === valid_url_tv).to eq(true)
+    end
+
+    it "matches valid short URL from instagr.am" do
+      valid_short_url = URI("https://instagr.am/p/abc123xyz/")
+      expect(described_class === valid_short_url).to eq(true)
+    end
+
+    it "does not match URL with extra domain" do
+      malicious_url = URI("https://instagram.com.malicious.com/p/abc123xyz")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match short URL with extra domain" do
+      malicious_url = URI("https://instagr.am.malicious.com/p/abc123xyz")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match URL with subdomain" do
+      subdomain_url = URI("https://sub.instagram.com/p/abc123xyz")
+      expect(described_class === subdomain_url).to eq(false)
+    end
+
+    it "does not match URL with invalid path" do
+      invalid_path_url = URI("https://instagram.com/invalid/abc123xyz")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+
+    it "does not match unrelated URL" do
+      unrelated_url = URI("https://example.com/p/abc123xyz")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/kaltura_onebox_spec.rb
+++ b/spec/lib/onebox/engine/kaltura_onebox_spec.rb
@ -53,4 +53,21 @@ RSpec.describe Onebox::Engine::KalturaOnebox do
      expect(actual_thumbnail).to eq og_video_thumbnail
    end
  end
+
+  describe ".===" do
+    it "matches valid Kaltura URL" do
+      valid_url = URI("https://cdn.kaltura.com/id/abc123")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "does not match URL with valid domain as part of another domain" do
+      malicious_url = URI("https://cdn.kaltura.com.malicious.com/id/abc123")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match invalid path" do
+      invalid_path_url = URI("https://cdn.kaltura.com/id/abc123/extra")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/loom_onebox_spec.rb
+++ b/spec/lib/onebox/engine/loom_onebox_spec.rb
@ -13,4 +13,31 @@ RSpec.describe Onebox::Engine::LoomOnebox do
      '<iframe class="loom-onebox" src="https://www.loom.com/embed/c9695e5dc084496c80b7d7516d2a569a?sid=e1279914-ecaa-4faf-afa8-89cbab488240" frameborder="0" allowfullscreen="" seamless="seamless" sandbox="allow-same-origin allow-scripts allow-forms allow-popups allow-popups-to-escape-sandbox allow-presentation"></iframe>',
    )
  end
+
+  describe ".===" do
+    it "matches valid Loom share URL" do
+      valid_url = URI("https://www.loom.com/share/abc123")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid Loom share URL with additional segment" do
+      valid_url_with_segment = URI("https://www.loom.com/share/abc123/xyz456")
+      expect(described_class === valid_url_with_segment).to eq(true)
+    end
+
+    it "does not match URL with valid domain as part of another domain" do
+      malicious_url = URI("https://www.loom.com.malicious.com/share/abc123")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match unrelated domain" do
+      unrelated_url = URI("https://example.com/share/abc123")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+
+    it "does not match invalid path" do
+      invalid_path_url = URI("https://www.loom.com/shares/abc123")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/mixcloud_onebox_spec.rb
+++ b/spec/lib/onebox/engine/mixcloud_onebox_spec.rb
@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::MixcloudOnebox do
+  describe ".===" do
+    it "matches valid MixCloud URL" do
+      valid_url = URI("https://www.mixcloud.com/user/show-name/")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid MixCloud root URL" do
+      valid_url_root = URI("https://www.mixcloud.com/")
+      expect(described_class === valid_url_root).to eq(true)
+    end
+
+    it "does not match URL with valid domain as part of another domain" do
+      malicious_url = URI("https://www.mixcloud.com.malicious.com/user/show-name/")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match unrelated domain" do
+      unrelated_url = URI("https://example.com/user/show-name/")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/pastebin_onebox_spec.rb
+++ b/spec/lib/onebox/engine/pastebin_onebox_spec.rb
@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::PastebinOnebox do
+  describe ".===" do
+    it "matches valid Pastebin URL" do
+      valid_url = URI("http://pastebin.com/abc123")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid Pastebin URL with HTTPS" do
+      valid_https_url = URI("https://pastebin.com/abc123")
+      expect(described_class === valid_https_url).to eq(true)
+    end
+
+    it "does not match URL with extra domain" do
+      malicious_url = URI("http://pastebin.com.malicious.com/abc123")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match URL with subdomain" do
+      subdomain_url = URI("http://sub.pastebin.com/abc123")
+      expect(described_class === subdomain_url).to eq(false)
+    end
+
+    it "does not match unrelated URL" do
+      unrelated_url = URI("http://example.com/pastebin.com/abc123")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/reddit_media_onebox_spec.rb
+++ b/spec/lib/onebox/engine/reddit_media_onebox_spec.rb
@ -23,4 +23,16 @@ RSpec.describe Onebox::Engine::RedditMediaOnebox do
  it "includes description" do
    expect(html).to include("Literally nothing black edition")
  end
+
+  describe ".===" do
+    it "matches valid Reddit URL" do
+      valid_url = URI(link)
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "does not match invalid Reddit URL" do
+      invalid_url = URI("https://www.reddit.com.somedomain.com/r/colors/comments/b4d5xm/")
+      expect(described_class === invalid_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/replit_onebox_spec.rb
+++ b/spec/lib/onebox/engine/replit_onebox_spec.rb
@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::ReplitOnebox do
+  describe ".===" do
+    it "matches valid Replit URL" do
+      valid_url = URI("https://replit.com/@username/project-name")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid repl.it URL" do
+      valid_url_repl = URI("https://repl.it/@username/project-name")
+      expect(described_class === valid_url_repl).to eq(true)
+    end
+
+    it "does not match URL with valid domain as part of another domain" do
+      malicious_url = URI("https://replit.com.malicious.com/@username/project-name")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match unrelated domain" do
+      unrelated_url = URI("https://example.com/@username/project-name")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/simplecast_onebox_spec.rb
+++ b/spec/lib/onebox/engine/simplecast_onebox_spec.rb
@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::SimplecastOnebox do
+  describe ".===" do
+    it "matches valid Simplecast episodes URL" do
+      valid_url = URI("https://simplecast.com/episodes/example-episode")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid Simplecast short URL" do
+      valid_short_url = URI("https://simplecast.com/s/123abc")
+      expect(described_class === valid_short_url).to eq(true)
+    end
+
+    it "matches valid Simplecast subdomain URL" do
+      valid_subdomain_url = URI("https://subdomain.simplecast.com/episodes/example-episode")
+      expect(described_class === valid_subdomain_url).to eq(true)
+    end
+
+    it "does not match URL with invalid path" do
+      invalid_path_url = URI("https://simplecast.com/invalid/123")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+
+    it "does not match unrelated domain" do
+      unrelated_url = URI("https://example.com/episodes/example-episode")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/sketch_fab_onebox_spec.rb
+++ b/spec/lib/onebox/engine/sketch_fab_onebox_spec.rb
@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::SketchFabOnebox do
+  describe ".===" do
+    it "matches valid Sketchfab models URL" do
+      valid_url = URI("https://sketchfab.com/models/1234567890abcdef1234567890abcdef")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid Sketchfab 3d-models URL with title" do
+      valid_url_with_title =
+        URI("https://sketchfab.com/3d-models/example-title-1234567890abcdef1234567890abcdef")
+      expect(described_class === valid_url_with_title).to eq(true)
+    end
+
+    it "does not match URL with invalid path" do
+      invalid_path_url = URI("https://sketchfab.com/invalid/path")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+
+    it "does not match unrelated domain" do
+      unrelated_url = URI("https://example.com/models/1234567890abcdef1234567890abcdef")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+
+    it "does not match Sketchfab URL with incorrect ID length" do
+      invalid_id_url = URI("https://sketchfab.com/models/12345")
+      expect(described_class === invalid_id_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/slides_onebox_spec.rb
+++ b/spec/lib/onebox/engine/slides_onebox_spec.rb
@ -1,7 +1,7 @@
 # frozen_string_literal: true

 RSpec.describe Onebox::Engine::SlidesOnebox do
-  let(:link) { "http://slides.com/drksephy/ecmascript-2015" }
+  let(:link) { "https://slides.com/drksephy/ecmascript-2015" }
  let(:html) { described_class.new(link).to_html }

  before { stub_request(:get, link).to_return(status: 200, body: onebox_response("slides")) }
@ -20,4 +20,26 @@ RSpec.describe Onebox::Engine::SlidesOnebox do
      expect(html).to include("iframe")
    end
  end
+
+  describe ".===" do
+    it "matches valid Slides URL" do
+      valid_url = URI("https://slides.com/drksephy/example-slide")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "does not match URL with missing slide name" do
+      invalid_url = URI("https://slides.com/drksephy/")
+      expect(described_class === invalid_url).to eq(false)
+    end
+
+    it "does not match URL with extra domain" do
+      malicious_url = URI("https://slides.com.malicious.com/drksephy/example-slide")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match unrelated domain" do
+      unrelated_url = URI("https://example.com/drksephy/example-slide")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/soundcloud_onebox_spec.rb
+++ b/spec/lib/onebox/engine/soundcloud_onebox_spec.rb
@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::SoundCloudOnebox do
+  describe ".===" do
+    it "matches valid SoundCloud URL" do
+      valid_url = URI("https://soundcloud.com/artist/track")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid SoundCloud URL with additional path" do
+      valid_url_with_path = URI("https://soundcloud.com/artist/track/more-info")
+      expect(described_class === valid_url_with_path).to eq(true)
+    end
+
+    it "does not match URL with extra domain" do
+      malicious_url = URI("https://soundcloud.com.malicious.com/artist/track")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match URL with subdomain" do
+      subdomain_url = URI("https://sub.soundcloud.com/artist/track")
+      expect(described_class === subdomain_url).to eq(false)
+    end
+
+    it "does not match unrelated URL" do
+      unrelated_url = URI("https://example.com/soundcloud.com/artist/track")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/spotify_onebox_spec.rb
+++ b/spec/lib/onebox/engine/spotify_onebox_spec.rb
@ -36,4 +36,36 @@ RSpec.describe Onebox::Engine::SpotifyOnebox do
      expect(html).to include('height="152"')
    end
  end
+
+  describe ".===" do
+    it "matches valid Spotify URL" do
+      valid_url = URI("https://open.spotify.com/playlist/12345")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid Spotify track URL" do
+      valid_url = URI("https://open.spotify.com/track/5Hpwb8l7NHJkiCZOPRmfIK?si=24c8d91a5d114c62")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid Spotify root URL" do
+      valid_root_url = URI("https://open.spotify.com/")
+      expect(described_class === valid_root_url).to eq(true)
+    end
+
+    it "does not match URL with extra domain" do
+      malicious_url = URI("https://open.spotify.com.malicious.com/playlist/12345")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match URL with subdomain" do
+      subdomain_url = URI("https://sub.open.spotify.com/playlist/12345")
+      expect(described_class === subdomain_url).to eq(false)
+    end
+
+    it "does not match unrelated URL" do
+      unrelated_url = URI("https://example.com/open.spotify.com/playlist/12345")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+  end
 end
--- a/spec/lib/onebox/engine/steam_store_onebox_spec.rb
+++ b/spec/lib/onebox/engine/steam_store_onebox_spec.rb
@ -0,0 +1,25 @@
+#frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::SteamStoreOnebox do
+  describe ".===" do
+    it "matches valid Steam Store app URL" do
+      valid_url = URI("https://store.steampowered.com/app/123456")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "does not match URL with invalid path" do
+      invalid_path_url = URI("https://store.steampowered.com/invalid/123456")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+
+    it "does not match unrelated domain" do
+      unrelated_url = URI("https://example.com/app/123456")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+
+    it "does not match URL with extra domain" do
+      malicious_url = URI("https://store.steampowered.com.malicious.com/app/123456")
+      expect(described_class === malicious_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/tiktok_onebox_spec.rb
+++ b/spec/lib/onebox/engine/tiktok_onebox_spec.rb
@ -0,0 +1,30 @@
+#frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::TiktokOnebox do
+  describe ".===" do
+    it "matches valid TikTok user video URL" do
+      valid_user_video_url = URI("https://www.tiktok.com/@user123/video/1234567890")
+      expect(described_class === valid_user_video_url).to eq(true)
+    end
+
+    it "matches valid TikTok short video URL" do
+      valid_short_video_url = URI("https://www.tiktok.com/v/1234567890")
+      expect(described_class === valid_short_video_url).to eq(true)
+    end
+
+    it "does not match URL with invalid path" do
+      invalid_path_url = URI("https://www.tiktok.com/@user123/invalid/1234567890")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+
+    it "does not match unrelated domain" do
+      unrelated_url = URI("https://example.com/@user123/video/1234567890")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+
+    it "does not match URL with valid domain as part of another domain" do
+      malicious_url = URI("https://www.tiktok.com.malicious.com/@user123/video/1234567890")
+      expect(described_class === malicious_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/vimeo_onebox_spec.rb
+++ b/spec/lib/onebox/engine/vimeo_onebox_spec.rb
@ -0,0 +1,30 @@
+#frozen_string_literal: true
+
+RSpec.describe Onebox::Engine::VimeoOnebox do
+  describe ".===" do
+    it "matches valid Vimeo video URL" do
+      valid_url = URI("https://vimeo.com/123456789")
+      expect(described_class === valid_url).to eq(true)
+    end
+
+    it "matches valid Vimeo video URL with additional segment" do
+      valid_url_with_segment = URI("https://vimeo.com/123456789/info")
+      expect(described_class === valid_url_with_segment).to eq(true)
+    end
+
+    it "does not match URL with invalid path" do
+      invalid_path_url = URI("https://vimeo.com/invalid/123456789")
+      expect(described_class === invalid_path_url).to eq(false)
+    end
+
+    it "does not match unrelated domain" do
+      unrelated_url = URI("https://example.com/123456789")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+
+    it "does not match URL with valid domain as part of another domain" do
+      malicious_url = URI("https://vimeo.com.malicious.com/123456789")
+      expect(described_class === malicious_url).to eq(false)
+    end
+  end
+end
--- a/spec/lib/onebox/engine/wikipedia_onebox_spec.rb
+++ b/spec/lib/onebox/engine/wikipedia_onebox_spec.rb
@ -49,4 +49,26 @@ RSpec.describe Onebox::Engine::WikipediaOnebox do
      expect(html).to include("Le terme est repris par")
    end
  end
+
+  describe ".===" do
+    it "matches valid Wikipedia URL with .org" do
+      valid_url_org = URI("https://en.wikipedia.org/wiki/Ruby_(programming_language)")
+      expect(described_class === valid_url_org).to eq(true)
+    end
+
+    it "matches valid Wikipedia URL with .com" do
+      valid_url_com = URI("https://en.wikipedia.com/wiki/Ruby_(programming_language)")
+      expect(described_class === valid_url_com).to eq(true)
+    end
+
+    it "does not match URL with extra domain" do
+      malicious_url = URI("https://en.wikipedia.org.malicious.com/wiki/Ruby_(programming_language)")
+      expect(described_class === malicious_url).to eq(false)
+    end
+
+    it "does not match unrelated URL" do
+      unrelated_url = URI("https://example.com/wiki/wikipedia.org")
+      expect(described_class === unrelated_url).to eq(false)
+    end
+  end
 end