mirror of
https://github.com/discourse/discourse.git
synced 2025-06-04 20:44:40 +08:00
FEATURE: Cache embed contents in the database (#25133)
* FEATURE: Cache embed contents in the database This will be useful for features that rely on the semantic content of topics, like the many AI features Co-authored-by: Roman Rizzi <rizziromanalejandro@gmail.com>
This commit is contained in:

committed by
GitHub

parent
ac4d90b3a6
commit
13735f35fb
@ -3,10 +3,13 @@
|
|||||||
class TopicEmbed < ActiveRecord::Base
|
class TopicEmbed < ActiveRecord::Base
|
||||||
include Trashable
|
include Trashable
|
||||||
|
|
||||||
|
EMBED_CONTENT_CACHE_MAX_LENGTH = 32_000
|
||||||
|
|
||||||
belongs_to :topic
|
belongs_to :topic
|
||||||
belongs_to :post
|
belongs_to :post
|
||||||
validates_presence_of :embed_url
|
validates_presence_of :embed_url
|
||||||
validates_uniqueness_of :embed_url
|
validates_uniqueness_of :embed_url
|
||||||
|
validates :embed_content_cache, length: { maximum: EMBED_CONTENT_CACHE_MAX_LENGTH }
|
||||||
|
|
||||||
before_validation(on: :create) do
|
before_validation(on: :create) do
|
||||||
unless (
|
unless (
|
||||||
@ -43,6 +46,7 @@ class TopicEmbed < ActiveRecord::Base
|
|||||||
def self.import(user, url, title, contents, category_id: nil, cook_method: nil, tags: nil)
|
def self.import(user, url, title, contents, category_id: nil, cook_method: nil, tags: nil)
|
||||||
return unless url =~ %r{\Ahttps?\://}
|
return unless url =~ %r{\Ahttps?\://}
|
||||||
|
|
||||||
|
original_contents = contents.dup.truncate(EMBED_CONTENT_CACHE_MAX_LENGTH)
|
||||||
contents = first_paragraph_from(contents) if SiteSetting.embed_truncate && cook_method.nil?
|
contents = first_paragraph_from(contents) if SiteSetting.embed_truncate && cook_method.nil?
|
||||||
contents ||= ""
|
contents ||= ""
|
||||||
contents = contents.dup << imported_from_html(url)
|
contents = contents.dup << imported_from_html(url)
|
||||||
@ -77,6 +81,7 @@ class TopicEmbed < ActiveRecord::Base
|
|||||||
}
|
}
|
||||||
|
|
||||||
post = PostCreator.create(user, create_args)
|
post = PostCreator.create(user, create_args)
|
||||||
|
post.topic.topic_embed.update!(embed_content_cache: original_contents)
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
absolutize_urls(url, contents)
|
absolutize_urls(url, contents)
|
||||||
@ -101,7 +106,7 @@ class TopicEmbed < ActiveRecord::Base
|
|||||||
changes[:title] = title if title.present?
|
changes[:title] = title if title.present?
|
||||||
|
|
||||||
post.revise(user, changes, skip_validations: true, bypass_rate_limiter: true)
|
post.revise(user, changes, skip_validations: true, bypass_rate_limiter: true)
|
||||||
embed.update!(content_sha1: content_sha1)
|
embed.update!(content_sha1: content_sha1, embed_content_cache: original_contents)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -296,6 +301,11 @@ class TopicEmbed < ActiveRecord::Base
|
|||||||
response = TopicEmbed.find_remote(url)
|
response = TopicEmbed.find_remote(url)
|
||||||
|
|
||||||
body = response.body
|
body = response.body
|
||||||
|
if post&.topic&.topic_embed && body.present?
|
||||||
|
post.topic.topic_embed.update!(
|
||||||
|
embed_content_cache: body.truncate(EMBED_CONTENT_CACHE_MAX_LENGTH),
|
||||||
|
)
|
||||||
|
end
|
||||||
body << TopicEmbed.imported_from_html(url)
|
body << TopicEmbed.imported_from_html(url)
|
||||||
body
|
body
|
||||||
end
|
end
|
||||||
@ -306,15 +316,16 @@ end
|
|||||||
#
|
#
|
||||||
# Table name: topic_embeds
|
# Table name: topic_embeds
|
||||||
#
|
#
|
||||||
# id :integer not null, primary key
|
# id :integer not null, primary key
|
||||||
# topic_id :integer not null
|
# topic_id :integer not null
|
||||||
# post_id :integer not null
|
# post_id :integer not null
|
||||||
# embed_url :string(1000) not null
|
# embed_url :string(1000) not null
|
||||||
# content_sha1 :string(40)
|
# content_sha1 :string(40)
|
||||||
# created_at :datetime not null
|
# created_at :datetime not null
|
||||||
# updated_at :datetime not null
|
# updated_at :datetime not null
|
||||||
# deleted_at :datetime
|
# deleted_at :datetime
|
||||||
# deleted_by_id :integer
|
# deleted_by_id :integer
|
||||||
|
# embed_content_cache :text
|
||||||
#
|
#
|
||||||
# Indexes
|
# Indexes
|
||||||
#
|
#
|
||||||
|
@ -0,0 +1,7 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
class AddEmbedContentCacheToTopicEmbed < ActiveRecord::Migration[7.0]
|
||||||
|
def change
|
||||||
|
add_column :topic_embeds, :embed_content_cache, :text
|
||||||
|
end
|
||||||
|
end
|
@ -73,6 +73,9 @@ RSpec.describe TopicEmbed do
|
|||||||
expect(post.cooked).to have_tag("a", with: { href: "http://eviltrout.com/hello" })
|
expect(post.cooked).to have_tag("a", with: { href: "http://eviltrout.com/hello" })
|
||||||
expect(post.cooked).to have_tag("img", with: { src: "http://eviltrout.com/images/wat.jpg" })
|
expect(post.cooked).to have_tag("img", with: { src: "http://eviltrout.com/images/wat.jpg" })
|
||||||
|
|
||||||
|
# It caches the embed content
|
||||||
|
expect(post.topic.topic_embed.embed_content_cache).to eq(contents)
|
||||||
|
|
||||||
# It converts relative URLs to absolute when expanded
|
# It converts relative URLs to absolute when expanded
|
||||||
stub_request(:get, url).to_return(status: 200, body: contents)
|
stub_request(:get, url).to_return(status: 200, body: contents)
|
||||||
expect(TopicEmbed.expanded_for(post)).to have_tag(
|
expect(TopicEmbed.expanded_for(post)).to have_tag(
|
||||||
@ -107,6 +110,13 @@ RSpec.describe TopicEmbed do
|
|||||||
expect(topic_embed.post.topic.user).to eq(new_user)
|
expect(topic_embed.post.topic.user).to eq(new_user)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "Supports updating the embed content cache" do
|
||||||
|
expect do TopicEmbed.import(user, url, title, "new contents") end.to change {
|
||||||
|
topic_embed.reload.embed_content_cache
|
||||||
|
}
|
||||||
|
expect(topic_embed.embed_content_cache).to eq("new contents")
|
||||||
|
end
|
||||||
|
|
||||||
it "Should leave uppercase Feed Entry URL untouched in content" do
|
it "Should leave uppercase Feed Entry URL untouched in content" do
|
||||||
cased_url = "http://eviltrout.com/ABCD"
|
cased_url = "http://eviltrout.com/ABCD"
|
||||||
post = TopicEmbed.import(user, cased_url, title, "some random content")
|
post = TopicEmbed.import(user, cased_url, title, "some random content")
|
||||||
@ -559,4 +569,30 @@ RSpec.describe TopicEmbed do
|
|||||||
expect(html).to eq(expected_html)
|
expect(html).to eq(expected_html)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
describe ".expanded_for" do
|
||||||
|
fab!(:user)
|
||||||
|
let(:title) { "How to turn a fish from good to evil in 30 seconds" }
|
||||||
|
let(:url) { "http://eviltrout.com/123" }
|
||||||
|
let(:contents) { "<p>hello world new post :D</p>" }
|
||||||
|
fab!(:embeddable_host)
|
||||||
|
fab!(:category)
|
||||||
|
fab!(:tag)
|
||||||
|
|
||||||
|
it "returns embed content" do
|
||||||
|
stub_request(:get, url).to_return(status: 200, body: contents)
|
||||||
|
post = TopicEmbed.import(user, url, title, contents)
|
||||||
|
expect(TopicEmbed.expanded_for(post)).to include(contents)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "updates the embed content cache" do
|
||||||
|
stub_request(:get, url)
|
||||||
|
.to_return(status: 200, body: contents)
|
||||||
|
.then
|
||||||
|
.to_return(status: 200, body: "contents changed")
|
||||||
|
post = TopicEmbed.import(user, url, title, contents)
|
||||||
|
TopicEmbed.expanded_for(post)
|
||||||
|
expect(post.topic.topic_embed.reload.embed_content_cache).to include("contents changed")
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
Reference in New Issue
Block a user