DEV: make sure we don't load all data into memory when exporting chat messages (#22276)

This commit makes sure we don't load all data into memory when doing CSV exports. 
The most important change here made to the recently introduced export of chat 
messages (3ea31f4). We were loading all data into memory in the first version, with 
this commit it's not the case anymore.

Speaking of old exports. Some of them already use find_each, and it worked as 
expected, without loading all data into memory. And it will proceed working as 
expected after this commit.

In general, I made sure this change didn't break other CSV exports, first manually, and 
then by writing system specs for them. Sadly, I haven't managed yet to make those 
specs stable, they work fine locally, but flaky in GitHub actions, so I've disabled them 
for now.

I'll be making more changes to the CSV exports code soon, those system specs will be 
very helpful. I'll be running them locally, and I hope I'll manage to make them stable 
while doing that work.
This commit is contained in:
Andrei Prigorshnev
2023-07-12 18:52:18 +04:00
committed by GitHub
parent b7404373cf
commit fbe0e4c78c
8 changed files with 501 additions and 37 deletions

View File

@ -8,26 +8,29 @@ module Chat
Chat::Message
.unscoped
.where(created_at: 6.months.ago..Time.current)
.joins(:chat_channel)
.joins(:user)
.joins("INNER JOIN users last_editors ON chat_messages.last_editor_id = last_editors.id")
.order(:created_at)
.includes(:chat_channel)
.includes(:user)
.includes(:last_editor)
.limit(LIMIT)
.pluck(
"chat_messages.id",
"chat_channels.id",
"chat_channels.name",
"users.id",
"users.username",
"chat_messages.message",
"chat_messages.cooked",
"chat_messages.created_at",
"chat_messages.updated_at",
"chat_messages.deleted_at",
"chat_messages.in_reply_to_id",
"last_editors.id",
"last_editors.username",
)
.find_each do |chat_message|
yield(
[
chat_message.id,
chat_message.chat_channel.id,
chat_message.chat_channel.name,
chat_message.user.id,
chat_message.user.username,
chat_message.message,
chat_message.cooked,
chat_message.created_at,
chat_message.updated_at,
chat_message.deleted_at,
chat_message.in_reply_to&.id,
chat_message.last_editor&.id,
chat_message.last_editor&.username,
]
)
end
end
def get_header(entity)

View File

@ -22,7 +22,8 @@ describe Chat::MessagesExporter do
it "exports messages" do
exporter = Class.new.extend(Chat::MessagesExporter)
result = exporter.chat_message_export.to_a
result = []
exporter.chat_message_export { |data_row| result << data_row }
expect(result.length).to be(7)
assert_exported_message(result[0], public_channel_message_1)

View File

@ -0,0 +1,63 @@
# frozen_string_literal: true
RSpec.describe "Chat CSV exports", type: :system do
fab!(:admin) { Fabricate(:admin) }
let(:csv_export_pm_page) { PageObjects::Pages::CSVExportPM.new }
before do
Jobs.run_immediately!
sign_in(admin)
chat_system_bootstrap
end
xit "exports chat messages" do
message = Fabricate(:chat_message)
visit "/admin/plugins/chat"
click_button "Create export"
visit "/u/#{admin.username}/messages"
click_link "[Chat Message] Data export complete"
expect(csv_export_pm_page).to have_download_link
exported_data = csv_export_pm_page.download_and_extract
expect(exported_data.first).to eq(
%w[
id
chat_channel_id
chat_channel_name
user_id
username
message
cooked
created_at
updated_at
deleted_at
in_reply_to_id
last_editor_id
last_editor_username
],
)
time_format = "%Y-%m-%d %H:%M:%S UTC"
expect(exported_data.second).to eq(
[
message.id.to_s,
message.chat_channel.id.to_s,
message.chat_channel.name,
message.user.id.to_s,
message.user.username,
message.message,
message.cooked,
message.created_at.strftime(time_format),
message.updated_at.strftime(time_format),
nil,
nil,
message.last_editor.id.to_s,
message.last_editor.username,
],
)
ensure
csv_export_pm_page.clear_downloads
end
end