From cd6d2ffaa4321db9389ba2f84020d8aa13a4481a Mon Sep 17 00:00:00 2001 From: pgcd Date: Thu, 14 Nov 2024 09:41:11 +0100 Subject: [PATCH] DEV: Adds personal messages to SMF2 importer (#29540) * This commits ports the personal messages conversion step from smf1.rb into smf2.rb * Improves error handling for skipped messages * also adds a brief explanation for possible improvements to topic matching in PMs --- script/import_scripts/smf2.rb | 120 +++++++++++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 1 deletion(-) diff --git a/script/import_scripts/smf2.rb b/script/import_scripts/smf2.rb index e6359195069..b1e8b3998ea 100644 --- a/script/import_scripts/smf2.rb +++ b/script/import_scripts/smf2.rb @@ -11,6 +11,8 @@ require "etc" require "open3" class ImportScripts::Smf2 < ImportScripts::Base + BATCH_SIZE = 5000 + def self.run options = Options.new begin @@ -65,6 +67,7 @@ class ImportScripts::Smf2 < ImportScripts::Base import_users import_categories import_posts + import_personal_posts postprocess_posts make_prettyurl_permalinks("/forum") end @@ -266,11 +269,126 @@ class ImportScripts::Smf2 < ImportScripts::Base ) end end - post[:raw] = convert_message_body(message[:body], attachments, ignore_quotes: ignore_quotes) + begin + post[:raw] = convert_message_body(message[:body], attachments, ignore_quotes: ignore_quotes) + rescue => e + puts "Failed to import message with ID #{post[:id]}" + puts e.message + puts e.backtrace.join("\n") + post[:raw] = "-- MESSAGE SKIPPED --" + end next post end end + def import_personal_posts + puts "Loading pm mapping..." + + @pm_mapping = {} + + Topic + .joins(:topic_allowed_users) + .where(archetype: Archetype.private_message) + .where("title NOT ILIKE 'Re:%'") + .group(:id) + .order(:id) + .pluck( + "string_agg(topic_allowed_users.user_id::text, ',' ORDER BY topic_allowed_users.user_id), title, topics.id", + ) + .each do |users, title, topic_id| + @pm_mapping[users] ||= {} + @pm_mapping[users][title] ||= [] + @pm_mapping[users][title] << topic_id + end + + puts "", "Importing personal posts..." + + last_post_id = -1 + total = + query( + "SELECT COUNT(*) count FROM smf_personal_messages WHERE deleted_by_sender = 0", + as: :single, + ) + + batches(BATCH_SIZE) do |offset| + posts = query(<<~SQL, as: :array) + SELECT id_pm + , id_member_from + , msgtime + , subject + , body + , (SELECT GROUP_CONCAT(id_member) FROM smf_pm_recipients r WHERE r.id_pm = pm.id_pm) recipients + FROM smf_personal_messages pm + WHERE deleted_by_sender = 0 + AND id_pm > #{last_post_id} + ORDER BY id_pm + LIMIT #{BATCH_SIZE} + SQL + + break if posts.empty? + + last_post_id = posts[-1][:id_pm] + post_ids = posts.map { |p| "pm-#{p[:id_pm]}" } + + next if all_records_exist?(:post, post_ids) + + create_posts(posts, total: total, offset: offset) do |p| + next unless user_id = user_id_from_imported_user_id(p[:id_member_from]) + next if p[:recipients].blank? + recipients = + p[:recipients].split(",").map { |id| user_id_from_imported_user_id(id) }.compact.uniq + next if recipients.empty? + + id = "pm-#{p[:id_pm]}" + next if post_id_from_imported_post_id(id) + + post = { id: id, created_at: Time.at(p[:msgtime]), user_id: user_id } + begin + post[:raw] = convert_message_body(p[:body]) + rescue => e + puts "Failed to import personal message with ID #{post[:id]}" + puts e.message + puts e.backtrace.join("\n") + post[:raw] = "-- MESSAGE SKIPPED --" + end + + users = (recipients + [user_id]).sort.uniq.join(",") + title = decode_entities(p[:subject]) + + if topic_id = find_pm_topic_id(users, title) + post[:topic_id] = topic_id + else + post[:archetype] = Archetype.private_message + post[:title] = title + post[:target_usernames] = User.where(id: recipients).pluck(:username) + post[:post_create_action] = proc do |action_post| + @pm_mapping[users] ||= {} + @pm_mapping[users][title] ||= [] + @pm_mapping[users][title] << action_post.topic_id + end + end + + post + end + end + end + + def find_pm_topic_id(users, title) + # Please note that this approach to topic matching is lifted straight from smf1.rb. + # With SMFv2 we could update this to use id_pm_head, which contains + # the id of the message this is a reply to, or the message's own id_pm + # if it's the first in the messages thread. + # + return unless title.start_with?("Re:") + + return unless @pm_mapping[users] + + title = title.gsub(/^(Re:)+/i, "") + return unless @pm_mapping[users][title] + + @pm_mapping[users][title][-1] + end + def import_attachment(post, attachment) path = find_smf_attachment_path(