diff --git a/script/import_scripts/higher_logic.rb b/script/import_scripts/higher_logic.rb new file mode 100644 index 00000000000..1f5b1d9599e --- /dev/null +++ b/script/import_scripts/higher_logic.rb @@ -0,0 +1,231 @@ +# frozen_string_literal: true + +require "mysql2" +require File.expand_path(File.dirname(__FILE__) + "/base.rb") + +class ImportScripts::HigherLogic < ImportScripts::Base + + HIGHERLOGIC_DB = "higherlogic" + BATCH_SIZE = 1000 + ATTACHMENT_DIR = "/shared/import/data/attachments" + + def initialize + super + + @client = Mysql2::Client.new( + host: "localhost", + username: "root", + database: HIGHERLOGIC_DB + ) + end + + def execute + import_groups + import_users + import_group_users + import_categories + import_posts + import_attachments + end + + def import_groups + puts '', 'importing groups' + + groups = mysql_query <<-SQL + SELECT CommunityKey, CommunityName + FROM Community + ORDER BY CommunityName + SQL + + create_groups(groups) do |group| + { + id: group['CommunityKey'], + name: group['CommunityName'] + } + end + end + + def import_users + puts '', 'importing users' + total_count = mysql_query("SELECT count(*) FROM Contact").first["count"] + + batches(BATCH_SIZE) do |offset| + results = mysql_query <<-SQL + SELECT ContactKey, FirstName, LastName, EmailAddress, HLAdminFlag, UserStatus, CreatedOn, Birthday, Bio + FROM Contact + LIMIT #{BATCH_SIZE} + OFFSET #{offset} + SQL + + break if results.size < 1 + + next if all_records_exist? :users, results.map { |u| u['ContactKey'] } + + create_users(results, total: total_count, offset: offset) do |user| + next if user['EmailAddress'].blank? + { + id: user['ContactKey'], + email: user['EmailAddress'], + name: "#{user['FirstName']} #{user['LastName']}", + created_at: user['CreatedOn'] == nil ? 0 : Time.zone.at(user['CreatedOn']), + bio_raw: user['Bio'], + active: user['UserStatus'] == "Active", + admin: user['HLAdminFlag'] == 1 + } + end + end + end + + def import_group_users + puts '', 'importing group users' + + group_users = mysql_query(<<-SQL + SELECT CommunityKey, ContactKey + FROM CommunityMember + SQL + ).to_a + + group_users.each do |row| + next unless user_id = user_id_from_imported_user_id(row['ContactKey']) + next unless group_id = group_id_from_imported_group_id(row['CommunityKey']) + puts '', '.' + + GroupUser.find_or_create_by(user_id: user_id, group_id: group_id) + end + end + + def import_categories + puts '', 'importing categories' + + categories = mysql_query <<-SQL + SELECT DiscussionKey, DiscussionName + FROM Discussion + SQL + + create_categories(categories) do |category| + { + id: category['DiscussionKey'], + name: category['DiscussionName'] + } + end + end + + def import_posts + puts '', 'importing topics and posts' + total_count = mysql_query("SELECT count(*) FROM DiscussionPost").first["count"] + + batches(BATCH_SIZE) do |offset| + results = mysql_query <<-SQL + SELECT MessageKey, + ParentMessageKey, + Subject, + ContactKey, + DiscussionKey, + PinnedFlag, + Body, + CreatedOn + FROM DiscussionPost + WHERE CreatedOn > '2020-01-01 00:00:00' + LIMIT #{BATCH_SIZE} + OFFSET #{offset} + SQL + + break if results.size < 1 + next if all_records_exist? :posts, results.map { |p| p['MessageKey'] } + + create_posts(results, total: total_count, offset: offset) do |post| + raw = preprocess_raw(post['Body']) + mapped = { + id: post['MessageKey'], + user_id: user_id_from_imported_user_id(post['ContactKey']), + raw: raw, + created_at: Time.zone.at(post['CreatedOn']), + } + + if post['ParentMessageKey'].nil? + mapped[:category] = category_id_from_imported_category_id(post['DiscussionKey']).to_i + mapped[:title] = CGI.unescapeHTML(post['Subject']) + mapped[:pinned] = post['PinnedFlag'] == 1 + else + topic = topic_lookup_from_imported_post_id(post['ParentMessageKey']) + + if topic.present? + mapped[:topic_id] = topic[:topic_id] + else + puts "Parent post #{post['ParentMessageKey']} doesn't exist. Skipping." + next + end + end + + mapped + end + end + end + + def import_attachments + puts '', 'importing attachments' + + count = 0 + + total_attachments = mysql_query(<<-SQL + SELECT COUNT(*) count + FROM LibraryEntryFile l + JOIN DiscussionPost p ON p.AttachmentDocumentKey = l.DocumentKey + WHERE p.CreatedOn > '2020-01-01 00:00:00' + SQL + ).first['count'] + + batches(BATCH_SIZE) do |offset| + attachments = mysql_query(<<-SQL + SELECT l.VersionName, + l.FileExtension, + p.MessageKey + FROM LibraryEntryFile l + LEFT JOIN DiscussionPost p ON p.AttachmentDocumentKey = l.DocumentKey + WHERE p.CreatedOn > '2020-01-01 00:00:00' + LIMIT #{BATCH_SIZE} + OFFSET #{offset} + SQL + ).to_a + + break if attachments.empty? + + attachments.each do |a| + print_status(count += 1, total_attachments, get_start_time("attachments")) + original_filename = "#{a['VersionName']}.#{a['FileExtension']}" + path = File.join(ATTACHMENT_DIR, original_filename) + + if File.exists?(path) + if post = Post.find(post_id_from_imported_post_id(a['MessageKey'])) + filename = File.basename(original_filename) + upload = create_upload(post.user.id, path, filename) + + if upload&.persisted? + html = html_for_upload(upload, filename) + + post.raw << "\n\n" << html + post.save! + PostUpload.create!(post: post, upload: upload) unless PostUpload.where(post: post, upload: upload).exists? + end + end + end + end + end + end + + def preprocess_raw(body) + raw = body.dup + + # trim off any post text beyond ---- to remove email threading + raw = raw.slice(0..(raw.index('------'))) || raw + + raw = HtmlToMarkdown.new(raw).to_markdown + raw + end + + def mysql_query(sql) + @client.query(sql, cache_rows: false) + end +end + +ImportScripts::HigherLogic.new.perform