From 0daa17780583dbde918caac9cbbd91342aa23414 Mon Sep 17 00:00:00 2001 From: Quangbuu Le Date: Mon, 31 Jul 2017 15:56:57 +0700 Subject: [PATCH] Enhance bulk import scripts (#5010) * Enhance bulk import scripts * Fix: restore running statement of BulkImport::VBulletin --- script/bulk_import/base.rb | 22 ++++++++++++++++++---- script/bulk_import/vbulletin.rb | 13 +++++++++---- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/script/bulk_import/base.rb b/script/bulk_import/base.rb index 7ed396c2fa7..7db12adfb9f 100644 --- a/script/bulk_import/base.rb +++ b/script/bulk_import/base.rb @@ -176,7 +176,7 @@ class BulkImport::Base POST_COLUMNS ||= %i{ id user_id last_editor_id topic_id post_number sort_order reply_to_post_number - raw cooked hidden word_count created_at last_version_at updated_at + like_count raw cooked hidden word_count created_at last_version_at updated_at } TOPIC_ALLOWED_USER_COLUMNS ||= %i{ @@ -306,7 +306,8 @@ class BulkImport::Base end def process_category(category) - @categories[category[:imported_id].to_s] = category[:id] = @last_category_id += 1 + category[:id] ||= @last_category_id += 1 + @categories[category[:imported_id].to_s] ||= category[:id] category[:name] = category[:name][0...50].scrub.strip # TODO: unique name category[:name_lower] = category[:name].downcase @@ -347,6 +348,7 @@ class BulkImport::Base @topic_id_by_post_id[post[:id]] = post[:topic_id] post[:raw] = (post[:raw] || "").scrub.strip.presence || "" post[:raw] = process_raw post[:raw] + post[:like_count] ||= 0 post[:cooked] = pre_cook post[:raw] post[:hidden] ||= false post[:word_count] = post[:raw].scan(/[[:word:]]+/).size @@ -484,7 +486,8 @@ class BulkImport::Base mapped = yield(row) next unless mapped processed = send(process_method_name, mapped) - imported_ids << mapped[:imported_id] + imported_ids << mapped[:imported_id] unless mapped[:imported_id].nil? + imported_ids |= mapped[:imported_ids] unless mapped[:imported_ids].nil? @raw_connection.put_copy_data columns.map { |c| processed[c] } print "\r%7d - %6d/sec".freeze % [imported_ids.size, imported_ids.size.to_f / (Time.now - start)] if imported_ids.size % 5000 == 0 end @@ -538,7 +541,18 @@ class BulkImport::Base end def pre_cook(raw) - cooked = @markdown.render(raw).scrub.strip + cooked = raw + + # Convert YouTube URLs to lazyYT DOMs before being transformed into links + cooked.gsub!(/\nhttps\:\/\/www.youtube.com\/watch\?v=(\w+)\n/) do + video_id = $1 + result = <<-HTML +
+ HTML + result.strip + end + + cooked = @markdown.render(cooked).scrub.strip cooked.gsub!(/\[QUOTE="?([^,"]+)(?:, post:(\d+), topic:(\d+))?"?\](.+?)\[\/QUOTE\]/im) do username, post_id, topic_id = $1, $2, $3 diff --git a/script/bulk_import/vbulletin.rb b/script/bulk_import/vbulletin.rb index 4de56ae5f93..20eec2398fd 100644 --- a/script/bulk_import/vbulletin.rb +++ b/script/bulk_import/vbulletin.rb @@ -1,4 +1,5 @@ require_relative "base" +require "set" require "mysql2" require "htmlentities" @@ -354,6 +355,8 @@ class BulkImport::VBulletin < BulkImport::Base posts = mysql_stream <<-SQL SELECT postid, post.threadid, parentid, userid, post.dateline, post.visible, pagetext + #{", post_thanks_amount" if @has_post_thanks} + FROM post JOIN thread ON thread.threadid = post.threadid WHERE postid > #{@last_imported_post_id} @@ -365,7 +368,7 @@ class BulkImport::VBulletin < BulkImport::Base replied_post_topic_id = topic_id_from_imported_post_id(row[2]) reply_to_post_number = topic_id == replied_post_topic_id ? post_number_from_imported_id(row[2]) : nil - { + post = { imported_id: row[0], topic_id: topic_id, reply_to_post_number: reply_to_post_number, @@ -374,6 +377,9 @@ class BulkImport::VBulletin < BulkImport::Base hidden: row[5] == 0, raw: normalize_text(row[6]), } + + post[:like_count] = row[7] if @has_post_thanks + post end end @@ -396,11 +402,10 @@ class BulkImport::VBulletin < BulkImport::Base next if @imported_topics.has_key?(key) @imported_topics[key] = row[0] + PRIVATE_OFFSET - { archetype: Archetype.private_message, imported_id: row[0] + PRIVATE_OFFSET, - title: normalize_text(title), + title: title, user_id: user_id_from_imported_id(row[2]), created_at: Time.zone.at(row[4]), } @@ -410,7 +415,7 @@ class BulkImport::VBulletin < BulkImport::Base def import_topic_allowed_users puts "Importing topic allowed users..." - allowed_users = [] + allowed_users = Set.new mysql_stream(<<-SQL SELECT pmtextid, touserarray