From 4f0bdec37084b5ab9d5439f7feebf31f59e302c9 Mon Sep 17 00:00:00 2001 From: discoursehosting Date: Tue, 7 Nov 2017 17:50:43 +0100 Subject: [PATCH] some improvements for importers (#5295) * decode html entities within code blocks * Only import users that actually participated in the bbpress part of Wordpress; import password hashes * create permalinks for topics * Better handling of [code] blocks --- script/import_scripts/bbpress.rb | 61 ++++++++++++++++--- .../phpbb3/support/text_processor.rb | 13 +++- 2 files changed, 66 insertions(+), 8 deletions(-) diff --git a/script/import_scripts/bbpress.rb b/script/import_scripts/bbpress.rb index a9ecc0650b2..6d27e950eaa 100644 --- a/script/import_scripts/bbpress.rb +++ b/script/import_scripts/bbpress.rb @@ -22,6 +22,8 @@ class ImportScripts::Bbpress < ImportScripts::Base def initialize super + @he = HTMLEntities.new + @client = Mysql2::Client.new( host: BB_PRESS_HOST, username: BB_PRESS_USER, @@ -36,21 +38,32 @@ class ImportScripts::Bbpress < ImportScripts::Base import_categories import_topics_and_posts import_private_messages + create_permalinks end def import_users puts "", "importing users..." last_user_id = -1 - total_users = bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}users WHERE user_email LIKE '%@%'").first["count"] + total_users = bbpress_query(<<-SQL + SELECT COUNT(DISTINCT(u.id)) AS cnt + FROM #{BB_PRESS_PREFIX}users u + LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id + WHERE p.post_type IN ('forum', 'reply', 'topic') + AND user_email LIKE '%@%' + SQL + ).first["cnt"] batches(BATCH_SIZE) do |offset| users = bbpress_query(<<-SQL - SELECT id, user_nicename, display_name, user_email, user_registered, user_url - FROM #{BB_PRESS_PREFIX}users + SELECT u.id, user_nicename, display_name, user_email, user_registered, user_url, user_pass + FROM #{BB_PRESS_PREFIX}users u + LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id WHERE user_email LIKE '%@%' - AND id > #{last_user_id} - ORDER BY id + AND p.post_type IN ('forum', 'reply', 'topic') + AND u.id > #{last_user_id} + GROUP BY u.id + ORDER BY u.id LIMIT #{BATCH_SIZE} SQL ).to_a @@ -86,6 +99,7 @@ class ImportScripts::Bbpress < ImportScripts::Base { id: u["id"].to_i, username: u["user_nicename"], + password: u["user_pass"], email: u["user_email"].downcase, name: u["display_name"].presence || u['user_nicename'], created_at: u["user_registered"], @@ -242,8 +256,7 @@ class ImportScripts::Bbpress < ImportScripts::Base } if post[:raw].present? - post[:raw].gsub!("
", "```\n")
-          post[:raw].gsub!("
", "\n```") + post[:raw].gsub!(/\\(.*?)\<\/code\>\<\/pre\>/im) { "```\n#{@he.decode($2)}\n```" } end if p["post_type"] == "topic" @@ -264,6 +277,40 @@ class ImportScripts::Bbpress < ImportScripts::Base end end + def create_permalinks + puts "", "creating permalinks..." + + last_topic_id = -1 + total_topics = bbpress_query(<<-SQL + SELECT COUNT(*) count + FROM #{BB_PRESS_PREFIX}posts + WHERE post_status <> 'spam' + AND post_type IN ('topic') + SQL + ).first["count"] + + batches(BATCH_SIZE) do |offset| + topics = bbpress_query(<<-SQL + SELECT id, + guid + FROM #{BB_PRESS_PREFIX}posts + WHERE post_status <> 'spam' + AND post_type IN ('topic') + AND id > #{last_topic_id} + ORDER BY id + LIMIT #{BATCH_SIZE} + SQL + ).to_a + break if topics.empty? + + topics.each do |t| + topic = topic_lookup_from_imported_post_id(t['id']) + Permalink.create( url: URI.parse(t['guid']).path.chomp('/'), topic_id: topic[:topic_id] ) rescue nil + end + last_topic_id = topics[-1]["id"].to_i + end + end + def import_private_messages puts "", "importing private messages..." diff --git a/script/import_scripts/phpbb3/support/text_processor.rb b/script/import_scripts/phpbb3/support/text_processor.rb index 3eceff3f753..561b2c2f69b 100644 --- a/script/import_scripts/phpbb3/support/text_processor.rb +++ b/script/import_scripts/phpbb3/support/text_processor.rb @@ -8,6 +8,7 @@ module ImportScripts::PhpBB3 @lookup = lookup @database = database @smiley_processor = smiley_processor + @he = HTMLEntities.new @settings = settings @new_site_prefix = settings.new_site_prefix @@ -25,7 +26,7 @@ module ImportScripts::PhpBB3 process_smilies(text) process_links(text) process_lists(text) - + process_code(text) text end @@ -48,6 +49,9 @@ module ImportScripts::PhpBB3 # [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky] # [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex] text.gsub!(/:(?:\w{8})\]/, ']') + + # remove color tags + text.gsub!(/\[\/?color(=#[a-z0-9]*)?\]/i, "") end def bbcode_to_md(text) @@ -142,5 +146,12 @@ module ImportScripts::PhpBB3 @long_internal_link_regexp = Regexp.new(%Q||, Regexp::IGNORECASE) @short_internal_link_regexp = Regexp.new(link_regex, Regexp::IGNORECASE) end + + def process_code(text) + text.gsub!(//, "\n") + text + end end end