DEV: Apply syntax_tree formatting to script/*

This commit is contained in:
David Taylor
2023-01-07 11:53:14 +00:00
parent ff508d1ae5
commit 436b3b392b
143 changed files with 8905 additions and 7353 deletions

View File

@ -1,6 +1,6 @@
# frozen_string_literal: true
if ARGV.include?('bbcode-to-md')
if ARGV.include?("bbcode-to-md")
# Replace (most) bbcode with markdown before creating posts.
# This will dramatically clean up the final posts in Discourse.
#
@ -10,7 +10,7 @@ if ARGV.include?('bbcode-to-md')
# cd ruby-bbcode-to-md
# gem build ruby-bbcode-to-md.gemspec
# gem install ruby-bbcode-to-md-*.gem
require 'ruby-bbcode-to-md'
require "ruby-bbcode-to-md"
end
require "pg"
@ -20,12 +20,12 @@ require "htmlentities"
puts "Loading application..."
require_relative "../../config/environment"
require_relative '../import_scripts/base/uploader'
require_relative "../import_scripts/base/uploader"
module BulkImport; end
module BulkImport
end
class BulkImport::Base
NOW ||= "now()"
PRIVATE_OFFSET ||= 2**30
@ -33,41 +33,41 @@ class BulkImport::Base
CHARSET_MAP = {
"armscii8" => nil,
"ascii" => Encoding::US_ASCII,
"big5" => Encoding::Big5,
"binary" => Encoding::ASCII_8BIT,
"cp1250" => Encoding::Windows_1250,
"cp1251" => Encoding::Windows_1251,
"cp1256" => Encoding::Windows_1256,
"cp1257" => Encoding::Windows_1257,
"cp850" => Encoding::CP850,
"cp852" => Encoding::CP852,
"cp866" => Encoding::IBM866,
"cp932" => Encoding::Windows_31J,
"dec8" => nil,
"eucjpms" => Encoding::EucJP_ms,
"euckr" => Encoding::EUC_KR,
"gb2312" => Encoding::EUC_CN,
"gbk" => Encoding::GBK,
"geostd8" => nil,
"greek" => Encoding::ISO_8859_7,
"hebrew" => Encoding::ISO_8859_8,
"hp8" => nil,
"keybcs2" => nil,
"koi8r" => Encoding::KOI8_R,
"koi8u" => Encoding::KOI8_U,
"latin1" => Encoding::ISO_8859_1,
"latin2" => Encoding::ISO_8859_2,
"latin5" => Encoding::ISO_8859_9,
"latin7" => Encoding::ISO_8859_13,
"macce" => Encoding::MacCentEuro,
"ascii" => Encoding::US_ASCII,
"big5" => Encoding::Big5,
"binary" => Encoding::ASCII_8BIT,
"cp1250" => Encoding::Windows_1250,
"cp1251" => Encoding::Windows_1251,
"cp1256" => Encoding::Windows_1256,
"cp1257" => Encoding::Windows_1257,
"cp850" => Encoding::CP850,
"cp852" => Encoding::CP852,
"cp866" => Encoding::IBM866,
"cp932" => Encoding::Windows_31J,
"dec8" => nil,
"eucjpms" => Encoding::EucJP_ms,
"euckr" => Encoding::EUC_KR,
"gb2312" => Encoding::EUC_CN,
"gbk" => Encoding::GBK,
"geostd8" => nil,
"greek" => Encoding::ISO_8859_7,
"hebrew" => Encoding::ISO_8859_8,
"hp8" => nil,
"keybcs2" => nil,
"koi8r" => Encoding::KOI8_R,
"koi8u" => Encoding::KOI8_U,
"latin1" => Encoding::ISO_8859_1,
"latin2" => Encoding::ISO_8859_2,
"latin5" => Encoding::ISO_8859_9,
"latin7" => Encoding::ISO_8859_13,
"macce" => Encoding::MacCentEuro,
"macroman" => Encoding::MacRoman,
"sjis" => Encoding::SHIFT_JIS,
"swe7" => nil,
"tis620" => Encoding::TIS_620,
"ucs2" => Encoding::UTF_16BE,
"ujis" => Encoding::EucJP_ms,
"utf8" => Encoding::UTF_8,
"sjis" => Encoding::SHIFT_JIS,
"swe7" => nil,
"tis620" => Encoding::TIS_620,
"ucs2" => Encoding::UTF_16BE,
"ujis" => Encoding::EucJP_ms,
"utf8" => Encoding::UTF_8,
}
# rubocop:enable Layout/HashAlignment
@ -82,12 +82,13 @@ class BulkImport::Base
@encoding = CHARSET_MAP[charset]
@bbcode_to_md = true if use_bbcode_to_md?
@markdown = Redcarpet::Markdown.new(
Redcarpet::Render::HTML.new(hard_wrap: true),
no_intra_emphasis: true,
fenced_code_blocks: true,
autolink: true
)
@markdown =
Redcarpet::Markdown.new(
Redcarpet::Render::HTML.new(hard_wrap: true),
no_intra_emphasis: true,
fenced_code_blocks: true,
autolink: true,
)
end
def run
@ -132,7 +133,9 @@ class BulkImport::Base
map = []
ids = []
@raw_connection.send_query("SELECT value, #{name}_id FROM #{name}_custom_fields WHERE name = 'import_id'")
@raw_connection.send_query(
"SELECT value, #{name}_id FROM #{name}_custom_fields WHERE name = 'import_id'",
)
@raw_connection.set_single_row_mode
@raw_connection.get_result.stream_each do |row|
@ -163,12 +166,14 @@ class BulkImport::Base
puts "Loading imported topic ids..."
@topics, imported_topic_ids = imported_ids("topic")
@last_imported_topic_id = imported_topic_ids.select { |id| id < PRIVATE_OFFSET }.max || -1
@last_imported_private_topic_id = imported_topic_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1)
@last_imported_private_topic_id =
imported_topic_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1)
puts "Loading imported post ids..."
@posts, imported_post_ids = imported_ids("post")
@last_imported_post_id = imported_post_ids.select { |id| id < PRIVATE_OFFSET }.max || -1
@last_imported_private_post_id = imported_post_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1)
@last_imported_private_post_id =
imported_post_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1)
end
def last_id(klass)
@ -182,9 +187,7 @@ class BulkImport::Base
@raw_connection.send_query("SELECT id, #{column} FROM #{name}")
@raw_connection.set_single_row_mode
@raw_connection.get_result.stream_each do |row|
map[row["id"].to_i] = row[column].to_i
end
@raw_connection.get_result.stream_each { |row| map[row["id"].to_i] = row[column].to_i }
@raw_connection.get_result
@ -199,13 +202,24 @@ class BulkImport::Base
puts "Loading users indexes..."
@last_user_id = last_id(User)
@last_user_email_id = last_id(UserEmail)
@emails = User.unscoped.joins(:user_emails).pluck(:"user_emails.email", :"user_emails.user_id").to_h
@emails =
User.unscoped.joins(:user_emails).pluck(:"user_emails.email", :"user_emails.user_id").to_h
@usernames_lower = User.unscoped.pluck(:username_lower).to_set
@mapped_usernames = UserCustomField.joins(:user).where(name: "import_username").pluck("user_custom_fields.value", "users.username").to_h
@mapped_usernames =
UserCustomField
.joins(:user)
.where(name: "import_username")
.pluck("user_custom_fields.value", "users.username")
.to_h
puts "Loading categories indexes..."
@last_category_id = last_id(Category)
@category_names = Category.unscoped.pluck(:parent_category_id, :name).map { |pci, name| "#{pci}-#{name}" }.to_set
@category_names =
Category
.unscoped
.pluck(:parent_category_id, :name)
.map { |pci, name| "#{pci}-#{name}" }
.to_set
puts "Loading topics indexes..."
@last_topic_id = last_id(Topic)
@ -233,13 +247,27 @@ class BulkImport::Base
def fix_primary_keys
puts "Updating primary key sequences..."
@raw_connection.exec("SELECT setval('#{Group.sequence_name}', #{@last_group_id})") if @last_group_id > 0
@raw_connection.exec("SELECT setval('#{User.sequence_name}', #{@last_user_id})") if @last_user_id > 0
@raw_connection.exec("SELECT setval('#{UserEmail.sequence_name}', #{@last_user_email_id})") if @last_user_email_id > 0
@raw_connection.exec("SELECT setval('#{Category.sequence_name}', #{@last_category_id})") if @last_category_id > 0
@raw_connection.exec("SELECT setval('#{Topic.sequence_name}', #{@last_topic_id})") if @last_topic_id > 0
@raw_connection.exec("SELECT setval('#{Post.sequence_name}', #{@last_post_id})") if @last_post_id > 0
@raw_connection.exec("SELECT setval('#{PostAction.sequence_name}', #{@last_post_action_id})") if @last_post_action_id > 0
if @last_group_id > 0
@raw_connection.exec("SELECT setval('#{Group.sequence_name}', #{@last_group_id})")
end
if @last_user_id > 0
@raw_connection.exec("SELECT setval('#{User.sequence_name}', #{@last_user_id})")
end
if @last_user_email_id > 0
@raw_connection.exec("SELECT setval('#{UserEmail.sequence_name}', #{@last_user_email_id})")
end
if @last_category_id > 0
@raw_connection.exec("SELECT setval('#{Category.sequence_name}', #{@last_category_id})")
end
if @last_topic_id > 0
@raw_connection.exec("SELECT setval('#{Topic.sequence_name}', #{@last_topic_id})")
end
if @last_post_id > 0
@raw_connection.exec("SELECT setval('#{Post.sequence_name}', #{@last_post_id})")
end
if @last_post_action_id > 0
@raw_connection.exec("SELECT setval('#{PostAction.sequence_name}', #{@last_post_action_id})")
end
end
def group_id_from_imported_id(id)
@ -272,63 +300,124 @@ class BulkImport::Base
post_id && @topic_id_by_post_id[post_id]
end
GROUP_COLUMNS ||= %i{
id name title bio_raw bio_cooked created_at updated_at
}
GROUP_COLUMNS ||= %i[id name title bio_raw bio_cooked created_at updated_at]
USER_COLUMNS ||= %i{
id username username_lower name active trust_level admin moderator
date_of_birth ip_address registration_ip_address primary_group_id
suspended_at suspended_till last_emailed_at created_at updated_at
}
USER_COLUMNS ||= %i[
id
username
username_lower
name
active
trust_level
admin
moderator
date_of_birth
ip_address
registration_ip_address
primary_group_id
suspended_at
suspended_till
last_emailed_at
created_at
updated_at
]
USER_EMAIL_COLUMNS ||= %i{
id user_id email primary created_at updated_at
}
USER_EMAIL_COLUMNS ||= %i[id user_id email primary created_at updated_at]
USER_STAT_COLUMNS ||= %i{
user_id topics_entered time_read days_visited posts_read_count
likes_given likes_received new_since read_faq
first_post_created_at post_count topic_count bounce_score
reset_bounce_score_after digest_attempted_at
}
USER_STAT_COLUMNS ||= %i[
user_id
topics_entered
time_read
days_visited
posts_read_count
likes_given
likes_received
new_since
read_faq
first_post_created_at
post_count
topic_count
bounce_score
reset_bounce_score_after
digest_attempted_at
]
USER_PROFILE_COLUMNS ||= %i{
user_id location website bio_raw bio_cooked views
}
USER_PROFILE_COLUMNS ||= %i[user_id location website bio_raw bio_cooked views]
GROUP_USER_COLUMNS ||= %i{
group_id user_id created_at updated_at
}
GROUP_USER_COLUMNS ||= %i[group_id user_id created_at updated_at]
CATEGORY_COLUMNS ||= %i{
id name name_lower slug user_id description position parent_category_id
created_at updated_at
}
CATEGORY_COLUMNS ||= %i[
id
name
name_lower
slug
user_id
description
position
parent_category_id
created_at
updated_at
]
TOPIC_COLUMNS ||= %i{
id archetype title fancy_title slug user_id last_post_user_id category_id
visible closed pinned_at views created_at bumped_at updated_at
}
TOPIC_COLUMNS ||= %i[
id
archetype
title
fancy_title
slug
user_id
last_post_user_id
category_id
visible
closed
pinned_at
views
created_at
bumped_at
updated_at
]
POST_COLUMNS ||= %i{
id user_id last_editor_id topic_id post_number sort_order reply_to_post_number
like_count raw cooked hidden word_count created_at last_version_at updated_at
}
POST_COLUMNS ||= %i[
id
user_id
last_editor_id
topic_id
post_number
sort_order
reply_to_post_number
like_count
raw
cooked
hidden
word_count
created_at
last_version_at
updated_at
]
POST_ACTION_COLUMNS ||= %i{
id post_id user_id post_action_type_id deleted_at created_at updated_at
deleted_by_id related_post_id staff_took_action deferred_by_id targets_topic
agreed_at agreed_by_id deferred_at disagreed_at disagreed_by_id
}
POST_ACTION_COLUMNS ||= %i[
id
post_id
user_id
post_action_type_id
deleted_at
created_at
updated_at
deleted_by_id
related_post_id
staff_took_action
deferred_by_id
targets_topic
agreed_at
agreed_by_id
deferred_at
disagreed_at
disagreed_by_id
]
TOPIC_ALLOWED_USER_COLUMNS ||= %i{
topic_id user_id created_at updated_at
}
TOPIC_ALLOWED_USER_COLUMNS ||= %i[topic_id user_id created_at updated_at]
TOPIC_TAG_COLUMNS ||= %i{
topic_id tag_id created_at updated_at
}
TOPIC_TAG_COLUMNS ||= %i[topic_id tag_id created_at updated_at]
def create_groups(rows, &block)
create_records(rows, "group", GROUP_COLUMNS, &block)
@ -340,10 +429,7 @@ class BulkImport::Base
create_records(rows, "user", USER_COLUMNS, &block)
create_custom_fields("user", "username", @imported_usernames.keys) do |username|
{
record_id: @imported_usernames[username],
value: username,
}
{ record_id: @imported_usernames[username], value: username }
end
end
@ -389,8 +475,8 @@ class BulkImport::Base
group[:name] = group_name
end
group[:title] = group[:title].scrub.strip.presence if group[:title].present?
group[:bio_raw] = group[:bio_raw].scrub.strip.presence if group[:bio_raw].present?
group[:title] = group[:title].scrub.strip.presence if group[:title].present?
group[:bio_raw] = group[:bio_raw].scrub.strip.presence if group[:bio_raw].present?
group[:bio_cooked] = pre_cook(group[:bio_raw]) if group[:bio_raw].present?
group[:created_at] ||= NOW
group[:updated_at] ||= group[:created_at]
@ -456,7 +542,9 @@ class BulkImport::Base
user_email[:email] ||= random_email
user_email[:email].downcase!
# unique email
user_email[:email] = random_email until EmailAddressValidator.valid_value?(user_email[:email]) && !@emails.has_key?(user_email[:email])
user_email[:email] = random_email until EmailAddressValidator.valid_value?(
user_email[:email],
) && !@emails.has_key?(user_email[:email])
user_email
end
@ -539,7 +627,11 @@ class BulkImport::Base
post[:raw] = (post[:raw] || "").scrub.strip.presence || "<Empty imported post>"
post[:raw] = process_raw post[:raw]
if @bbcode_to_md
post[:raw] = post[:raw].bbcode_to_md(false, {}, :disable, :quote) rescue post[:raw]
post[:raw] = begin
post[:raw].bbcode_to_md(false, {}, :disable, :quote)
rescue StandardError
post[:raw]
end
end
post[:like_count] ||= 0
post[:cooked] = pre_cook post[:raw]
@ -580,22 +672,22 @@ class BulkImport::Base
# [HTML]...[/HTML]
raw.gsub!(/\[HTML\]/i, "\n\n```html\n")
raw.gsub!(/\[\/HTML\]/i, "\n```\n\n")
raw.gsub!(%r{\[/HTML\]}i, "\n```\n\n")
# [PHP]...[/PHP]
raw.gsub!(/\[PHP\]/i, "\n\n```php\n")
raw.gsub!(/\[\/PHP\]/i, "\n```\n\n")
raw.gsub!(%r{\[/PHP\]}i, "\n```\n\n")
# [HIGHLIGHT="..."]
raw.gsub!(/\[HIGHLIGHT="?(\w+)"?\]/i) { "\n\n```#{$1.downcase}\n" }
# [CODE]...[/CODE]
# [HIGHLIGHT]...[/HIGHLIGHT]
raw.gsub!(/\[\/?CODE\]/i, "\n\n```\n\n")
raw.gsub!(/\[\/?HIGHLIGHT\]/i, "\n\n```\n\n")
raw.gsub!(%r{\[/?CODE\]}i, "\n\n```\n\n")
raw.gsub!(%r{\[/?HIGHLIGHT\]}i, "\n\n```\n\n")
# [SAMP]...[/SAMP]
raw.gsub!(/\[\/?SAMP\]/i, "`")
raw.gsub!(%r{\[/?SAMP\]}i, "`")
# replace all chevrons with HTML entities
# /!\ must be done /!\
@ -609,61 +701,61 @@ class BulkImport::Base
raw.gsub!(">", "&gt;")
raw.gsub!("\u2603", ">")
raw.gsub!(/\[\/?I\]/i, "*")
raw.gsub!(/\[\/?B\]/i, "**")
raw.gsub!(/\[\/?U\]/i, "")
raw.gsub!(%r{\[/?I\]}i, "*")
raw.gsub!(%r{\[/?B\]}i, "**")
raw.gsub!(%r{\[/?U\]}i, "")
raw.gsub!(/\[\/?RED\]/i, "")
raw.gsub!(/\[\/?BLUE\]/i, "")
raw.gsub!(%r{\[/?RED\]}i, "")
raw.gsub!(%r{\[/?BLUE\]}i, "")
raw.gsub!(/\[AUTEUR\].+?\[\/AUTEUR\]/im, "")
raw.gsub!(/\[VOIRMSG\].+?\[\/VOIRMSG\]/im, "")
raw.gsub!(/\[PSEUDOID\].+?\[\/PSEUDOID\]/im, "")
raw.gsub!(%r{\[AUTEUR\].+?\[/AUTEUR\]}im, "")
raw.gsub!(%r{\[VOIRMSG\].+?\[/VOIRMSG\]}im, "")
raw.gsub!(%r{\[PSEUDOID\].+?\[/PSEUDOID\]}im, "")
# [IMG]...[/IMG]
raw.gsub!(/(?:\s*\[IMG\]\s*)+(.+?)(?:\s*\[\/IMG\]\s*)+/im) { "\n\n#{$1}\n\n" }
raw.gsub!(%r{(?:\s*\[IMG\]\s*)+(.+?)(?:\s*\[/IMG\]\s*)+}im) { "\n\n#{$1}\n\n" }
# [IMG=url]
raw.gsub!(/\[IMG=([^\]]*)\]/im) { "\n\n#{$1}\n\n" }
# [URL=...]...[/URL]
raw.gsub!(/\[URL="?(.+?)"?\](.+?)\[\/URL\]/im) { "[#{$2.strip}](#{$1})" }
raw.gsub!(%r{\[URL="?(.+?)"?\](.+?)\[/URL\]}im) { "[#{$2.strip}](#{$1})" }
# [URL]...[/URL]
# [MP3]...[/MP3]
# [EMAIL]...[/EMAIL]
# [LEFT]...[/LEFT]
raw.gsub!(/\[\/?URL\]/i, "")
raw.gsub!(/\[\/?MP3\]/i, "")
raw.gsub!(/\[\/?EMAIL\]/i, "")
raw.gsub!(/\[\/?LEFT\]/i, "")
raw.gsub!(%r{\[/?URL\]}i, "")
raw.gsub!(%r{\[/?MP3\]}i, "")
raw.gsub!(%r{\[/?EMAIL\]}i, "")
raw.gsub!(%r{\[/?LEFT\]}i, "")
# [FONT=blah] and [COLOR=blah]
raw.gsub!(/\[FONT=.*?\](.*?)\[\/FONT\]/im, "\\1")
raw.gsub!(/\[COLOR=.*?\](.*?)\[\/COLOR\]/im, "\\1")
raw.gsub!(%r{\[FONT=.*?\](.*?)\[/FONT\]}im, "\\1")
raw.gsub!(%r{\[COLOR=.*?\](.*?)\[/COLOR\]}im, "\\1")
raw.gsub!(/\[SIZE=.*?\](.*?)\[\/SIZE\]/im, "\\1")
raw.gsub!(/\[H=.*?\](.*?)\[\/H\]/im, "\\1")
raw.gsub!(%r{\[SIZE=.*?\](.*?)\[/SIZE\]}im, "\\1")
raw.gsub!(%r{\[H=.*?\](.*?)\[/H\]}im, "\\1")
# [CENTER]...[/CENTER]
raw.gsub!(/\[CENTER\](.*?)\[\/CENTER\]/im, "\\1")
raw.gsub!(%r{\[CENTER\](.*?)\[/CENTER\]}im, "\\1")
# [INDENT]...[/INDENT]
raw.gsub!(/\[INDENT\](.*?)\[\/INDENT\]/im, "\\1")
raw.gsub!(/\[TABLE\](.*?)\[\/TABLE\]/im, "\\1")
raw.gsub!(/\[TR\](.*?)\[\/TR\]/im, "\\1")
raw.gsub!(/\[TD\](.*?)\[\/TD\]/im, "\\1")
raw.gsub!(/\[TD="?.*?"?\](.*?)\[\/TD\]/im, "\\1")
raw.gsub!(%r{\[INDENT\](.*?)\[/INDENT\]}im, "\\1")
raw.gsub!(%r{\[TABLE\](.*?)\[/TABLE\]}im, "\\1")
raw.gsub!(%r{\[TR\](.*?)\[/TR\]}im, "\\1")
raw.gsub!(%r{\[TD\](.*?)\[/TD\]}im, "\\1")
raw.gsub!(%r{\[TD="?.*?"?\](.*?)\[/TD\]}im, "\\1")
# [STRIKE]
raw.gsub!(/\[STRIKE\]/i, "<s>")
raw.gsub!(/\[\/STRIKE\]/i, "</s>")
raw.gsub!(%r{\[/STRIKE\]}i, "</s>")
# [QUOTE]...[/QUOTE]
raw.gsub!(/\[QUOTE="([^\]]+)"\]/i) { "[QUOTE=#{$1}]" }
# Nested Quotes
raw.gsub!(/(\[\/?QUOTE.*?\])/mi) { |q| "\n#{q}\n" }
raw.gsub!(%r{(\[/?QUOTE.*?\])}mi) { |q| "\n#{q}\n" }
# raw.gsub!(/\[QUOTE\](.+?)\[\/QUOTE\]/im) { |quote|
# quote.gsub!(/\[QUOTE\](.+?)\[\/QUOTE\]/im) { "\n#{$1}\n" }
@ -686,28 +778,36 @@ class BulkImport::Base
end
# [YOUTUBE]<id>[/YOUTUBE]
raw.gsub!(/\[YOUTUBE\](.+?)\[\/YOUTUBE\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
raw.gsub!(/\[DAILYMOTION\](.+?)\[\/DAILYMOTION\]/i) { "\nhttps://www.dailymotion.com/video/#{$1}\n" }
raw.gsub!(%r{\[YOUTUBE\](.+?)\[/YOUTUBE\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
raw.gsub!(%r{\[DAILYMOTION\](.+?)\[/DAILYMOTION\]}i) do
"\nhttps://www.dailymotion.com/video/#{$1}\n"
end
# [VIDEO=youtube;<id>]...[/VIDEO]
raw.gsub!(/\[VIDEO=YOUTUBE;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
raw.gsub!(/\[VIDEO=DAILYMOTION;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://www.dailymotion.com/video/#{$1}\n" }
raw.gsub!(%r{\[VIDEO=YOUTUBE;([^\]]+)\].*?\[/VIDEO\]}i) do
"\nhttps://www.youtube.com/watch?v=#{$1}\n"
end
raw.gsub!(%r{\[VIDEO=DAILYMOTION;([^\]]+)\].*?\[/VIDEO\]}i) do
"\nhttps://www.dailymotion.com/video/#{$1}\n"
end
# [SPOILER=Some hidden stuff]SPOILER HERE!![/SPOILER]
raw.gsub!(/\[SPOILER="?(.+?)"?\](.+?)\[\/SPOILER\]/im) { "\n#{$1}\n[spoiler]#{$2}[/spoiler]\n" }
raw.gsub!(%r{\[SPOILER="?(.+?)"?\](.+?)\[/SPOILER\]}im) do
"\n#{$1}\n[spoiler]#{$2}[/spoiler]\n"
end
# convert list tags to ul and list=1 tags to ol
# (basically, we're only missing list=a here...)
# (https://meta.discourse.org/t/phpbb-3-importer-old/17397)
raw.gsub!(/\[list\](.*?)\[\/list\]/im, '[ul]\1[/ul]')
raw.gsub!(/\[list=1\|?[^\]]*\](.*?)\[\/list\]/im, '[ol]\1[/ol]')
raw.gsub!(/\[list\](.*?)\[\/list:u\]/im, '[ul]\1[/ul]')
raw.gsub!(/\[list=1\|?[^\]]*\](.*?)\[\/list:o\]/im, '[ol]\1[/ol]')
raw.gsub!(%r{\[list\](.*?)\[/list\]}im, '[ul]\1[/ul]')
raw.gsub!(%r{\[list=1\|?[^\]]*\](.*?)\[/list\]}im, '[ol]\1[/ol]')
raw.gsub!(%r{\[list\](.*?)\[/list:u\]}im, '[ul]\1[/ul]')
raw.gsub!(%r{\[list=1\|?[^\]]*\](.*?)\[/list:o\]}im, '[ol]\1[/ol]')
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
raw.gsub!(/\[\*\]\n/, '')
raw.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]')
raw.gsub!(/\[\*\]\n/, "")
raw.gsub!(%r{\[\*\](.*?)\[/\*:m\]}, '[li]\1[/li]')
raw.gsub!(/\[\*\](.*?)\n/, '[li]\1[/li]')
raw.gsub!(/\[\*=1\]/, '')
raw.gsub!(/\[\*=1\]/, "")
raw
end
@ -728,7 +828,9 @@ class BulkImport::Base
imported_ids |= mapped[:imported_ids] unless mapped[:imported_ids].nil?
@raw_connection.put_copy_data columns.map { |c| processed[c] } unless processed[:skip]
rows_created += 1
print "\r%7d - %6d/sec" % [rows_created, rows_created.to_f / (Time.now - start)] if rows_created % 100 == 0
if rows_created % 100 == 0
print "\r%7d - %6d/sec" % [rows_created, rows_created.to_f / (Time.now - start)]
end
rescue => e
puts "\n"
puts "ERROR: #{e.message}"
@ -737,15 +839,14 @@ class BulkImport::Base
end
end
print "\r%7d - %6d/sec\n" % [rows_created, rows_created.to_f / (Time.now - start)] if rows_created > 0
if rows_created > 0
print "\r%7d - %6d/sec\n" % [rows_created, rows_created.to_f / (Time.now - start)]
end
id_mapping_method_name = "#{name}_id_from_imported_id".freeze
return unless respond_to?(id_mapping_method_name)
create_custom_fields(name, "id", imported_ids) do |imported_id|
{
record_id: send(id_mapping_method_name, imported_id),
value: imported_id,
}
{ record_id: send(id_mapping_method_name, imported_id), value: imported_id }
end
rescue => e
# FIXME: errors catched here stop the rest of the COPY
@ -755,7 +856,8 @@ class BulkImport::Base
def create_custom_fields(table, name, rows)
name = "import_#{name}"
sql = "COPY #{table}_custom_fields (#{table}_id, name, value, created_at, updated_at) FROM STDIN"
sql =
"COPY #{table}_custom_fields (#{table}_id, name, value, created_at, updated_at) FROM STDIN"
@raw_connection.copy_data(sql, @encoder) do
rows.each do |row|
next unless cf = yield(row)
@ -797,7 +899,7 @@ class BulkImport::Base
cooked = raw
# Convert YouTube URLs to lazyYT DOMs before being transformed into links
cooked.gsub!(/\nhttps\:\/\/www.youtube.com\/watch\?v=(\w+)\n/) do
cooked.gsub!(%r{\nhttps\://www.youtube.com/watch\?v=(\w+)\n}) do
video_id = $1
result = <<-HTML
<div class="lazyYT" data-youtube-id="#{video_id}" data-width="480" data-height="270" data-parameters="feature=oembed&amp;wmode=opaque"></div>
@ -807,7 +909,7 @@ class BulkImport::Base
cooked = @markdown.render(cooked).scrub.strip
cooked.gsub!(/\[QUOTE="?([^,"]+)(?:, post:(\d+), topic:(\d+))?"?\](.+?)\[\/QUOTE\]/im) do
cooked.gsub!(%r{\[QUOTE="?([^,"]+)(?:, post:(\d+), topic:(\d+))?"?\](.+?)\[/QUOTE\]}im) do
username, post_id, topic_id, quote = $1, $2, $3, $4
quote = quote.scrub.strip
@ -860,5 +962,4 @@ class BulkImport::Base
return text if @encoding == Encoding::UTF_8
text && text.encode(@encoding).force_encoding(Encoding::UTF_8)
end
end

File diff suppressed because it is too large Load Diff

View File

@ -3,17 +3,16 @@
require_relative "base"
require "pg"
require "htmlentities"
require 'ruby-bbcode-to-md'
require "ruby-bbcode-to-md"
class BulkImport::PhpBB < BulkImport::Base
SUSPENDED_TILL ||= Date.new(3000, 1, 1)
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "phpbb_"
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "phpbb_"
def initialize
super
charset = ENV["DB_CHARSET"] || "utf8"
charset = ENV["DB_CHARSET"] || "utf8"
database = ENV["DB_NAME"] || "flightaware"
password = ENV["DB_PASSWORD"] || "discourse"
@ -57,7 +56,7 @@ class BulkImport::PhpBB < BulkImport::Base
{
imported_id: row["group_id"],
name: normalize_text(row["group_name"]),
bio_raw: normalize_text(row["group_desc"])
bio_raw: normalize_text(row["group_desc"]),
}
end
end
@ -85,15 +84,28 @@ class BulkImport::PhpBB < BulkImport::Base
username: normalize_text(row["username"]),
email: row["user_email"],
created_at: Time.zone.at(row["user_regdate"].to_i),
last_seen_at: row["user_lastvisit"] == 0 ? Time.zone.at(row["user_regdate"].to_i) : Time.zone.at(row["user_lastvisit"].to_i),
last_seen_at:
(
if row["user_lastvisit"] == 0
Time.zone.at(row["user_regdate"].to_i)
else
Time.zone.at(row["user_lastvisit"].to_i)
end
),
trust_level: row["user_posts"] == 0 ? TrustLevel[0] : TrustLevel[1],
date_of_birth: parse_birthday(row["user_birthday"]),
primary_group_id: group_id_from_imported_id(row["group_id"])
primary_group_id: group_id_from_imported_id(row["group_id"]),
}
u[:ip_address] = row["user_ip"][/\b(?:\d{1,3}\.){3}\d{1,3}\b/] if row["user_ip"].present?
if row["ban_start"]
u[:suspended_at] = Time.zone.at(row["ban_start"].to_i)
u[:suspended_till] = row["ban_end"].to_i > 0 ? Time.zone.at(row["ban_end"].to_i) : SUSPENDED_TILL
u[:suspended_till] = (
if row["ban_end"].to_i > 0
Time.zone.at(row["ban_end"].to_i)
else
SUSPENDED_TILL
end
)
end
u
end
@ -114,7 +126,7 @@ class BulkImport::PhpBB < BulkImport::Base
imported_id: row["user_id"],
imported_user_id: row["user_id"],
email: row["user_email"],
created_at: Time.zone.at(row["user_regdate"].to_i)
created_at: Time.zone.at(row["user_regdate"].to_i),
}
end
end
@ -149,7 +161,14 @@ class BulkImport::PhpBB < BulkImport::Base
create_user_profiles(user_profiles) do |row|
{
user_id: user_id_from_imported_id(row["user_id"]),
website: (URI.parse(row["user_website"]).to_s rescue nil),
website:
(
begin
URI.parse(row["user_website"]).to_s
rescue StandardError
nil
end
),
location: row["user_from"],
}
end
@ -158,17 +177,16 @@ class BulkImport::PhpBB < BulkImport::Base
def import_categories
puts "Importing categories..."
categories = psql_query(<<-SQL
categories = psql_query(<<-SQL).to_a
SELECT forum_id, parent_id, forum_name, forum_desc
FROM #{TABLE_PREFIX}forums
WHERE forum_id > #{@last_imported_category_id}
ORDER BY parent_id, left_id
SQL
).to_a
return if categories.empty?
parent_categories = categories.select { |c| c["parent_id"].to_i == 0 }
parent_categories = categories.select { |c| c["parent_id"].to_i == 0 }
children_categories = categories.select { |c| c["parent_id"].to_i != 0 }
puts "Importing parent categories..."
@ -176,7 +194,7 @@ class BulkImport::PhpBB < BulkImport::Base
{
imported_id: row["forum_id"],
name: normalize_text(row["forum_name"]),
description: normalize_text(row["forum_desc"])
description: normalize_text(row["forum_desc"]),
}
end
@ -186,7 +204,7 @@ class BulkImport::PhpBB < BulkImport::Base
imported_id: row["forum_id"],
name: normalize_text(row["forum_name"]),
description: normalize_text(row["forum_desc"]),
parent_category_id: category_id_from_imported_id(row["parent_id"])
parent_category_id: category_id_from_imported_id(row["parent_id"]),
}
end
end
@ -209,7 +227,7 @@ class BulkImport::PhpBB < BulkImport::Base
category_id: category_id_from_imported_id(row["forum_id"]),
user_id: user_id_from_imported_id(row["topic_poster"]),
created_at: Time.zone.at(row["topic_time"].to_i),
views: row["topic_views"]
views: row["topic_views"],
}
end
end
@ -261,7 +279,7 @@ class BulkImport::PhpBB < BulkImport::Base
imported_id: row["msg_id"].to_i + PRIVATE_OFFSET,
title: normalize_text(title),
user_id: user_id_from_imported_id(row["author_id"].to_i),
created_at: Time.zone.at(row["message_time"].to_i)
created_at: Time.zone.at(row["message_time"].to_i),
}
end
end
@ -271,13 +289,12 @@ class BulkImport::PhpBB < BulkImport::Base
allowed_users = []
psql_query(<<-SQL
psql_query(<<-SQL).each do |row|
SELECT msg_id, author_id, to_address
FROM #{TABLE_PREFIX}privmsgs
WHERE msg_id > (#{@last_imported_private_topic_id - PRIVATE_OFFSET})
ORDER BY msg_id
SQL
).each do |row|
next unless topic_id = topic_id_from_imported_id(row["msg_id"].to_i + PRIVATE_OFFSET)
user_ids = get_message_recipients(row["author_id"], row["to_address"])
@ -287,12 +304,7 @@ class BulkImport::PhpBB < BulkImport::Base
end
end
create_topic_allowed_users(allowed_users) do |row|
{
topic_id: row[0],
user_id: row[1]
}
end
create_topic_allowed_users(allowed_users) { |row| { topic_id: row[0], user_id: row[1] } }
end
def import_private_posts
@ -316,13 +328,13 @@ class BulkImport::PhpBB < BulkImport::Base
topic_id: topic_id,
user_id: user_id_from_imported_id(row["author_id"].to_i),
created_at: Time.zone.at(row["message_time"].to_i),
raw: process_raw_text(row["message_text"])
raw: process_raw_text(row["message_text"]),
}
end
end
def get_message_recipients(from, to)
user_ids = to.split(':')
user_ids = to.split(":")
user_ids.map! { |u| u[2..-1].to_i }
user_ids.push(from.to_i)
user_ids.uniq!
@ -332,15 +344,29 @@ class BulkImport::PhpBB < BulkImport::Base
def extract_pm_title(title)
pm_title = CGI.unescapeHTML(title)
pm_title = title.gsub(/^Re\s*:\s*/i, "") rescue nil
pm_title =
begin
title.gsub(/^Re\s*:\s*/i, "")
rescue StandardError
nil
end
pm_title
end
def parse_birthday(birthday)
return if birthday.blank?
date_of_birth = Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") rescue nil
date_of_birth =
begin
Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y")
rescue StandardError
nil
end
return if date_of_birth.nil?
date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth
if date_of_birth.year < 1904
Date.new(1904, date_of_birth.month, date_of_birth.day)
else
date_of_birth
end
end
def psql_query(sql)
@ -352,34 +378,36 @@ class BulkImport::PhpBB < BulkImport::Base
text = raw.dup
text = CGI.unescapeHTML(text)
text.gsub!(/:(?:\w{8})\]/, ']')
text.gsub!(/:(?:\w{8})\]/, "]")
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
text.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/i, '[\2](\1)')
text.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}i, '[\2](\1)')
# phpBB shortens link text like this, which breaks our markdown processing:
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[')
text.gsub!(%r{\[http(s)?://(www\.)?}i, "[")
# convert list tags to ul and list=1 tags to ol
# list=a is not supported, so handle it like list=1
# list=9 and list=x have the same result as list=1 and list=a
text.gsub!(/\[list\](.*?)\[\/list:u\]/mi, '[ul]\1[/ul]')
text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi, '[ol]\1[/ol]')
text.gsub!(%r{\[list\](.*?)\[/list:u\]}mi, '[ul]\1[/ul]')
text.gsub!(%r{\[list=.*?\](.*?)\[/list:o\]}mi, '[ol]\1[/ol]')
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
text.gsub!(/\[\*\](.*?)\[\/\*:m\]/mi, '[li]\1[/li]')
text.gsub!(%r{\[\*\](.*?)\[/\*:m\]}mi, '[li]\1[/li]')
# [QUOTE="<username>"] -- add newline
text.gsub!(/(\[quote="[a-zA-Z\d]+"\])/i) { "#{$1}\n" }
# [/QUOTE] -- add newline
text.gsub!(/(\[\/quote\])/i) { "\n#{$1}" }
text.gsub!(%r{(\[/quote\])}i) { "\n#{$1}" }
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/(.+?)" alt="(.*?)" title="(.*?)" \/><!-- s(?:\S+) -->/) do
text.gsub!(
/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/(.+?)" alt="(.*?)" title="(.*?)" \/><!-- s(?:\S+) -->/,
) do
smiley = $1
@smiley_map.fetch(smiley) do
# upload_smiley(smiley, $2, $3, $4) || smiley_as_text(smiley)
@ -405,33 +433,30 @@ class BulkImport::PhpBB < BulkImport::Base
def add_default_smilies
{
[':D', ':-D', ':grin:'] => ':smiley:',
[':)', ':-)', ':smile:'] => ':slight_smile:',
[';)', ';-)', ':wink:'] => ':wink:',
[':(', ':-(', ':sad:'] => ':frowning:',
[':o', ':-o', ':eek:'] => ':astonished:',
[':shock:'] => ':open_mouth:',
[':?', ':-?', ':???:'] => ':confused:',
['8-)', ':cool:'] => ':sunglasses:',
[':lol:'] => ':laughing:',
[':x', ':-x', ':mad:'] => ':angry:',
[':P', ':-P', ':razz:'] => ':stuck_out_tongue:',
[':oops:'] => ':blush:',
[':cry:'] => ':cry:',
[':evil:'] => ':imp:',
[':twisted:'] => ':smiling_imp:',
[':roll:'] => ':unamused:',
[':!:'] => ':exclamation:',
[':?:'] => ':question:',
[':idea:'] => ':bulb:',
[':arrow:'] => ':arrow_right:',
[':|', ':-|'] => ':neutral_face:',
[':geek:'] => ':nerd:'
}.each do |smilies, emoji|
smilies.each { |smiley| @smiley_map[smiley] = emoji }
end
%w[:D :-D :grin:] => ":smiley:",
%w[:) :-) :smile:] => ":slight_smile:",
%w[;) ;-) :wink:] => ":wink:",
%w[:( :-( :sad:] => ":frowning:",
%w[:o :-o :eek:] => ":astonished:",
[":shock:"] => ":open_mouth:",
%w[:? :-? :???:] => ":confused:",
%w[8-) :cool:] => ":sunglasses:",
[":lol:"] => ":laughing:",
%w[:x :-x :mad:] => ":angry:",
%w[:P :-P :razz:] => ":stuck_out_tongue:",
[":oops:"] => ":blush:",
[":cry:"] => ":cry:",
[":evil:"] => ":imp:",
[":twisted:"] => ":smiling_imp:",
[":roll:"] => ":unamused:",
[":!:"] => ":exclamation:",
[":?:"] => ":question:",
[":idea:"] => ":bulb:",
[":arrow:"] => ":arrow_right:",
%w[:| :-|] => ":neutral_face:",
[":geek:"] => ":nerd:",
}.each { |smilies, emoji| smilies.each { |smiley| @smiley_map[smiley] = emoji } }
end
end
BulkImport::PhpBB.new.run

View File

@ -8,7 +8,6 @@ require "htmlentities"
# NOTE: this importer expects a MySQL DB to directly connect to
class BulkImport::Vanilla < BulkImport::Base
VANILLA_DB = "dbname"
TABLE_PREFIX = "GDN_"
ATTACHMENTS_BASE_DIR = "/my/absolute/path/to/from_vanilla/uploads"
@ -20,13 +19,14 @@ class BulkImport::Vanilla < BulkImport::Base
def initialize
super
@htmlentities = HTMLEntities.new
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
database: VANILLA_DB,
password: "",
reconnect: true
)
@client =
Mysql2::Client.new(
host: "localhost",
username: "root",
database: VANILLA_DB,
password: "",
reconnect: true,
)
@import_tags = false
begin
@ -88,10 +88,10 @@ class BulkImport::Vanilla < BulkImport::Base
end
def import_users
puts '', "Importing users..."
puts "", "Importing users..."
username = nil
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}User;").first['count']
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}User;").first["count"]
users = mysql_stream <<-SQL
SELECT UserID, Name, Title, Location, Email,
@ -103,26 +103,32 @@ class BulkImport::Vanilla < BulkImport::Base
SQL
create_users(users) do |row|
next if row['Email'].blank?
next if row['Name'].blank?
next if row["Email"].blank?
next if row["Name"].blank?
if ip_address = row['InsertIPAddress']&.split(',').try(:[], 0)
ip_address = nil unless (IPAddr.new(ip_address) rescue false)
if ip_address = row["InsertIPAddress"]&.split(",").try(:[], 0)
ip_address = nil unless (
begin
IPAddr.new(ip_address)
rescue StandardError
false
end
)
end
u = {
imported_id: row['UserID'],
email: row['Email'],
username: row['Name'],
name: row['Name'],
created_at: row['DateInserted'] == nil ? 0 : Time.zone.at(row['DateInserted']),
imported_id: row["UserID"],
email: row["Email"],
username: row["Name"],
name: row["Name"],
created_at: row["DateInserted"] == nil ? 0 : Time.zone.at(row["DateInserted"]),
registration_ip_address: ip_address,
last_seen_at: row['DateLastActive'] == nil ? 0 : Time.zone.at(row['DateLastActive']),
location: row['Location'],
admin: row['Admin'] > 0
last_seen_at: row["DateLastActive"] == nil ? 0 : Time.zone.at(row["DateLastActive"]),
location: row["Location"],
admin: row["Admin"] > 0,
}
if row["Banned"] > 0
u[:suspended_at] = Time.zone.at(row['DateInserted'])
u[:suspended_at] = Time.zone.at(row["DateInserted"])
u[:suspended_till] = SUSPENDED_TILL
end
u
@ -130,7 +136,7 @@ class BulkImport::Vanilla < BulkImport::Base
end
def import_user_emails
puts '', 'Importing user emails...'
puts "", "Importing user emails..."
users = mysql_stream <<-SQL
SELECT UserID, Name, Email, DateInserted
@ -141,20 +147,20 @@ class BulkImport::Vanilla < BulkImport::Base
SQL
create_user_emails(users) do |row|
next if row['Email'].blank?
next if row['Name'].blank?
next if row["Email"].blank?
next if row["Name"].blank?
{
imported_id: row["UserID"],
imported_user_id: row["UserID"],
email: row["Email"],
created_at: Time.zone.at(row["DateInserted"])
created_at: Time.zone.at(row["DateInserted"]),
}
end
end
def import_user_profiles
puts '', 'Importing user profiles...'
puts "", "Importing user profiles..."
user_profiles = mysql_stream <<-SQL
SELECT UserID, Name, Email, Location, About
@ -165,19 +171,19 @@ class BulkImport::Vanilla < BulkImport::Base
SQL
create_user_profiles(user_profiles) do |row|
next if row['Email'].blank?
next if row['Name'].blank?
next if row["Email"].blank?
next if row["Name"].blank?
{
user_id: user_id_from_imported_id(row["UserID"]),
location: row["Location"],
bio_raw: row["About"]
bio_raw: row["About"],
}
end
end
def import_user_stats
puts '', "Importing user stats..."
puts "", "Importing user stats..."
users = mysql_stream <<-SQL
SELECT UserID, CountDiscussions, CountComments, DateInserted
@ -190,14 +196,14 @@ class BulkImport::Vanilla < BulkImport::Base
now = Time.zone.now
create_user_stats(users) do |row|
next unless @users[row['UserID'].to_i] # shouldn't need this but it can be NULL :<
next unless @users[row["UserID"].to_i] # shouldn't need this but it can be NULL :<
{
imported_id: row['UserID'],
imported_user_id: row['UserID'],
new_since: Time.zone.at(row['DateInserted'] || now),
post_count: row['CountComments'] || 0,
topic_count: row['CountDiscussions'] || 0
imported_id: row["UserID"],
imported_user_id: row["UserID"],
new_since: Time.zone.at(row["DateInserted"] || now),
post_count: row["CountComments"] || 0,
topic_count: row["CountDiscussions"] || 0,
}
end
end
@ -215,7 +221,10 @@ class BulkImport::Vanilla < BulkImport::Base
next unless u.custom_fields["import_id"]
r = mysql_query("SELECT photo FROM #{TABLE_PREFIX}User WHERE UserID = #{u.custom_fields['import_id']};").first
r =
mysql_query(
"SELECT photo FROM #{TABLE_PREFIX}User WHERE UserID = #{u.custom_fields["import_id"]};",
).first
next if r.nil?
photo = r["photo"]
next unless photo.present?
@ -229,9 +238,9 @@ class BulkImport::Vanilla < BulkImport::Base
photo_real_filename = nil
parts = photo.squeeze("/").split("/")
if parts[0] =~ /^[a-z0-9]{2}:/
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[2..-2].join('/')}".squeeze("/")
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[2..-2].join("/")}".squeeze("/")
elsif parts[0] == "~cf"
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[1..-2].join('/')}".squeeze("/")
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[1..-2].join("/")}".squeeze("/")
else
puts "UNKNOWN FORMAT: #{photo}"
next
@ -272,75 +281,86 @@ class BulkImport::Vanilla < BulkImport::Base
count = 0
# https://us.v-cdn.net/1234567/uploads/editor/xyz/image.jpg
cdn_regex = /https:\/\/us.v-cdn.net\/1234567\/uploads\/(\S+\/(\w|-)+.\w+)/i
cdn_regex = %r{https://us.v-cdn.net/1234567/uploads/(\S+/(\w|-)+.\w+)}i
# [attachment=10109:Screen Shot 2012-04-01 at 3.47.35 AM.png]
attachment_regex = /\[attachment=(\d+):(.*?)\]/i
Post.where("raw LIKE '%/us.v-cdn.net/%' OR raw LIKE '%[attachment%'").find_each do |post|
count += 1
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)]
new_raw = post.raw.dup
Post
.where("raw LIKE '%/us.v-cdn.net/%' OR raw LIKE '%[attachment%'")
.find_each do |post|
count += 1
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)]
new_raw = post.raw.dup
new_raw.gsub!(attachment_regex) do |s|
matches = attachment_regex.match(s)
attachment_id = matches[1]
file_name = matches[2]
next unless attachment_id
new_raw.gsub!(attachment_regex) do |s|
matches = attachment_regex.match(s)
attachment_id = matches[1]
file_name = matches[2]
next unless attachment_id
r = mysql_query("SELECT Path, Name FROM #{TABLE_PREFIX}Media WHERE MediaID = #{attachment_id};").first
next if r.nil?
path = r["Path"]
name = r["Name"]
next unless path.present?
r =
mysql_query(
"SELECT Path, Name FROM #{TABLE_PREFIX}Media WHERE MediaID = #{attachment_id};",
).first
next if r.nil?
path = r["Path"]
name = r["Name"]
next unless path.present?
path.gsub!("s3://content/", "")
path.gsub!("s3://uploads/", "")
file_path = "#{ATTACHMENTS_BASE_DIR}/#{path}"
path.gsub!("s3://content/", "")
path.gsub!("s3://uploads/", "")
file_path = "#{ATTACHMENTS_BASE_DIR}/#{path}"
if File.exist?(file_path)
upload = create_upload(post.user.id, file_path, File.basename(file_path))
if upload && upload.errors.empty?
# upload.url
filename = name || file_name || File.basename(file_path)
html_for_upload(upload, normalize_text(filename))
if File.exist?(file_path)
upload = create_upload(post.user.id, file_path, File.basename(file_path))
if upload && upload.errors.empty?
# upload.url
filename = name || file_name || File.basename(file_path)
html_for_upload(upload, normalize_text(filename))
else
puts "Error: Upload did not persist for #{post.id} #{attachment_id}!"
end
else
puts "Error: Upload did not persist for #{post.id} #{attachment_id}!"
puts "Couldn't find file for #{attachment_id}. Skipping."
next
end
else
puts "Couldn't find file for #{attachment_id}. Skipping."
next
end
end
new_raw.gsub!(cdn_regex) do |s|
matches = cdn_regex.match(s)
attachment_id = matches[1]
new_raw.gsub!(cdn_regex) do |s|
matches = cdn_regex.match(s)
attachment_id = matches[1]
file_path = "#{ATTACHMENTS_BASE_DIR}/#{attachment_id}"
file_path = "#{ATTACHMENTS_BASE_DIR}/#{attachment_id}"
if File.exist?(file_path)
upload = create_upload(post.user.id, file_path, File.basename(file_path))
if upload && upload.errors.empty?
upload.url
if File.exist?(file_path)
upload = create_upload(post.user.id, file_path, File.basename(file_path))
if upload && upload.errors.empty?
upload.url
else
puts "Error: Upload did not persist for #{post.id} #{attachment_id}!"
end
else
puts "Error: Upload did not persist for #{post.id} #{attachment_id}!"
puts "Couldn't find file for #{attachment_id}. Skipping."
next
end
else
puts "Couldn't find file for #{attachment_id}. Skipping."
next
end
end
if new_raw != post.raw
begin
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, skip_revision: true, skip_validations: true, bypass_bump: true)
rescue
puts "PostRevisor error for #{post.id}"
post.raw = new_raw
post.save(validate: false)
if new_raw != post.raw
begin
PostRevisor.new(post).revise!(
post.user,
{ raw: new_raw },
skip_revision: true,
skip_validations: true,
bypass_bump: true,
)
rescue StandardError
puts "PostRevisor error for #{post.id}"
post.raw = new_raw
post.save(validate: false)
end
end
end
end
end
end
@ -352,7 +372,7 @@ class BulkImport::Vanilla < BulkImport::Base
# Otherwise, the file exists but with a prefix:
# The p prefix seems to be the full file, so try to find that one first.
['p', 't', 'n'].each do |prefix|
%w[p t n].each do |prefix|
full_guess = File.join(path, "#{prefix}#{base_guess}")
return full_guess if File.exist?(full_guess)
end
@ -364,26 +384,30 @@ class BulkImport::Vanilla < BulkImport::Base
def import_categories
puts "", "Importing categories..."
categories = mysql_query("
categories =
mysql_query(
"
SELECT CategoryID, ParentCategoryID, Name, Description, Sort
FROM #{TABLE_PREFIX}Category
WHERE CategoryID > 0
ORDER BY Sort, CategoryID
").to_a
",
).to_a
# Throw the -1 level categories away since they contain no topics.
# Use the next level as root categories.
top_level_categories = categories.select { |c| c["ParentCategoryID"].blank? || c['ParentCategoryID'] == -1 }
top_level_categories =
categories.select { |c| c["ParentCategoryID"].blank? || c["ParentCategoryID"] == -1 }
# Depth = 2
create_categories(top_level_categories) do |category|
next if category_id_from_imported_id(category['CategoryID'])
next if category_id_from_imported_id(category["CategoryID"])
{
imported_id: category['CategoryID'],
name: CGI.unescapeHTML(category['Name']),
description: category['Description'] ? CGI.unescapeHTML(category['Description']) : nil,
position: category['Sort']
imported_id: category["CategoryID"],
name: CGI.unescapeHTML(category["Name"]),
description: category["Description"] ? CGI.unescapeHTML(category["Description"]) : nil,
position: category["Sort"],
}
end
@ -393,39 +417,39 @@ class BulkImport::Vanilla < BulkImport::Base
# Depth = 3
create_categories(subcategories) do |category|
next if category_id_from_imported_id(category['CategoryID'])
next if category_id_from_imported_id(category["CategoryID"])
{
imported_id: category['CategoryID'],
parent_category_id: category_id_from_imported_id(category['ParentCategoryID']),
name: CGI.unescapeHTML(category['Name']),
description: category['Description'] ? CGI.unescapeHTML(category['Description']) : nil,
position: category['Sort']
imported_id: category["CategoryID"],
parent_category_id: category_id_from_imported_id(category["ParentCategoryID"]),
name: CGI.unescapeHTML(category["Name"]),
description: category["Description"] ? CGI.unescapeHTML(category["Description"]) : nil,
position: category["Sort"],
}
end
subcategory_ids = Set.new(subcategories.map { |c| c['CategoryID'] })
subcategory_ids = Set.new(subcategories.map { |c| c["CategoryID"] })
# Depth 4 and 5 need to be tags
categories.each do |c|
next if c['ParentCategoryID'] == -1
next if top_level_category_ids.include?(c['CategoryID'])
next if subcategory_ids.include?(c['CategoryID'])
next if c["ParentCategoryID"] == -1
next if top_level_category_ids.include?(c["CategoryID"])
next if subcategory_ids.include?(c["CategoryID"])
# Find a depth 3 category for topics in this category
parent = c
while !parent.nil? && !subcategory_ids.include?(parent['CategoryID'])
parent = categories.find { |subcat| subcat['CategoryID'] == parent['ParentCategoryID'] }
while !parent.nil? && !subcategory_ids.include?(parent["CategoryID"])
parent = categories.find { |subcat| subcat["CategoryID"] == parent["ParentCategoryID"] }
end
if parent
tag_name = DiscourseTagging.clean_tag(c['Name'])
@category_mappings[c['CategoryID']] = {
category_id: category_id_from_imported_id(parent['CategoryID']),
tag: Tag.find_by_name(tag_name) || Tag.create(name: tag_name)
tag_name = DiscourseTagging.clean_tag(c["Name"])
@category_mappings[c["CategoryID"]] = {
category_id: category_id_from_imported_id(parent["CategoryID"]),
tag: Tag.find_by_name(tag_name) || Tag.create(name: tag_name),
}
else
puts '', "Couldn't find a category for #{c['CategoryID']} '#{c['Name']}'!"
puts "", "Couldn't find a category for #{c["CategoryID"]} '#{c["Name"]}'!"
end
end
end
@ -433,7 +457,8 @@ class BulkImport::Vanilla < BulkImport::Base
def import_topics
puts "", "Importing topics..."
topics_sql = "SELECT DiscussionID, CategoryID, Name, Body, DateInserted, InsertUserID, Announce, Format
topics_sql =
"SELECT DiscussionID, CategoryID, Name, Body, DateInserted, InsertUserID, Announce, Format
FROM #{TABLE_PREFIX}Discussion
WHERE DiscussionID > #{@last_imported_topic_id}
ORDER BY DiscussionID ASC"
@ -442,11 +467,12 @@ class BulkImport::Vanilla < BulkImport::Base
data = {
imported_id: row["DiscussionID"],
title: normalize_text(row["Name"]),
category_id: category_id_from_imported_id(row["CategoryID"]) ||
@category_mappings[row["CategoryID"]].try(:[], :category_id),
category_id:
category_id_from_imported_id(row["CategoryID"]) ||
@category_mappings[row["CategoryID"]].try(:[], :category_id),
user_id: user_id_from_imported_id(row["InsertUserID"]),
created_at: Time.zone.at(row['DateInserted']),
pinned_at: row['Announce'] == 0 ? nil : Time.zone.at(row['DateInserted'])
created_at: Time.zone.at(row["DateInserted"]),
pinned_at: row["Announce"] == 0 ? nil : Time.zone.at(row["DateInserted"]),
}
(data[:user_id].present? && data[:title].present?) ? data : false
end
@ -455,46 +481,45 @@ class BulkImport::Vanilla < BulkImport::Base
create_posts(mysql_stream(topics_sql)) do |row|
data = {
imported_id: "d-" + row['DiscussionID'].to_s,
topic_id: topic_id_from_imported_id(row['DiscussionID']),
imported_id: "d-" + row["DiscussionID"].to_s,
topic_id: topic_id_from_imported_id(row["DiscussionID"]),
user_id: user_id_from_imported_id(row["InsertUserID"]),
created_at: Time.zone.at(row['DateInserted']),
raw: clean_up(row['Body'], row['Format'])
created_at: Time.zone.at(row["DateInserted"]),
raw: clean_up(row["Body"], row["Format"]),
}
data[:topic_id].present? ? data : false
end
puts '', 'converting deep categories to tags...'
puts "", "converting deep categories to tags..."
create_topic_tags(mysql_stream(topics_sql)) do |row|
next unless mapping = @category_mappings[row['CategoryID']]
next unless mapping = @category_mappings[row["CategoryID"]]
{
tag_id: mapping[:tag].id,
topic_id: topic_id_from_imported_id(row["DiscussionID"])
}
{ tag_id: mapping[:tag].id, topic_id: topic_id_from_imported_id(row["DiscussionID"]) }
end
end
def import_posts
puts "", "Importing posts..."
posts = mysql_stream(
"SELECT CommentID, DiscussionID, Body, DateInserted, InsertUserID, Format
posts =
mysql_stream(
"SELECT CommentID, DiscussionID, Body, DateInserted, InsertUserID, Format
FROM #{TABLE_PREFIX}Comment
WHERE CommentID > #{@last_imported_post_id}
ORDER BY CommentID ASC")
ORDER BY CommentID ASC",
)
create_posts(posts) do |row|
next unless topic_id = topic_id_from_imported_id(row['DiscussionID'])
next if row['Body'].blank?
next unless topic_id = topic_id_from_imported_id(row["DiscussionID"])
next if row["Body"].blank?
{
imported_id: row['CommentID'],
imported_id: row["CommentID"],
topic_id: topic_id,
user_id: user_id_from_imported_id(row['InsertUserID']),
created_at: Time.zone.at(row['DateInserted']),
raw: clean_up(row['Body'], row['Format'])
user_id: user_id_from_imported_id(row["InsertUserID"]),
created_at: Time.zone.at(row["DateInserted"]),
raw: clean_up(row["Body"], row["Format"]),
}
end
end
@ -505,31 +530,31 @@ class BulkImport::Vanilla < BulkImport::Base
tag_mapping = {}
mysql_query("SELECT TagID, Name FROM #{TABLE_PREFIX}Tag").each do |row|
tag_name = DiscourseTagging.clean_tag(row['Name'])
tag_name = DiscourseTagging.clean_tag(row["Name"])
tag = Tag.find_by_name(tag_name) || Tag.create(name: tag_name)
tag_mapping[row['TagID']] = tag.id
tag_mapping[row["TagID"]] = tag.id
end
tags = mysql_query(
"SELECT TagID, DiscussionID
tags =
mysql_query(
"SELECT TagID, DiscussionID
FROM #{TABLE_PREFIX}TagDiscussion
WHERE DiscussionID > #{@last_imported_topic_id}
ORDER BY DateInserted")
ORDER BY DateInserted",
)
create_topic_tags(tags) do |row|
next unless topic_id = topic_id_from_imported_id(row['DiscussionID'])
next unless topic_id = topic_id_from_imported_id(row["DiscussionID"])
{
topic_id: topic_id,
tag_id: tag_mapping[row['TagID']]
}
{ topic_id: topic_id, tag_id: tag_mapping[row["TagID"]] }
end
end
def import_private_topics
puts "", "Importing private topics..."
topics_sql = "SELECT c.ConversationID, c.Subject, m.MessageID, m.Body, c.DateInserted, c.InsertUserID
topics_sql =
"SELECT c.ConversationID, c.Subject, m.MessageID, m.Body, c.DateInserted, c.InsertUserID
FROM #{TABLE_PREFIX}Conversation c, #{TABLE_PREFIX}ConversationMessage m
WHERE c.FirstMessageID = m.MessageID
AND c.ConversationID > #{@last_imported_private_topic_id - PRIVATE_OFFSET}
@ -539,9 +564,10 @@ class BulkImport::Vanilla < BulkImport::Base
{
archetype: Archetype.private_message,
imported_id: row["ConversationID"] + PRIVATE_OFFSET,
title: row["Subject"] ? normalize_text(row["Subject"]) : "Conversation #{row["ConversationID"]}",
title:
row["Subject"] ? normalize_text(row["Subject"]) : "Conversation #{row["ConversationID"]}",
user_id: user_id_from_imported_id(row["InsertUserID"]),
created_at: Time.zone.at(row['DateInserted'])
created_at: Time.zone.at(row["DateInserted"]),
}
end
end
@ -549,7 +575,8 @@ class BulkImport::Vanilla < BulkImport::Base
def import_topic_allowed_users
puts "", "importing topic_allowed_users..."
topic_allowed_users_sql = "
topic_allowed_users_sql =
"
SELECT ConversationID, UserID
FROM #{TABLE_PREFIX}UserConversation
WHERE Deleted = 0
@ -559,45 +586,43 @@ class BulkImport::Vanilla < BulkImport::Base
added = 0
create_topic_allowed_users(mysql_stream(topic_allowed_users_sql)) do |row|
next unless topic_id = topic_id_from_imported_id(row['ConversationID'] + PRIVATE_OFFSET)
next unless topic_id = topic_id_from_imported_id(row["ConversationID"] + PRIVATE_OFFSET)
next unless user_id = user_id_from_imported_id(row["UserID"])
added += 1
{
topic_id: topic_id,
user_id: user_id,
}
{ topic_id: topic_id, user_id: user_id }
end
puts '', "Added #{added} topic_allowed_users records."
puts "", "Added #{added} topic_allowed_users records."
end
def import_private_posts
puts "", "importing private replies..."
private_posts_sql = "
private_posts_sql =
"
SELECT ConversationID, MessageID, Body, InsertUserID, DateInserted, Format
FROM GDN_ConversationMessage
WHERE ConversationID > #{@last_imported_private_topic_id - PRIVATE_OFFSET}
ORDER BY ConversationID ASC, MessageID ASC"
create_posts(mysql_stream(private_posts_sql)) do |row|
next unless topic_id = topic_id_from_imported_id(row['ConversationID'] + PRIVATE_OFFSET)
next unless topic_id = topic_id_from_imported_id(row["ConversationID"] + PRIVATE_OFFSET)
{
imported_id: row['MessageID'] + PRIVATE_OFFSET,
imported_id: row["MessageID"] + PRIVATE_OFFSET,
topic_id: topic_id,
user_id: user_id_from_imported_id(row['InsertUserID']),
created_at: Time.zone.at(row['DateInserted']),
raw: clean_up(row['Body'], row['Format'])
user_id: user_id_from_imported_id(row["InsertUserID"]),
created_at: Time.zone.at(row["DateInserted"]),
raw: clean_up(row["Body"], row["Format"]),
}
end
end
# TODO: too slow
def create_permalinks
puts '', 'Creating permalinks...', ''
puts "", "Creating permalinks...", ""
puts ' User pages...'
puts " User pages..."
start = Time.now
count = 0
@ -606,21 +631,23 @@ class BulkImport::Vanilla < BulkImport::Base
sql = "COPY permalinks (url, created_at, updated_at, external_url) FROM STDIN"
@raw_connection.copy_data(sql, @encoder) do
User.includes(:_custom_fields).find_each do |u|
count += 1
ucf = u.custom_fields
if ucf && ucf["import_id"]
vanilla_username = ucf["import_username"] || u.username
@raw_connection.put_copy_data(
["profile/#{vanilla_username}", now, now, "/users/#{u.username}"]
)
end
User
.includes(:_custom_fields)
.find_each do |u|
count += 1
ucf = u.custom_fields
if ucf && ucf["import_id"]
vanilla_username = ucf["import_username"] || u.username
@raw_connection.put_copy_data(
["profile/#{vanilla_username}", now, now, "/users/#{u.username}"],
)
end
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0
end
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0
end
end
puts '', '', ' Topics and posts...'
puts "", "", " Topics and posts..."
start = Time.now
count = 0
@ -628,38 +655,36 @@ class BulkImport::Vanilla < BulkImport::Base
sql = "COPY permalinks (url, topic_id, post_id, created_at, updated_at) FROM STDIN"
@raw_connection.copy_data(sql, @encoder) do
Post.includes(:_custom_fields).find_each do |post|
count += 1
pcf = post.custom_fields
if pcf && pcf["import_id"]
topic = post.topic
if topic.present?
id = pcf["import_id"].split('-').last
if post.post_number == 1
slug = Slug.for(topic.title) # probably matches what vanilla would do...
@raw_connection.put_copy_data(
["discussion/#{id}/#{slug}", topic.id, nil, now, now]
)
else
@raw_connection.put_copy_data(
["discussion/comment/#{id}", nil, post.id, now, now]
)
Post
.includes(:_custom_fields)
.find_each do |post|
count += 1
pcf = post.custom_fields
if pcf && pcf["import_id"]
topic = post.topic
if topic.present?
id = pcf["import_id"].split("-").last
if post.post_number == 1
slug = Slug.for(topic.title) # probably matches what vanilla would do...
@raw_connection.put_copy_data(["discussion/#{id}/#{slug}", topic.id, nil, now, now])
else
@raw_connection.put_copy_data(["discussion/comment/#{id}", nil, post.id, now, now])
end
end
end
end
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0
end
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0
end
end
end
def clean_up(raw, format)
raw.encode!("utf-8", "utf-8", invalid: :replace, undef: :replace, replace: "")
raw.gsub!(/<(.+)>&nbsp;<\/\1>/, "\n\n")
raw.gsub!(%r{<(.+)>&nbsp;</\1>}, "\n\n")
html =
if format == 'Html'
if format == "Html"
raw
else
markdown = Redcarpet::Markdown.new(Redcarpet::Render::HTML, autolink: true, tables: true)
@ -668,29 +693,23 @@ class BulkImport::Vanilla < BulkImport::Base
doc = Nokogiri::HTML5.fragment(html)
doc.css("blockquote").each do |bq|
name = bq["rel"]
user = User.find_by(name: name)
bq.replace %{<br>[QUOTE="#{user&.username || name}"]\n#{bq.inner_html}\n[/QUOTE]<br>}
end
doc
.css("blockquote")
.each do |bq|
name = bq["rel"]
user = User.find_by(name: name)
bq.replace %{<br>[QUOTE="#{user&.username || name}"]\n#{bq.inner_html}\n[/QUOTE]<br>}
end
doc.css("font").reverse.each do |f|
f.replace f.inner_html
end
doc.css("font").reverse.each { |f| f.replace f.inner_html }
doc.css("span").reverse.each do |f|
f.replace f.inner_html
end
doc.css("span").reverse.each { |f| f.replace f.inner_html }
doc.css("sub").reverse.each do |f|
f.replace f.inner_html
end
doc.css("sub").reverse.each { |f| f.replace f.inner_html }
doc.css("u").reverse.each do |f|
f.replace f.inner_html
end
doc.css("u").reverse.each { |f| f.replace f.inner_html }
markdown = format == 'Html' ? ReverseMarkdown.convert(doc.to_html) : doc.to_html
markdown = format == "Html" ? ReverseMarkdown.convert(doc.to_html) : doc.to_html
markdown.gsub!(/\[QUOTE="([^;]+);c-(\d+)"\]/i) { "[QUOTE=#{$1};#{$2}]" }
markdown = process_raw_text(markdown)
@ -702,31 +721,31 @@ class BulkImport::Vanilla < BulkImport::Base
text = raw.dup
text = CGI.unescapeHTML(text)
text.gsub!(/:(?:\w{8})\]/, ']')
text.gsub!(/:(?:\w{8})\]/, "]")
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
text.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/i, '[\2](\1)')
text.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}i, '[\2](\1)')
# phpBB shortens link text like this, which breaks our markdown processing:
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[')
text.gsub!(%r{\[http(s)?://(www\.)?}i, "[")
# convert list tags to ul and list=1 tags to ol
# list=a is not supported, so handle it like list=1
# list=9 and list=x have the same result as list=1 and list=a
text.gsub!(/\[list\](.*?)\[\/list:u\]/mi, '[ul]\1[/ul]')
text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi, '[ol]\1[/ol]')
text.gsub!(%r{\[list\](.*?)\[/list:u\]}mi, '[ul]\1[/ul]')
text.gsub!(%r{\[list=.*?\](.*?)\[/list:o\]}mi, '[ol]\1[/ol]')
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
text.gsub!(/\[\*\](.*?)\[\/\*:m\]/mi, '[li]\1[/li]')
text.gsub!(%r{\[\*\](.*?)\[/\*:m\]}mi, '[li]\1[/li]')
# [QUOTE="<username>"] -- add newline
text.gsub!(/(\[quote="[a-zA-Z\d]+"\])/i) { "#{$1}\n" }
# [/QUOTE] -- add newline
text.gsub!(/(\[\/quote\])/i) { "\n#{$1}" }
text.gsub!(%r{(\[/quote\])}i) { "\n#{$1}" }
text
end
@ -742,7 +761,6 @@ class BulkImport::Vanilla < BulkImport::Base
def mysql_query(sql)
@client.query(sql)
end
end
BulkImport::Vanilla.new.start

View File

@ -7,43 +7,42 @@ require "htmlentities"
require "parallel"
class BulkImport::VBulletin < BulkImport::Base
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "vb_"
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "vb_"
SUSPENDED_TILL ||= Date.new(3000, 1, 1)
ATTACHMENT_DIR ||= ENV['ATTACHMENT_DIR'] || '/shared/import/data/attachments'
AVATAR_DIR ||= ENV['AVATAR_DIR'] || '/shared/import/data/customavatars'
ATTACHMENT_DIR ||= ENV["ATTACHMENT_DIR"] || "/shared/import/data/attachments"
AVATAR_DIR ||= ENV["AVATAR_DIR"] || "/shared/import/data/customavatars"
def initialize
super
host = ENV["DB_HOST"] || "localhost"
host = ENV["DB_HOST"] || "localhost"
username = ENV["DB_USERNAME"] || "root"
password = ENV["DB_PASSWORD"]
database = ENV["DB_NAME"] || "vbulletin"
charset = ENV["DB_CHARSET"] || "utf8"
charset = ENV["DB_CHARSET"] || "utf8"
@html_entities = HTMLEntities.new
@encoding = CHARSET_MAP[charset]
@client = Mysql2::Client.new(
host: host,
username: username,
password: password,
database: database,
encoding: charset,
reconnect: true
)
@client =
Mysql2::Client.new(
host: host,
username: username,
password: password,
database: database,
encoding: charset,
reconnect: true,
)
@client.query_options.merge!(as: :array, cache_rows: false)
@has_post_thanks = mysql_query(<<-SQL
@has_post_thanks = mysql_query(<<-SQL).to_a.count > 0
SELECT `COLUMN_NAME`
FROM `INFORMATION_SCHEMA`.`COLUMNS`
WHERE `TABLE_SCHEMA`='#{database}'
AND `TABLE_NAME`='user'
AND `COLUMN_NAME` LIKE 'post_thanks_%'
SQL
).to_a.count > 0
@user_ids_by_email = {}
end
@ -95,7 +94,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_groups
puts '', "Importing groups..."
puts "", "Importing groups..."
groups = mysql_stream <<-SQL
SELECT usergroupid, title, description, usertitle
@ -115,7 +114,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_users
puts '', "Importing users..."
puts "", "Importing users..."
users = mysql_stream <<-SQL
SELECT u.userid, username, email, joindate, birthday, ipaddress, u.usergroupid, bandate, liftdate
@ -145,7 +144,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_user_emails
puts '', "Importing user emails..."
puts "", "Importing user emails..."
users = mysql_stream <<-SQL
SELECT u.userid, email, joindate
@ -155,7 +154,7 @@ class BulkImport::VBulletin < BulkImport::Base
SQL
create_user_emails(users) do |row|
user_id, email = row[0 .. 1]
user_id, email = row[0..1]
@user_ids_by_email[email.downcase] ||= []
user_ids = @user_ids_by_email[email.downcase] << user_id
@ -170,7 +169,7 @@ class BulkImport::VBulletin < BulkImport::Base
imported_id: user_id,
imported_user_id: user_id,
email: email,
created_at: Time.zone.at(row[2])
created_at: Time.zone.at(row[2]),
}
end
@ -179,7 +178,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_user_stats
puts '', "Importing user stats..."
puts "", "Importing user stats..."
users = mysql_stream <<-SQL
SELECT u.userid, joindate, posts, COUNT(t.threadid) AS threads, p.dateline
@ -199,7 +198,7 @@ class BulkImport::VBulletin < BulkImport::Base
new_since: Time.zone.at(row[1]),
post_count: row[2],
topic_count: row[3],
first_post_created_at: row[4] && Time.zone.at(row[4])
first_post_created_at: row[4] && Time.zone.at(row[4]),
}
if @has_post_thanks
@ -212,7 +211,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_group_users
puts '', "Importing group users..."
puts "", "Importing group users..."
group_users = mysql_stream <<-SQL
SELECT usergroupid, userid
@ -221,15 +220,12 @@ class BulkImport::VBulletin < BulkImport::Base
SQL
create_group_users(group_users) do |row|
{
group_id: group_id_from_imported_id(row[0]),
user_id: user_id_from_imported_id(row[1]),
}
{ group_id: group_id_from_imported_id(row[0]), user_id: user_id_from_imported_id(row[1]) }
end
end
def import_user_passwords
puts '', "Importing user passwords..."
puts "", "Importing user passwords..."
user_passwords = mysql_stream <<-SQL
SELECT userid, password
@ -239,15 +235,12 @@ class BulkImport::VBulletin < BulkImport::Base
SQL
create_custom_fields("user", "password", user_passwords) do |row|
{
record_id: user_id_from_imported_id(row[0]),
value: row[1],
}
{ record_id: user_id_from_imported_id(row[0]), value: row[1] }
end
end
def import_user_salts
puts '', "Importing user salts..."
puts "", "Importing user salts..."
user_salts = mysql_stream <<-SQL
SELECT userid, salt
@ -258,15 +251,12 @@ class BulkImport::VBulletin < BulkImport::Base
SQL
create_custom_fields("user", "salt", user_salts) do |row|
{
record_id: user_id_from_imported_id(row[0]),
value: row[1],
}
{ record_id: user_id_from_imported_id(row[0]), value: row[1] }
end
end
def import_user_profiles
puts '', "Importing user profiles..."
puts "", "Importing user profiles..."
user_profiles = mysql_stream <<-SQL
SELECT userid, homepage, profilevisits
@ -278,16 +268,23 @@ class BulkImport::VBulletin < BulkImport::Base
create_user_profiles(user_profiles) do |row|
{
user_id: user_id_from_imported_id(row[0]),
website: (URI.parse(row[1]).to_s rescue nil),
website:
(
begin
URI.parse(row[1]).to_s
rescue StandardError
nil
end
),
views: row[2],
}
end
end
def import_categories
puts '', "Importing categories..."
puts "", "Importing categories..."
categories = mysql_query(<<-SQL
categories = mysql_query(<<-SQL).to_a
select
forumid,
parentid,
@ -311,23 +308,20 @@ class BulkImport::VBulletin < BulkImport::Base
from forum
order by forumid
SQL
).to_a
return if categories.empty?
parent_categories = categories.select { |c| c[1] == -1 }
parent_categories = categories.select { |c| c[1] == -1 }
children_categories = categories.select { |c| c[1] != -1 }
parent_category_ids = Set.new parent_categories.map { |c| c[0] }
# cut down the tree to only 2 levels of categories
children_categories.each do |cc|
until parent_category_ids.include?(cc[1])
cc[1] = categories.find { |c| c[0] == cc[1] }[1]
end
cc[1] = categories.find { |c| c[0] == cc[1] }[1] until parent_category_ids.include?(cc[1])
end
puts '', "Importing parent categories..."
puts "", "Importing parent categories..."
create_categories(parent_categories) do |row|
{
imported_id: row[0],
@ -337,7 +331,7 @@ class BulkImport::VBulletin < BulkImport::Base
}
end
puts '', "Importing children categories..."
puts "", "Importing children categories..."
create_categories(children_categories) do |row|
{
imported_id: row[0],
@ -350,7 +344,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_topics
puts '', "Importing topics..."
puts "", "Importing topics..."
topics = mysql_stream <<-SQL
SELECT threadid, title, forumid, postuserid, open, dateline, views, visible, sticky
@ -381,7 +375,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_posts
puts '', "Importing posts..."
puts "", "Importing posts..."
posts = mysql_stream <<-SQL
SELECT postid, p.threadid, parentid, userid, p.dateline, p.visible, pagetext
@ -396,7 +390,8 @@ class BulkImport::VBulletin < BulkImport::Base
create_posts(posts) do |row|
topic_id = topic_id_from_imported_id(row[1])
replied_post_topic_id = topic_id_from_imported_post_id(row[2])
reply_to_post_number = topic_id == replied_post_topic_id ? post_number_from_imported_id(row[2]) : nil
reply_to_post_number =
topic_id == replied_post_topic_id ? post_number_from_imported_id(row[2]) : nil
post = {
imported_id: row[0],
@ -415,7 +410,7 @@ class BulkImport::VBulletin < BulkImport::Base
def import_likes
return unless @has_post_thanks
puts '', "Importing likes..."
puts "", "Importing likes..."
@imported_likes = Set.new
@last_imported_post_id = 0
@ -438,13 +433,13 @@ class BulkImport::VBulletin < BulkImport::Base
post_id: post_id_from_imported_id(row[0]),
user_id: user_id_from_imported_id(row[1]),
post_action_type_id: 2,
created_at: Time.zone.at(row[2])
created_at: Time.zone.at(row[2]),
}
end
end
def import_private_topics
puts '', "Importing private topics..."
puts "", "Importing private topics..."
@imported_topics = {}
@ -473,34 +468,31 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_topic_allowed_users
puts '', "Importing topic allowed users..."
puts "", "Importing topic allowed users..."
allowed_users = Set.new
mysql_stream(<<-SQL
mysql_stream(<<-SQL).each do |row|
SELECT pmtextid, touserarray
FROM #{TABLE_PREFIX}pmtext
WHERE pmtextid > (#{@last_imported_private_topic_id - PRIVATE_OFFSET})
ORDER BY pmtextid
SQL
).each do |row|
next unless topic_id = topic_id_from_imported_id(row[0] + PRIVATE_OFFSET)
row[1].scan(/i:(\d+)/).flatten.each do |id|
next unless user_id = user_id_from_imported_id(id)
allowed_users << [topic_id, user_id]
end
row[1]
.scan(/i:(\d+)/)
.flatten
.each do |id|
next unless user_id = user_id_from_imported_id(id)
allowed_users << [topic_id, user_id]
end
end
create_topic_allowed_users(allowed_users) do |row|
{
topic_id: row[0],
user_id: row[1],
}
end
create_topic_allowed_users(allowed_users) { |row| { topic_id: row[0], user_id: row[1] } }
end
def import_private_posts
puts '', "Importing private posts..."
puts "", "Importing private posts..."
posts = mysql_stream <<-SQL
SELECT pmtextid, title, fromuserid, touserarray, dateline, message
@ -527,7 +519,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def create_permalink_file
puts '', 'Creating Permalink File...', ''
puts "", "Creating Permalink File...", ""
total = Topic.listable_topics.count
start = Time.now
@ -538,9 +530,9 @@ class BulkImport::VBulletin < BulkImport::Base
i += 1
pcf = topic.posts.includes(:_custom_fields).where(post_number: 1).first.custom_fields
if pcf && pcf["import_id"]
id = pcf["import_id"].split('-').last
id = pcf["import_id"].split("-").last
f.print [ "XXX#{id} YYY#{topic.id}" ].to_csv
f.print ["XXX#{id} YYY#{topic.id}"].to_csv
print "\r%7d/%7d - %6d/sec" % [i, total, i.to_f / (Time.now - start)] if i % 5000 == 0
end
end
@ -549,7 +541,8 @@ class BulkImport::VBulletin < BulkImport::Base
# find the uploaded file information from the db
def find_upload(post, attachment_id)
sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filename filename
sql =
"SELECT a.attachmentid attachment_id, a.userid user_id, a.filename filename
FROM #{TABLE_PREFIX}attachment a
WHERE a.attachmentid = #{attachment_id}"
results = mysql_query(sql)
@ -563,9 +556,10 @@ class BulkImport::VBulletin < BulkImport::Base
user_id = row[1]
db_filename = row[2]
filename = File.join(ATTACHMENT_DIR, user_id.to_s.split('').join('/'), "#{attachment_id}.attach")
filename =
File.join(ATTACHMENT_DIR, user_id.to_s.split("").join("/"), "#{attachment_id}.attach")
real_filename = db_filename
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
unless File.exist?(filename)
puts "Attachment file #{row.inspect} doesn't exist"
@ -588,7 +582,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_attachments
puts '', 'importing attachments...'
puts "", "importing attachments..."
RateLimiter.disable
current_count = 0
@ -596,7 +590,7 @@ class BulkImport::VBulletin < BulkImport::Base
success_count = 0
fail_count = 0
attachment_regex = /\[attach[^\]]*\](\d+)\[\/attach\]/i
attachment_regex = %r{\[attach[^\]]*\](\d+)\[/attach\]}i
Post.find_each do |post|
current_count += 1
@ -618,7 +612,12 @@ class BulkImport::VBulletin < BulkImport::Base
end
if new_raw != post.raw
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from vBulletin')
PostRevisor.new(post).revise!(
post.user,
{ raw: new_raw },
bypass_bump: true,
edit_reason: "Import attachments from vBulletin",
)
end
success_count += 1
@ -639,7 +638,7 @@ class BulkImport::VBulletin < BulkImport::Base
Dir.foreach(AVATAR_DIR) do |item|
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)]
next if item == ('.') || item == ('..') || item == ('.DS_Store')
next if item == (".") || item == ("..") || item == (".DS_Store")
next unless item =~ /avatar(\d+)_(\d).gif/
scan = item.scan(/avatar(\d+)_(\d).gif/)
next unless scan[0][0].present?
@ -671,11 +670,10 @@ class BulkImport::VBulletin < BulkImport::Base
def import_signatures
puts "Importing user signatures..."
total_count = mysql_query(<<-SQL
total_count = mysql_query(<<-SQL).first[0].to_i
SELECT COUNT(userid) count
FROM #{TABLE_PREFIX}sigparsed
SQL
).first[0].to_i
current_count = 0
user_signatures = mysql_stream <<-SQL
@ -695,13 +693,20 @@ class BulkImport::VBulletin < BulkImport::Base
next unless u.present?
# can not hold dupes
UserCustomField.where(user_id: u.id, name: ["see_signatures", "signature_raw", "signature_cooked"]).destroy_all
UserCustomField.where(
user_id: u.id,
name: %w[see_signatures signature_raw signature_cooked],
).destroy_all
user_sig.gsub!(/\[\/?sigpic\]/i, "")
user_sig.gsub!(%r{\[/?sigpic\]}i, "")
UserCustomField.create!(user_id: u.id, name: "see_signatures", value: true)
UserCustomField.create!(user_id: u.id, name: "signature_raw", value: user_sig)
UserCustomField.create!(user_id: u.id, name: "signature_cooked", value: PrettyText.cook(user_sig, omit_nofollow: false))
UserCustomField.create!(
user_id: u.id,
name: "signature_cooked",
value: PrettyText.cook(user_sig, omit_nofollow: false),
)
end
end
@ -710,15 +715,15 @@ class BulkImport::VBulletin < BulkImport::Base
total_count = 0
duplicated = {}
@user_ids_by_email.
select { |e, ids| ids.count > 1 }.
each_with_index do |(email, ids), i|
duplicated[email] = [ ids, i ]
@user_ids_by_email
.select { |e, ids| ids.count > 1 }
.each_with_index do |(email, ids), i|
duplicated[email] = [ids, i]
count += 1
total_count += ids.count
end
puts '', "Merging #{total_count} duplicated users across #{count} distinct emails..."
puts "", "Merging #{total_count} duplicated users across #{count} distinct emails..."
start = Time.now
@ -727,14 +732,15 @@ class BulkImport::VBulletin < BulkImport::Base
next unless email.presence
# queried one by one to ensure ordering
first, *rest = user_ids.map do |id|
UserCustomField.includes(:user).find_by!(name: 'import_id', value: id).user
end
first, *rest =
user_ids.map do |id|
UserCustomField.includes(:user).find_by!(name: "import_id", value: id).user
end
rest.each do |dup|
UserMerger.new(dup, first).merge!
first.reload
printf '.'
printf "."
end
print "\n%6d/%6d - %6d/sec" % [i, count, i.to_f / (Time.now - start)] if i % 10 == 0
@ -744,13 +750,11 @@ class BulkImport::VBulletin < BulkImport::Base
end
def save_duplicated_users
File.open('duplicated_users.json', 'w+') do |f|
f.puts @user_ids_by_email.to_json
end
File.open("duplicated_users.json", "w+") { |f| f.puts @user_ids_by_email.to_json }
end
def read_duplicated_users
@user_ids_by_email = JSON.parse File.read('duplicated_users.json')
@user_ids_by_email = JSON.parse File.read("duplicated_users.json")
end
def extract_pm_title(title)
@ -759,17 +763,26 @@ class BulkImport::VBulletin < BulkImport::Base
def parse_birthday(birthday)
return if birthday.blank?
date_of_birth = Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") rescue nil
date_of_birth =
begin
Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y")
rescue StandardError
nil
end
return if date_of_birth.nil?
date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth
if date_of_birth.year < 1904
Date.new(1904, date_of_birth.month, date_of_birth.day)
else
date_of_birth
end
end
def print_status(current, max, start_time = nil)
if start_time.present?
elapsed_seconds = Time.now - start_time
elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60]
elements_per_minute = "[%.0f items/min] " % [current / elapsed_seconds.to_f * 60]
else
elements_per_minute = ''
elements_per_minute = ""
end
print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute]
@ -782,7 +795,6 @@ class BulkImport::VBulletin < BulkImport::Base
def mysql_query(sql)
@client.query(sql)
end
end
BulkImport::VBulletin.new.run

View File

@ -5,47 +5,56 @@ require "cgi"
require "set"
require "mysql2"
require "htmlentities"
require 'ruby-bbcode-to-md'
require 'find'
require "ruby-bbcode-to-md"
require "find"
class BulkImport::VBulletin5 < BulkImport::Base
DB_PREFIX = ""
SUSPENDED_TILL ||= Date.new(3000, 1, 1)
ATTACH_DIR ||= ENV['ATTACH_DIR'] || '/shared/import/data/attachments'
AVATAR_DIR ||= ENV['AVATAR_DIR'] || '/shared/import/data/customavatars'
ATTACH_DIR ||= ENV["ATTACH_DIR"] || "/shared/import/data/attachments"
AVATAR_DIR ||= ENV["AVATAR_DIR"] || "/shared/import/data/customavatars"
ROOT_NODE = 2
def initialize
super
host = ENV["DB_HOST"] || "localhost"
host = ENV["DB_HOST"] || "localhost"
username = ENV["DB_USERNAME"] || "root"
password = ENV["DB_PASSWORD"]
database = ENV["DB_NAME"] || "vbulletin"
charset = ENV["DB_CHARSET"] || "utf8"
charset = ENV["DB_CHARSET"] || "utf8"
@html_entities = HTMLEntities.new
@encoding = CHARSET_MAP[charset]
@bbcode_to_md = true
@client = Mysql2::Client.new(
host: host,
username: username,
password: password,
database: database,
encoding: charset,
reconnect: true
)
@client =
Mysql2::Client.new(
host: host,
username: username,
password: password,
database: database,
encoding: charset,
reconnect: true,
)
@client.query_options.merge!(as: :array, cache_rows: false)
# TODO: Add `LIMIT 1` to the below queries
# ------
# be aware there may be other contenttypeid's in use, such as poll, link, video, etc.
@forum_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Forum'").to_a[0][0]
@channel_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Channel'").to_a[0][0]
@text_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Text'").to_a[0][0]
@forum_typeid =
mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Forum'").to_a[0][
0
]
@channel_typeid =
mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Channel'").to_a[
0
][
0
]
@text_typeid =
mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Text'").to_a[0][0]
end
def execute
@ -127,7 +136,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
date_of_birth: parse_birthday(row[3]),
primary_group_id: group_id_from_imported_id(row[5]),
admin: row[5] == 6,
moderator: row[5] == 7
moderator: row[5] == 7,
}
u[:ip_address] = row[4][/\b(?:\d{1,3}\.){3}\d{1,3}\b/] if row[4].present?
if row[7]
@ -153,7 +162,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
imported_id: row[0],
imported_user_id: row[0],
email: random_email,
created_at: Time.zone.at(row[2])
created_at: Time.zone.at(row[2]),
}
end
end
@ -203,10 +212,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
SQL
create_group_users(group_users) do |row|
{
group_id: group_id_from_imported_id(row[0]),
user_id: user_id_from_imported_id(row[1]),
}
{ group_id: group_id_from_imported_id(row[0]), user_id: user_id_from_imported_id(row[1]) }
end
# import secondary group memberships
@ -228,12 +234,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
end
end
create_group_users(group_mapping) do |row|
{
group_id: row[0],
user_id: row[1]
}
end
create_group_users(group_mapping) { |row| { group_id: row[0], user_id: row[1] } }
end
def import_user_profiles
@ -249,7 +250,14 @@ class BulkImport::VBulletin5 < BulkImport::Base
create_user_profiles(user_profiles) do |row|
{
user_id: user_id_from_imported_id(row[0]),
website: (URI.parse(row[1]).to_s rescue nil),
website:
(
begin
URI.parse(row[1]).to_s
rescue StandardError
nil
end
),
views: row[2],
}
end
@ -258,7 +266,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
def import_categories
puts "Importing categories..."
categories = mysql_query(<<-SQL
categories = mysql_query(<<-SQL).to_a
SELECT nodeid AS forumid, title, description, displayorder, parentid, urlident
FROM #{DB_PREFIX}node
WHERE parentid = #{ROOT_NODE}
@ -269,11 +277,10 @@ class BulkImport::VBulletin5 < BulkImport::Base
WHERE contenttypeid = #{@channel_typeid}
AND nodeid > #{@last_imported_category_id}
SQL
).to_a
return if categories.empty?
parent_categories = categories.select { |c| c[4] == ROOT_NODE }
parent_categories = categories.select { |c| c[4] == ROOT_NODE }
children_categories = categories.select { |c| c[4] != ROOT_NODE }
parent_category_ids = Set.new parent_categories.map { |c| c[0] }
@ -285,7 +292,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
name: normalize_text(row[1]),
description: normalize_text(row[2]),
position: row[3],
slug: row[5]
slug: row[5],
}
end
@ -297,7 +304,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
description: normalize_text(row[2]),
position: row[3],
parent_category_id: category_id_from_imported_id(row[4]),
slug: row[5]
slug: row[5],
}
end
end
@ -428,7 +435,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
post_id: post_id,
user_id: user_id,
post_action_type_id: 2,
created_at: Time.zone.at(row[2])
created_at: Time.zone.at(row[2]),
}
end
end
@ -455,7 +462,6 @@ class BulkImport::VBulletin5 < BulkImport::Base
user_id: user_id_from_imported_id(row[2]),
created_at: Time.zone.at(row[3]),
}
end
end
@ -475,17 +481,18 @@ class BulkImport::VBulletin5 < BulkImport::Base
users_added = Set.new
create_topic_allowed_users(mysql_stream(allowed_users_sql)) do |row|
next unless topic_id = topic_id_from_imported_id(row[0] + PRIVATE_OFFSET) || topic_id_from_imported_id(row[2] + PRIVATE_OFFSET)
unless topic_id =
topic_id_from_imported_id(row[0] + PRIVATE_OFFSET) ||
topic_id_from_imported_id(row[2] + PRIVATE_OFFSET)
next
end
next unless user_id = user_id_from_imported_id(row[1])
next if users_added.add?([topic_id, user_id]).nil?
added += 1
{
topic_id: topic_id,
user_id: user_id,
}
{ topic_id: topic_id, user_id: user_id }
end
puts '', "Added #{added} topic allowed users records."
puts "", "Added #{added} topic allowed users records."
end
def import_private_first_posts
@ -543,7 +550,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
end
def create_permalinks
puts '', 'creating permalinks...', ''
puts "", "creating permalinks...", ""
# add permalink normalizations to site settings
# EVERYTHING: /.*\/([\w-]+)$/\1 -- selects the last segment of the URL
@ -580,21 +587,23 @@ class BulkImport::VBulletin5 < BulkImport::Base
return nil
end
tmpfile = 'attach_' + row[6].to_s
filename = File.join('/tmp/', tmpfile)
File.open(filename, 'wb') { |f| f.write(row[5]) }
tmpfile = "attach_" + row[6].to_s
filename = File.join("/tmp/", tmpfile)
File.open(filename, "wb") { |f| f.write(row[5]) }
filename
end
def find_upload(post, opts = {})
if opts[:node_id].present?
sql = "SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid
sql =
"SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid
FROM #{DB_PREFIX}attach a
LEFT JOIN #{DB_PREFIX}filedata fd ON fd.filedataid = a.filedataid
LEFT JOIN #{DB_PREFIX}node n ON n.nodeid = a.nodeid
WHERE a.nodeid = #{opts[:node_id]}"
elsif opts[:attachment_id].present?
sql = "SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid
sql =
"SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid
FROM #{DB_PREFIX}attachment a
LEFT JOIN #{DB_PREFIX}filedata fd ON fd.filedataid = a.filedataid
LEFT JOIN #{DB_PREFIX}node n ON n.nodeid = a.nodeid
@ -612,9 +621,9 @@ class BulkImport::VBulletin5 < BulkImport::Base
user_id = row[3]
db_filename = row[2]
filename = File.join(ATTACH_DIR, user_id.to_s.split('').join('/'), "#{attachment_id}.attach")
filename = File.join(ATTACH_DIR, user_id.to_s.split("").join("/"), "#{attachment_id}.attach")
real_filename = db_filename
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
unless File.exist?(filename)
filename = check_database_for_attachment(row) if filename.blank?
@ -637,7 +646,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
end
def import_attachments
puts '', 'importing attachments...'
puts "", "importing attachments..."
# add extensions to authorized setting
#ext = mysql_query("SELECT GROUP_CONCAT(DISTINCT(extension)) exts FROM #{DB_PREFIX}filedata").first[0].split(',')
@ -655,8 +664,8 @@ class BulkImport::VBulletin5 < BulkImport::Base
# new style matches the nodeid in the attach table
# old style matches the filedataid in attach/filedata tables
# if the site is very old, there may be multiple different attachment syntaxes used in posts
attachment_regex = /\[attach[^\]]*\].*\"data-attachmentid\":"?(\d+)"?,?.*\[\/attach\]/i
attachment_regex_oldstyle = /\[attach[^\]]*\](\d+)\[\/attach\]/i
attachment_regex = %r{\[attach[^\]]*\].*\"data-attachmentid\":"?(\d+)"?,?.*\[/attach\]}i
attachment_regex_oldstyle = %r{\[attach[^\]]*\](\d+)\[/attach\]}i
Post.find_each do |post|
current_count += 1
@ -715,9 +724,18 @@ class BulkImport::VBulletin5 < BulkImport::Base
def parse_birthday(birthday)
return if birthday.blank?
date_of_birth = Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") rescue nil
date_of_birth =
begin
Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y")
rescue StandardError
nil
end
return if date_of_birth.nil?
date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth
if date_of_birth.year < 1904
Date.new(1904, date_of_birth.month, date_of_birth.day)
else
date_of_birth
end
end
def preprocess_raw(raw)
@ -726,33 +744,37 @@ class BulkImport::VBulletin5 < BulkImport::Base
raw = raw.dup
# [PLAINTEXT]...[/PLAINTEXT]
raw.gsub!(/\[\/?PLAINTEXT\]/i, "\n\n```\n\n")
raw.gsub!(%r{\[/?PLAINTEXT\]}i, "\n\n```\n\n")
# [FONT=font]...[/FONT]
raw.gsub!(/\[FONT=\w*\]/im, "")
raw.gsub!(/\[\/FONT\]/im, "")
raw.gsub!(%r{\[/FONT\]}im, "")
# @[URL=<user_profile>]<username>[/URL]
# [USER=id]username[/USER]
# [MENTION=id]username[/MENTION]
raw.gsub!(/@\[URL=\"\S+\"\]([\w\s]+)\[\/URL\]/i) { "@#{$1.gsub(" ", "_")}" }
raw.gsub!(/\[USER=\"\d+\"\]([\S]+)\[\/USER\]/i) { "@#{$1.gsub(" ", "_")}" }
raw.gsub!(/\[MENTION=\d+\]([\S]+)\[\/MENTION\]/i) { "@#{$1.gsub(" ", "_")}" }
raw.gsub!(%r{@\[URL=\"\S+\"\]([\w\s]+)\[/URL\]}i) { "@#{$1.gsub(" ", "_")}" }
raw.gsub!(%r{\[USER=\"\d+\"\]([\S]+)\[/USER\]}i) { "@#{$1.gsub(" ", "_")}" }
raw.gsub!(%r{\[MENTION=\d+\]([\S]+)\[/MENTION\]}i) { "@#{$1.gsub(" ", "_")}" }
# [IMG2=JSON]{..."src":"<url>"}[/IMG2]
raw.gsub!(/\[img2[^\]]*\].*\"src\":\"?([\w\\\/:\.\-;%]*)\"?}.*\[\/img2\]/i) { "\n#{CGI::unescape($1)}\n" }
raw.gsub!(/\[img2[^\]]*\].*\"src\":\"?([\w\\\/:\.\-;%]*)\"?}.*\[\/img2\]/i) do
"\n#{CGI.unescape($1)}\n"
end
# [TABLE]...[/TABLE]
raw.gsub!(/\[TABLE=\\"[\w:\-\s,]+\\"\]/i, "")
raw.gsub!(/\[\/TABLE\]/i, "")
raw.gsub!(%r{\[/TABLE\]}i, "")
# [HR]...[/HR]
raw.gsub(/\[HR\]\s*\[\/HR\]/im, "---")
raw.gsub(%r{\[HR\]\s*\[/HR\]}im, "---")
# [VIDEO=youtube_share;<id>]...[/VIDEO]
# [VIDEO=vimeo;<id>]...[/VIDEO]
raw.gsub!(/\[VIDEO=YOUTUBE_SHARE;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
raw.gsub!(/\[VIDEO=VIMEO;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://vimeo.com/#{$1}\n" }
raw.gsub!(%r{\[VIDEO=YOUTUBE_SHARE;([^\]]+)\].*?\[/VIDEO\]}i) do
"\nhttps://www.youtube.com/watch?v=#{$1}\n"
end
raw.gsub!(%r{\[VIDEO=VIMEO;([^\]]+)\].*?\[/VIDEO\]}i) { "\nhttps://vimeo.com/#{$1}\n" }
raw
end
@ -760,9 +782,9 @@ class BulkImport::VBulletin5 < BulkImport::Base
def print_status(current, max, start_time = nil)
if start_time.present?
elapsed_seconds = Time.now - start_time
elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60]
elements_per_minute = "[%.0f items/min] " % [current / elapsed_seconds.to_f * 60]
else
elements_per_minute = ''
elements_per_minute = ""
end
print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute]
@ -775,7 +797,6 @@ class BulkImport::VBulletin5 < BulkImport::Base
def mysql_query(sql)
@client.query(sql)
end
end
BulkImport::VBulletin5.new.run