mirror of
https://github.com/discourse/discourse.git
synced 2025-05-21 18:12:32 +08:00
DEV: Apply syntax_tree formatting to script/*
This commit is contained in:
@ -1,6 +1,6 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
if ARGV.include?('bbcode-to-md')
|
||||
if ARGV.include?("bbcode-to-md")
|
||||
# Replace (most) bbcode with markdown before creating posts.
|
||||
# This will dramatically clean up the final posts in Discourse.
|
||||
#
|
||||
@ -10,7 +10,7 @@ if ARGV.include?('bbcode-to-md')
|
||||
# cd ruby-bbcode-to-md
|
||||
# gem build ruby-bbcode-to-md.gemspec
|
||||
# gem install ruby-bbcode-to-md-*.gem
|
||||
require 'ruby-bbcode-to-md'
|
||||
require "ruby-bbcode-to-md"
|
||||
end
|
||||
|
||||
require "pg"
|
||||
@ -20,12 +20,12 @@ require "htmlentities"
|
||||
|
||||
puts "Loading application..."
|
||||
require_relative "../../config/environment"
|
||||
require_relative '../import_scripts/base/uploader'
|
||||
require_relative "../import_scripts/base/uploader"
|
||||
|
||||
module BulkImport; end
|
||||
module BulkImport
|
||||
end
|
||||
|
||||
class BulkImport::Base
|
||||
|
||||
NOW ||= "now()"
|
||||
PRIVATE_OFFSET ||= 2**30
|
||||
|
||||
@ -33,41 +33,41 @@ class BulkImport::Base
|
||||
|
||||
CHARSET_MAP = {
|
||||
"armscii8" => nil,
|
||||
"ascii" => Encoding::US_ASCII,
|
||||
"big5" => Encoding::Big5,
|
||||
"binary" => Encoding::ASCII_8BIT,
|
||||
"cp1250" => Encoding::Windows_1250,
|
||||
"cp1251" => Encoding::Windows_1251,
|
||||
"cp1256" => Encoding::Windows_1256,
|
||||
"cp1257" => Encoding::Windows_1257,
|
||||
"cp850" => Encoding::CP850,
|
||||
"cp852" => Encoding::CP852,
|
||||
"cp866" => Encoding::IBM866,
|
||||
"cp932" => Encoding::Windows_31J,
|
||||
"dec8" => nil,
|
||||
"eucjpms" => Encoding::EucJP_ms,
|
||||
"euckr" => Encoding::EUC_KR,
|
||||
"gb2312" => Encoding::EUC_CN,
|
||||
"gbk" => Encoding::GBK,
|
||||
"geostd8" => nil,
|
||||
"greek" => Encoding::ISO_8859_7,
|
||||
"hebrew" => Encoding::ISO_8859_8,
|
||||
"hp8" => nil,
|
||||
"keybcs2" => nil,
|
||||
"koi8r" => Encoding::KOI8_R,
|
||||
"koi8u" => Encoding::KOI8_U,
|
||||
"latin1" => Encoding::ISO_8859_1,
|
||||
"latin2" => Encoding::ISO_8859_2,
|
||||
"latin5" => Encoding::ISO_8859_9,
|
||||
"latin7" => Encoding::ISO_8859_13,
|
||||
"macce" => Encoding::MacCentEuro,
|
||||
"ascii" => Encoding::US_ASCII,
|
||||
"big5" => Encoding::Big5,
|
||||
"binary" => Encoding::ASCII_8BIT,
|
||||
"cp1250" => Encoding::Windows_1250,
|
||||
"cp1251" => Encoding::Windows_1251,
|
||||
"cp1256" => Encoding::Windows_1256,
|
||||
"cp1257" => Encoding::Windows_1257,
|
||||
"cp850" => Encoding::CP850,
|
||||
"cp852" => Encoding::CP852,
|
||||
"cp866" => Encoding::IBM866,
|
||||
"cp932" => Encoding::Windows_31J,
|
||||
"dec8" => nil,
|
||||
"eucjpms" => Encoding::EucJP_ms,
|
||||
"euckr" => Encoding::EUC_KR,
|
||||
"gb2312" => Encoding::EUC_CN,
|
||||
"gbk" => Encoding::GBK,
|
||||
"geostd8" => nil,
|
||||
"greek" => Encoding::ISO_8859_7,
|
||||
"hebrew" => Encoding::ISO_8859_8,
|
||||
"hp8" => nil,
|
||||
"keybcs2" => nil,
|
||||
"koi8r" => Encoding::KOI8_R,
|
||||
"koi8u" => Encoding::KOI8_U,
|
||||
"latin1" => Encoding::ISO_8859_1,
|
||||
"latin2" => Encoding::ISO_8859_2,
|
||||
"latin5" => Encoding::ISO_8859_9,
|
||||
"latin7" => Encoding::ISO_8859_13,
|
||||
"macce" => Encoding::MacCentEuro,
|
||||
"macroman" => Encoding::MacRoman,
|
||||
"sjis" => Encoding::SHIFT_JIS,
|
||||
"swe7" => nil,
|
||||
"tis620" => Encoding::TIS_620,
|
||||
"ucs2" => Encoding::UTF_16BE,
|
||||
"ujis" => Encoding::EucJP_ms,
|
||||
"utf8" => Encoding::UTF_8,
|
||||
"sjis" => Encoding::SHIFT_JIS,
|
||||
"swe7" => nil,
|
||||
"tis620" => Encoding::TIS_620,
|
||||
"ucs2" => Encoding::UTF_16BE,
|
||||
"ujis" => Encoding::EucJP_ms,
|
||||
"utf8" => Encoding::UTF_8,
|
||||
}
|
||||
|
||||
# rubocop:enable Layout/HashAlignment
|
||||
@ -82,12 +82,13 @@ class BulkImport::Base
|
||||
@encoding = CHARSET_MAP[charset]
|
||||
@bbcode_to_md = true if use_bbcode_to_md?
|
||||
|
||||
@markdown = Redcarpet::Markdown.new(
|
||||
Redcarpet::Render::HTML.new(hard_wrap: true),
|
||||
no_intra_emphasis: true,
|
||||
fenced_code_blocks: true,
|
||||
autolink: true
|
||||
)
|
||||
@markdown =
|
||||
Redcarpet::Markdown.new(
|
||||
Redcarpet::Render::HTML.new(hard_wrap: true),
|
||||
no_intra_emphasis: true,
|
||||
fenced_code_blocks: true,
|
||||
autolink: true,
|
||||
)
|
||||
end
|
||||
|
||||
def run
|
||||
@ -132,7 +133,9 @@ class BulkImport::Base
|
||||
map = []
|
||||
ids = []
|
||||
|
||||
@raw_connection.send_query("SELECT value, #{name}_id FROM #{name}_custom_fields WHERE name = 'import_id'")
|
||||
@raw_connection.send_query(
|
||||
"SELECT value, #{name}_id FROM #{name}_custom_fields WHERE name = 'import_id'",
|
||||
)
|
||||
@raw_connection.set_single_row_mode
|
||||
|
||||
@raw_connection.get_result.stream_each do |row|
|
||||
@ -163,12 +166,14 @@ class BulkImport::Base
|
||||
puts "Loading imported topic ids..."
|
||||
@topics, imported_topic_ids = imported_ids("topic")
|
||||
@last_imported_topic_id = imported_topic_ids.select { |id| id < PRIVATE_OFFSET }.max || -1
|
||||
@last_imported_private_topic_id = imported_topic_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1)
|
||||
@last_imported_private_topic_id =
|
||||
imported_topic_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1)
|
||||
|
||||
puts "Loading imported post ids..."
|
||||
@posts, imported_post_ids = imported_ids("post")
|
||||
@last_imported_post_id = imported_post_ids.select { |id| id < PRIVATE_OFFSET }.max || -1
|
||||
@last_imported_private_post_id = imported_post_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1)
|
||||
@last_imported_private_post_id =
|
||||
imported_post_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1)
|
||||
end
|
||||
|
||||
def last_id(klass)
|
||||
@ -182,9 +187,7 @@ class BulkImport::Base
|
||||
@raw_connection.send_query("SELECT id, #{column} FROM #{name}")
|
||||
@raw_connection.set_single_row_mode
|
||||
|
||||
@raw_connection.get_result.stream_each do |row|
|
||||
map[row["id"].to_i] = row[column].to_i
|
||||
end
|
||||
@raw_connection.get_result.stream_each { |row| map[row["id"].to_i] = row[column].to_i }
|
||||
|
||||
@raw_connection.get_result
|
||||
|
||||
@ -199,13 +202,24 @@ class BulkImport::Base
|
||||
puts "Loading users indexes..."
|
||||
@last_user_id = last_id(User)
|
||||
@last_user_email_id = last_id(UserEmail)
|
||||
@emails = User.unscoped.joins(:user_emails).pluck(:"user_emails.email", :"user_emails.user_id").to_h
|
||||
@emails =
|
||||
User.unscoped.joins(:user_emails).pluck(:"user_emails.email", :"user_emails.user_id").to_h
|
||||
@usernames_lower = User.unscoped.pluck(:username_lower).to_set
|
||||
@mapped_usernames = UserCustomField.joins(:user).where(name: "import_username").pluck("user_custom_fields.value", "users.username").to_h
|
||||
@mapped_usernames =
|
||||
UserCustomField
|
||||
.joins(:user)
|
||||
.where(name: "import_username")
|
||||
.pluck("user_custom_fields.value", "users.username")
|
||||
.to_h
|
||||
|
||||
puts "Loading categories indexes..."
|
||||
@last_category_id = last_id(Category)
|
||||
@category_names = Category.unscoped.pluck(:parent_category_id, :name).map { |pci, name| "#{pci}-#{name}" }.to_set
|
||||
@category_names =
|
||||
Category
|
||||
.unscoped
|
||||
.pluck(:parent_category_id, :name)
|
||||
.map { |pci, name| "#{pci}-#{name}" }
|
||||
.to_set
|
||||
|
||||
puts "Loading topics indexes..."
|
||||
@last_topic_id = last_id(Topic)
|
||||
@ -233,13 +247,27 @@ class BulkImport::Base
|
||||
|
||||
def fix_primary_keys
|
||||
puts "Updating primary key sequences..."
|
||||
@raw_connection.exec("SELECT setval('#{Group.sequence_name}', #{@last_group_id})") if @last_group_id > 0
|
||||
@raw_connection.exec("SELECT setval('#{User.sequence_name}', #{@last_user_id})") if @last_user_id > 0
|
||||
@raw_connection.exec("SELECT setval('#{UserEmail.sequence_name}', #{@last_user_email_id})") if @last_user_email_id > 0
|
||||
@raw_connection.exec("SELECT setval('#{Category.sequence_name}', #{@last_category_id})") if @last_category_id > 0
|
||||
@raw_connection.exec("SELECT setval('#{Topic.sequence_name}', #{@last_topic_id})") if @last_topic_id > 0
|
||||
@raw_connection.exec("SELECT setval('#{Post.sequence_name}', #{@last_post_id})") if @last_post_id > 0
|
||||
@raw_connection.exec("SELECT setval('#{PostAction.sequence_name}', #{@last_post_action_id})") if @last_post_action_id > 0
|
||||
if @last_group_id > 0
|
||||
@raw_connection.exec("SELECT setval('#{Group.sequence_name}', #{@last_group_id})")
|
||||
end
|
||||
if @last_user_id > 0
|
||||
@raw_connection.exec("SELECT setval('#{User.sequence_name}', #{@last_user_id})")
|
||||
end
|
||||
if @last_user_email_id > 0
|
||||
@raw_connection.exec("SELECT setval('#{UserEmail.sequence_name}', #{@last_user_email_id})")
|
||||
end
|
||||
if @last_category_id > 0
|
||||
@raw_connection.exec("SELECT setval('#{Category.sequence_name}', #{@last_category_id})")
|
||||
end
|
||||
if @last_topic_id > 0
|
||||
@raw_connection.exec("SELECT setval('#{Topic.sequence_name}', #{@last_topic_id})")
|
||||
end
|
||||
if @last_post_id > 0
|
||||
@raw_connection.exec("SELECT setval('#{Post.sequence_name}', #{@last_post_id})")
|
||||
end
|
||||
if @last_post_action_id > 0
|
||||
@raw_connection.exec("SELECT setval('#{PostAction.sequence_name}', #{@last_post_action_id})")
|
||||
end
|
||||
end
|
||||
|
||||
def group_id_from_imported_id(id)
|
||||
@ -272,63 +300,124 @@ class BulkImport::Base
|
||||
post_id && @topic_id_by_post_id[post_id]
|
||||
end
|
||||
|
||||
GROUP_COLUMNS ||= %i{
|
||||
id name title bio_raw bio_cooked created_at updated_at
|
||||
}
|
||||
GROUP_COLUMNS ||= %i[id name title bio_raw bio_cooked created_at updated_at]
|
||||
|
||||
USER_COLUMNS ||= %i{
|
||||
id username username_lower name active trust_level admin moderator
|
||||
date_of_birth ip_address registration_ip_address primary_group_id
|
||||
suspended_at suspended_till last_emailed_at created_at updated_at
|
||||
}
|
||||
USER_COLUMNS ||= %i[
|
||||
id
|
||||
username
|
||||
username_lower
|
||||
name
|
||||
active
|
||||
trust_level
|
||||
admin
|
||||
moderator
|
||||
date_of_birth
|
||||
ip_address
|
||||
registration_ip_address
|
||||
primary_group_id
|
||||
suspended_at
|
||||
suspended_till
|
||||
last_emailed_at
|
||||
created_at
|
||||
updated_at
|
||||
]
|
||||
|
||||
USER_EMAIL_COLUMNS ||= %i{
|
||||
id user_id email primary created_at updated_at
|
||||
}
|
||||
USER_EMAIL_COLUMNS ||= %i[id user_id email primary created_at updated_at]
|
||||
|
||||
USER_STAT_COLUMNS ||= %i{
|
||||
user_id topics_entered time_read days_visited posts_read_count
|
||||
likes_given likes_received new_since read_faq
|
||||
first_post_created_at post_count topic_count bounce_score
|
||||
reset_bounce_score_after digest_attempted_at
|
||||
}
|
||||
USER_STAT_COLUMNS ||= %i[
|
||||
user_id
|
||||
topics_entered
|
||||
time_read
|
||||
days_visited
|
||||
posts_read_count
|
||||
likes_given
|
||||
likes_received
|
||||
new_since
|
||||
read_faq
|
||||
first_post_created_at
|
||||
post_count
|
||||
topic_count
|
||||
bounce_score
|
||||
reset_bounce_score_after
|
||||
digest_attempted_at
|
||||
]
|
||||
|
||||
USER_PROFILE_COLUMNS ||= %i{
|
||||
user_id location website bio_raw bio_cooked views
|
||||
}
|
||||
USER_PROFILE_COLUMNS ||= %i[user_id location website bio_raw bio_cooked views]
|
||||
|
||||
GROUP_USER_COLUMNS ||= %i{
|
||||
group_id user_id created_at updated_at
|
||||
}
|
||||
GROUP_USER_COLUMNS ||= %i[group_id user_id created_at updated_at]
|
||||
|
||||
CATEGORY_COLUMNS ||= %i{
|
||||
id name name_lower slug user_id description position parent_category_id
|
||||
created_at updated_at
|
||||
}
|
||||
CATEGORY_COLUMNS ||= %i[
|
||||
id
|
||||
name
|
||||
name_lower
|
||||
slug
|
||||
user_id
|
||||
description
|
||||
position
|
||||
parent_category_id
|
||||
created_at
|
||||
updated_at
|
||||
]
|
||||
|
||||
TOPIC_COLUMNS ||= %i{
|
||||
id archetype title fancy_title slug user_id last_post_user_id category_id
|
||||
visible closed pinned_at views created_at bumped_at updated_at
|
||||
}
|
||||
TOPIC_COLUMNS ||= %i[
|
||||
id
|
||||
archetype
|
||||
title
|
||||
fancy_title
|
||||
slug
|
||||
user_id
|
||||
last_post_user_id
|
||||
category_id
|
||||
visible
|
||||
closed
|
||||
pinned_at
|
||||
views
|
||||
created_at
|
||||
bumped_at
|
||||
updated_at
|
||||
]
|
||||
|
||||
POST_COLUMNS ||= %i{
|
||||
id user_id last_editor_id topic_id post_number sort_order reply_to_post_number
|
||||
like_count raw cooked hidden word_count created_at last_version_at updated_at
|
||||
}
|
||||
POST_COLUMNS ||= %i[
|
||||
id
|
||||
user_id
|
||||
last_editor_id
|
||||
topic_id
|
||||
post_number
|
||||
sort_order
|
||||
reply_to_post_number
|
||||
like_count
|
||||
raw
|
||||
cooked
|
||||
hidden
|
||||
word_count
|
||||
created_at
|
||||
last_version_at
|
||||
updated_at
|
||||
]
|
||||
|
||||
POST_ACTION_COLUMNS ||= %i{
|
||||
id post_id user_id post_action_type_id deleted_at created_at updated_at
|
||||
deleted_by_id related_post_id staff_took_action deferred_by_id targets_topic
|
||||
agreed_at agreed_by_id deferred_at disagreed_at disagreed_by_id
|
||||
}
|
||||
POST_ACTION_COLUMNS ||= %i[
|
||||
id
|
||||
post_id
|
||||
user_id
|
||||
post_action_type_id
|
||||
deleted_at
|
||||
created_at
|
||||
updated_at
|
||||
deleted_by_id
|
||||
related_post_id
|
||||
staff_took_action
|
||||
deferred_by_id
|
||||
targets_topic
|
||||
agreed_at
|
||||
agreed_by_id
|
||||
deferred_at
|
||||
disagreed_at
|
||||
disagreed_by_id
|
||||
]
|
||||
|
||||
TOPIC_ALLOWED_USER_COLUMNS ||= %i{
|
||||
topic_id user_id created_at updated_at
|
||||
}
|
||||
TOPIC_ALLOWED_USER_COLUMNS ||= %i[topic_id user_id created_at updated_at]
|
||||
|
||||
TOPIC_TAG_COLUMNS ||= %i{
|
||||
topic_id tag_id created_at updated_at
|
||||
}
|
||||
TOPIC_TAG_COLUMNS ||= %i[topic_id tag_id created_at updated_at]
|
||||
|
||||
def create_groups(rows, &block)
|
||||
create_records(rows, "group", GROUP_COLUMNS, &block)
|
||||
@ -340,10 +429,7 @@ class BulkImport::Base
|
||||
create_records(rows, "user", USER_COLUMNS, &block)
|
||||
|
||||
create_custom_fields("user", "username", @imported_usernames.keys) do |username|
|
||||
{
|
||||
record_id: @imported_usernames[username],
|
||||
value: username,
|
||||
}
|
||||
{ record_id: @imported_usernames[username], value: username }
|
||||
end
|
||||
end
|
||||
|
||||
@ -389,8 +475,8 @@ class BulkImport::Base
|
||||
group[:name] = group_name
|
||||
end
|
||||
|
||||
group[:title] = group[:title].scrub.strip.presence if group[:title].present?
|
||||
group[:bio_raw] = group[:bio_raw].scrub.strip.presence if group[:bio_raw].present?
|
||||
group[:title] = group[:title].scrub.strip.presence if group[:title].present?
|
||||
group[:bio_raw] = group[:bio_raw].scrub.strip.presence if group[:bio_raw].present?
|
||||
group[:bio_cooked] = pre_cook(group[:bio_raw]) if group[:bio_raw].present?
|
||||
group[:created_at] ||= NOW
|
||||
group[:updated_at] ||= group[:created_at]
|
||||
@ -456,7 +542,9 @@ class BulkImport::Base
|
||||
user_email[:email] ||= random_email
|
||||
user_email[:email].downcase!
|
||||
# unique email
|
||||
user_email[:email] = random_email until EmailAddressValidator.valid_value?(user_email[:email]) && !@emails.has_key?(user_email[:email])
|
||||
user_email[:email] = random_email until EmailAddressValidator.valid_value?(
|
||||
user_email[:email],
|
||||
) && !@emails.has_key?(user_email[:email])
|
||||
|
||||
user_email
|
||||
end
|
||||
@ -539,7 +627,11 @@ class BulkImport::Base
|
||||
post[:raw] = (post[:raw] || "").scrub.strip.presence || "<Empty imported post>"
|
||||
post[:raw] = process_raw post[:raw]
|
||||
if @bbcode_to_md
|
||||
post[:raw] = post[:raw].bbcode_to_md(false, {}, :disable, :quote) rescue post[:raw]
|
||||
post[:raw] = begin
|
||||
post[:raw].bbcode_to_md(false, {}, :disable, :quote)
|
||||
rescue StandardError
|
||||
post[:raw]
|
||||
end
|
||||
end
|
||||
post[:like_count] ||= 0
|
||||
post[:cooked] = pre_cook post[:raw]
|
||||
@ -580,22 +672,22 @@ class BulkImport::Base
|
||||
|
||||
# [HTML]...[/HTML]
|
||||
raw.gsub!(/\[HTML\]/i, "\n\n```html\n")
|
||||
raw.gsub!(/\[\/HTML\]/i, "\n```\n\n")
|
||||
raw.gsub!(%r{\[/HTML\]}i, "\n```\n\n")
|
||||
|
||||
# [PHP]...[/PHP]
|
||||
raw.gsub!(/\[PHP\]/i, "\n\n```php\n")
|
||||
raw.gsub!(/\[\/PHP\]/i, "\n```\n\n")
|
||||
raw.gsub!(%r{\[/PHP\]}i, "\n```\n\n")
|
||||
|
||||
# [HIGHLIGHT="..."]
|
||||
raw.gsub!(/\[HIGHLIGHT="?(\w+)"?\]/i) { "\n\n```#{$1.downcase}\n" }
|
||||
|
||||
# [CODE]...[/CODE]
|
||||
# [HIGHLIGHT]...[/HIGHLIGHT]
|
||||
raw.gsub!(/\[\/?CODE\]/i, "\n\n```\n\n")
|
||||
raw.gsub!(/\[\/?HIGHLIGHT\]/i, "\n\n```\n\n")
|
||||
raw.gsub!(%r{\[/?CODE\]}i, "\n\n```\n\n")
|
||||
raw.gsub!(%r{\[/?HIGHLIGHT\]}i, "\n\n```\n\n")
|
||||
|
||||
# [SAMP]...[/SAMP]
|
||||
raw.gsub!(/\[\/?SAMP\]/i, "`")
|
||||
raw.gsub!(%r{\[/?SAMP\]}i, "`")
|
||||
|
||||
# replace all chevrons with HTML entities
|
||||
# /!\ must be done /!\
|
||||
@ -609,61 +701,61 @@ class BulkImport::Base
|
||||
raw.gsub!(">", ">")
|
||||
raw.gsub!("\u2603", ">")
|
||||
|
||||
raw.gsub!(/\[\/?I\]/i, "*")
|
||||
raw.gsub!(/\[\/?B\]/i, "**")
|
||||
raw.gsub!(/\[\/?U\]/i, "")
|
||||
raw.gsub!(%r{\[/?I\]}i, "*")
|
||||
raw.gsub!(%r{\[/?B\]}i, "**")
|
||||
raw.gsub!(%r{\[/?U\]}i, "")
|
||||
|
||||
raw.gsub!(/\[\/?RED\]/i, "")
|
||||
raw.gsub!(/\[\/?BLUE\]/i, "")
|
||||
raw.gsub!(%r{\[/?RED\]}i, "")
|
||||
raw.gsub!(%r{\[/?BLUE\]}i, "")
|
||||
|
||||
raw.gsub!(/\[AUTEUR\].+?\[\/AUTEUR\]/im, "")
|
||||
raw.gsub!(/\[VOIRMSG\].+?\[\/VOIRMSG\]/im, "")
|
||||
raw.gsub!(/\[PSEUDOID\].+?\[\/PSEUDOID\]/im, "")
|
||||
raw.gsub!(%r{\[AUTEUR\].+?\[/AUTEUR\]}im, "")
|
||||
raw.gsub!(%r{\[VOIRMSG\].+?\[/VOIRMSG\]}im, "")
|
||||
raw.gsub!(%r{\[PSEUDOID\].+?\[/PSEUDOID\]}im, "")
|
||||
|
||||
# [IMG]...[/IMG]
|
||||
raw.gsub!(/(?:\s*\[IMG\]\s*)+(.+?)(?:\s*\[\/IMG\]\s*)+/im) { "\n\n#{$1}\n\n" }
|
||||
raw.gsub!(%r{(?:\s*\[IMG\]\s*)+(.+?)(?:\s*\[/IMG\]\s*)+}im) { "\n\n#{$1}\n\n" }
|
||||
|
||||
# [IMG=url]
|
||||
raw.gsub!(/\[IMG=([^\]]*)\]/im) { "\n\n#{$1}\n\n" }
|
||||
|
||||
# [URL=...]...[/URL]
|
||||
raw.gsub!(/\[URL="?(.+?)"?\](.+?)\[\/URL\]/im) { "[#{$2.strip}](#{$1})" }
|
||||
raw.gsub!(%r{\[URL="?(.+?)"?\](.+?)\[/URL\]}im) { "[#{$2.strip}](#{$1})" }
|
||||
|
||||
# [URL]...[/URL]
|
||||
# [MP3]...[/MP3]
|
||||
# [EMAIL]...[/EMAIL]
|
||||
# [LEFT]...[/LEFT]
|
||||
raw.gsub!(/\[\/?URL\]/i, "")
|
||||
raw.gsub!(/\[\/?MP3\]/i, "")
|
||||
raw.gsub!(/\[\/?EMAIL\]/i, "")
|
||||
raw.gsub!(/\[\/?LEFT\]/i, "")
|
||||
raw.gsub!(%r{\[/?URL\]}i, "")
|
||||
raw.gsub!(%r{\[/?MP3\]}i, "")
|
||||
raw.gsub!(%r{\[/?EMAIL\]}i, "")
|
||||
raw.gsub!(%r{\[/?LEFT\]}i, "")
|
||||
|
||||
# [FONT=blah] and [COLOR=blah]
|
||||
raw.gsub!(/\[FONT=.*?\](.*?)\[\/FONT\]/im, "\\1")
|
||||
raw.gsub!(/\[COLOR=.*?\](.*?)\[\/COLOR\]/im, "\\1")
|
||||
raw.gsub!(%r{\[FONT=.*?\](.*?)\[/FONT\]}im, "\\1")
|
||||
raw.gsub!(%r{\[COLOR=.*?\](.*?)\[/COLOR\]}im, "\\1")
|
||||
|
||||
raw.gsub!(/\[SIZE=.*?\](.*?)\[\/SIZE\]/im, "\\1")
|
||||
raw.gsub!(/\[H=.*?\](.*?)\[\/H\]/im, "\\1")
|
||||
raw.gsub!(%r{\[SIZE=.*?\](.*?)\[/SIZE\]}im, "\\1")
|
||||
raw.gsub!(%r{\[H=.*?\](.*?)\[/H\]}im, "\\1")
|
||||
|
||||
# [CENTER]...[/CENTER]
|
||||
raw.gsub!(/\[CENTER\](.*?)\[\/CENTER\]/im, "\\1")
|
||||
raw.gsub!(%r{\[CENTER\](.*?)\[/CENTER\]}im, "\\1")
|
||||
|
||||
# [INDENT]...[/INDENT]
|
||||
raw.gsub!(/\[INDENT\](.*?)\[\/INDENT\]/im, "\\1")
|
||||
raw.gsub!(/\[TABLE\](.*?)\[\/TABLE\]/im, "\\1")
|
||||
raw.gsub!(/\[TR\](.*?)\[\/TR\]/im, "\\1")
|
||||
raw.gsub!(/\[TD\](.*?)\[\/TD\]/im, "\\1")
|
||||
raw.gsub!(/\[TD="?.*?"?\](.*?)\[\/TD\]/im, "\\1")
|
||||
raw.gsub!(%r{\[INDENT\](.*?)\[/INDENT\]}im, "\\1")
|
||||
raw.gsub!(%r{\[TABLE\](.*?)\[/TABLE\]}im, "\\1")
|
||||
raw.gsub!(%r{\[TR\](.*?)\[/TR\]}im, "\\1")
|
||||
raw.gsub!(%r{\[TD\](.*?)\[/TD\]}im, "\\1")
|
||||
raw.gsub!(%r{\[TD="?.*?"?\](.*?)\[/TD\]}im, "\\1")
|
||||
|
||||
# [STRIKE]
|
||||
raw.gsub!(/\[STRIKE\]/i, "<s>")
|
||||
raw.gsub!(/\[\/STRIKE\]/i, "</s>")
|
||||
raw.gsub!(%r{\[/STRIKE\]}i, "</s>")
|
||||
|
||||
# [QUOTE]...[/QUOTE]
|
||||
raw.gsub!(/\[QUOTE="([^\]]+)"\]/i) { "[QUOTE=#{$1}]" }
|
||||
|
||||
# Nested Quotes
|
||||
raw.gsub!(/(\[\/?QUOTE.*?\])/mi) { |q| "\n#{q}\n" }
|
||||
raw.gsub!(%r{(\[/?QUOTE.*?\])}mi) { |q| "\n#{q}\n" }
|
||||
|
||||
# raw.gsub!(/\[QUOTE\](.+?)\[\/QUOTE\]/im) { |quote|
|
||||
# quote.gsub!(/\[QUOTE\](.+?)\[\/QUOTE\]/im) { "\n#{$1}\n" }
|
||||
@ -686,28 +778,36 @@ class BulkImport::Base
|
||||
end
|
||||
|
||||
# [YOUTUBE]<id>[/YOUTUBE]
|
||||
raw.gsub!(/\[YOUTUBE\](.+?)\[\/YOUTUBE\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
|
||||
raw.gsub!(/\[DAILYMOTION\](.+?)\[\/DAILYMOTION\]/i) { "\nhttps://www.dailymotion.com/video/#{$1}\n" }
|
||||
raw.gsub!(%r{\[YOUTUBE\](.+?)\[/YOUTUBE\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
|
||||
raw.gsub!(%r{\[DAILYMOTION\](.+?)\[/DAILYMOTION\]}i) do
|
||||
"\nhttps://www.dailymotion.com/video/#{$1}\n"
|
||||
end
|
||||
|
||||
# [VIDEO=youtube;<id>]...[/VIDEO]
|
||||
raw.gsub!(/\[VIDEO=YOUTUBE;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
|
||||
raw.gsub!(/\[VIDEO=DAILYMOTION;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://www.dailymotion.com/video/#{$1}\n" }
|
||||
raw.gsub!(%r{\[VIDEO=YOUTUBE;([^\]]+)\].*?\[/VIDEO\]}i) do
|
||||
"\nhttps://www.youtube.com/watch?v=#{$1}\n"
|
||||
end
|
||||
raw.gsub!(%r{\[VIDEO=DAILYMOTION;([^\]]+)\].*?\[/VIDEO\]}i) do
|
||||
"\nhttps://www.dailymotion.com/video/#{$1}\n"
|
||||
end
|
||||
|
||||
# [SPOILER=Some hidden stuff]SPOILER HERE!![/SPOILER]
|
||||
raw.gsub!(/\[SPOILER="?(.+?)"?\](.+?)\[\/SPOILER\]/im) { "\n#{$1}\n[spoiler]#{$2}[/spoiler]\n" }
|
||||
raw.gsub!(%r{\[SPOILER="?(.+?)"?\](.+?)\[/SPOILER\]}im) do
|
||||
"\n#{$1}\n[spoiler]#{$2}[/spoiler]\n"
|
||||
end
|
||||
|
||||
# convert list tags to ul and list=1 tags to ol
|
||||
# (basically, we're only missing list=a here...)
|
||||
# (https://meta.discourse.org/t/phpbb-3-importer-old/17397)
|
||||
raw.gsub!(/\[list\](.*?)\[\/list\]/im, '[ul]\1[/ul]')
|
||||
raw.gsub!(/\[list=1\|?[^\]]*\](.*?)\[\/list\]/im, '[ol]\1[/ol]')
|
||||
raw.gsub!(/\[list\](.*?)\[\/list:u\]/im, '[ul]\1[/ul]')
|
||||
raw.gsub!(/\[list=1\|?[^\]]*\](.*?)\[\/list:o\]/im, '[ol]\1[/ol]')
|
||||
raw.gsub!(%r{\[list\](.*?)\[/list\]}im, '[ul]\1[/ul]')
|
||||
raw.gsub!(%r{\[list=1\|?[^\]]*\](.*?)\[/list\]}im, '[ol]\1[/ol]')
|
||||
raw.gsub!(%r{\[list\](.*?)\[/list:u\]}im, '[ul]\1[/ul]')
|
||||
raw.gsub!(%r{\[list=1\|?[^\]]*\](.*?)\[/list:o\]}im, '[ol]\1[/ol]')
|
||||
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
|
||||
raw.gsub!(/\[\*\]\n/, '')
|
||||
raw.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]')
|
||||
raw.gsub!(/\[\*\]\n/, "")
|
||||
raw.gsub!(%r{\[\*\](.*?)\[/\*:m\]}, '[li]\1[/li]')
|
||||
raw.gsub!(/\[\*\](.*?)\n/, '[li]\1[/li]')
|
||||
raw.gsub!(/\[\*=1\]/, '')
|
||||
raw.gsub!(/\[\*=1\]/, "")
|
||||
|
||||
raw
|
||||
end
|
||||
@ -728,7 +828,9 @@ class BulkImport::Base
|
||||
imported_ids |= mapped[:imported_ids] unless mapped[:imported_ids].nil?
|
||||
@raw_connection.put_copy_data columns.map { |c| processed[c] } unless processed[:skip]
|
||||
rows_created += 1
|
||||
print "\r%7d - %6d/sec" % [rows_created, rows_created.to_f / (Time.now - start)] if rows_created % 100 == 0
|
||||
if rows_created % 100 == 0
|
||||
print "\r%7d - %6d/sec" % [rows_created, rows_created.to_f / (Time.now - start)]
|
||||
end
|
||||
rescue => e
|
||||
puts "\n"
|
||||
puts "ERROR: #{e.message}"
|
||||
@ -737,15 +839,14 @@ class BulkImport::Base
|
||||
end
|
||||
end
|
||||
|
||||
print "\r%7d - %6d/sec\n" % [rows_created, rows_created.to_f / (Time.now - start)] if rows_created > 0
|
||||
if rows_created > 0
|
||||
print "\r%7d - %6d/sec\n" % [rows_created, rows_created.to_f / (Time.now - start)]
|
||||
end
|
||||
|
||||
id_mapping_method_name = "#{name}_id_from_imported_id".freeze
|
||||
return unless respond_to?(id_mapping_method_name)
|
||||
create_custom_fields(name, "id", imported_ids) do |imported_id|
|
||||
{
|
||||
record_id: send(id_mapping_method_name, imported_id),
|
||||
value: imported_id,
|
||||
}
|
||||
{ record_id: send(id_mapping_method_name, imported_id), value: imported_id }
|
||||
end
|
||||
rescue => e
|
||||
# FIXME: errors catched here stop the rest of the COPY
|
||||
@ -755,7 +856,8 @@ class BulkImport::Base
|
||||
|
||||
def create_custom_fields(table, name, rows)
|
||||
name = "import_#{name}"
|
||||
sql = "COPY #{table}_custom_fields (#{table}_id, name, value, created_at, updated_at) FROM STDIN"
|
||||
sql =
|
||||
"COPY #{table}_custom_fields (#{table}_id, name, value, created_at, updated_at) FROM STDIN"
|
||||
@raw_connection.copy_data(sql, @encoder) do
|
||||
rows.each do |row|
|
||||
next unless cf = yield(row)
|
||||
@ -797,7 +899,7 @@ class BulkImport::Base
|
||||
cooked = raw
|
||||
|
||||
# Convert YouTube URLs to lazyYT DOMs before being transformed into links
|
||||
cooked.gsub!(/\nhttps\:\/\/www.youtube.com\/watch\?v=(\w+)\n/) do
|
||||
cooked.gsub!(%r{\nhttps\://www.youtube.com/watch\?v=(\w+)\n}) do
|
||||
video_id = $1
|
||||
result = <<-HTML
|
||||
<div class="lazyYT" data-youtube-id="#{video_id}" data-width="480" data-height="270" data-parameters="feature=oembed&wmode=opaque"></div>
|
||||
@ -807,7 +909,7 @@ class BulkImport::Base
|
||||
|
||||
cooked = @markdown.render(cooked).scrub.strip
|
||||
|
||||
cooked.gsub!(/\[QUOTE="?([^,"]+)(?:, post:(\d+), topic:(\d+))?"?\](.+?)\[\/QUOTE\]/im) do
|
||||
cooked.gsub!(%r{\[QUOTE="?([^,"]+)(?:, post:(\d+), topic:(\d+))?"?\](.+?)\[/QUOTE\]}im) do
|
||||
username, post_id, topic_id, quote = $1, $2, $3, $4
|
||||
|
||||
quote = quote.scrub.strip
|
||||
@ -860,5 +962,4 @@ class BulkImport::Base
|
||||
return text if @encoding == Encoding::UTF_8
|
||||
text && text.encode(@encoding).force_encoding(Encoding::UTF_8)
|
||||
end
|
||||
|
||||
end
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -3,17 +3,16 @@
|
||||
require_relative "base"
|
||||
require "pg"
|
||||
require "htmlentities"
|
||||
require 'ruby-bbcode-to-md'
|
||||
require "ruby-bbcode-to-md"
|
||||
|
||||
class BulkImport::PhpBB < BulkImport::Base
|
||||
|
||||
SUSPENDED_TILL ||= Date.new(3000, 1, 1)
|
||||
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "phpbb_"
|
||||
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "phpbb_"
|
||||
|
||||
def initialize
|
||||
super
|
||||
|
||||
charset = ENV["DB_CHARSET"] || "utf8"
|
||||
charset = ENV["DB_CHARSET"] || "utf8"
|
||||
database = ENV["DB_NAME"] || "flightaware"
|
||||
password = ENV["DB_PASSWORD"] || "discourse"
|
||||
|
||||
@ -57,7 +56,7 @@ class BulkImport::PhpBB < BulkImport::Base
|
||||
{
|
||||
imported_id: row["group_id"],
|
||||
name: normalize_text(row["group_name"]),
|
||||
bio_raw: normalize_text(row["group_desc"])
|
||||
bio_raw: normalize_text(row["group_desc"]),
|
||||
}
|
||||
end
|
||||
end
|
||||
@ -85,15 +84,28 @@ class BulkImport::PhpBB < BulkImport::Base
|
||||
username: normalize_text(row["username"]),
|
||||
email: row["user_email"],
|
||||
created_at: Time.zone.at(row["user_regdate"].to_i),
|
||||
last_seen_at: row["user_lastvisit"] == 0 ? Time.zone.at(row["user_regdate"].to_i) : Time.zone.at(row["user_lastvisit"].to_i),
|
||||
last_seen_at:
|
||||
(
|
||||
if row["user_lastvisit"] == 0
|
||||
Time.zone.at(row["user_regdate"].to_i)
|
||||
else
|
||||
Time.zone.at(row["user_lastvisit"].to_i)
|
||||
end
|
||||
),
|
||||
trust_level: row["user_posts"] == 0 ? TrustLevel[0] : TrustLevel[1],
|
||||
date_of_birth: parse_birthday(row["user_birthday"]),
|
||||
primary_group_id: group_id_from_imported_id(row["group_id"])
|
||||
primary_group_id: group_id_from_imported_id(row["group_id"]),
|
||||
}
|
||||
u[:ip_address] = row["user_ip"][/\b(?:\d{1,3}\.){3}\d{1,3}\b/] if row["user_ip"].present?
|
||||
if row["ban_start"]
|
||||
u[:suspended_at] = Time.zone.at(row["ban_start"].to_i)
|
||||
u[:suspended_till] = row["ban_end"].to_i > 0 ? Time.zone.at(row["ban_end"].to_i) : SUSPENDED_TILL
|
||||
u[:suspended_till] = (
|
||||
if row["ban_end"].to_i > 0
|
||||
Time.zone.at(row["ban_end"].to_i)
|
||||
else
|
||||
SUSPENDED_TILL
|
||||
end
|
||||
)
|
||||
end
|
||||
u
|
||||
end
|
||||
@ -114,7 +126,7 @@ class BulkImport::PhpBB < BulkImport::Base
|
||||
imported_id: row["user_id"],
|
||||
imported_user_id: row["user_id"],
|
||||
email: row["user_email"],
|
||||
created_at: Time.zone.at(row["user_regdate"].to_i)
|
||||
created_at: Time.zone.at(row["user_regdate"].to_i),
|
||||
}
|
||||
end
|
||||
end
|
||||
@ -149,7 +161,14 @@ class BulkImport::PhpBB < BulkImport::Base
|
||||
create_user_profiles(user_profiles) do |row|
|
||||
{
|
||||
user_id: user_id_from_imported_id(row["user_id"]),
|
||||
website: (URI.parse(row["user_website"]).to_s rescue nil),
|
||||
website:
|
||||
(
|
||||
begin
|
||||
URI.parse(row["user_website"]).to_s
|
||||
rescue StandardError
|
||||
nil
|
||||
end
|
||||
),
|
||||
location: row["user_from"],
|
||||
}
|
||||
end
|
||||
@ -158,17 +177,16 @@ class BulkImport::PhpBB < BulkImport::Base
|
||||
def import_categories
|
||||
puts "Importing categories..."
|
||||
|
||||
categories = psql_query(<<-SQL
|
||||
categories = psql_query(<<-SQL).to_a
|
||||
SELECT forum_id, parent_id, forum_name, forum_desc
|
||||
FROM #{TABLE_PREFIX}forums
|
||||
WHERE forum_id > #{@last_imported_category_id}
|
||||
ORDER BY parent_id, left_id
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
return if categories.empty?
|
||||
|
||||
parent_categories = categories.select { |c| c["parent_id"].to_i == 0 }
|
||||
parent_categories = categories.select { |c| c["parent_id"].to_i == 0 }
|
||||
children_categories = categories.select { |c| c["parent_id"].to_i != 0 }
|
||||
|
||||
puts "Importing parent categories..."
|
||||
@ -176,7 +194,7 @@ class BulkImport::PhpBB < BulkImport::Base
|
||||
{
|
||||
imported_id: row["forum_id"],
|
||||
name: normalize_text(row["forum_name"]),
|
||||
description: normalize_text(row["forum_desc"])
|
||||
description: normalize_text(row["forum_desc"]),
|
||||
}
|
||||
end
|
||||
|
||||
@ -186,7 +204,7 @@ class BulkImport::PhpBB < BulkImport::Base
|
||||
imported_id: row["forum_id"],
|
||||
name: normalize_text(row["forum_name"]),
|
||||
description: normalize_text(row["forum_desc"]),
|
||||
parent_category_id: category_id_from_imported_id(row["parent_id"])
|
||||
parent_category_id: category_id_from_imported_id(row["parent_id"]),
|
||||
}
|
||||
end
|
||||
end
|
||||
@ -209,7 +227,7 @@ class BulkImport::PhpBB < BulkImport::Base
|
||||
category_id: category_id_from_imported_id(row["forum_id"]),
|
||||
user_id: user_id_from_imported_id(row["topic_poster"]),
|
||||
created_at: Time.zone.at(row["topic_time"].to_i),
|
||||
views: row["topic_views"]
|
||||
views: row["topic_views"],
|
||||
}
|
||||
end
|
||||
end
|
||||
@ -261,7 +279,7 @@ class BulkImport::PhpBB < BulkImport::Base
|
||||
imported_id: row["msg_id"].to_i + PRIVATE_OFFSET,
|
||||
title: normalize_text(title),
|
||||
user_id: user_id_from_imported_id(row["author_id"].to_i),
|
||||
created_at: Time.zone.at(row["message_time"].to_i)
|
||||
created_at: Time.zone.at(row["message_time"].to_i),
|
||||
}
|
||||
end
|
||||
end
|
||||
@ -271,13 +289,12 @@ class BulkImport::PhpBB < BulkImport::Base
|
||||
|
||||
allowed_users = []
|
||||
|
||||
psql_query(<<-SQL
|
||||
psql_query(<<-SQL).each do |row|
|
||||
SELECT msg_id, author_id, to_address
|
||||
FROM #{TABLE_PREFIX}privmsgs
|
||||
WHERE msg_id > (#{@last_imported_private_topic_id - PRIVATE_OFFSET})
|
||||
ORDER BY msg_id
|
||||
SQL
|
||||
).each do |row|
|
||||
next unless topic_id = topic_id_from_imported_id(row["msg_id"].to_i + PRIVATE_OFFSET)
|
||||
|
||||
user_ids = get_message_recipients(row["author_id"], row["to_address"])
|
||||
@ -287,12 +304,7 @@ class BulkImport::PhpBB < BulkImport::Base
|
||||
end
|
||||
end
|
||||
|
||||
create_topic_allowed_users(allowed_users) do |row|
|
||||
{
|
||||
topic_id: row[0],
|
||||
user_id: row[1]
|
||||
}
|
||||
end
|
||||
create_topic_allowed_users(allowed_users) { |row| { topic_id: row[0], user_id: row[1] } }
|
||||
end
|
||||
|
||||
def import_private_posts
|
||||
@ -316,13 +328,13 @@ class BulkImport::PhpBB < BulkImport::Base
|
||||
topic_id: topic_id,
|
||||
user_id: user_id_from_imported_id(row["author_id"].to_i),
|
||||
created_at: Time.zone.at(row["message_time"].to_i),
|
||||
raw: process_raw_text(row["message_text"])
|
||||
raw: process_raw_text(row["message_text"]),
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def get_message_recipients(from, to)
|
||||
user_ids = to.split(':')
|
||||
user_ids = to.split(":")
|
||||
user_ids.map! { |u| u[2..-1].to_i }
|
||||
user_ids.push(from.to_i)
|
||||
user_ids.uniq!
|
||||
@ -332,15 +344,29 @@ class BulkImport::PhpBB < BulkImport::Base
|
||||
|
||||
def extract_pm_title(title)
|
||||
pm_title = CGI.unescapeHTML(title)
|
||||
pm_title = title.gsub(/^Re\s*:\s*/i, "") rescue nil
|
||||
pm_title =
|
||||
begin
|
||||
title.gsub(/^Re\s*:\s*/i, "")
|
||||
rescue StandardError
|
||||
nil
|
||||
end
|
||||
pm_title
|
||||
end
|
||||
|
||||
def parse_birthday(birthday)
|
||||
return if birthday.blank?
|
||||
date_of_birth = Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") rescue nil
|
||||
date_of_birth =
|
||||
begin
|
||||
Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y")
|
||||
rescue StandardError
|
||||
nil
|
||||
end
|
||||
return if date_of_birth.nil?
|
||||
date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth
|
||||
if date_of_birth.year < 1904
|
||||
Date.new(1904, date_of_birth.month, date_of_birth.day)
|
||||
else
|
||||
date_of_birth
|
||||
end
|
||||
end
|
||||
|
||||
def psql_query(sql)
|
||||
@ -352,34 +378,36 @@ class BulkImport::PhpBB < BulkImport::Base
|
||||
text = raw.dup
|
||||
text = CGI.unescapeHTML(text)
|
||||
|
||||
text.gsub!(/:(?:\w{8})\]/, ']')
|
||||
text.gsub!(/:(?:\w{8})\]/, "]")
|
||||
|
||||
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
|
||||
text.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/i, '[\2](\1)')
|
||||
text.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}i, '[\2](\1)')
|
||||
|
||||
# phpBB shortens link text like this, which breaks our markdown processing:
|
||||
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
|
||||
#
|
||||
# Work around it for now:
|
||||
text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[')
|
||||
text.gsub!(%r{\[http(s)?://(www\.)?}i, "[")
|
||||
|
||||
# convert list tags to ul and list=1 tags to ol
|
||||
# list=a is not supported, so handle it like list=1
|
||||
# list=9 and list=x have the same result as list=1 and list=a
|
||||
text.gsub!(/\[list\](.*?)\[\/list:u\]/mi, '[ul]\1[/ul]')
|
||||
text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi, '[ol]\1[/ol]')
|
||||
text.gsub!(%r{\[list\](.*?)\[/list:u\]}mi, '[ul]\1[/ul]')
|
||||
text.gsub!(%r{\[list=.*?\](.*?)\[/list:o\]}mi, '[ol]\1[/ol]')
|
||||
|
||||
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
|
||||
text.gsub!(/\[\*\](.*?)\[\/\*:m\]/mi, '[li]\1[/li]')
|
||||
text.gsub!(%r{\[\*\](.*?)\[/\*:m\]}mi, '[li]\1[/li]')
|
||||
|
||||
# [QUOTE="<username>"] -- add newline
|
||||
text.gsub!(/(\[quote="[a-zA-Z\d]+"\])/i) { "#{$1}\n" }
|
||||
|
||||
# [/QUOTE] -- add newline
|
||||
text.gsub!(/(\[\/quote\])/i) { "\n#{$1}" }
|
||||
text.gsub!(%r{(\[/quote\])}i) { "\n#{$1}" }
|
||||
|
||||
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
|
||||
text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/(.+?)" alt="(.*?)" title="(.*?)" \/><!-- s(?:\S+) -->/) do
|
||||
text.gsub!(
|
||||
/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/(.+?)" alt="(.*?)" title="(.*?)" \/><!-- s(?:\S+) -->/,
|
||||
) do
|
||||
smiley = $1
|
||||
@smiley_map.fetch(smiley) do
|
||||
# upload_smiley(smiley, $2, $3, $4) || smiley_as_text(smiley)
|
||||
@ -405,33 +433,30 @@ class BulkImport::PhpBB < BulkImport::Base
|
||||
|
||||
def add_default_smilies
|
||||
{
|
||||
[':D', ':-D', ':grin:'] => ':smiley:',
|
||||
[':)', ':-)', ':smile:'] => ':slight_smile:',
|
||||
[';)', ';-)', ':wink:'] => ':wink:',
|
||||
[':(', ':-(', ':sad:'] => ':frowning:',
|
||||
[':o', ':-o', ':eek:'] => ':astonished:',
|
||||
[':shock:'] => ':open_mouth:',
|
||||
[':?', ':-?', ':???:'] => ':confused:',
|
||||
['8-)', ':cool:'] => ':sunglasses:',
|
||||
[':lol:'] => ':laughing:',
|
||||
[':x', ':-x', ':mad:'] => ':angry:',
|
||||
[':P', ':-P', ':razz:'] => ':stuck_out_tongue:',
|
||||
[':oops:'] => ':blush:',
|
||||
[':cry:'] => ':cry:',
|
||||
[':evil:'] => ':imp:',
|
||||
[':twisted:'] => ':smiling_imp:',
|
||||
[':roll:'] => ':unamused:',
|
||||
[':!:'] => ':exclamation:',
|
||||
[':?:'] => ':question:',
|
||||
[':idea:'] => ':bulb:',
|
||||
[':arrow:'] => ':arrow_right:',
|
||||
[':|', ':-|'] => ':neutral_face:',
|
||||
[':geek:'] => ':nerd:'
|
||||
}.each do |smilies, emoji|
|
||||
smilies.each { |smiley| @smiley_map[smiley] = emoji }
|
||||
end
|
||||
%w[:D :-D :grin:] => ":smiley:",
|
||||
%w[:) :-) :smile:] => ":slight_smile:",
|
||||
%w[;) ;-) :wink:] => ":wink:",
|
||||
%w[:( :-( :sad:] => ":frowning:",
|
||||
%w[:o :-o :eek:] => ":astonished:",
|
||||
[":shock:"] => ":open_mouth:",
|
||||
%w[:? :-? :???:] => ":confused:",
|
||||
%w[8-) :cool:] => ":sunglasses:",
|
||||
[":lol:"] => ":laughing:",
|
||||
%w[:x :-x :mad:] => ":angry:",
|
||||
%w[:P :-P :razz:] => ":stuck_out_tongue:",
|
||||
[":oops:"] => ":blush:",
|
||||
[":cry:"] => ":cry:",
|
||||
[":evil:"] => ":imp:",
|
||||
[":twisted:"] => ":smiling_imp:",
|
||||
[":roll:"] => ":unamused:",
|
||||
[":!:"] => ":exclamation:",
|
||||
[":?:"] => ":question:",
|
||||
[":idea:"] => ":bulb:",
|
||||
[":arrow:"] => ":arrow_right:",
|
||||
%w[:| :-|] => ":neutral_face:",
|
||||
[":geek:"] => ":nerd:",
|
||||
}.each { |smilies, emoji| smilies.each { |smiley| @smiley_map[smiley] = emoji } }
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
BulkImport::PhpBB.new.run
|
||||
|
@ -8,7 +8,6 @@ require "htmlentities"
|
||||
# NOTE: this importer expects a MySQL DB to directly connect to
|
||||
|
||||
class BulkImport::Vanilla < BulkImport::Base
|
||||
|
||||
VANILLA_DB = "dbname"
|
||||
TABLE_PREFIX = "GDN_"
|
||||
ATTACHMENTS_BASE_DIR = "/my/absolute/path/to/from_vanilla/uploads"
|
||||
@ -20,13 +19,14 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
def initialize
|
||||
super
|
||||
@htmlentities = HTMLEntities.new
|
||||
@client = Mysql2::Client.new(
|
||||
host: "localhost",
|
||||
username: "root",
|
||||
database: VANILLA_DB,
|
||||
password: "",
|
||||
reconnect: true
|
||||
)
|
||||
@client =
|
||||
Mysql2::Client.new(
|
||||
host: "localhost",
|
||||
username: "root",
|
||||
database: VANILLA_DB,
|
||||
password: "",
|
||||
reconnect: true,
|
||||
)
|
||||
|
||||
@import_tags = false
|
||||
begin
|
||||
@ -88,10 +88,10 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
end
|
||||
|
||||
def import_users
|
||||
puts '', "Importing users..."
|
||||
puts "", "Importing users..."
|
||||
|
||||
username = nil
|
||||
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}User;").first['count']
|
||||
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}User;").first["count"]
|
||||
|
||||
users = mysql_stream <<-SQL
|
||||
SELECT UserID, Name, Title, Location, Email,
|
||||
@ -103,26 +103,32 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
SQL
|
||||
|
||||
create_users(users) do |row|
|
||||
next if row['Email'].blank?
|
||||
next if row['Name'].blank?
|
||||
next if row["Email"].blank?
|
||||
next if row["Name"].blank?
|
||||
|
||||
if ip_address = row['InsertIPAddress']&.split(',').try(:[], 0)
|
||||
ip_address = nil unless (IPAddr.new(ip_address) rescue false)
|
||||
if ip_address = row["InsertIPAddress"]&.split(",").try(:[], 0)
|
||||
ip_address = nil unless (
|
||||
begin
|
||||
IPAddr.new(ip_address)
|
||||
rescue StandardError
|
||||
false
|
||||
end
|
||||
)
|
||||
end
|
||||
|
||||
u = {
|
||||
imported_id: row['UserID'],
|
||||
email: row['Email'],
|
||||
username: row['Name'],
|
||||
name: row['Name'],
|
||||
created_at: row['DateInserted'] == nil ? 0 : Time.zone.at(row['DateInserted']),
|
||||
imported_id: row["UserID"],
|
||||
email: row["Email"],
|
||||
username: row["Name"],
|
||||
name: row["Name"],
|
||||
created_at: row["DateInserted"] == nil ? 0 : Time.zone.at(row["DateInserted"]),
|
||||
registration_ip_address: ip_address,
|
||||
last_seen_at: row['DateLastActive'] == nil ? 0 : Time.zone.at(row['DateLastActive']),
|
||||
location: row['Location'],
|
||||
admin: row['Admin'] > 0
|
||||
last_seen_at: row["DateLastActive"] == nil ? 0 : Time.zone.at(row["DateLastActive"]),
|
||||
location: row["Location"],
|
||||
admin: row["Admin"] > 0,
|
||||
}
|
||||
if row["Banned"] > 0
|
||||
u[:suspended_at] = Time.zone.at(row['DateInserted'])
|
||||
u[:suspended_at] = Time.zone.at(row["DateInserted"])
|
||||
u[:suspended_till] = SUSPENDED_TILL
|
||||
end
|
||||
u
|
||||
@ -130,7 +136,7 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
end
|
||||
|
||||
def import_user_emails
|
||||
puts '', 'Importing user emails...'
|
||||
puts "", "Importing user emails..."
|
||||
|
||||
users = mysql_stream <<-SQL
|
||||
SELECT UserID, Name, Email, DateInserted
|
||||
@ -141,20 +147,20 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
SQL
|
||||
|
||||
create_user_emails(users) do |row|
|
||||
next if row['Email'].blank?
|
||||
next if row['Name'].blank?
|
||||
next if row["Email"].blank?
|
||||
next if row["Name"].blank?
|
||||
|
||||
{
|
||||
imported_id: row["UserID"],
|
||||
imported_user_id: row["UserID"],
|
||||
email: row["Email"],
|
||||
created_at: Time.zone.at(row["DateInserted"])
|
||||
created_at: Time.zone.at(row["DateInserted"]),
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def import_user_profiles
|
||||
puts '', 'Importing user profiles...'
|
||||
puts "", "Importing user profiles..."
|
||||
|
||||
user_profiles = mysql_stream <<-SQL
|
||||
SELECT UserID, Name, Email, Location, About
|
||||
@ -165,19 +171,19 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
SQL
|
||||
|
||||
create_user_profiles(user_profiles) do |row|
|
||||
next if row['Email'].blank?
|
||||
next if row['Name'].blank?
|
||||
next if row["Email"].blank?
|
||||
next if row["Name"].blank?
|
||||
|
||||
{
|
||||
user_id: user_id_from_imported_id(row["UserID"]),
|
||||
location: row["Location"],
|
||||
bio_raw: row["About"]
|
||||
bio_raw: row["About"],
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def import_user_stats
|
||||
puts '', "Importing user stats..."
|
||||
puts "", "Importing user stats..."
|
||||
|
||||
users = mysql_stream <<-SQL
|
||||
SELECT UserID, CountDiscussions, CountComments, DateInserted
|
||||
@ -190,14 +196,14 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
now = Time.zone.now
|
||||
|
||||
create_user_stats(users) do |row|
|
||||
next unless @users[row['UserID'].to_i] # shouldn't need this but it can be NULL :<
|
||||
next unless @users[row["UserID"].to_i] # shouldn't need this but it can be NULL :<
|
||||
|
||||
{
|
||||
imported_id: row['UserID'],
|
||||
imported_user_id: row['UserID'],
|
||||
new_since: Time.zone.at(row['DateInserted'] || now),
|
||||
post_count: row['CountComments'] || 0,
|
||||
topic_count: row['CountDiscussions'] || 0
|
||||
imported_id: row["UserID"],
|
||||
imported_user_id: row["UserID"],
|
||||
new_since: Time.zone.at(row["DateInserted"] || now),
|
||||
post_count: row["CountComments"] || 0,
|
||||
topic_count: row["CountDiscussions"] || 0,
|
||||
}
|
||||
end
|
||||
end
|
||||
@ -215,7 +221,10 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
|
||||
next unless u.custom_fields["import_id"]
|
||||
|
||||
r = mysql_query("SELECT photo FROM #{TABLE_PREFIX}User WHERE UserID = #{u.custom_fields['import_id']};").first
|
||||
r =
|
||||
mysql_query(
|
||||
"SELECT photo FROM #{TABLE_PREFIX}User WHERE UserID = #{u.custom_fields["import_id"]};",
|
||||
).first
|
||||
next if r.nil?
|
||||
photo = r["photo"]
|
||||
next unless photo.present?
|
||||
@ -229,9 +238,9 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
photo_real_filename = nil
|
||||
parts = photo.squeeze("/").split("/")
|
||||
if parts[0] =~ /^[a-z0-9]{2}:/
|
||||
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[2..-2].join('/')}".squeeze("/")
|
||||
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[2..-2].join("/")}".squeeze("/")
|
||||
elsif parts[0] == "~cf"
|
||||
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[1..-2].join('/')}".squeeze("/")
|
||||
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[1..-2].join("/")}".squeeze("/")
|
||||
else
|
||||
puts "UNKNOWN FORMAT: #{photo}"
|
||||
next
|
||||
@ -272,75 +281,86 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
count = 0
|
||||
|
||||
# https://us.v-cdn.net/1234567/uploads/editor/xyz/image.jpg
|
||||
cdn_regex = /https:\/\/us.v-cdn.net\/1234567\/uploads\/(\S+\/(\w|-)+.\w+)/i
|
||||
cdn_regex = %r{https://us.v-cdn.net/1234567/uploads/(\S+/(\w|-)+.\w+)}i
|
||||
# [attachment=10109:Screen Shot 2012-04-01 at 3.47.35 AM.png]
|
||||
attachment_regex = /\[attachment=(\d+):(.*?)\]/i
|
||||
|
||||
Post.where("raw LIKE '%/us.v-cdn.net/%' OR raw LIKE '%[attachment%'").find_each do |post|
|
||||
count += 1
|
||||
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)]
|
||||
new_raw = post.raw.dup
|
||||
Post
|
||||
.where("raw LIKE '%/us.v-cdn.net/%' OR raw LIKE '%[attachment%'")
|
||||
.find_each do |post|
|
||||
count += 1
|
||||
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)]
|
||||
new_raw = post.raw.dup
|
||||
|
||||
new_raw.gsub!(attachment_regex) do |s|
|
||||
matches = attachment_regex.match(s)
|
||||
attachment_id = matches[1]
|
||||
file_name = matches[2]
|
||||
next unless attachment_id
|
||||
new_raw.gsub!(attachment_regex) do |s|
|
||||
matches = attachment_regex.match(s)
|
||||
attachment_id = matches[1]
|
||||
file_name = matches[2]
|
||||
next unless attachment_id
|
||||
|
||||
r = mysql_query("SELECT Path, Name FROM #{TABLE_PREFIX}Media WHERE MediaID = #{attachment_id};").first
|
||||
next if r.nil?
|
||||
path = r["Path"]
|
||||
name = r["Name"]
|
||||
next unless path.present?
|
||||
r =
|
||||
mysql_query(
|
||||
"SELECT Path, Name FROM #{TABLE_PREFIX}Media WHERE MediaID = #{attachment_id};",
|
||||
).first
|
||||
next if r.nil?
|
||||
path = r["Path"]
|
||||
name = r["Name"]
|
||||
next unless path.present?
|
||||
|
||||
path.gsub!("s3://content/", "")
|
||||
path.gsub!("s3://uploads/", "")
|
||||
file_path = "#{ATTACHMENTS_BASE_DIR}/#{path}"
|
||||
path.gsub!("s3://content/", "")
|
||||
path.gsub!("s3://uploads/", "")
|
||||
file_path = "#{ATTACHMENTS_BASE_DIR}/#{path}"
|
||||
|
||||
if File.exist?(file_path)
|
||||
upload = create_upload(post.user.id, file_path, File.basename(file_path))
|
||||
if upload && upload.errors.empty?
|
||||
# upload.url
|
||||
filename = name || file_name || File.basename(file_path)
|
||||
html_for_upload(upload, normalize_text(filename))
|
||||
if File.exist?(file_path)
|
||||
upload = create_upload(post.user.id, file_path, File.basename(file_path))
|
||||
if upload && upload.errors.empty?
|
||||
# upload.url
|
||||
filename = name || file_name || File.basename(file_path)
|
||||
html_for_upload(upload, normalize_text(filename))
|
||||
else
|
||||
puts "Error: Upload did not persist for #{post.id} #{attachment_id}!"
|
||||
end
|
||||
else
|
||||
puts "Error: Upload did not persist for #{post.id} #{attachment_id}!"
|
||||
puts "Couldn't find file for #{attachment_id}. Skipping."
|
||||
next
|
||||
end
|
||||
else
|
||||
puts "Couldn't find file for #{attachment_id}. Skipping."
|
||||
next
|
||||
end
|
||||
end
|
||||
|
||||
new_raw.gsub!(cdn_regex) do |s|
|
||||
matches = cdn_regex.match(s)
|
||||
attachment_id = matches[1]
|
||||
new_raw.gsub!(cdn_regex) do |s|
|
||||
matches = cdn_regex.match(s)
|
||||
attachment_id = matches[1]
|
||||
|
||||
file_path = "#{ATTACHMENTS_BASE_DIR}/#{attachment_id}"
|
||||
file_path = "#{ATTACHMENTS_BASE_DIR}/#{attachment_id}"
|
||||
|
||||
if File.exist?(file_path)
|
||||
upload = create_upload(post.user.id, file_path, File.basename(file_path))
|
||||
if upload && upload.errors.empty?
|
||||
upload.url
|
||||
if File.exist?(file_path)
|
||||
upload = create_upload(post.user.id, file_path, File.basename(file_path))
|
||||
if upload && upload.errors.empty?
|
||||
upload.url
|
||||
else
|
||||
puts "Error: Upload did not persist for #{post.id} #{attachment_id}!"
|
||||
end
|
||||
else
|
||||
puts "Error: Upload did not persist for #{post.id} #{attachment_id}!"
|
||||
puts "Couldn't find file for #{attachment_id}. Skipping."
|
||||
next
|
||||
end
|
||||
else
|
||||
puts "Couldn't find file for #{attachment_id}. Skipping."
|
||||
next
|
||||
end
|
||||
end
|
||||
|
||||
if new_raw != post.raw
|
||||
begin
|
||||
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, skip_revision: true, skip_validations: true, bypass_bump: true)
|
||||
rescue
|
||||
puts "PostRevisor error for #{post.id}"
|
||||
post.raw = new_raw
|
||||
post.save(validate: false)
|
||||
if new_raw != post.raw
|
||||
begin
|
||||
PostRevisor.new(post).revise!(
|
||||
post.user,
|
||||
{ raw: new_raw },
|
||||
skip_revision: true,
|
||||
skip_validations: true,
|
||||
bypass_bump: true,
|
||||
)
|
||||
rescue StandardError
|
||||
puts "PostRevisor error for #{post.id}"
|
||||
post.raw = new_raw
|
||||
post.save(validate: false)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@ -352,7 +372,7 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
|
||||
# Otherwise, the file exists but with a prefix:
|
||||
# The p prefix seems to be the full file, so try to find that one first.
|
||||
['p', 't', 'n'].each do |prefix|
|
||||
%w[p t n].each do |prefix|
|
||||
full_guess = File.join(path, "#{prefix}#{base_guess}")
|
||||
return full_guess if File.exist?(full_guess)
|
||||
end
|
||||
@ -364,26 +384,30 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
def import_categories
|
||||
puts "", "Importing categories..."
|
||||
|
||||
categories = mysql_query("
|
||||
categories =
|
||||
mysql_query(
|
||||
"
|
||||
SELECT CategoryID, ParentCategoryID, Name, Description, Sort
|
||||
FROM #{TABLE_PREFIX}Category
|
||||
WHERE CategoryID > 0
|
||||
ORDER BY Sort, CategoryID
|
||||
").to_a
|
||||
",
|
||||
).to_a
|
||||
|
||||
# Throw the -1 level categories away since they contain no topics.
|
||||
# Use the next level as root categories.
|
||||
|
||||
top_level_categories = categories.select { |c| c["ParentCategoryID"].blank? || c['ParentCategoryID'] == -1 }
|
||||
top_level_categories =
|
||||
categories.select { |c| c["ParentCategoryID"].blank? || c["ParentCategoryID"] == -1 }
|
||||
|
||||
# Depth = 2
|
||||
create_categories(top_level_categories) do |category|
|
||||
next if category_id_from_imported_id(category['CategoryID'])
|
||||
next if category_id_from_imported_id(category["CategoryID"])
|
||||
{
|
||||
imported_id: category['CategoryID'],
|
||||
name: CGI.unescapeHTML(category['Name']),
|
||||
description: category['Description'] ? CGI.unescapeHTML(category['Description']) : nil,
|
||||
position: category['Sort']
|
||||
imported_id: category["CategoryID"],
|
||||
name: CGI.unescapeHTML(category["Name"]),
|
||||
description: category["Description"] ? CGI.unescapeHTML(category["Description"]) : nil,
|
||||
position: category["Sort"],
|
||||
}
|
||||
end
|
||||
|
||||
@ -393,39 +417,39 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
|
||||
# Depth = 3
|
||||
create_categories(subcategories) do |category|
|
||||
next if category_id_from_imported_id(category['CategoryID'])
|
||||
next if category_id_from_imported_id(category["CategoryID"])
|
||||
{
|
||||
imported_id: category['CategoryID'],
|
||||
parent_category_id: category_id_from_imported_id(category['ParentCategoryID']),
|
||||
name: CGI.unescapeHTML(category['Name']),
|
||||
description: category['Description'] ? CGI.unescapeHTML(category['Description']) : nil,
|
||||
position: category['Sort']
|
||||
imported_id: category["CategoryID"],
|
||||
parent_category_id: category_id_from_imported_id(category["ParentCategoryID"]),
|
||||
name: CGI.unescapeHTML(category["Name"]),
|
||||
description: category["Description"] ? CGI.unescapeHTML(category["Description"]) : nil,
|
||||
position: category["Sort"],
|
||||
}
|
||||
end
|
||||
|
||||
subcategory_ids = Set.new(subcategories.map { |c| c['CategoryID'] })
|
||||
subcategory_ids = Set.new(subcategories.map { |c| c["CategoryID"] })
|
||||
|
||||
# Depth 4 and 5 need to be tags
|
||||
|
||||
categories.each do |c|
|
||||
next if c['ParentCategoryID'] == -1
|
||||
next if top_level_category_ids.include?(c['CategoryID'])
|
||||
next if subcategory_ids.include?(c['CategoryID'])
|
||||
next if c["ParentCategoryID"] == -1
|
||||
next if top_level_category_ids.include?(c["CategoryID"])
|
||||
next if subcategory_ids.include?(c["CategoryID"])
|
||||
|
||||
# Find a depth 3 category for topics in this category
|
||||
parent = c
|
||||
while !parent.nil? && !subcategory_ids.include?(parent['CategoryID'])
|
||||
parent = categories.find { |subcat| subcat['CategoryID'] == parent['ParentCategoryID'] }
|
||||
while !parent.nil? && !subcategory_ids.include?(parent["CategoryID"])
|
||||
parent = categories.find { |subcat| subcat["CategoryID"] == parent["ParentCategoryID"] }
|
||||
end
|
||||
|
||||
if parent
|
||||
tag_name = DiscourseTagging.clean_tag(c['Name'])
|
||||
@category_mappings[c['CategoryID']] = {
|
||||
category_id: category_id_from_imported_id(parent['CategoryID']),
|
||||
tag: Tag.find_by_name(tag_name) || Tag.create(name: tag_name)
|
||||
tag_name = DiscourseTagging.clean_tag(c["Name"])
|
||||
@category_mappings[c["CategoryID"]] = {
|
||||
category_id: category_id_from_imported_id(parent["CategoryID"]),
|
||||
tag: Tag.find_by_name(tag_name) || Tag.create(name: tag_name),
|
||||
}
|
||||
else
|
||||
puts '', "Couldn't find a category for #{c['CategoryID']} '#{c['Name']}'!"
|
||||
puts "", "Couldn't find a category for #{c["CategoryID"]} '#{c["Name"]}'!"
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -433,7 +457,8 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
def import_topics
|
||||
puts "", "Importing topics..."
|
||||
|
||||
topics_sql = "SELECT DiscussionID, CategoryID, Name, Body, DateInserted, InsertUserID, Announce, Format
|
||||
topics_sql =
|
||||
"SELECT DiscussionID, CategoryID, Name, Body, DateInserted, InsertUserID, Announce, Format
|
||||
FROM #{TABLE_PREFIX}Discussion
|
||||
WHERE DiscussionID > #{@last_imported_topic_id}
|
||||
ORDER BY DiscussionID ASC"
|
||||
@ -442,11 +467,12 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
data = {
|
||||
imported_id: row["DiscussionID"],
|
||||
title: normalize_text(row["Name"]),
|
||||
category_id: category_id_from_imported_id(row["CategoryID"]) ||
|
||||
@category_mappings[row["CategoryID"]].try(:[], :category_id),
|
||||
category_id:
|
||||
category_id_from_imported_id(row["CategoryID"]) ||
|
||||
@category_mappings[row["CategoryID"]].try(:[], :category_id),
|
||||
user_id: user_id_from_imported_id(row["InsertUserID"]),
|
||||
created_at: Time.zone.at(row['DateInserted']),
|
||||
pinned_at: row['Announce'] == 0 ? nil : Time.zone.at(row['DateInserted'])
|
||||
created_at: Time.zone.at(row["DateInserted"]),
|
||||
pinned_at: row["Announce"] == 0 ? nil : Time.zone.at(row["DateInserted"]),
|
||||
}
|
||||
(data[:user_id].present? && data[:title].present?) ? data : false
|
||||
end
|
||||
@ -455,46 +481,45 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
|
||||
create_posts(mysql_stream(topics_sql)) do |row|
|
||||
data = {
|
||||
imported_id: "d-" + row['DiscussionID'].to_s,
|
||||
topic_id: topic_id_from_imported_id(row['DiscussionID']),
|
||||
imported_id: "d-" + row["DiscussionID"].to_s,
|
||||
topic_id: topic_id_from_imported_id(row["DiscussionID"]),
|
||||
user_id: user_id_from_imported_id(row["InsertUserID"]),
|
||||
created_at: Time.zone.at(row['DateInserted']),
|
||||
raw: clean_up(row['Body'], row['Format'])
|
||||
created_at: Time.zone.at(row["DateInserted"]),
|
||||
raw: clean_up(row["Body"], row["Format"]),
|
||||
}
|
||||
data[:topic_id].present? ? data : false
|
||||
end
|
||||
|
||||
puts '', 'converting deep categories to tags...'
|
||||
puts "", "converting deep categories to tags..."
|
||||
|
||||
create_topic_tags(mysql_stream(topics_sql)) do |row|
|
||||
next unless mapping = @category_mappings[row['CategoryID']]
|
||||
next unless mapping = @category_mappings[row["CategoryID"]]
|
||||
|
||||
{
|
||||
tag_id: mapping[:tag].id,
|
||||
topic_id: topic_id_from_imported_id(row["DiscussionID"])
|
||||
}
|
||||
{ tag_id: mapping[:tag].id, topic_id: topic_id_from_imported_id(row["DiscussionID"]) }
|
||||
end
|
||||
end
|
||||
|
||||
def import_posts
|
||||
puts "", "Importing posts..."
|
||||
|
||||
posts = mysql_stream(
|
||||
"SELECT CommentID, DiscussionID, Body, DateInserted, InsertUserID, Format
|
||||
posts =
|
||||
mysql_stream(
|
||||
"SELECT CommentID, DiscussionID, Body, DateInserted, InsertUserID, Format
|
||||
FROM #{TABLE_PREFIX}Comment
|
||||
WHERE CommentID > #{@last_imported_post_id}
|
||||
ORDER BY CommentID ASC")
|
||||
ORDER BY CommentID ASC",
|
||||
)
|
||||
|
||||
create_posts(posts) do |row|
|
||||
next unless topic_id = topic_id_from_imported_id(row['DiscussionID'])
|
||||
next if row['Body'].blank?
|
||||
next unless topic_id = topic_id_from_imported_id(row["DiscussionID"])
|
||||
next if row["Body"].blank?
|
||||
|
||||
{
|
||||
imported_id: row['CommentID'],
|
||||
imported_id: row["CommentID"],
|
||||
topic_id: topic_id,
|
||||
user_id: user_id_from_imported_id(row['InsertUserID']),
|
||||
created_at: Time.zone.at(row['DateInserted']),
|
||||
raw: clean_up(row['Body'], row['Format'])
|
||||
user_id: user_id_from_imported_id(row["InsertUserID"]),
|
||||
created_at: Time.zone.at(row["DateInserted"]),
|
||||
raw: clean_up(row["Body"], row["Format"]),
|
||||
}
|
||||
end
|
||||
end
|
||||
@ -505,31 +530,31 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
tag_mapping = {}
|
||||
|
||||
mysql_query("SELECT TagID, Name FROM #{TABLE_PREFIX}Tag").each do |row|
|
||||
tag_name = DiscourseTagging.clean_tag(row['Name'])
|
||||
tag_name = DiscourseTagging.clean_tag(row["Name"])
|
||||
tag = Tag.find_by_name(tag_name) || Tag.create(name: tag_name)
|
||||
tag_mapping[row['TagID']] = tag.id
|
||||
tag_mapping[row["TagID"]] = tag.id
|
||||
end
|
||||
|
||||
tags = mysql_query(
|
||||
"SELECT TagID, DiscussionID
|
||||
tags =
|
||||
mysql_query(
|
||||
"SELECT TagID, DiscussionID
|
||||
FROM #{TABLE_PREFIX}TagDiscussion
|
||||
WHERE DiscussionID > #{@last_imported_topic_id}
|
||||
ORDER BY DateInserted")
|
||||
ORDER BY DateInserted",
|
||||
)
|
||||
|
||||
create_topic_tags(tags) do |row|
|
||||
next unless topic_id = topic_id_from_imported_id(row['DiscussionID'])
|
||||
next unless topic_id = topic_id_from_imported_id(row["DiscussionID"])
|
||||
|
||||
{
|
||||
topic_id: topic_id,
|
||||
tag_id: tag_mapping[row['TagID']]
|
||||
}
|
||||
{ topic_id: topic_id, tag_id: tag_mapping[row["TagID"]] }
|
||||
end
|
||||
end
|
||||
|
||||
def import_private_topics
|
||||
puts "", "Importing private topics..."
|
||||
|
||||
topics_sql = "SELECT c.ConversationID, c.Subject, m.MessageID, m.Body, c.DateInserted, c.InsertUserID
|
||||
topics_sql =
|
||||
"SELECT c.ConversationID, c.Subject, m.MessageID, m.Body, c.DateInserted, c.InsertUserID
|
||||
FROM #{TABLE_PREFIX}Conversation c, #{TABLE_PREFIX}ConversationMessage m
|
||||
WHERE c.FirstMessageID = m.MessageID
|
||||
AND c.ConversationID > #{@last_imported_private_topic_id - PRIVATE_OFFSET}
|
||||
@ -539,9 +564,10 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
{
|
||||
archetype: Archetype.private_message,
|
||||
imported_id: row["ConversationID"] + PRIVATE_OFFSET,
|
||||
title: row["Subject"] ? normalize_text(row["Subject"]) : "Conversation #{row["ConversationID"]}",
|
||||
title:
|
||||
row["Subject"] ? normalize_text(row["Subject"]) : "Conversation #{row["ConversationID"]}",
|
||||
user_id: user_id_from_imported_id(row["InsertUserID"]),
|
||||
created_at: Time.zone.at(row['DateInserted'])
|
||||
created_at: Time.zone.at(row["DateInserted"]),
|
||||
}
|
||||
end
|
||||
end
|
||||
@ -549,7 +575,8 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
def import_topic_allowed_users
|
||||
puts "", "importing topic_allowed_users..."
|
||||
|
||||
topic_allowed_users_sql = "
|
||||
topic_allowed_users_sql =
|
||||
"
|
||||
SELECT ConversationID, UserID
|
||||
FROM #{TABLE_PREFIX}UserConversation
|
||||
WHERE Deleted = 0
|
||||
@ -559,45 +586,43 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
added = 0
|
||||
|
||||
create_topic_allowed_users(mysql_stream(topic_allowed_users_sql)) do |row|
|
||||
next unless topic_id = topic_id_from_imported_id(row['ConversationID'] + PRIVATE_OFFSET)
|
||||
next unless topic_id = topic_id_from_imported_id(row["ConversationID"] + PRIVATE_OFFSET)
|
||||
next unless user_id = user_id_from_imported_id(row["UserID"])
|
||||
added += 1
|
||||
{
|
||||
topic_id: topic_id,
|
||||
user_id: user_id,
|
||||
}
|
||||
{ topic_id: topic_id, user_id: user_id }
|
||||
end
|
||||
|
||||
puts '', "Added #{added} topic_allowed_users records."
|
||||
puts "", "Added #{added} topic_allowed_users records."
|
||||
end
|
||||
|
||||
def import_private_posts
|
||||
puts "", "importing private replies..."
|
||||
|
||||
private_posts_sql = "
|
||||
private_posts_sql =
|
||||
"
|
||||
SELECT ConversationID, MessageID, Body, InsertUserID, DateInserted, Format
|
||||
FROM GDN_ConversationMessage
|
||||
WHERE ConversationID > #{@last_imported_private_topic_id - PRIVATE_OFFSET}
|
||||
ORDER BY ConversationID ASC, MessageID ASC"
|
||||
|
||||
create_posts(mysql_stream(private_posts_sql)) do |row|
|
||||
next unless topic_id = topic_id_from_imported_id(row['ConversationID'] + PRIVATE_OFFSET)
|
||||
next unless topic_id = topic_id_from_imported_id(row["ConversationID"] + PRIVATE_OFFSET)
|
||||
|
||||
{
|
||||
imported_id: row['MessageID'] + PRIVATE_OFFSET,
|
||||
imported_id: row["MessageID"] + PRIVATE_OFFSET,
|
||||
topic_id: topic_id,
|
||||
user_id: user_id_from_imported_id(row['InsertUserID']),
|
||||
created_at: Time.zone.at(row['DateInserted']),
|
||||
raw: clean_up(row['Body'], row['Format'])
|
||||
user_id: user_id_from_imported_id(row["InsertUserID"]),
|
||||
created_at: Time.zone.at(row["DateInserted"]),
|
||||
raw: clean_up(row["Body"], row["Format"]),
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
# TODO: too slow
|
||||
def create_permalinks
|
||||
puts '', 'Creating permalinks...', ''
|
||||
puts "", "Creating permalinks...", ""
|
||||
|
||||
puts ' User pages...'
|
||||
puts " User pages..."
|
||||
|
||||
start = Time.now
|
||||
count = 0
|
||||
@ -606,21 +631,23 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
sql = "COPY permalinks (url, created_at, updated_at, external_url) FROM STDIN"
|
||||
|
||||
@raw_connection.copy_data(sql, @encoder) do
|
||||
User.includes(:_custom_fields).find_each do |u|
|
||||
count += 1
|
||||
ucf = u.custom_fields
|
||||
if ucf && ucf["import_id"]
|
||||
vanilla_username = ucf["import_username"] || u.username
|
||||
@raw_connection.put_copy_data(
|
||||
["profile/#{vanilla_username}", now, now, "/users/#{u.username}"]
|
||||
)
|
||||
end
|
||||
User
|
||||
.includes(:_custom_fields)
|
||||
.find_each do |u|
|
||||
count += 1
|
||||
ucf = u.custom_fields
|
||||
if ucf && ucf["import_id"]
|
||||
vanilla_username = ucf["import_username"] || u.username
|
||||
@raw_connection.put_copy_data(
|
||||
["profile/#{vanilla_username}", now, now, "/users/#{u.username}"],
|
||||
)
|
||||
end
|
||||
|
||||
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0
|
||||
end
|
||||
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0
|
||||
end
|
||||
end
|
||||
|
||||
puts '', '', ' Topics and posts...'
|
||||
puts "", "", " Topics and posts..."
|
||||
|
||||
start = Time.now
|
||||
count = 0
|
||||
@ -628,38 +655,36 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
sql = "COPY permalinks (url, topic_id, post_id, created_at, updated_at) FROM STDIN"
|
||||
|
||||
@raw_connection.copy_data(sql, @encoder) do
|
||||
Post.includes(:_custom_fields).find_each do |post|
|
||||
count += 1
|
||||
pcf = post.custom_fields
|
||||
if pcf && pcf["import_id"]
|
||||
topic = post.topic
|
||||
if topic.present?
|
||||
id = pcf["import_id"].split('-').last
|
||||
if post.post_number == 1
|
||||
slug = Slug.for(topic.title) # probably matches what vanilla would do...
|
||||
@raw_connection.put_copy_data(
|
||||
["discussion/#{id}/#{slug}", topic.id, nil, now, now]
|
||||
)
|
||||
else
|
||||
@raw_connection.put_copy_data(
|
||||
["discussion/comment/#{id}", nil, post.id, now, now]
|
||||
)
|
||||
Post
|
||||
.includes(:_custom_fields)
|
||||
.find_each do |post|
|
||||
count += 1
|
||||
pcf = post.custom_fields
|
||||
if pcf && pcf["import_id"]
|
||||
topic = post.topic
|
||||
if topic.present?
|
||||
id = pcf["import_id"].split("-").last
|
||||
if post.post_number == 1
|
||||
slug = Slug.for(topic.title) # probably matches what vanilla would do...
|
||||
@raw_connection.put_copy_data(["discussion/#{id}/#{slug}", topic.id, nil, now, now])
|
||||
else
|
||||
@raw_connection.put_copy_data(["discussion/comment/#{id}", nil, post.id, now, now])
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0
|
||||
end
|
||||
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def clean_up(raw, format)
|
||||
raw.encode!("utf-8", "utf-8", invalid: :replace, undef: :replace, replace: "")
|
||||
|
||||
raw.gsub!(/<(.+)> <\/\1>/, "\n\n")
|
||||
raw.gsub!(%r{<(.+)> </\1>}, "\n\n")
|
||||
|
||||
html =
|
||||
if format == 'Html'
|
||||
if format == "Html"
|
||||
raw
|
||||
else
|
||||
markdown = Redcarpet::Markdown.new(Redcarpet::Render::HTML, autolink: true, tables: true)
|
||||
@ -668,29 +693,23 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
|
||||
doc = Nokogiri::HTML5.fragment(html)
|
||||
|
||||
doc.css("blockquote").each do |bq|
|
||||
name = bq["rel"]
|
||||
user = User.find_by(name: name)
|
||||
bq.replace %{<br>[QUOTE="#{user&.username || name}"]\n#{bq.inner_html}\n[/QUOTE]<br>}
|
||||
end
|
||||
doc
|
||||
.css("blockquote")
|
||||
.each do |bq|
|
||||
name = bq["rel"]
|
||||
user = User.find_by(name: name)
|
||||
bq.replace %{<br>[QUOTE="#{user&.username || name}"]\n#{bq.inner_html}\n[/QUOTE]<br>}
|
||||
end
|
||||
|
||||
doc.css("font").reverse.each do |f|
|
||||
f.replace f.inner_html
|
||||
end
|
||||
doc.css("font").reverse.each { |f| f.replace f.inner_html }
|
||||
|
||||
doc.css("span").reverse.each do |f|
|
||||
f.replace f.inner_html
|
||||
end
|
||||
doc.css("span").reverse.each { |f| f.replace f.inner_html }
|
||||
|
||||
doc.css("sub").reverse.each do |f|
|
||||
f.replace f.inner_html
|
||||
end
|
||||
doc.css("sub").reverse.each { |f| f.replace f.inner_html }
|
||||
|
||||
doc.css("u").reverse.each do |f|
|
||||
f.replace f.inner_html
|
||||
end
|
||||
doc.css("u").reverse.each { |f| f.replace f.inner_html }
|
||||
|
||||
markdown = format == 'Html' ? ReverseMarkdown.convert(doc.to_html) : doc.to_html
|
||||
markdown = format == "Html" ? ReverseMarkdown.convert(doc.to_html) : doc.to_html
|
||||
markdown.gsub!(/\[QUOTE="([^;]+);c-(\d+)"\]/i) { "[QUOTE=#{$1};#{$2}]" }
|
||||
|
||||
markdown = process_raw_text(markdown)
|
||||
@ -702,31 +721,31 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
text = raw.dup
|
||||
text = CGI.unescapeHTML(text)
|
||||
|
||||
text.gsub!(/:(?:\w{8})\]/, ']')
|
||||
text.gsub!(/:(?:\w{8})\]/, "]")
|
||||
|
||||
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
|
||||
text.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/i, '[\2](\1)')
|
||||
text.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}i, '[\2](\1)')
|
||||
|
||||
# phpBB shortens link text like this, which breaks our markdown processing:
|
||||
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
|
||||
#
|
||||
# Work around it for now:
|
||||
text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[')
|
||||
text.gsub!(%r{\[http(s)?://(www\.)?}i, "[")
|
||||
|
||||
# convert list tags to ul and list=1 tags to ol
|
||||
# list=a is not supported, so handle it like list=1
|
||||
# list=9 and list=x have the same result as list=1 and list=a
|
||||
text.gsub!(/\[list\](.*?)\[\/list:u\]/mi, '[ul]\1[/ul]')
|
||||
text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi, '[ol]\1[/ol]')
|
||||
text.gsub!(%r{\[list\](.*?)\[/list:u\]}mi, '[ul]\1[/ul]')
|
||||
text.gsub!(%r{\[list=.*?\](.*?)\[/list:o\]}mi, '[ol]\1[/ol]')
|
||||
|
||||
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
|
||||
text.gsub!(/\[\*\](.*?)\[\/\*:m\]/mi, '[li]\1[/li]')
|
||||
text.gsub!(%r{\[\*\](.*?)\[/\*:m\]}mi, '[li]\1[/li]')
|
||||
|
||||
# [QUOTE="<username>"] -- add newline
|
||||
text.gsub!(/(\[quote="[a-zA-Z\d]+"\])/i) { "#{$1}\n" }
|
||||
|
||||
# [/QUOTE] -- add newline
|
||||
text.gsub!(/(\[\/quote\])/i) { "\n#{$1}" }
|
||||
text.gsub!(%r{(\[/quote\])}i) { "\n#{$1}" }
|
||||
|
||||
text
|
||||
end
|
||||
@ -742,7 +761,6 @@ class BulkImport::Vanilla < BulkImport::Base
|
||||
def mysql_query(sql)
|
||||
@client.query(sql)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
BulkImport::Vanilla.new.start
|
||||
|
@ -7,43 +7,42 @@ require "htmlentities"
|
||||
require "parallel"
|
||||
|
||||
class BulkImport::VBulletin < BulkImport::Base
|
||||
|
||||
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "vb_"
|
||||
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "vb_"
|
||||
SUSPENDED_TILL ||= Date.new(3000, 1, 1)
|
||||
ATTACHMENT_DIR ||= ENV['ATTACHMENT_DIR'] || '/shared/import/data/attachments'
|
||||
AVATAR_DIR ||= ENV['AVATAR_DIR'] || '/shared/import/data/customavatars'
|
||||
ATTACHMENT_DIR ||= ENV["ATTACHMENT_DIR"] || "/shared/import/data/attachments"
|
||||
AVATAR_DIR ||= ENV["AVATAR_DIR"] || "/shared/import/data/customavatars"
|
||||
|
||||
def initialize
|
||||
super
|
||||
|
||||
host = ENV["DB_HOST"] || "localhost"
|
||||
host = ENV["DB_HOST"] || "localhost"
|
||||
username = ENV["DB_USERNAME"] || "root"
|
||||
password = ENV["DB_PASSWORD"]
|
||||
database = ENV["DB_NAME"] || "vbulletin"
|
||||
charset = ENV["DB_CHARSET"] || "utf8"
|
||||
charset = ENV["DB_CHARSET"] || "utf8"
|
||||
|
||||
@html_entities = HTMLEntities.new
|
||||
@encoding = CHARSET_MAP[charset]
|
||||
|
||||
@client = Mysql2::Client.new(
|
||||
host: host,
|
||||
username: username,
|
||||
password: password,
|
||||
database: database,
|
||||
encoding: charset,
|
||||
reconnect: true
|
||||
)
|
||||
@client =
|
||||
Mysql2::Client.new(
|
||||
host: host,
|
||||
username: username,
|
||||
password: password,
|
||||
database: database,
|
||||
encoding: charset,
|
||||
reconnect: true,
|
||||
)
|
||||
|
||||
@client.query_options.merge!(as: :array, cache_rows: false)
|
||||
|
||||
@has_post_thanks = mysql_query(<<-SQL
|
||||
@has_post_thanks = mysql_query(<<-SQL).to_a.count > 0
|
||||
SELECT `COLUMN_NAME`
|
||||
FROM `INFORMATION_SCHEMA`.`COLUMNS`
|
||||
WHERE `TABLE_SCHEMA`='#{database}'
|
||||
AND `TABLE_NAME`='user'
|
||||
AND `COLUMN_NAME` LIKE 'post_thanks_%'
|
||||
SQL
|
||||
).to_a.count > 0
|
||||
|
||||
@user_ids_by_email = {}
|
||||
end
|
||||
@ -95,7 +94,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
end
|
||||
|
||||
def import_groups
|
||||
puts '', "Importing groups..."
|
||||
puts "", "Importing groups..."
|
||||
|
||||
groups = mysql_stream <<-SQL
|
||||
SELECT usergroupid, title, description, usertitle
|
||||
@ -115,7 +114,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
end
|
||||
|
||||
def import_users
|
||||
puts '', "Importing users..."
|
||||
puts "", "Importing users..."
|
||||
|
||||
users = mysql_stream <<-SQL
|
||||
SELECT u.userid, username, email, joindate, birthday, ipaddress, u.usergroupid, bandate, liftdate
|
||||
@ -145,7 +144,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
end
|
||||
|
||||
def import_user_emails
|
||||
puts '', "Importing user emails..."
|
||||
puts "", "Importing user emails..."
|
||||
|
||||
users = mysql_stream <<-SQL
|
||||
SELECT u.userid, email, joindate
|
||||
@ -155,7 +154,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
SQL
|
||||
|
||||
create_user_emails(users) do |row|
|
||||
user_id, email = row[0 .. 1]
|
||||
user_id, email = row[0..1]
|
||||
|
||||
@user_ids_by_email[email.downcase] ||= []
|
||||
user_ids = @user_ids_by_email[email.downcase] << user_id
|
||||
@ -170,7 +169,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
imported_id: user_id,
|
||||
imported_user_id: user_id,
|
||||
email: email,
|
||||
created_at: Time.zone.at(row[2])
|
||||
created_at: Time.zone.at(row[2]),
|
||||
}
|
||||
end
|
||||
|
||||
@ -179,7 +178,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
end
|
||||
|
||||
def import_user_stats
|
||||
puts '', "Importing user stats..."
|
||||
puts "", "Importing user stats..."
|
||||
|
||||
users = mysql_stream <<-SQL
|
||||
SELECT u.userid, joindate, posts, COUNT(t.threadid) AS threads, p.dateline
|
||||
@ -199,7 +198,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
new_since: Time.zone.at(row[1]),
|
||||
post_count: row[2],
|
||||
topic_count: row[3],
|
||||
first_post_created_at: row[4] && Time.zone.at(row[4])
|
||||
first_post_created_at: row[4] && Time.zone.at(row[4]),
|
||||
}
|
||||
|
||||
if @has_post_thanks
|
||||
@ -212,7 +211,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
end
|
||||
|
||||
def import_group_users
|
||||
puts '', "Importing group users..."
|
||||
puts "", "Importing group users..."
|
||||
|
||||
group_users = mysql_stream <<-SQL
|
||||
SELECT usergroupid, userid
|
||||
@ -221,15 +220,12 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
SQL
|
||||
|
||||
create_group_users(group_users) do |row|
|
||||
{
|
||||
group_id: group_id_from_imported_id(row[0]),
|
||||
user_id: user_id_from_imported_id(row[1]),
|
||||
}
|
||||
{ group_id: group_id_from_imported_id(row[0]), user_id: user_id_from_imported_id(row[1]) }
|
||||
end
|
||||
end
|
||||
|
||||
def import_user_passwords
|
||||
puts '', "Importing user passwords..."
|
||||
puts "", "Importing user passwords..."
|
||||
|
||||
user_passwords = mysql_stream <<-SQL
|
||||
SELECT userid, password
|
||||
@ -239,15 +235,12 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
SQL
|
||||
|
||||
create_custom_fields("user", "password", user_passwords) do |row|
|
||||
{
|
||||
record_id: user_id_from_imported_id(row[0]),
|
||||
value: row[1],
|
||||
}
|
||||
{ record_id: user_id_from_imported_id(row[0]), value: row[1] }
|
||||
end
|
||||
end
|
||||
|
||||
def import_user_salts
|
||||
puts '', "Importing user salts..."
|
||||
puts "", "Importing user salts..."
|
||||
|
||||
user_salts = mysql_stream <<-SQL
|
||||
SELECT userid, salt
|
||||
@ -258,15 +251,12 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
SQL
|
||||
|
||||
create_custom_fields("user", "salt", user_salts) do |row|
|
||||
{
|
||||
record_id: user_id_from_imported_id(row[0]),
|
||||
value: row[1],
|
||||
}
|
||||
{ record_id: user_id_from_imported_id(row[0]), value: row[1] }
|
||||
end
|
||||
end
|
||||
|
||||
def import_user_profiles
|
||||
puts '', "Importing user profiles..."
|
||||
puts "", "Importing user profiles..."
|
||||
|
||||
user_profiles = mysql_stream <<-SQL
|
||||
SELECT userid, homepage, profilevisits
|
||||
@ -278,16 +268,23 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
create_user_profiles(user_profiles) do |row|
|
||||
{
|
||||
user_id: user_id_from_imported_id(row[0]),
|
||||
website: (URI.parse(row[1]).to_s rescue nil),
|
||||
website:
|
||||
(
|
||||
begin
|
||||
URI.parse(row[1]).to_s
|
||||
rescue StandardError
|
||||
nil
|
||||
end
|
||||
),
|
||||
views: row[2],
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def import_categories
|
||||
puts '', "Importing categories..."
|
||||
puts "", "Importing categories..."
|
||||
|
||||
categories = mysql_query(<<-SQL
|
||||
categories = mysql_query(<<-SQL).to_a
|
||||
select
|
||||
forumid,
|
||||
parentid,
|
||||
@ -311,23 +308,20 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
from forum
|
||||
order by forumid
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
return if categories.empty?
|
||||
|
||||
parent_categories = categories.select { |c| c[1] == -1 }
|
||||
parent_categories = categories.select { |c| c[1] == -1 }
|
||||
children_categories = categories.select { |c| c[1] != -1 }
|
||||
|
||||
parent_category_ids = Set.new parent_categories.map { |c| c[0] }
|
||||
|
||||
# cut down the tree to only 2 levels of categories
|
||||
children_categories.each do |cc|
|
||||
until parent_category_ids.include?(cc[1])
|
||||
cc[1] = categories.find { |c| c[0] == cc[1] }[1]
|
||||
end
|
||||
cc[1] = categories.find { |c| c[0] == cc[1] }[1] until parent_category_ids.include?(cc[1])
|
||||
end
|
||||
|
||||
puts '', "Importing parent categories..."
|
||||
puts "", "Importing parent categories..."
|
||||
create_categories(parent_categories) do |row|
|
||||
{
|
||||
imported_id: row[0],
|
||||
@ -337,7 +331,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
}
|
||||
end
|
||||
|
||||
puts '', "Importing children categories..."
|
||||
puts "", "Importing children categories..."
|
||||
create_categories(children_categories) do |row|
|
||||
{
|
||||
imported_id: row[0],
|
||||
@ -350,7 +344,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
end
|
||||
|
||||
def import_topics
|
||||
puts '', "Importing topics..."
|
||||
puts "", "Importing topics..."
|
||||
|
||||
topics = mysql_stream <<-SQL
|
||||
SELECT threadid, title, forumid, postuserid, open, dateline, views, visible, sticky
|
||||
@ -381,7 +375,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
end
|
||||
|
||||
def import_posts
|
||||
puts '', "Importing posts..."
|
||||
puts "", "Importing posts..."
|
||||
|
||||
posts = mysql_stream <<-SQL
|
||||
SELECT postid, p.threadid, parentid, userid, p.dateline, p.visible, pagetext
|
||||
@ -396,7 +390,8 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
create_posts(posts) do |row|
|
||||
topic_id = topic_id_from_imported_id(row[1])
|
||||
replied_post_topic_id = topic_id_from_imported_post_id(row[2])
|
||||
reply_to_post_number = topic_id == replied_post_topic_id ? post_number_from_imported_id(row[2]) : nil
|
||||
reply_to_post_number =
|
||||
topic_id == replied_post_topic_id ? post_number_from_imported_id(row[2]) : nil
|
||||
|
||||
post = {
|
||||
imported_id: row[0],
|
||||
@ -415,7 +410,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
|
||||
def import_likes
|
||||
return unless @has_post_thanks
|
||||
puts '', "Importing likes..."
|
||||
puts "", "Importing likes..."
|
||||
|
||||
@imported_likes = Set.new
|
||||
@last_imported_post_id = 0
|
||||
@ -438,13 +433,13 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
post_id: post_id_from_imported_id(row[0]),
|
||||
user_id: user_id_from_imported_id(row[1]),
|
||||
post_action_type_id: 2,
|
||||
created_at: Time.zone.at(row[2])
|
||||
created_at: Time.zone.at(row[2]),
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def import_private_topics
|
||||
puts '', "Importing private topics..."
|
||||
puts "", "Importing private topics..."
|
||||
|
||||
@imported_topics = {}
|
||||
|
||||
@ -473,34 +468,31 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
end
|
||||
|
||||
def import_topic_allowed_users
|
||||
puts '', "Importing topic allowed users..."
|
||||
puts "", "Importing topic allowed users..."
|
||||
|
||||
allowed_users = Set.new
|
||||
|
||||
mysql_stream(<<-SQL
|
||||
mysql_stream(<<-SQL).each do |row|
|
||||
SELECT pmtextid, touserarray
|
||||
FROM #{TABLE_PREFIX}pmtext
|
||||
WHERE pmtextid > (#{@last_imported_private_topic_id - PRIVATE_OFFSET})
|
||||
ORDER BY pmtextid
|
||||
SQL
|
||||
).each do |row|
|
||||
next unless topic_id = topic_id_from_imported_id(row[0] + PRIVATE_OFFSET)
|
||||
row[1].scan(/i:(\d+)/).flatten.each do |id|
|
||||
next unless user_id = user_id_from_imported_id(id)
|
||||
allowed_users << [topic_id, user_id]
|
||||
end
|
||||
row[1]
|
||||
.scan(/i:(\d+)/)
|
||||
.flatten
|
||||
.each do |id|
|
||||
next unless user_id = user_id_from_imported_id(id)
|
||||
allowed_users << [topic_id, user_id]
|
||||
end
|
||||
end
|
||||
|
||||
create_topic_allowed_users(allowed_users) do |row|
|
||||
{
|
||||
topic_id: row[0],
|
||||
user_id: row[1],
|
||||
}
|
||||
end
|
||||
create_topic_allowed_users(allowed_users) { |row| { topic_id: row[0], user_id: row[1] } }
|
||||
end
|
||||
|
||||
def import_private_posts
|
||||
puts '', "Importing private posts..."
|
||||
puts "", "Importing private posts..."
|
||||
|
||||
posts = mysql_stream <<-SQL
|
||||
SELECT pmtextid, title, fromuserid, touserarray, dateline, message
|
||||
@ -527,7 +519,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
end
|
||||
|
||||
def create_permalink_file
|
||||
puts '', 'Creating Permalink File...', ''
|
||||
puts "", "Creating Permalink File...", ""
|
||||
|
||||
total = Topic.listable_topics.count
|
||||
start = Time.now
|
||||
@ -538,9 +530,9 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
i += 1
|
||||
pcf = topic.posts.includes(:_custom_fields).where(post_number: 1).first.custom_fields
|
||||
if pcf && pcf["import_id"]
|
||||
id = pcf["import_id"].split('-').last
|
||||
id = pcf["import_id"].split("-").last
|
||||
|
||||
f.print [ "XXX#{id} YYY#{topic.id}" ].to_csv
|
||||
f.print ["XXX#{id} YYY#{topic.id}"].to_csv
|
||||
print "\r%7d/%7d - %6d/sec" % [i, total, i.to_f / (Time.now - start)] if i % 5000 == 0
|
||||
end
|
||||
end
|
||||
@ -549,7 +541,8 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
|
||||
# find the uploaded file information from the db
|
||||
def find_upload(post, attachment_id)
|
||||
sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filename filename
|
||||
sql =
|
||||
"SELECT a.attachmentid attachment_id, a.userid user_id, a.filename filename
|
||||
FROM #{TABLE_PREFIX}attachment a
|
||||
WHERE a.attachmentid = #{attachment_id}"
|
||||
results = mysql_query(sql)
|
||||
@ -563,9 +556,10 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
user_id = row[1]
|
||||
db_filename = row[2]
|
||||
|
||||
filename = File.join(ATTACHMENT_DIR, user_id.to_s.split('').join('/'), "#{attachment_id}.attach")
|
||||
filename =
|
||||
File.join(ATTACHMENT_DIR, user_id.to_s.split("").join("/"), "#{attachment_id}.attach")
|
||||
real_filename = db_filename
|
||||
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
|
||||
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
|
||||
|
||||
unless File.exist?(filename)
|
||||
puts "Attachment file #{row.inspect} doesn't exist"
|
||||
@ -588,7 +582,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
end
|
||||
|
||||
def import_attachments
|
||||
puts '', 'importing attachments...'
|
||||
puts "", "importing attachments..."
|
||||
|
||||
RateLimiter.disable
|
||||
current_count = 0
|
||||
@ -596,7 +590,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
success_count = 0
|
||||
fail_count = 0
|
||||
|
||||
attachment_regex = /\[attach[^\]]*\](\d+)\[\/attach\]/i
|
||||
attachment_regex = %r{\[attach[^\]]*\](\d+)\[/attach\]}i
|
||||
|
||||
Post.find_each do |post|
|
||||
current_count += 1
|
||||
@ -618,7 +612,12 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
end
|
||||
|
||||
if new_raw != post.raw
|
||||
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from vBulletin')
|
||||
PostRevisor.new(post).revise!(
|
||||
post.user,
|
||||
{ raw: new_raw },
|
||||
bypass_bump: true,
|
||||
edit_reason: "Import attachments from vBulletin",
|
||||
)
|
||||
end
|
||||
|
||||
success_count += 1
|
||||
@ -639,7 +638,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
Dir.foreach(AVATAR_DIR) do |item|
|
||||
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)]
|
||||
|
||||
next if item == ('.') || item == ('..') || item == ('.DS_Store')
|
||||
next if item == (".") || item == ("..") || item == (".DS_Store")
|
||||
next unless item =~ /avatar(\d+)_(\d).gif/
|
||||
scan = item.scan(/avatar(\d+)_(\d).gif/)
|
||||
next unless scan[0][0].present?
|
||||
@ -671,11 +670,10 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
def import_signatures
|
||||
puts "Importing user signatures..."
|
||||
|
||||
total_count = mysql_query(<<-SQL
|
||||
total_count = mysql_query(<<-SQL).first[0].to_i
|
||||
SELECT COUNT(userid) count
|
||||
FROM #{TABLE_PREFIX}sigparsed
|
||||
SQL
|
||||
).first[0].to_i
|
||||
current_count = 0
|
||||
|
||||
user_signatures = mysql_stream <<-SQL
|
||||
@ -695,13 +693,20 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
next unless u.present?
|
||||
|
||||
# can not hold dupes
|
||||
UserCustomField.where(user_id: u.id, name: ["see_signatures", "signature_raw", "signature_cooked"]).destroy_all
|
||||
UserCustomField.where(
|
||||
user_id: u.id,
|
||||
name: %w[see_signatures signature_raw signature_cooked],
|
||||
).destroy_all
|
||||
|
||||
user_sig.gsub!(/\[\/?sigpic\]/i, "")
|
||||
user_sig.gsub!(%r{\[/?sigpic\]}i, "")
|
||||
|
||||
UserCustomField.create!(user_id: u.id, name: "see_signatures", value: true)
|
||||
UserCustomField.create!(user_id: u.id, name: "signature_raw", value: user_sig)
|
||||
UserCustomField.create!(user_id: u.id, name: "signature_cooked", value: PrettyText.cook(user_sig, omit_nofollow: false))
|
||||
UserCustomField.create!(
|
||||
user_id: u.id,
|
||||
name: "signature_cooked",
|
||||
value: PrettyText.cook(user_sig, omit_nofollow: false),
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
@ -710,15 +715,15 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
total_count = 0
|
||||
|
||||
duplicated = {}
|
||||
@user_ids_by_email.
|
||||
select { |e, ids| ids.count > 1 }.
|
||||
each_with_index do |(email, ids), i|
|
||||
duplicated[email] = [ ids, i ]
|
||||
@user_ids_by_email
|
||||
.select { |e, ids| ids.count > 1 }
|
||||
.each_with_index do |(email, ids), i|
|
||||
duplicated[email] = [ids, i]
|
||||
count += 1
|
||||
total_count += ids.count
|
||||
end
|
||||
|
||||
puts '', "Merging #{total_count} duplicated users across #{count} distinct emails..."
|
||||
puts "", "Merging #{total_count} duplicated users across #{count} distinct emails..."
|
||||
|
||||
start = Time.now
|
||||
|
||||
@ -727,14 +732,15 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
next unless email.presence
|
||||
|
||||
# queried one by one to ensure ordering
|
||||
first, *rest = user_ids.map do |id|
|
||||
UserCustomField.includes(:user).find_by!(name: 'import_id', value: id).user
|
||||
end
|
||||
first, *rest =
|
||||
user_ids.map do |id|
|
||||
UserCustomField.includes(:user).find_by!(name: "import_id", value: id).user
|
||||
end
|
||||
|
||||
rest.each do |dup|
|
||||
UserMerger.new(dup, first).merge!
|
||||
first.reload
|
||||
printf '.'
|
||||
printf "."
|
||||
end
|
||||
|
||||
print "\n%6d/%6d - %6d/sec" % [i, count, i.to_f / (Time.now - start)] if i % 10 == 0
|
||||
@ -744,13 +750,11 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
end
|
||||
|
||||
def save_duplicated_users
|
||||
File.open('duplicated_users.json', 'w+') do |f|
|
||||
f.puts @user_ids_by_email.to_json
|
||||
end
|
||||
File.open("duplicated_users.json", "w+") { |f| f.puts @user_ids_by_email.to_json }
|
||||
end
|
||||
|
||||
def read_duplicated_users
|
||||
@user_ids_by_email = JSON.parse File.read('duplicated_users.json')
|
||||
@user_ids_by_email = JSON.parse File.read("duplicated_users.json")
|
||||
end
|
||||
|
||||
def extract_pm_title(title)
|
||||
@ -759,17 +763,26 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
|
||||
def parse_birthday(birthday)
|
||||
return if birthday.blank?
|
||||
date_of_birth = Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") rescue nil
|
||||
date_of_birth =
|
||||
begin
|
||||
Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y")
|
||||
rescue StandardError
|
||||
nil
|
||||
end
|
||||
return if date_of_birth.nil?
|
||||
date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth
|
||||
if date_of_birth.year < 1904
|
||||
Date.new(1904, date_of_birth.month, date_of_birth.day)
|
||||
else
|
||||
date_of_birth
|
||||
end
|
||||
end
|
||||
|
||||
def print_status(current, max, start_time = nil)
|
||||
if start_time.present?
|
||||
elapsed_seconds = Time.now - start_time
|
||||
elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60]
|
||||
elements_per_minute = "[%.0f items/min] " % [current / elapsed_seconds.to_f * 60]
|
||||
else
|
||||
elements_per_minute = ''
|
||||
elements_per_minute = ""
|
||||
end
|
||||
|
||||
print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute]
|
||||
@ -782,7 +795,6 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||
def mysql_query(sql)
|
||||
@client.query(sql)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
BulkImport::VBulletin.new.run
|
||||
|
@ -5,47 +5,56 @@ require "cgi"
|
||||
require "set"
|
||||
require "mysql2"
|
||||
require "htmlentities"
|
||||
require 'ruby-bbcode-to-md'
|
||||
require 'find'
|
||||
require "ruby-bbcode-to-md"
|
||||
require "find"
|
||||
|
||||
class BulkImport::VBulletin5 < BulkImport::Base
|
||||
|
||||
DB_PREFIX = ""
|
||||
SUSPENDED_TILL ||= Date.new(3000, 1, 1)
|
||||
ATTACH_DIR ||= ENV['ATTACH_DIR'] || '/shared/import/data/attachments'
|
||||
AVATAR_DIR ||= ENV['AVATAR_DIR'] || '/shared/import/data/customavatars'
|
||||
ATTACH_DIR ||= ENV["ATTACH_DIR"] || "/shared/import/data/attachments"
|
||||
AVATAR_DIR ||= ENV["AVATAR_DIR"] || "/shared/import/data/customavatars"
|
||||
ROOT_NODE = 2
|
||||
|
||||
def initialize
|
||||
super
|
||||
|
||||
host = ENV["DB_HOST"] || "localhost"
|
||||
host = ENV["DB_HOST"] || "localhost"
|
||||
username = ENV["DB_USERNAME"] || "root"
|
||||
password = ENV["DB_PASSWORD"]
|
||||
database = ENV["DB_NAME"] || "vbulletin"
|
||||
charset = ENV["DB_CHARSET"] || "utf8"
|
||||
charset = ENV["DB_CHARSET"] || "utf8"
|
||||
|
||||
@html_entities = HTMLEntities.new
|
||||
@encoding = CHARSET_MAP[charset]
|
||||
@bbcode_to_md = true
|
||||
|
||||
@client = Mysql2::Client.new(
|
||||
host: host,
|
||||
username: username,
|
||||
password: password,
|
||||
database: database,
|
||||
encoding: charset,
|
||||
reconnect: true
|
||||
)
|
||||
@client =
|
||||
Mysql2::Client.new(
|
||||
host: host,
|
||||
username: username,
|
||||
password: password,
|
||||
database: database,
|
||||
encoding: charset,
|
||||
reconnect: true,
|
||||
)
|
||||
|
||||
@client.query_options.merge!(as: :array, cache_rows: false)
|
||||
|
||||
# TODO: Add `LIMIT 1` to the below queries
|
||||
# ------
|
||||
# be aware there may be other contenttypeid's in use, such as poll, link, video, etc.
|
||||
@forum_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Forum'").to_a[0][0]
|
||||
@channel_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Channel'").to_a[0][0]
|
||||
@text_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Text'").to_a[0][0]
|
||||
@forum_typeid =
|
||||
mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Forum'").to_a[0][
|
||||
0
|
||||
]
|
||||
@channel_typeid =
|
||||
mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Channel'").to_a[
|
||||
0
|
||||
][
|
||||
0
|
||||
]
|
||||
@text_typeid =
|
||||
mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Text'").to_a[0][0]
|
||||
end
|
||||
|
||||
def execute
|
||||
@ -127,7 +136,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
date_of_birth: parse_birthday(row[3]),
|
||||
primary_group_id: group_id_from_imported_id(row[5]),
|
||||
admin: row[5] == 6,
|
||||
moderator: row[5] == 7
|
||||
moderator: row[5] == 7,
|
||||
}
|
||||
u[:ip_address] = row[4][/\b(?:\d{1,3}\.){3}\d{1,3}\b/] if row[4].present?
|
||||
if row[7]
|
||||
@ -153,7 +162,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
imported_id: row[0],
|
||||
imported_user_id: row[0],
|
||||
email: random_email,
|
||||
created_at: Time.zone.at(row[2])
|
||||
created_at: Time.zone.at(row[2]),
|
||||
}
|
||||
end
|
||||
end
|
||||
@ -203,10 +212,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
SQL
|
||||
|
||||
create_group_users(group_users) do |row|
|
||||
{
|
||||
group_id: group_id_from_imported_id(row[0]),
|
||||
user_id: user_id_from_imported_id(row[1]),
|
||||
}
|
||||
{ group_id: group_id_from_imported_id(row[0]), user_id: user_id_from_imported_id(row[1]) }
|
||||
end
|
||||
|
||||
# import secondary group memberships
|
||||
@ -228,12 +234,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
end
|
||||
end
|
||||
|
||||
create_group_users(group_mapping) do |row|
|
||||
{
|
||||
group_id: row[0],
|
||||
user_id: row[1]
|
||||
}
|
||||
end
|
||||
create_group_users(group_mapping) { |row| { group_id: row[0], user_id: row[1] } }
|
||||
end
|
||||
|
||||
def import_user_profiles
|
||||
@ -249,7 +250,14 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
create_user_profiles(user_profiles) do |row|
|
||||
{
|
||||
user_id: user_id_from_imported_id(row[0]),
|
||||
website: (URI.parse(row[1]).to_s rescue nil),
|
||||
website:
|
||||
(
|
||||
begin
|
||||
URI.parse(row[1]).to_s
|
||||
rescue StandardError
|
||||
nil
|
||||
end
|
||||
),
|
||||
views: row[2],
|
||||
}
|
||||
end
|
||||
@ -258,7 +266,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
def import_categories
|
||||
puts "Importing categories..."
|
||||
|
||||
categories = mysql_query(<<-SQL
|
||||
categories = mysql_query(<<-SQL).to_a
|
||||
SELECT nodeid AS forumid, title, description, displayorder, parentid, urlident
|
||||
FROM #{DB_PREFIX}node
|
||||
WHERE parentid = #{ROOT_NODE}
|
||||
@ -269,11 +277,10 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
WHERE contenttypeid = #{@channel_typeid}
|
||||
AND nodeid > #{@last_imported_category_id}
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
return if categories.empty?
|
||||
|
||||
parent_categories = categories.select { |c| c[4] == ROOT_NODE }
|
||||
parent_categories = categories.select { |c| c[4] == ROOT_NODE }
|
||||
children_categories = categories.select { |c| c[4] != ROOT_NODE }
|
||||
|
||||
parent_category_ids = Set.new parent_categories.map { |c| c[0] }
|
||||
@ -285,7 +292,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
name: normalize_text(row[1]),
|
||||
description: normalize_text(row[2]),
|
||||
position: row[3],
|
||||
slug: row[5]
|
||||
slug: row[5],
|
||||
}
|
||||
end
|
||||
|
||||
@ -297,7 +304,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
description: normalize_text(row[2]),
|
||||
position: row[3],
|
||||
parent_category_id: category_id_from_imported_id(row[4]),
|
||||
slug: row[5]
|
||||
slug: row[5],
|
||||
}
|
||||
end
|
||||
end
|
||||
@ -428,7 +435,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
post_id: post_id,
|
||||
user_id: user_id,
|
||||
post_action_type_id: 2,
|
||||
created_at: Time.zone.at(row[2])
|
||||
created_at: Time.zone.at(row[2]),
|
||||
}
|
||||
end
|
||||
end
|
||||
@ -455,7 +462,6 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
user_id: user_id_from_imported_id(row[2]),
|
||||
created_at: Time.zone.at(row[3]),
|
||||
}
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
@ -475,17 +481,18 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
users_added = Set.new
|
||||
|
||||
create_topic_allowed_users(mysql_stream(allowed_users_sql)) do |row|
|
||||
next unless topic_id = topic_id_from_imported_id(row[0] + PRIVATE_OFFSET) || topic_id_from_imported_id(row[2] + PRIVATE_OFFSET)
|
||||
unless topic_id =
|
||||
topic_id_from_imported_id(row[0] + PRIVATE_OFFSET) ||
|
||||
topic_id_from_imported_id(row[2] + PRIVATE_OFFSET)
|
||||
next
|
||||
end
|
||||
next unless user_id = user_id_from_imported_id(row[1])
|
||||
next if users_added.add?([topic_id, user_id]).nil?
|
||||
added += 1
|
||||
{
|
||||
topic_id: topic_id,
|
||||
user_id: user_id,
|
||||
}
|
||||
{ topic_id: topic_id, user_id: user_id }
|
||||
end
|
||||
|
||||
puts '', "Added #{added} topic allowed users records."
|
||||
puts "", "Added #{added} topic allowed users records."
|
||||
end
|
||||
|
||||
def import_private_first_posts
|
||||
@ -543,7 +550,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
end
|
||||
|
||||
def create_permalinks
|
||||
puts '', 'creating permalinks...', ''
|
||||
puts "", "creating permalinks...", ""
|
||||
|
||||
# add permalink normalizations to site settings
|
||||
# EVERYTHING: /.*\/([\w-]+)$/\1 -- selects the last segment of the URL
|
||||
@ -580,21 +587,23 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
return nil
|
||||
end
|
||||
|
||||
tmpfile = 'attach_' + row[6].to_s
|
||||
filename = File.join('/tmp/', tmpfile)
|
||||
File.open(filename, 'wb') { |f| f.write(row[5]) }
|
||||
tmpfile = "attach_" + row[6].to_s
|
||||
filename = File.join("/tmp/", tmpfile)
|
||||
File.open(filename, "wb") { |f| f.write(row[5]) }
|
||||
filename
|
||||
end
|
||||
|
||||
def find_upload(post, opts = {})
|
||||
if opts[:node_id].present?
|
||||
sql = "SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid
|
||||
sql =
|
||||
"SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid
|
||||
FROM #{DB_PREFIX}attach a
|
||||
LEFT JOIN #{DB_PREFIX}filedata fd ON fd.filedataid = a.filedataid
|
||||
LEFT JOIN #{DB_PREFIX}node n ON n.nodeid = a.nodeid
|
||||
WHERE a.nodeid = #{opts[:node_id]}"
|
||||
elsif opts[:attachment_id].present?
|
||||
sql = "SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid
|
||||
sql =
|
||||
"SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid
|
||||
FROM #{DB_PREFIX}attachment a
|
||||
LEFT JOIN #{DB_PREFIX}filedata fd ON fd.filedataid = a.filedataid
|
||||
LEFT JOIN #{DB_PREFIX}node n ON n.nodeid = a.nodeid
|
||||
@ -612,9 +621,9 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
user_id = row[3]
|
||||
db_filename = row[2]
|
||||
|
||||
filename = File.join(ATTACH_DIR, user_id.to_s.split('').join('/'), "#{attachment_id}.attach")
|
||||
filename = File.join(ATTACH_DIR, user_id.to_s.split("").join("/"), "#{attachment_id}.attach")
|
||||
real_filename = db_filename
|
||||
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
|
||||
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
|
||||
|
||||
unless File.exist?(filename)
|
||||
filename = check_database_for_attachment(row) if filename.blank?
|
||||
@ -637,7 +646,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
end
|
||||
|
||||
def import_attachments
|
||||
puts '', 'importing attachments...'
|
||||
puts "", "importing attachments..."
|
||||
|
||||
# add extensions to authorized setting
|
||||
#ext = mysql_query("SELECT GROUP_CONCAT(DISTINCT(extension)) exts FROM #{DB_PREFIX}filedata").first[0].split(',')
|
||||
@ -655,8 +664,8 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
# new style matches the nodeid in the attach table
|
||||
# old style matches the filedataid in attach/filedata tables
|
||||
# if the site is very old, there may be multiple different attachment syntaxes used in posts
|
||||
attachment_regex = /\[attach[^\]]*\].*\"data-attachmentid\":"?(\d+)"?,?.*\[\/attach\]/i
|
||||
attachment_regex_oldstyle = /\[attach[^\]]*\](\d+)\[\/attach\]/i
|
||||
attachment_regex = %r{\[attach[^\]]*\].*\"data-attachmentid\":"?(\d+)"?,?.*\[/attach\]}i
|
||||
attachment_regex_oldstyle = %r{\[attach[^\]]*\](\d+)\[/attach\]}i
|
||||
|
||||
Post.find_each do |post|
|
||||
current_count += 1
|
||||
@ -715,9 +724,18 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
|
||||
def parse_birthday(birthday)
|
||||
return if birthday.blank?
|
||||
date_of_birth = Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") rescue nil
|
||||
date_of_birth =
|
||||
begin
|
||||
Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y")
|
||||
rescue StandardError
|
||||
nil
|
||||
end
|
||||
return if date_of_birth.nil?
|
||||
date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth
|
||||
if date_of_birth.year < 1904
|
||||
Date.new(1904, date_of_birth.month, date_of_birth.day)
|
||||
else
|
||||
date_of_birth
|
||||
end
|
||||
end
|
||||
|
||||
def preprocess_raw(raw)
|
||||
@ -726,33 +744,37 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
raw = raw.dup
|
||||
|
||||
# [PLAINTEXT]...[/PLAINTEXT]
|
||||
raw.gsub!(/\[\/?PLAINTEXT\]/i, "\n\n```\n\n")
|
||||
raw.gsub!(%r{\[/?PLAINTEXT\]}i, "\n\n```\n\n")
|
||||
|
||||
# [FONT=font]...[/FONT]
|
||||
raw.gsub!(/\[FONT=\w*\]/im, "")
|
||||
raw.gsub!(/\[\/FONT\]/im, "")
|
||||
raw.gsub!(%r{\[/FONT\]}im, "")
|
||||
|
||||
# @[URL=<user_profile>]<username>[/URL]
|
||||
# [USER=id]username[/USER]
|
||||
# [MENTION=id]username[/MENTION]
|
||||
raw.gsub!(/@\[URL=\"\S+\"\]([\w\s]+)\[\/URL\]/i) { "@#{$1.gsub(" ", "_")}" }
|
||||
raw.gsub!(/\[USER=\"\d+\"\]([\S]+)\[\/USER\]/i) { "@#{$1.gsub(" ", "_")}" }
|
||||
raw.gsub!(/\[MENTION=\d+\]([\S]+)\[\/MENTION\]/i) { "@#{$1.gsub(" ", "_")}" }
|
||||
raw.gsub!(%r{@\[URL=\"\S+\"\]([\w\s]+)\[/URL\]}i) { "@#{$1.gsub(" ", "_")}" }
|
||||
raw.gsub!(%r{\[USER=\"\d+\"\]([\S]+)\[/USER\]}i) { "@#{$1.gsub(" ", "_")}" }
|
||||
raw.gsub!(%r{\[MENTION=\d+\]([\S]+)\[/MENTION\]}i) { "@#{$1.gsub(" ", "_")}" }
|
||||
|
||||
# [IMG2=JSON]{..."src":"<url>"}[/IMG2]
|
||||
raw.gsub!(/\[img2[^\]]*\].*\"src\":\"?([\w\\\/:\.\-;%]*)\"?}.*\[\/img2\]/i) { "\n#{CGI::unescape($1)}\n" }
|
||||
raw.gsub!(/\[img2[^\]]*\].*\"src\":\"?([\w\\\/:\.\-;%]*)\"?}.*\[\/img2\]/i) do
|
||||
"\n#{CGI.unescape($1)}\n"
|
||||
end
|
||||
|
||||
# [TABLE]...[/TABLE]
|
||||
raw.gsub!(/\[TABLE=\\"[\w:\-\s,]+\\"\]/i, "")
|
||||
raw.gsub!(/\[\/TABLE\]/i, "")
|
||||
raw.gsub!(%r{\[/TABLE\]}i, "")
|
||||
|
||||
# [HR]...[/HR]
|
||||
raw.gsub(/\[HR\]\s*\[\/HR\]/im, "---")
|
||||
raw.gsub(%r{\[HR\]\s*\[/HR\]}im, "---")
|
||||
|
||||
# [VIDEO=youtube_share;<id>]...[/VIDEO]
|
||||
# [VIDEO=vimeo;<id>]...[/VIDEO]
|
||||
raw.gsub!(/\[VIDEO=YOUTUBE_SHARE;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
|
||||
raw.gsub!(/\[VIDEO=VIMEO;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://vimeo.com/#{$1}\n" }
|
||||
raw.gsub!(%r{\[VIDEO=YOUTUBE_SHARE;([^\]]+)\].*?\[/VIDEO\]}i) do
|
||||
"\nhttps://www.youtube.com/watch?v=#{$1}\n"
|
||||
end
|
||||
raw.gsub!(%r{\[VIDEO=VIMEO;([^\]]+)\].*?\[/VIDEO\]}i) { "\nhttps://vimeo.com/#{$1}\n" }
|
||||
|
||||
raw
|
||||
end
|
||||
@ -760,9 +782,9 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
def print_status(current, max, start_time = nil)
|
||||
if start_time.present?
|
||||
elapsed_seconds = Time.now - start_time
|
||||
elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60]
|
||||
elements_per_minute = "[%.0f items/min] " % [current / elapsed_seconds.to_f * 60]
|
||||
else
|
||||
elements_per_minute = ''
|
||||
elements_per_minute = ""
|
||||
end
|
||||
|
||||
print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute]
|
||||
@ -775,7 +797,6 @@ class BulkImport::VBulletin5 < BulkImport::Base
|
||||
def mysql_query(sql)
|
||||
@client.query(sql)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
BulkImport::VBulletin5.new.run
|
||||
|
Reference in New Issue
Block a user