From 436b3b392b9c917510d4ff0d73a5167cd3eb936c Mon Sep 17 00:00:00 2001 From: David Taylor Date: Sat, 7 Jan 2023 11:53:14 +0000 Subject: [PATCH] DEV: Apply syntax_tree formatting to `script/*` --- .streerc | 1 - script/analyse_message_bus.rb | 41 +- script/analyze_sidekiq_queues.rb | 11 +- script/bench.rb | 195 ++-- script/benchmarks/cache/bench.rb | 5 +- script/benchmarks/markdown/bench.rb | 10 +- script/benchmarks/middleware/test.rb | 18 +- script/benchmarks/site_setting/bench.rb | 33 +- script/benchmarks/site_setting/profile.rb | 24 +- script/biggest_objects.rb | 34 +- script/boot_mem.rb | 34 +- script/bulk_import/base.rb | 437 +++++---- script/bulk_import/discourse_merger.rb | 757 ++++++++------- script/bulk_import/phpbb_postgresql.rb | 153 +-- script/bulk_import/vanilla.rb | 474 +++++----- script/bulk_import/vbulletin.rb | 216 +++-- script/bulk_import/vbulletin5.rb | 161 ++-- script/check_forking.rb | 22 +- script/db_timestamps_mover.rb | 32 +- script/diff_heaps.rb | 40 +- script/docker_test.rb | 6 +- script/i18n_lint.rb | 42 +- script/import_scripts/answerbase.rb | 167 ++-- script/import_scripts/answerhub.rb | 211 ++--- script/import_scripts/askbot.rb | 114 ++- script/import_scripts/base.rb | 171 ++-- script/import_scripts/base/csv_helper.rb | 78 +- .../import_scripts/base/generic_database.rb | 38 +- .../import_scripts/base/lookup_container.rb | 42 +- script/import_scripts/base/uploader.rb | 20 +- script/import_scripts/bbpress.rb | 230 ++--- script/import_scripts/bespoke_1.rb | 131 ++- script/import_scripts/csv_importer.rb | 117 ++- .../csv_restore_staged_users.rb | 50 +- script/import_scripts/discuz_x.rb | 877 +++++++++++------- script/import_scripts/disqus.rb | 68 +- script/import_scripts/drupal-6.rb | 150 +-- script/import_scripts/drupal.rb | 225 +++-- script/import_scripts/drupal_json.rb | 9 +- script/import_scripts/drupal_qa.rb | 157 ++-- script/import_scripts/elgg.rb | 106 ++- script/import_scripts/flarum_import.rb | 102 +- script/import_scripts/fluxbb.rb | 159 ++-- script/import_scripts/friendsmegplus.rb | 197 ++-- script/import_scripts/getsatisfaction.rb | 96 +- script/import_scripts/google_groups.rb | 148 +-- script/import_scripts/higher_logic.rb | 96 +- script/import_scripts/ipboard.rb | 734 ++++++++------- script/import_scripts/ipboard3.rb | 121 +-- script/import_scripts/jforum.rb | 91 +- script/import_scripts/jive.rb | 224 +++-- script/import_scripts/jive_api.rb | 265 ++++-- script/import_scripts/json_generic.rb | 62 +- script/import_scripts/kunena.rb | 101 +- script/import_scripts/kunena3.rb | 122 ++- script/import_scripts/lithium.rb | 563 ++++++----- script/import_scripts/mbox.rb | 7 +- script/import_scripts/mbox/importer.rb | 86 +- .../import_scripts/mbox/support/database.rb | 33 +- script/import_scripts/mbox/support/indexer.rb | 55 +- .../import_scripts/mbox/support/settings.rb | 34 +- script/import_scripts/modx.rb | 212 +++-- script/import_scripts/muut.rb | 67 +- script/import_scripts/mybb.rb | 194 ++-- script/import_scripts/mybbru.rb | 62 +- script/import_scripts/mylittleforum.rb | 313 ++++--- script/import_scripts/nabble.rb | 99 +- script/import_scripts/ning.rb | 184 ++-- script/import_scripts/nodebb/mongo.rb | 22 +- script/import_scripts/nodebb/nodebb.rb | 196 ++-- script/import_scripts/nodebb/redis.rb | 18 +- script/import_scripts/phorum.rb | 153 +-- script/import_scripts/phpbb3.rb | 18 +- .../phpbb3/database/database.rb | 12 +- .../phpbb3/database/database_3_0.rb | 4 +- .../phpbb3/database/database_3_1.rb | 19 +- .../phpbb3/database/database_base.rb | 4 +- script/import_scripts/phpbb3/importer.rb | 75 +- .../phpbb3/importers/avatar_importer.rb | 29 +- .../phpbb3/importers/bookmark_importer.rb | 2 +- .../phpbb3/importers/category_importer.rb | 23 +- .../phpbb3/importers/importer_factory.rb | 34 +- .../phpbb3/importers/message_importer.rb | 43 +- .../phpbb3/importers/permalink_importer.rb | 12 +- .../phpbb3/importers/poll_importer.rb | 24 +- .../phpbb3/importers/post_importer.rb | 21 +- .../phpbb3/importers/user_importer.rb | 81 +- .../phpbb3/support/bbcode/markdown_node.rb | 6 +- .../phpbb3/support/bbcode/xml_to_markdown.rb | 107 ++- .../phpbb3/support/constants.rb | 10 +- .../import_scripts/phpbb3/support/settings.rb | 87 +- .../phpbb3/support/smiley_processor.rb | 60 +- .../phpbb3/support/text_processor.rb | 103 +- script/import_scripts/punbb.rb | 121 +-- script/import_scripts/quandora/export.rb | 20 +- script/import_scripts/quandora/import.rb | 25 +- .../import_scripts/quandora/quandora_api.rb | 11 +- .../quandora/quandora_question.rb | 77 +- .../import_scripts/quandora/test/test_data.rb | 11 +- .../quandora/test/test_quandora_api.rb | 59 +- .../quandora/test/test_quandora_question.rb | 115 ++- script/import_scripts/question2answer.rb | 216 +++-- script/import_scripts/sfn.rb | 43 +- script/import_scripts/simplepress.rb | 116 +-- script/import_scripts/smf1.rb | 290 +++--- script/import_scripts/smf2.rb | 448 +++++---- .../import_scripts/socialcast/create_title.rb | 9 +- script/import_scripts/socialcast/export.rb | 30 +- script/import_scripts/socialcast/import.rb | 30 +- .../socialcast/socialcast_api.rb | 9 +- .../socialcast/socialcast_message.rb | 53 +- .../socialcast/socialcast_user.rb | 18 +- .../socialcast/test/test_create_title.rb | 42 +- .../socialcast/test/test_data.rb | 9 +- .../socialcast/test/test_socialcast_api.rb | 45 +- script/import_scripts/socialcast/title.rb | 12 +- script/import_scripts/sourceforge.rb | 45 +- script/import_scripts/stack_overflow.rb | 73 +- .../support/convert_mysql_xml_to_mysql.rb | 19 +- script/import_scripts/telligent.rb | 237 ++--- script/import_scripts/vanilla.rb | 93 +- script/import_scripts/vanilla_body_parser.rb | 92 +- script/import_scripts/vanilla_mysql.rb | 474 ++++++---- script/import_scripts/vbulletin.rb | 368 ++++---- script/import_scripts/vbulletin5.rb | 421 +++++---- script/import_scripts/xenforo.rb | 296 +++--- script/import_scripts/yahoogroup.rb | 73 +- script/import_scripts/zendesk.rb | 113 +-- script/import_scripts/zendesk_api.rb | 282 +++--- script/import_scripts/zoho.rb | 135 ++- script/measure.rb | 39 +- script/memstats.rb | 46 +- script/micro_bench.rb | 26 +- script/profile_db_generator.rb | 79 +- script/redis_memory.rb | 11 +- script/require_profiler.rb | 46 +- script/spawn_backup_restore.rb | 7 +- script/test_email_settings.rb | 24 +- script/test_mem.rb | 23 +- script/test_memory_leak.rb | 88 +- script/test_pretty_text.rb | 2 +- script/thread_detective.rb | 9 +- script/user_simulator.rb | 31 +- 143 files changed, 8905 insertions(+), 7353 deletions(-) diff --git a/.streerc b/.streerc index 8a004597732..612e4b246dc 100644 --- a/.streerc +++ b/.streerc @@ -5,5 +5,4 @@ --ignore-files=config/* --ignore-files=db/* --ignore-files=lib/* ---ignore-files=script/* --ignore-files=spec/* diff --git a/script/analyse_message_bus.rb b/script/analyse_message_bus.rb index a65bd53f02f..ef3e0eacb33 100644 --- a/script/analyse_message_bus.rb +++ b/script/analyse_message_bus.rb @@ -9,23 +9,24 @@ wait_seconds = ARGV[0]&.to_i || 10 puts "Counting messages for #{wait_seconds} seconds..." -print 'Seen 0 messages' -t = Thread.new do - MessageBus.backend_instance.global_subscribe do |m| - channel = m.channel - if channel.start_with?("/distributed_hash") - payload = JSON.parse(m.data)["data"] - info = payload["hash_key"] - # info += ".#{payload["key"]}" # Uncomment if you need more granular info - channel += " (#{info})" +print "Seen 0 messages" +t = + Thread.new do + MessageBus.backend_instance.global_subscribe do |m| + channel = m.channel + if channel.start_with?("/distributed_hash") + payload = JSON.parse(m.data)["data"] + info = payload["hash_key"] + # info += ".#{payload["key"]}" # Uncomment if you need more granular info + channel += " (#{info})" + end + + channel_counters[channel] += 1 + messages_seen += 1 + + print "\rSeen #{messages_seen} messages from #{channel_counters.size} channels" end - - channel_counters[channel] += 1 - messages_seen += 1 - - print "\rSeen #{messages_seen} messages from #{channel_counters.size} channels" end -end sleep wait_seconds @@ -53,10 +54,12 @@ puts "| #{"channel".ljust(max_channel_name_length)} | #{"message count".rjust(ma puts "|#{"-" * (max_channel_name_length + 2)}|#{"-" * (max_count_length + 2)}|" result_count = 10 -sorted_results.first(result_count).each do |name, value| - name = "`#{name}`" - puts "| #{name.ljust(max_channel_name_length)} | #{value.to_s.rjust(max_count_length)} |" -end +sorted_results + .first(result_count) + .each do |name, value| + name = "`#{name}`" + puts "| #{name.ljust(max_channel_name_length)} | #{value.to_s.rjust(max_count_length)} |" + end other_count = messages_seen - sorted_results.first(result_count).sum { |k, v| v } puts "| #{"(other)".ljust(max_channel_name_length)} | #{other_count.to_s.rjust(max_count_length)} |" puts "|#{" " * (max_channel_name_length + 2)}|#{" " * (max_count_length + 2)}|" diff --git a/script/analyze_sidekiq_queues.rb b/script/analyze_sidekiq_queues.rb index fcb736d7624..d5e9c4fd34e 100644 --- a/script/analyze_sidekiq_queues.rb +++ b/script/analyze_sidekiq_queues.rb @@ -2,17 +2,14 @@ require File.expand_path("../../config/environment", __FILE__) -queues = %w{default low ultra_low critical}.map { |name| Sidekiq::Queue.new(name) }.lazy.flat_map(&:lazy) +queues = + %w[default low ultra_low critical].map { |name| Sidekiq::Queue.new(name) }.lazy.flat_map(&:lazy) stats = Hash.new(0) -queues.each do |j| - stats[j.klass] += 1 -end +queues.each { |j| stats[j.klass] += 1 } -stats.sort_by { |a, b| -b }.each do |name, count| - puts "#{name}: #{count}" -end +stats.sort_by { |a, b| -b }.each { |name, count| puts "#{name}: #{count}" } dupes = Hash.new([]) queues.each do |j| diff --git a/script/bench.rb b/script/bench.rb index e114cdd2cbf..6e10dcd1125 100644 --- a/script/bench.rb +++ b/script/bench.rb @@ -19,46 +19,43 @@ require "uri" @skip_asset_bundle = false @unicorn_workers = 3 -opts = OptionParser.new do |o| - o.banner = "Usage: ruby bench.rb [options]" +opts = + OptionParser.new do |o| + o.banner = "Usage: ruby bench.rb [options]" - o.on("-n", "--with_default_env", "Include recommended Discourse env") do - @include_env = true - end - o.on("-o", "--output [FILE]", "Output results to this file") do |f| - @result_file = f - end - o.on("-i", "--iterations [ITERATIONS]", "Number of iterations to run the bench for") do |i| - @iterations = i.to_i - end - o.on("-b", "--best_of [NUM]", "Number of times to run the bench taking best as result") do |i| - @best_of = i.to_i - end - o.on("-d", "--heap_dump") do - @dump_heap = true - # We need an env var for config/boot.rb to enable allocation tracing prior to framework init - ENV['DISCOURSE_DUMP_HEAP'] = "1" - end - o.on("-m", "--memory_stats") do - @mem_stats = true - end - o.on("-u", "--unicorn", "Use unicorn to serve pages as opposed to puma") do - @unicorn = true - end - o.on("-c", "--concurrency [NUM]", "Run benchmark with this number of concurrent requests (default: 1)") do |i| - @concurrency = i.to_i - end - o.on("-w", "--unicorn_workers [NUM]", "Run benchmark with this number of unicorn workers (default: 3)") do |i| - @unicorn_workers = i.to_i - end - o.on("-s", "--skip-bundle-assets", "Skip bundling assets") do - @skip_asset_bundle = true - end + o.on("-n", "--with_default_env", "Include recommended Discourse env") { @include_env = true } + o.on("-o", "--output [FILE]", "Output results to this file") { |f| @result_file = f } + o.on("-i", "--iterations [ITERATIONS]", "Number of iterations to run the bench for") do |i| + @iterations = i.to_i + end + o.on("-b", "--best_of [NUM]", "Number of times to run the bench taking best as result") do |i| + @best_of = i.to_i + end + o.on("-d", "--heap_dump") do + @dump_heap = true + # We need an env var for config/boot.rb to enable allocation tracing prior to framework init + ENV["DISCOURSE_DUMP_HEAP"] = "1" + end + o.on("-m", "--memory_stats") { @mem_stats = true } + o.on("-u", "--unicorn", "Use unicorn to serve pages as opposed to puma") { @unicorn = true } + o.on( + "-c", + "--concurrency [NUM]", + "Run benchmark with this number of concurrent requests (default: 1)", + ) { |i| @concurrency = i.to_i } + o.on( + "-w", + "--unicorn_workers [NUM]", + "Run benchmark with this number of unicorn workers (default: 3)", + ) { |i| @unicorn_workers = i.to_i } + o.on("-s", "--skip-bundle-assets", "Skip bundling assets") { @skip_asset_bundle = true } - o.on("-t", "--tests [STRING]", "List of tests to run. Example: '--tests topic,categories')") do |i| - @tests = i.split(",") + o.on( + "-t", + "--tests [STRING]", + "List of tests to run. Example: '--tests topic,categories')", + ) { |i| @tests = i.split(",") } end -end opts.parse! def run(command, opt = nil) @@ -73,7 +70,7 @@ def run(command, opt = nil) end begin - require 'facter' + require "facter" raise LoadError if Gem::Version.new(Facter.version) < Gem::Version.new("4.0") rescue LoadError run "gem install facter" @@ -113,7 +110,7 @@ end puts "Ensuring config is setup" -%x{which ab > /dev/null 2>&1} +`which ab > /dev/null 2>&1` unless $? == 0 abort "Apache Bench is not installed. Try: apt-get install apache2-utils or brew install ab" end @@ -125,7 +122,7 @@ end ENV["RAILS_ENV"] = "profile" -discourse_env_vars = %w( +discourse_env_vars = %w[ DISCOURSE_DUMP_HEAP RUBY_GC_HEAP_INIT_SLOTS RUBY_GC_HEAP_FREE_SLOTS @@ -140,27 +137,22 @@ discourse_env_vars = %w( RUBY_GC_HEAP_OLDOBJECT_LIMIT_FACTOR RUBY_GLOBAL_METHOD_CACHE_SIZE LD_PRELOAD -) +] if @include_env puts "Running with tuned environment" - discourse_env_vars.each do |v| - ENV.delete v - end - - ENV['RUBY_GLOBAL_METHOD_CACHE_SIZE'] = '131072' - ENV['RUBY_GC_HEAP_GROWTH_MAX_SLOTS'] = '40000' - ENV['RUBY_GC_HEAP_INIT_SLOTS'] = '400000' - ENV['RUBY_GC_HEAP_OLDOBJECT_LIMIT_FACTOR'] = '1.5' + discourse_env_vars.each { |v| ENV.delete v } + ENV["RUBY_GLOBAL_METHOD_CACHE_SIZE"] = "131072" + ENV["RUBY_GC_HEAP_GROWTH_MAX_SLOTS"] = "40000" + ENV["RUBY_GC_HEAP_INIT_SLOTS"] = "400000" + ENV["RUBY_GC_HEAP_OLDOBJECT_LIMIT_FACTOR"] = "1.5" else # clean env puts "Running with the following custom environment" end -discourse_env_vars.each do |w| - puts "#{w}: #{ENV[w]}" if ENV[w].to_s.length > 0 -end +discourse_env_vars.each { |w| puts "#{w}: #{ENV[w]}" if ENV[w].to_s.length > 0 } def port_available?(port) server = TCPServer.open("0.0.0.0", port) @@ -170,20 +162,16 @@ rescue Errno::EADDRINUSE false end -@port = 60079 +@port = 60_079 -while !port_available? @port - @port += 1 -end +@port += 1 while !port_available? @port puts "Ensuring profiling DB exists and is migrated" puts `bundle exec rake db:create` `bundle exec rake db:migrate` puts "Timing loading Rails" -measure("load_rails") do - `bundle exec rake middleware` -end +measure("load_rails") { `bundle exec rake middleware` } puts "Populating Profile DB" run("bundle exec ruby script/profile_db_generator.rb") @@ -223,16 +211,21 @@ begin pid = if @unicorn - ENV['UNICORN_PORT'] = @port.to_s - ENV['UNICORN_WORKERS'] = @unicorn_workers.to_s - FileUtils.mkdir_p(File.join('tmp', 'pids')) + ENV["UNICORN_PORT"] = @port.to_s + ENV["UNICORN_WORKERS"] = @unicorn_workers.to_s + FileUtils.mkdir_p(File.join("tmp", "pids")) unicorn_pid = spawn("bundle exec unicorn -c config/unicorn.conf.rb") - while (unicorn_master_pid = `ps aux | grep "unicorn master" | grep -v "grep" | awk '{print $2}'`.strip.to_i) == 0 + while ( + unicorn_master_pid = + `ps aux | grep "unicorn master" | grep -v "grep" | awk '{print $2}'`.strip.to_i + ) == 0 sleep 1 end - while `ps -f --ppid #{unicorn_master_pid} | grep worker | awk '{ print $2 }'`.split("\n").map(&:to_i).size != @unicorn_workers.to_i + while `ps -f --ppid #{unicorn_master_pid} | grep worker | awk '{ print $2 }'`.split("\n") + .map(&:to_i) + .size != @unicorn_workers.to_i sleep 1 end @@ -241,48 +234,38 @@ begin spawn("bundle exec puma -p #{@port} -e production") end - while port_available? @port - sleep 1 - end + sleep 1 while port_available? @port puts "Starting benchmark..." - admin_headers = { - 'Api-Key' => admin_api_key, - 'Api-Username' => "admin1" - } + admin_headers = { "Api-Key" => admin_api_key, "Api-Username" => "admin1" } - user_headers = { - 'User-Api-Key' => user_api_key - } + user_headers = { "User-Api-Key" => user_api_key } # asset precompilation is a dog, wget to force it run "curl -s -o /dev/null http://127.0.0.1:#{@port}/" redirect_response = `curl -s -I "http://127.0.0.1:#{@port}/t/i-am-a-topic-used-for-perf-tests"` - if redirect_response !~ /301 Moved Permanently/ - raise "Unable to locate topic for perf tests" - end + raise "Unable to locate topic for perf tests" if redirect_response !~ /301 Moved Permanently/ - topic_url = redirect_response.match(/^location: .+(\/t\/i-am-a-topic-used-for-perf-tests\/.+)$/i)[1].strip + topic_url = + redirect_response.match(%r{^location: .+(/t/i-am-a-topic-used-for-perf-tests/.+)$}i)[1].strip all_tests = [ - ["categories", "/categories"], - ["home", "/"], + %w[categories /categories], + %w[home /], ["topic", topic_url], ["topic.json", "#{topic_url}.json"], ["user activity", "/u/admin1/activity"], ] - @tests ||= %w{categories home topic} + @tests ||= %w[categories home topic] - tests_to_run = all_tests.select do |test_name, path| - @tests.include?(test_name) - end + tests_to_run = all_tests.select { |test_name, path| @tests.include?(test_name) } tests_to_run.concat( tests_to_run.map { |k, url| ["#{k} user", "#{url}", user_headers] }, - tests_to_run.map { |k, url| ["#{k} admin", "#{url}", admin_headers] } + tests_to_run.map { |k, url| ["#{k} admin", "#{url}", admin_headers] }, ) tests_to_run.each do |test_name, path, headers_for_path| @@ -290,15 +273,11 @@ begin http = Net::HTTP.new(uri.host, uri.port) request = Net::HTTP::Get.new(uri.request_uri) - headers_for_path&.each do |key, value| - request[key] = value - end + headers_for_path&.each { |key, value| request[key] = value } response = http.request(request) - if response.code != "200" - raise "#{test_name} #{path} returned non 200 response code" - end + raise "#{test_name} #{path} returned non 200 response code" if response.code != "200" end # NOTE: we run the most expensive page first in the bench @@ -335,11 +314,17 @@ begin Facter.reset facts = Facter.to_hash - facts.delete_if { |k, v| - !["operatingsystem", "architecture", "kernelversion", - "memorysize", "physicalprocessorcount", "processor0", - "virtual"].include?(k) - } + facts.delete_if do |k, v| + !%w[ + operatingsystem + architecture + kernelversion + memorysize + physicalprocessorcount + processor0 + virtual + ].include?(k) + end run("RAILS_ENV=profile bundle exec rake assets:clean") @@ -349,10 +334,13 @@ begin mem = get_mem(pid) - results = results.merge("timings" => @timings, - "ruby-version" => "#{RUBY_DESCRIPTION}", - "rss_kb" => mem["rss_kb"], - "pss_kb" => mem["pss_kb"]).merge(facts) + results = + results.merge( + "timings" => @timings, + "ruby-version" => "#{RUBY_DESCRIPTION}", + "rss_kb" => mem["rss_kb"], + "pss_kb" => mem["pss_kb"], + ).merge(facts) if @unicorn child_pids = `ps --ppid #{pid} | awk '{ print $1; }' | grep -v PID`.split("\n") @@ -375,12 +363,7 @@ begin puts open("http://127.0.0.1:#{@port}/admin/dump_heap", headers).read end - if @result_file - File.open(@result_file, "wb") do |f| - f.write(results) - end - end - + File.open(@result_file, "wb") { |f| f.write(results) } if @result_file ensure Process.kill "KILL", pid end diff --git a/script/benchmarks/cache/bench.rb b/script/benchmarks/cache/bench.rb index fad1b73607a..87d4a1e4d30 100644 --- a/script/benchmarks/cache/bench.rb +++ b/script/benchmarks/cache/bench.rb @@ -1,10 +1,9 @@ # frozen_string_literal: true -require 'benchmark/ips' -require File.expand_path('../../../../config/environment', __FILE__) +require "benchmark/ips" +require File.expand_path("../../../../config/environment", __FILE__) Benchmark.ips do |x| - x.report("redis setex string") do |times| while times > 0 Discourse.redis.setex("test_key", 60, "test") diff --git a/script/benchmarks/markdown/bench.rb b/script/benchmarks/markdown/bench.rb index 00bd7573d8d..5e54d61755b 100644 --- a/script/benchmarks/markdown/bench.rb +++ b/script/benchmarks/markdown/bench.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true -require 'benchmark/ips' -require File.expand_path('../../../../config/environment', __FILE__) +require "benchmark/ips" +require File.expand_path("../../../../config/environment", __FILE__) # set any flags here # MiniRacer::Platform.set_flags! :noturbo @@ -10,7 +10,7 @@ tests = [ ["tiny post", "**hello**"], ["giant post", File.read("giant_post.md")], ["most features", File.read("most_features.md")], - ["lots of mentions", File.read("lots_of_mentions.md")] + ["lots of mentions", File.read("lots_of_mentions.md")], ] PrettyText.cook("") @@ -31,9 +31,7 @@ PrettyText.v8.eval("window.commonmark = window.markdownit('commonmark')") Benchmark.ips do |x| [true, false].each do |sanitize| tests.each do |test, text| - x.report("#{test} sanitize: #{sanitize}") do - PrettyText.markdown(text, sanitize: sanitize) - end + x.report("#{test} sanitize: #{sanitize}") { PrettyText.markdown(text, sanitize: sanitize) } end end diff --git a/script/benchmarks/middleware/test.rb b/script/benchmarks/middleware/test.rb index 1432b9227e2..8b0cca5f6ea 100644 --- a/script/benchmarks/middleware/test.rb +++ b/script/benchmarks/middleware/test.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true -require 'memory_profiler' -require 'benchmark/ips' +require "memory_profiler" +require "benchmark/ips" ENV["RAILS_ENV"] = "production" @@ -14,12 +14,10 @@ def req "timings[1]" => "1001", "timings[2]" => "1001", "timings[3]" => "1001", - "topic_id" => "490310" + "topic_id" => "490310", } - data = data.map do |k, v| - "#{CGI.escape(k)}=#{v}" - end.join("&") + data = data.map { |k, v| "#{CGI.escape(k)}=#{v}" }.join("&") { "REQUEST_METHOD" => "POST", @@ -33,7 +31,7 @@ def req "HTTP_COOKIE" => "_t=#{_t}", "rack.input" => StringIO.new(data), "rack.version" => [1, 2], - "rack.url_scheme" => "http" + "rack.url_scheme" => "http", } end @@ -45,11 +43,7 @@ end exit # # -StackProf.run(mode: :wall, out: 'report.dump') do - 1000.times do - Rails.application.call(req) - end -end +StackProf.run(mode: :wall, out: "report.dump") { 1000.times { Rails.application.call(req) } } # # MemoryProfiler.start # Rails.application.call(req) diff --git a/script/benchmarks/site_setting/bench.rb b/script/benchmarks/site_setting/bench.rb index 5790b41ffa8..02676e8570d 100644 --- a/script/benchmarks/site_setting/bench.rb +++ b/script/benchmarks/site_setting/bench.rb @@ -1,37 +1,32 @@ # frozen_string_literal: true -require 'benchmark/ips' -require File.expand_path('../../../../config/environment', __FILE__) +require "benchmark/ips" +require File.expand_path("../../../../config/environment", __FILE__) # Put pre conditions here # Used db but it's OK in the most cases # build the cache SiteSetting.title = SecureRandom.hex -SiteSetting.default_locale = SiteSetting.default_locale == 'en' ? 'zh_CN' : 'en' +SiteSetting.default_locale = SiteSetting.default_locale == "en" ? "zh_CN" : "en" SiteSetting.refresh! tests = [ - ["current cache", lambda do - SiteSetting.title - SiteSetting.enable_discourse_connect - end + [ + "current cache", + lambda do + SiteSetting.title + SiteSetting.enable_discourse_connect + end, ], - ["change default locale with current cache refreshed", lambda do - SiteSetting.default_locale = SiteSetting.default_locale == 'en' ? 'zh_CN' : 'en' - end - ], - ["change site setting", lambda do - SiteSetting.title = SecureRandom.hex - end + [ + "change default locale with current cache refreshed", + lambda { SiteSetting.default_locale = SiteSetting.default_locale == "en" ? "zh_CN" : "en" }, ], + ["change site setting", lambda { SiteSetting.title = SecureRandom.hex }], ] -Benchmark.ips do |x| - tests.each do |test, proc| - x.report(test, proc) - end -end +Benchmark.ips { |x| tests.each { |test, proc| x.report(test, proc) } } # 2017-08-02 - Erick's Site Setting change diff --git a/script/benchmarks/site_setting/profile.rb b/script/benchmarks/site_setting/profile.rb index a849a18a363..fea7977b866 100644 --- a/script/benchmarks/site_setting/profile.rb +++ b/script/benchmarks/site_setting/profile.rb @@ -1,34 +1,26 @@ # frozen_string_literal: true -require 'ruby-prof' +require "ruby-prof" def profile(&blk) result = RubyProf.profile(&blk) printer = RubyProf::GraphHtmlPrinter.new(result) printer.print(STDOUT) end -profile { '' } # loading profiler dependency +profile { "" } # loading profiler dependency -require File.expand_path('../../../../config/environment', __FILE__) +require File.expand_path("../../../../config/environment", __FILE__) # warming up SiteSetting.title SiteSetting.enable_discourse_connect -SiteSetting.default_locale = SiteSetting.default_locale == 'en' ? 'zh_CN' : 'en' +SiteSetting.default_locale = SiteSetting.default_locale == "en" ? "zh_CN" : "en" SiteSetting.title = SecureRandom.hex -profile do - SiteSetting.title -end +profile { SiteSetting.title } -profile do - SiteSetting.enable_discourse_connect -end +profile { SiteSetting.enable_discourse_connect } -profile do - SiteSetting.default_locale = SiteSetting.default_locale == 'en' ? 'zh_CN' : 'en' -end +profile { SiteSetting.default_locale = SiteSetting.default_locale == "en" ? "zh_CN" : "en" } -profile do - SiteSetting.title = SecureRandom.hex -end +profile { SiteSetting.title = SecureRandom.hex } diff --git a/script/biggest_objects.rb b/script/biggest_objects.rb index 3f99cf109b4..220fff3c00d 100644 --- a/script/biggest_objects.rb +++ b/script/biggest_objects.rb @@ -2,35 +2,41 @@ # simple script to measure largest objects in memory post boot -if ENV['RAILS_ENV'] != "production" - exec "RAILS_ENV=production ruby #{__FILE__}" -end +exec "RAILS_ENV=production ruby #{__FILE__}" if ENV["RAILS_ENV"] != "production" -require 'objspace' +require "objspace" ObjectSpace.trace_object_allocations do - require File.expand_path("../../config/environment", __FILE__) - Rails.application.routes.recognize_path('abc') rescue nil + begin + Rails.application.routes.recognize_path("abc") + rescue StandardError + nil + end # load up the yaml for the localization bits, in master process I18n.t(:posts) RailsMultisite::ConnectionManagement.each_connection do (ActiveRecord::Base.connection.tables - %w[schema_migrations versions]).each do |table| - table.classify.constantize.first rescue nil + begin + table.classify.constantize.first + rescue StandardError + nil + end end end - end -5.times do - GC.start(full_mark: true, immediate_sweep: true) -end +5.times { GC.start(full_mark: true, immediate_sweep: true) } [String, Array, Hash].each do |klass| - ObjectSpace.each_object(klass).sort { |a, b| b.length <=> a.length }.first(50).each do |obj| - puts "#{klass} size: #{obj.length} #{ObjectSpace.allocation_sourcefile(obj)} #{ObjectSpace.allocation_sourceline(obj)}" - end + ObjectSpace + .each_object(klass) + .sort { |a, b| b.length <=> a.length } + .first(50) + .each do |obj| + puts "#{klass} size: #{obj.length} #{ObjectSpace.allocation_sourcefile(obj)} #{ObjectSpace.allocation_sourceline(obj)}" + end end diff --git a/script/boot_mem.rb b/script/boot_mem.rb index 5780ae6058d..44fb2b84074 100644 --- a/script/boot_mem.rb +++ b/script/boot_mem.rb @@ -2,22 +2,30 @@ # simple script to measure memory at boot -if ENV['RAILS_ENV'] != "production" - exec "RAILS_ENV=production ruby #{__FILE__}" -end +exec "RAILS_ENV=production ruby #{__FILE__}" if ENV["RAILS_ENV"] != "production" -require 'memory_profiler' +require "memory_profiler" -MemoryProfiler.report do - require File.expand_path("../../config/environment", __FILE__) +MemoryProfiler + .report do + require File.expand_path("../../config/environment", __FILE__) - Rails.application.routes.recognize_path('abc') rescue nil + begin + Rails.application.routes.recognize_path("abc") + rescue StandardError + nil + end - # load up the yaml for the localization bits, in master process - I18n.t(:posts) + # load up the yaml for the localization bits, in master process + I18n.t(:posts) - # load up all models and schema - (ActiveRecord::Base.connection.tables - %w[schema_migrations versions]).each do |table| - table.classify.constantize.first rescue nil + # load up all models and schema + (ActiveRecord::Base.connection.tables - %w[schema_migrations versions]).each do |table| + begin + table.classify.constantize.first + rescue StandardError + nil + end + end end -end.pretty_print + .pretty_print diff --git a/script/bulk_import/base.rb b/script/bulk_import/base.rb index 767469a83bf..d92878b705b 100644 --- a/script/bulk_import/base.rb +++ b/script/bulk_import/base.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -if ARGV.include?('bbcode-to-md') +if ARGV.include?("bbcode-to-md") # Replace (most) bbcode with markdown before creating posts. # This will dramatically clean up the final posts in Discourse. # @@ -10,7 +10,7 @@ if ARGV.include?('bbcode-to-md') # cd ruby-bbcode-to-md # gem build ruby-bbcode-to-md.gemspec # gem install ruby-bbcode-to-md-*.gem - require 'ruby-bbcode-to-md' + require "ruby-bbcode-to-md" end require "pg" @@ -20,12 +20,12 @@ require "htmlentities" puts "Loading application..." require_relative "../../config/environment" -require_relative '../import_scripts/base/uploader' +require_relative "../import_scripts/base/uploader" -module BulkImport; end +module BulkImport +end class BulkImport::Base - NOW ||= "now()" PRIVATE_OFFSET ||= 2**30 @@ -33,41 +33,41 @@ class BulkImport::Base CHARSET_MAP = { "armscii8" => nil, - "ascii" => Encoding::US_ASCII, - "big5" => Encoding::Big5, - "binary" => Encoding::ASCII_8BIT, - "cp1250" => Encoding::Windows_1250, - "cp1251" => Encoding::Windows_1251, - "cp1256" => Encoding::Windows_1256, - "cp1257" => Encoding::Windows_1257, - "cp850" => Encoding::CP850, - "cp852" => Encoding::CP852, - "cp866" => Encoding::IBM866, - "cp932" => Encoding::Windows_31J, - "dec8" => nil, - "eucjpms" => Encoding::EucJP_ms, - "euckr" => Encoding::EUC_KR, - "gb2312" => Encoding::EUC_CN, - "gbk" => Encoding::GBK, - "geostd8" => nil, - "greek" => Encoding::ISO_8859_7, - "hebrew" => Encoding::ISO_8859_8, - "hp8" => nil, - "keybcs2" => nil, - "koi8r" => Encoding::KOI8_R, - "koi8u" => Encoding::KOI8_U, - "latin1" => Encoding::ISO_8859_1, - "latin2" => Encoding::ISO_8859_2, - "latin5" => Encoding::ISO_8859_9, - "latin7" => Encoding::ISO_8859_13, - "macce" => Encoding::MacCentEuro, + "ascii" => Encoding::US_ASCII, + "big5" => Encoding::Big5, + "binary" => Encoding::ASCII_8BIT, + "cp1250" => Encoding::Windows_1250, + "cp1251" => Encoding::Windows_1251, + "cp1256" => Encoding::Windows_1256, + "cp1257" => Encoding::Windows_1257, + "cp850" => Encoding::CP850, + "cp852" => Encoding::CP852, + "cp866" => Encoding::IBM866, + "cp932" => Encoding::Windows_31J, + "dec8" => nil, + "eucjpms" => Encoding::EucJP_ms, + "euckr" => Encoding::EUC_KR, + "gb2312" => Encoding::EUC_CN, + "gbk" => Encoding::GBK, + "geostd8" => nil, + "greek" => Encoding::ISO_8859_7, + "hebrew" => Encoding::ISO_8859_8, + "hp8" => nil, + "keybcs2" => nil, + "koi8r" => Encoding::KOI8_R, + "koi8u" => Encoding::KOI8_U, + "latin1" => Encoding::ISO_8859_1, + "latin2" => Encoding::ISO_8859_2, + "latin5" => Encoding::ISO_8859_9, + "latin7" => Encoding::ISO_8859_13, + "macce" => Encoding::MacCentEuro, "macroman" => Encoding::MacRoman, - "sjis" => Encoding::SHIFT_JIS, - "swe7" => nil, - "tis620" => Encoding::TIS_620, - "ucs2" => Encoding::UTF_16BE, - "ujis" => Encoding::EucJP_ms, - "utf8" => Encoding::UTF_8, + "sjis" => Encoding::SHIFT_JIS, + "swe7" => nil, + "tis620" => Encoding::TIS_620, + "ucs2" => Encoding::UTF_16BE, + "ujis" => Encoding::EucJP_ms, + "utf8" => Encoding::UTF_8, } # rubocop:enable Layout/HashAlignment @@ -82,12 +82,13 @@ class BulkImport::Base @encoding = CHARSET_MAP[charset] @bbcode_to_md = true if use_bbcode_to_md? - @markdown = Redcarpet::Markdown.new( - Redcarpet::Render::HTML.new(hard_wrap: true), - no_intra_emphasis: true, - fenced_code_blocks: true, - autolink: true - ) + @markdown = + Redcarpet::Markdown.new( + Redcarpet::Render::HTML.new(hard_wrap: true), + no_intra_emphasis: true, + fenced_code_blocks: true, + autolink: true, + ) end def run @@ -132,7 +133,9 @@ class BulkImport::Base map = [] ids = [] - @raw_connection.send_query("SELECT value, #{name}_id FROM #{name}_custom_fields WHERE name = 'import_id'") + @raw_connection.send_query( + "SELECT value, #{name}_id FROM #{name}_custom_fields WHERE name = 'import_id'", + ) @raw_connection.set_single_row_mode @raw_connection.get_result.stream_each do |row| @@ -163,12 +166,14 @@ class BulkImport::Base puts "Loading imported topic ids..." @topics, imported_topic_ids = imported_ids("topic") @last_imported_topic_id = imported_topic_ids.select { |id| id < PRIVATE_OFFSET }.max || -1 - @last_imported_private_topic_id = imported_topic_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1) + @last_imported_private_topic_id = + imported_topic_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1) puts "Loading imported post ids..." @posts, imported_post_ids = imported_ids("post") @last_imported_post_id = imported_post_ids.select { |id| id < PRIVATE_OFFSET }.max || -1 - @last_imported_private_post_id = imported_post_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1) + @last_imported_private_post_id = + imported_post_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1) end def last_id(klass) @@ -182,9 +187,7 @@ class BulkImport::Base @raw_connection.send_query("SELECT id, #{column} FROM #{name}") @raw_connection.set_single_row_mode - @raw_connection.get_result.stream_each do |row| - map[row["id"].to_i] = row[column].to_i - end + @raw_connection.get_result.stream_each { |row| map[row["id"].to_i] = row[column].to_i } @raw_connection.get_result @@ -199,13 +202,24 @@ class BulkImport::Base puts "Loading users indexes..." @last_user_id = last_id(User) @last_user_email_id = last_id(UserEmail) - @emails = User.unscoped.joins(:user_emails).pluck(:"user_emails.email", :"user_emails.user_id").to_h + @emails = + User.unscoped.joins(:user_emails).pluck(:"user_emails.email", :"user_emails.user_id").to_h @usernames_lower = User.unscoped.pluck(:username_lower).to_set - @mapped_usernames = UserCustomField.joins(:user).where(name: "import_username").pluck("user_custom_fields.value", "users.username").to_h + @mapped_usernames = + UserCustomField + .joins(:user) + .where(name: "import_username") + .pluck("user_custom_fields.value", "users.username") + .to_h puts "Loading categories indexes..." @last_category_id = last_id(Category) - @category_names = Category.unscoped.pluck(:parent_category_id, :name).map { |pci, name| "#{pci}-#{name}" }.to_set + @category_names = + Category + .unscoped + .pluck(:parent_category_id, :name) + .map { |pci, name| "#{pci}-#{name}" } + .to_set puts "Loading topics indexes..." @last_topic_id = last_id(Topic) @@ -233,13 +247,27 @@ class BulkImport::Base def fix_primary_keys puts "Updating primary key sequences..." - @raw_connection.exec("SELECT setval('#{Group.sequence_name}', #{@last_group_id})") if @last_group_id > 0 - @raw_connection.exec("SELECT setval('#{User.sequence_name}', #{@last_user_id})") if @last_user_id > 0 - @raw_connection.exec("SELECT setval('#{UserEmail.sequence_name}', #{@last_user_email_id})") if @last_user_email_id > 0 - @raw_connection.exec("SELECT setval('#{Category.sequence_name}', #{@last_category_id})") if @last_category_id > 0 - @raw_connection.exec("SELECT setval('#{Topic.sequence_name}', #{@last_topic_id})") if @last_topic_id > 0 - @raw_connection.exec("SELECT setval('#{Post.sequence_name}', #{@last_post_id})") if @last_post_id > 0 - @raw_connection.exec("SELECT setval('#{PostAction.sequence_name}', #{@last_post_action_id})") if @last_post_action_id > 0 + if @last_group_id > 0 + @raw_connection.exec("SELECT setval('#{Group.sequence_name}', #{@last_group_id})") + end + if @last_user_id > 0 + @raw_connection.exec("SELECT setval('#{User.sequence_name}', #{@last_user_id})") + end + if @last_user_email_id > 0 + @raw_connection.exec("SELECT setval('#{UserEmail.sequence_name}', #{@last_user_email_id})") + end + if @last_category_id > 0 + @raw_connection.exec("SELECT setval('#{Category.sequence_name}', #{@last_category_id})") + end + if @last_topic_id > 0 + @raw_connection.exec("SELECT setval('#{Topic.sequence_name}', #{@last_topic_id})") + end + if @last_post_id > 0 + @raw_connection.exec("SELECT setval('#{Post.sequence_name}', #{@last_post_id})") + end + if @last_post_action_id > 0 + @raw_connection.exec("SELECT setval('#{PostAction.sequence_name}', #{@last_post_action_id})") + end end def group_id_from_imported_id(id) @@ -272,63 +300,124 @@ class BulkImport::Base post_id && @topic_id_by_post_id[post_id] end - GROUP_COLUMNS ||= %i{ - id name title bio_raw bio_cooked created_at updated_at - } + GROUP_COLUMNS ||= %i[id name title bio_raw bio_cooked created_at updated_at] - USER_COLUMNS ||= %i{ - id username username_lower name active trust_level admin moderator - date_of_birth ip_address registration_ip_address primary_group_id - suspended_at suspended_till last_emailed_at created_at updated_at - } + USER_COLUMNS ||= %i[ + id + username + username_lower + name + active + trust_level + admin + moderator + date_of_birth + ip_address + registration_ip_address + primary_group_id + suspended_at + suspended_till + last_emailed_at + created_at + updated_at + ] - USER_EMAIL_COLUMNS ||= %i{ - id user_id email primary created_at updated_at - } + USER_EMAIL_COLUMNS ||= %i[id user_id email primary created_at updated_at] - USER_STAT_COLUMNS ||= %i{ - user_id topics_entered time_read days_visited posts_read_count - likes_given likes_received new_since read_faq - first_post_created_at post_count topic_count bounce_score - reset_bounce_score_after digest_attempted_at - } + USER_STAT_COLUMNS ||= %i[ + user_id + topics_entered + time_read + days_visited + posts_read_count + likes_given + likes_received + new_since + read_faq + first_post_created_at + post_count + topic_count + bounce_score + reset_bounce_score_after + digest_attempted_at + ] - USER_PROFILE_COLUMNS ||= %i{ - user_id location website bio_raw bio_cooked views - } + USER_PROFILE_COLUMNS ||= %i[user_id location website bio_raw bio_cooked views] - GROUP_USER_COLUMNS ||= %i{ - group_id user_id created_at updated_at - } + GROUP_USER_COLUMNS ||= %i[group_id user_id created_at updated_at] - CATEGORY_COLUMNS ||= %i{ - id name name_lower slug user_id description position parent_category_id - created_at updated_at - } + CATEGORY_COLUMNS ||= %i[ + id + name + name_lower + slug + user_id + description + position + parent_category_id + created_at + updated_at + ] - TOPIC_COLUMNS ||= %i{ - id archetype title fancy_title slug user_id last_post_user_id category_id - visible closed pinned_at views created_at bumped_at updated_at - } + TOPIC_COLUMNS ||= %i[ + id + archetype + title + fancy_title + slug + user_id + last_post_user_id + category_id + visible + closed + pinned_at + views + created_at + bumped_at + updated_at + ] - POST_COLUMNS ||= %i{ - id user_id last_editor_id topic_id post_number sort_order reply_to_post_number - like_count raw cooked hidden word_count created_at last_version_at updated_at - } + POST_COLUMNS ||= %i[ + id + user_id + last_editor_id + topic_id + post_number + sort_order + reply_to_post_number + like_count + raw + cooked + hidden + word_count + created_at + last_version_at + updated_at + ] - POST_ACTION_COLUMNS ||= %i{ - id post_id user_id post_action_type_id deleted_at created_at updated_at - deleted_by_id related_post_id staff_took_action deferred_by_id targets_topic - agreed_at agreed_by_id deferred_at disagreed_at disagreed_by_id - } + POST_ACTION_COLUMNS ||= %i[ + id + post_id + user_id + post_action_type_id + deleted_at + created_at + updated_at + deleted_by_id + related_post_id + staff_took_action + deferred_by_id + targets_topic + agreed_at + agreed_by_id + deferred_at + disagreed_at + disagreed_by_id + ] - TOPIC_ALLOWED_USER_COLUMNS ||= %i{ - topic_id user_id created_at updated_at - } + TOPIC_ALLOWED_USER_COLUMNS ||= %i[topic_id user_id created_at updated_at] - TOPIC_TAG_COLUMNS ||= %i{ - topic_id tag_id created_at updated_at - } + TOPIC_TAG_COLUMNS ||= %i[topic_id tag_id created_at updated_at] def create_groups(rows, &block) create_records(rows, "group", GROUP_COLUMNS, &block) @@ -340,10 +429,7 @@ class BulkImport::Base create_records(rows, "user", USER_COLUMNS, &block) create_custom_fields("user", "username", @imported_usernames.keys) do |username| - { - record_id: @imported_usernames[username], - value: username, - } + { record_id: @imported_usernames[username], value: username } end end @@ -389,8 +475,8 @@ class BulkImport::Base group[:name] = group_name end - group[:title] = group[:title].scrub.strip.presence if group[:title].present? - group[:bio_raw] = group[:bio_raw].scrub.strip.presence if group[:bio_raw].present? + group[:title] = group[:title].scrub.strip.presence if group[:title].present? + group[:bio_raw] = group[:bio_raw].scrub.strip.presence if group[:bio_raw].present? group[:bio_cooked] = pre_cook(group[:bio_raw]) if group[:bio_raw].present? group[:created_at] ||= NOW group[:updated_at] ||= group[:created_at] @@ -456,7 +542,9 @@ class BulkImport::Base user_email[:email] ||= random_email user_email[:email].downcase! # unique email - user_email[:email] = random_email until EmailAddressValidator.valid_value?(user_email[:email]) && !@emails.has_key?(user_email[:email]) + user_email[:email] = random_email until EmailAddressValidator.valid_value?( + user_email[:email], + ) && !@emails.has_key?(user_email[:email]) user_email end @@ -539,7 +627,11 @@ class BulkImport::Base post[:raw] = (post[:raw] || "").scrub.strip.presence || "" post[:raw] = process_raw post[:raw] if @bbcode_to_md - post[:raw] = post[:raw].bbcode_to_md(false, {}, :disable, :quote) rescue post[:raw] + post[:raw] = begin + post[:raw].bbcode_to_md(false, {}, :disable, :quote) + rescue StandardError + post[:raw] + end end post[:like_count] ||= 0 post[:cooked] = pre_cook post[:raw] @@ -580,22 +672,22 @@ class BulkImport::Base # [HTML]...[/HTML] raw.gsub!(/\[HTML\]/i, "\n\n```html\n") - raw.gsub!(/\[\/HTML\]/i, "\n```\n\n") + raw.gsub!(%r{\[/HTML\]}i, "\n```\n\n") # [PHP]...[/PHP] raw.gsub!(/\[PHP\]/i, "\n\n```php\n") - raw.gsub!(/\[\/PHP\]/i, "\n```\n\n") + raw.gsub!(%r{\[/PHP\]}i, "\n```\n\n") # [HIGHLIGHT="..."] raw.gsub!(/\[HIGHLIGHT="?(\w+)"?\]/i) { "\n\n```#{$1.downcase}\n" } # [CODE]...[/CODE] # [HIGHLIGHT]...[/HIGHLIGHT] - raw.gsub!(/\[\/?CODE\]/i, "\n\n```\n\n") - raw.gsub!(/\[\/?HIGHLIGHT\]/i, "\n\n```\n\n") + raw.gsub!(%r{\[/?CODE\]}i, "\n\n```\n\n") + raw.gsub!(%r{\[/?HIGHLIGHT\]}i, "\n\n```\n\n") # [SAMP]...[/SAMP] - raw.gsub!(/\[\/?SAMP\]/i, "`") + raw.gsub!(%r{\[/?SAMP\]}i, "`") # replace all chevrons with HTML entities # /!\ must be done /!\ @@ -609,61 +701,61 @@ class BulkImport::Base raw.gsub!(">", ">") raw.gsub!("\u2603", ">") - raw.gsub!(/\[\/?I\]/i, "*") - raw.gsub!(/\[\/?B\]/i, "**") - raw.gsub!(/\[\/?U\]/i, "") + raw.gsub!(%r{\[/?I\]}i, "*") + raw.gsub!(%r{\[/?B\]}i, "**") + raw.gsub!(%r{\[/?U\]}i, "") - raw.gsub!(/\[\/?RED\]/i, "") - raw.gsub!(/\[\/?BLUE\]/i, "") + raw.gsub!(%r{\[/?RED\]}i, "") + raw.gsub!(%r{\[/?BLUE\]}i, "") - raw.gsub!(/\[AUTEUR\].+?\[\/AUTEUR\]/im, "") - raw.gsub!(/\[VOIRMSG\].+?\[\/VOIRMSG\]/im, "") - raw.gsub!(/\[PSEUDOID\].+?\[\/PSEUDOID\]/im, "") + raw.gsub!(%r{\[AUTEUR\].+?\[/AUTEUR\]}im, "") + raw.gsub!(%r{\[VOIRMSG\].+?\[/VOIRMSG\]}im, "") + raw.gsub!(%r{\[PSEUDOID\].+?\[/PSEUDOID\]}im, "") # [IMG]...[/IMG] - raw.gsub!(/(?:\s*\[IMG\]\s*)+(.+?)(?:\s*\[\/IMG\]\s*)+/im) { "\n\n#{$1}\n\n" } + raw.gsub!(%r{(?:\s*\[IMG\]\s*)+(.+?)(?:\s*\[/IMG\]\s*)+}im) { "\n\n#{$1}\n\n" } # [IMG=url] raw.gsub!(/\[IMG=([^\]]*)\]/im) { "\n\n#{$1}\n\n" } # [URL=...]...[/URL] - raw.gsub!(/\[URL="?(.+?)"?\](.+?)\[\/URL\]/im) { "[#{$2.strip}](#{$1})" } + raw.gsub!(%r{\[URL="?(.+?)"?\](.+?)\[/URL\]}im) { "[#{$2.strip}](#{$1})" } # [URL]...[/URL] # [MP3]...[/MP3] # [EMAIL]...[/EMAIL] # [LEFT]...[/LEFT] - raw.gsub!(/\[\/?URL\]/i, "") - raw.gsub!(/\[\/?MP3\]/i, "") - raw.gsub!(/\[\/?EMAIL\]/i, "") - raw.gsub!(/\[\/?LEFT\]/i, "") + raw.gsub!(%r{\[/?URL\]}i, "") + raw.gsub!(%r{\[/?MP3\]}i, "") + raw.gsub!(%r{\[/?EMAIL\]}i, "") + raw.gsub!(%r{\[/?LEFT\]}i, "") # [FONT=blah] and [COLOR=blah] - raw.gsub!(/\[FONT=.*?\](.*?)\[\/FONT\]/im, "\\1") - raw.gsub!(/\[COLOR=.*?\](.*?)\[\/COLOR\]/im, "\\1") + raw.gsub!(%r{\[FONT=.*?\](.*?)\[/FONT\]}im, "\\1") + raw.gsub!(%r{\[COLOR=.*?\](.*?)\[/COLOR\]}im, "\\1") - raw.gsub!(/\[SIZE=.*?\](.*?)\[\/SIZE\]/im, "\\1") - raw.gsub!(/\[H=.*?\](.*?)\[\/H\]/im, "\\1") + raw.gsub!(%r{\[SIZE=.*?\](.*?)\[/SIZE\]}im, "\\1") + raw.gsub!(%r{\[H=.*?\](.*?)\[/H\]}im, "\\1") # [CENTER]...[/CENTER] - raw.gsub!(/\[CENTER\](.*?)\[\/CENTER\]/im, "\\1") + raw.gsub!(%r{\[CENTER\](.*?)\[/CENTER\]}im, "\\1") # [INDENT]...[/INDENT] - raw.gsub!(/\[INDENT\](.*?)\[\/INDENT\]/im, "\\1") - raw.gsub!(/\[TABLE\](.*?)\[\/TABLE\]/im, "\\1") - raw.gsub!(/\[TR\](.*?)\[\/TR\]/im, "\\1") - raw.gsub!(/\[TD\](.*?)\[\/TD\]/im, "\\1") - raw.gsub!(/\[TD="?.*?"?\](.*?)\[\/TD\]/im, "\\1") + raw.gsub!(%r{\[INDENT\](.*?)\[/INDENT\]}im, "\\1") + raw.gsub!(%r{\[TABLE\](.*?)\[/TABLE\]}im, "\\1") + raw.gsub!(%r{\[TR\](.*?)\[/TR\]}im, "\\1") + raw.gsub!(%r{\[TD\](.*?)\[/TD\]}im, "\\1") + raw.gsub!(%r{\[TD="?.*?"?\](.*?)\[/TD\]}im, "\\1") # [STRIKE] raw.gsub!(/\[STRIKE\]/i, "") - raw.gsub!(/\[\/STRIKE\]/i, "") + raw.gsub!(%r{\[/STRIKE\]}i, "") # [QUOTE]...[/QUOTE] raw.gsub!(/\[QUOTE="([^\]]+)"\]/i) { "[QUOTE=#{$1}]" } # Nested Quotes - raw.gsub!(/(\[\/?QUOTE.*?\])/mi) { |q| "\n#{q}\n" } + raw.gsub!(%r{(\[/?QUOTE.*?\])}mi) { |q| "\n#{q}\n" } # raw.gsub!(/\[QUOTE\](.+?)\[\/QUOTE\]/im) { |quote| # quote.gsub!(/\[QUOTE\](.+?)\[\/QUOTE\]/im) { "\n#{$1}\n" } @@ -686,28 +778,36 @@ class BulkImport::Base end # [YOUTUBE][/YOUTUBE] - raw.gsub!(/\[YOUTUBE\](.+?)\[\/YOUTUBE\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } - raw.gsub!(/\[DAILYMOTION\](.+?)\[\/DAILYMOTION\]/i) { "\nhttps://www.dailymotion.com/video/#{$1}\n" } + raw.gsub!(%r{\[YOUTUBE\](.+?)\[/YOUTUBE\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } + raw.gsub!(%r{\[DAILYMOTION\](.+?)\[/DAILYMOTION\]}i) do + "\nhttps://www.dailymotion.com/video/#{$1}\n" + end # [VIDEO=youtube;]...[/VIDEO] - raw.gsub!(/\[VIDEO=YOUTUBE;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } - raw.gsub!(/\[VIDEO=DAILYMOTION;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://www.dailymotion.com/video/#{$1}\n" } + raw.gsub!(%r{\[VIDEO=YOUTUBE;([^\]]+)\].*?\[/VIDEO\]}i) do + "\nhttps://www.youtube.com/watch?v=#{$1}\n" + end + raw.gsub!(%r{\[VIDEO=DAILYMOTION;([^\]]+)\].*?\[/VIDEO\]}i) do + "\nhttps://www.dailymotion.com/video/#{$1}\n" + end # [SPOILER=Some hidden stuff]SPOILER HERE!![/SPOILER] - raw.gsub!(/\[SPOILER="?(.+?)"?\](.+?)\[\/SPOILER\]/im) { "\n#{$1}\n[spoiler]#{$2}[/spoiler]\n" } + raw.gsub!(%r{\[SPOILER="?(.+?)"?\](.+?)\[/SPOILER\]}im) do + "\n#{$1}\n[spoiler]#{$2}[/spoiler]\n" + end # convert list tags to ul and list=1 tags to ol # (basically, we're only missing list=a here...) # (https://meta.discourse.org/t/phpbb-3-importer-old/17397) - raw.gsub!(/\[list\](.*?)\[\/list\]/im, '[ul]\1[/ul]') - raw.gsub!(/\[list=1\|?[^\]]*\](.*?)\[\/list\]/im, '[ol]\1[/ol]') - raw.gsub!(/\[list\](.*?)\[\/list:u\]/im, '[ul]\1[/ul]') - raw.gsub!(/\[list=1\|?[^\]]*\](.*?)\[\/list:o\]/im, '[ol]\1[/ol]') + raw.gsub!(%r{\[list\](.*?)\[/list\]}im, '[ul]\1[/ul]') + raw.gsub!(%r{\[list=1\|?[^\]]*\](.*?)\[/list\]}im, '[ol]\1[/ol]') + raw.gsub!(%r{\[list\](.*?)\[/list:u\]}im, '[ul]\1[/ul]') + raw.gsub!(%r{\[list=1\|?[^\]]*\](.*?)\[/list:o\]}im, '[ol]\1[/ol]') # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists: - raw.gsub!(/\[\*\]\n/, '') - raw.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]') + raw.gsub!(/\[\*\]\n/, "") + raw.gsub!(%r{\[\*\](.*?)\[/\*:m\]}, '[li]\1[/li]') raw.gsub!(/\[\*\](.*?)\n/, '[li]\1[/li]') - raw.gsub!(/\[\*=1\]/, '') + raw.gsub!(/\[\*=1\]/, "") raw end @@ -728,7 +828,9 @@ class BulkImport::Base imported_ids |= mapped[:imported_ids] unless mapped[:imported_ids].nil? @raw_connection.put_copy_data columns.map { |c| processed[c] } unless processed[:skip] rows_created += 1 - print "\r%7d - %6d/sec" % [rows_created, rows_created.to_f / (Time.now - start)] if rows_created % 100 == 0 + if rows_created % 100 == 0 + print "\r%7d - %6d/sec" % [rows_created, rows_created.to_f / (Time.now - start)] + end rescue => e puts "\n" puts "ERROR: #{e.message}" @@ -737,15 +839,14 @@ class BulkImport::Base end end - print "\r%7d - %6d/sec\n" % [rows_created, rows_created.to_f / (Time.now - start)] if rows_created > 0 + if rows_created > 0 + print "\r%7d - %6d/sec\n" % [rows_created, rows_created.to_f / (Time.now - start)] + end id_mapping_method_name = "#{name}_id_from_imported_id".freeze return unless respond_to?(id_mapping_method_name) create_custom_fields(name, "id", imported_ids) do |imported_id| - { - record_id: send(id_mapping_method_name, imported_id), - value: imported_id, - } + { record_id: send(id_mapping_method_name, imported_id), value: imported_id } end rescue => e # FIXME: errors catched here stop the rest of the COPY @@ -755,7 +856,8 @@ class BulkImport::Base def create_custom_fields(table, name, rows) name = "import_#{name}" - sql = "COPY #{table}_custom_fields (#{table}_id, name, value, created_at, updated_at) FROM STDIN" + sql = + "COPY #{table}_custom_fields (#{table}_id, name, value, created_at, updated_at) FROM STDIN" @raw_connection.copy_data(sql, @encoder) do rows.each do |row| next unless cf = yield(row) @@ -797,7 +899,7 @@ class BulkImport::Base cooked = raw # Convert YouTube URLs to lazyYT DOMs before being transformed into links - cooked.gsub!(/\nhttps\:\/\/www.youtube.com\/watch\?v=(\w+)\n/) do + cooked.gsub!(%r{\nhttps\://www.youtube.com/watch\?v=(\w+)\n}) do video_id = $1 result = <<-HTML
@@ -807,7 +909,7 @@ class BulkImport::Base cooked = @markdown.render(cooked).scrub.strip - cooked.gsub!(/\[QUOTE="?([^,"]+)(?:, post:(\d+), topic:(\d+))?"?\](.+?)\[\/QUOTE\]/im) do + cooked.gsub!(%r{\[QUOTE="?([^,"]+)(?:, post:(\d+), topic:(\d+))?"?\](.+?)\[/QUOTE\]}im) do username, post_id, topic_id, quote = $1, $2, $3, $4 quote = quote.scrub.strip @@ -860,5 +962,4 @@ class BulkImport::Base return text if @encoding == Encoding::UTF_8 text && text.encode(@encoding).force_encoding(Encoding::UTF_8) end - end diff --git a/script/bulk_import/discourse_merger.rb b/script/bulk_import/discourse_merger.rb index dc555850b85..61106e6bf35 100644 --- a/script/bulk_import/discourse_merger.rb +++ b/script/bulk_import/discourse_merger.rb @@ -3,9 +3,8 @@ require_relative "base" class BulkImport::DiscourseMerger < BulkImport::Base - NOW ||= "now()" - CUSTOM_FIELDS = ['category', 'group', 'post', 'topic', 'user'] + CUSTOM_FIELDS = %w[category group post topic user] # DB_NAME: name of database being merged into the current local db # DB_HOST: hostname of database being merged @@ -17,31 +16,36 @@ class BulkImport::DiscourseMerger < BulkImport::Base # e.g. https://discourse-cdn-sjc1.com/business4 def initialize - db_password = ENV["DB_PASS"] || 'import_password' + db_password = ENV["DB_PASS"] || "import_password" local_db = ActiveRecord::Base.connection_db_config.configuration_hash - @raw_connection = PG.connect(dbname: local_db[:database], host: 'localhost', port: local_db[:port], user: 'postgres', password: db_password) + @raw_connection = + PG.connect( + dbname: local_db[:database], + host: "localhost", + port: local_db[:port], + user: "postgres", + password: db_password, + ) @source_db_config = { - dbname: ENV["DB_NAME"] || 'dd_demo', - host: ENV["DB_HOST"] || 'localhost', - user: 'postgres', - password: db_password + dbname: ENV["DB_NAME"] || "dd_demo", + host: ENV["DB_HOST"] || "localhost", + user: "postgres", + password: db_password, } - raise "SOURCE_BASE_URL missing!" unless ENV['SOURCE_BASE_URL'] + raise "SOURCE_BASE_URL missing!" unless ENV["SOURCE_BASE_URL"] @source_base_url = ENV["SOURCE_BASE_URL"] - @uploads_path = ENV['UPLOADS_PATH'] + @uploads_path = ENV["UPLOADS_PATH"] @uploader = ImportScripts::Uploader.new - if ENV['SOURCE_CDN'] - @source_cdn = ENV['SOURCE_CDN'] - end + @source_cdn = ENV["SOURCE_CDN"] if ENV["SOURCE_CDN"] local_version = @raw_connection.exec("select max(version) from schema_migrations") - local_version = local_version.first['max'] + local_version = local_version.first["max"] source_version = source_raw_connection.exec("select max(version) from schema_migrations") - source_version = source_version.first['max'] + source_version = source_version.first["max"] if local_version != source_version raise "DB schema mismatch. Databases must be at the same migration version. Local is #{local_version}, other is #{source_version}" @@ -62,7 +66,7 @@ class BulkImport::DiscourseMerger < BulkImport::Base @auto_group_ids = Group::AUTO_GROUPS.values # add your authorized extensions here: - SiteSetting.authorized_extensions = ['jpg', 'jpeg', 'png', 'gif'].join('|') + SiteSetting.authorized_extensions = %w[jpg jpeg png gif].join("|") @sequences = {} end @@ -99,7 +103,7 @@ class BulkImport::DiscourseMerger < BulkImport::Base end def copy_users - puts '', "merging users..." + puts "", "merging users..." imported_ids = [] @@ -109,34 +113,38 @@ class BulkImport::DiscourseMerger < BulkImport::Base sql = "COPY users (#{columns.map { |c| "\"#{c}\"" }.join(",")}) FROM STDIN" @raw_connection.copy_data(sql, @encoder) do - source_raw_connection.exec("SELECT #{columns.map { |c| "u.\"#{c}\"" }.join(",")}, e.email FROM users u INNER JOIN user_emails e ON (u.id = e.user_id AND e.primary = TRUE) WHERE u.id > 0").each do |row| - old_user_id = row['id']&.to_i - if existing = UserEmail.where(email: row.delete('email')).first&.user - # Merge these users - @users[old_user_id] = existing.id - @merged_user_ids << old_user_id - next - else - # New user - unless usernames_lower.add?(row['username_lower']) - username = row['username'] + "_1" - username.next! until usernames_lower.add?(username.downcase) - row['username'] = username - row['username_lower'] = row['username'].downcase + source_raw_connection + .exec( + "SELECT #{columns.map { |c| "u.\"#{c}\"" }.join(",")}, e.email FROM users u INNER JOIN user_emails e ON (u.id = e.user_id AND e.primary = TRUE) WHERE u.id > 0", + ) + .each do |row| + old_user_id = row["id"]&.to_i + if existing = UserEmail.where(email: row.delete("email")).first&.user + # Merge these users + @users[old_user_id] = existing.id + @merged_user_ids << old_user_id + next + else + # New user + unless usernames_lower.add?(row["username_lower"]) + username = row["username"] + "_1" + username.next! until usernames_lower.add?(username.downcase) + row["username"] = username + row["username_lower"] = row["username"].downcase + end + + row["id"] = (@last_user_id += 1) + @users[old_user_id] = row["id"] + + @raw_connection.put_copy_data row.values end - - row['id'] = (@last_user_id += 1) - @users[old_user_id] = row['id'] - - @raw_connection.put_copy_data row.values + imported_ids << old_user_id end - imported_ids << old_user_id - end end @sequences[User.sequence_name] = @last_user_id + 1 if @last_user_id - create_custom_fields('user', 'id', imported_ids) do |old_user_id| + create_custom_fields("user", "id", imported_ids) do |old_user_id| { value: old_user_id, record_id: user_id_from_imported_id(old_user_id) } end end @@ -147,28 +155,32 @@ class BulkImport::DiscourseMerger < BulkImport::Base skip_if_merged: true, is_a_user_model: true, skip_processing: true, - mapping: @email_tokens + mapping: @email_tokens, ) [ - UserEmail, UserStat, UserOption, UserProfile, - UserVisit, UserSearchData, GivenDailyLike, UserSecondFactor - ].each do |c| - copy_model(c, skip_if_merged: true, is_a_user_model: true, skip_processing: true) - end + UserEmail, + UserStat, + UserOption, + UserProfile, + UserVisit, + UserSearchData, + GivenDailyLike, + UserSecondFactor, + ].each { |c| copy_model(c, skip_if_merged: true, is_a_user_model: true, skip_processing: true) } - [UserAssociatedAccount, Oauth2UserInfo, - SingleSignOnRecord, EmailChangeRequest - ].each do |c| + [UserAssociatedAccount, Oauth2UserInfo, SingleSignOnRecord, EmailChangeRequest].each do |c| copy_model(c, skip_if_merged: true, is_a_user_model: true) end end def copy_groups - copy_model(Group, + copy_model( + Group, mapping: @groups, skip_processing: true, - select_sql: "SELECT #{Group.columns.map { |c| "\"#{c.name}\"" }.join(', ')} FROM groups WHERE automatic = false" + select_sql: + "SELECT #{Group.columns.map { |c| "\"#{c.name}\"" }.join(", ")} FROM groups WHERE automatic = false", ) copy_model(GroupUser, skip_if_merged: true) @@ -181,11 +193,12 @@ class BulkImport::DiscourseMerger < BulkImport::Base imported_ids = [] last_id = Category.unscoped.maximum(:id) || 1 - sql = "COPY categories (#{columns.map { |c| "\"#{c}\"" }.join(', ')}) FROM STDIN" + sql = "COPY categories (#{columns.map { |c| "\"#{c}\"" }.join(", ")}) FROM STDIN" @raw_connection.copy_data(sql, @encoder) do - source_raw_connection.exec( + source_raw_connection + .exec( "SELECT concat('/c/', x.parent_slug, '/', x.slug) as path, - #{columns.map { |c| "c.\"#{c}\"" }.join(', ')} + #{columns.map { |c| "c.\"#{c}\"" }.join(", ")} FROM categories c INNER JOIN ( SELECT c1.id AS id, @@ -194,61 +207,55 @@ class BulkImport::DiscourseMerger < BulkImport::Base FROM categories c1 LEFT OUTER JOIN categories c2 ON c1.parent_category_id = c2.id ) x ON c.id = x.id - ORDER BY c.id" - ).each do |row| + ORDER BY c.id", + ) + .each do |row| + # using ORDER BY id to import categories in order of creation. + # this assumes parent categories were created prior to child categories + # and have a lower category id. + # + # without this definition, categories import in different orders in subsequent imports + # and can potentially mess up parent/child structure - # using ORDER BY id to import categories in order of creation. - # this assumes parent categories were created prior to child categories - # and have a lower category id. - # - # without this definition, categories import in different orders in subsequent imports - # and can potentially mess up parent/child structure + source_category_path = row.delete("path")&.squeeze("/") - source_category_path = row.delete('path')&.squeeze('/') + existing = Category.where(slug: row["slug"]).first + parent_slug = existing&.parent_category&.slug + if existing && source_category_path == "/c/#{parent_slug}/#{existing.slug}".squeeze("/") + @categories[row["id"].to_i] = existing.id + next + elsif existing + # if not the exact path as the source, + # we still need to avoid a unique index conflict on the slug when importing + # if that's the case, we'll append the imported id + row["slug"] = "#{row["slug"]}-#{row["id"]}" + end - existing = Category.where(slug: row['slug']).first - parent_slug = existing&.parent_category&.slug - if existing && - source_category_path == "/c/#{parent_slug}/#{existing.slug}".squeeze('/') - @categories[row['id'].to_i] = existing.id - next - elsif existing - # if not the exact path as the source, - # we still need to avoid a unique index conflict on the slug when importing - # if that's the case, we'll append the imported id - row['slug'] = "#{row['slug']}-#{row['id']}" + old_user_id = row["user_id"].to_i + row["user_id"] = user_id_from_imported_id(old_user_id) || -1 if old_user_id >= 1 + + if row["parent_category_id"] + row["parent_category_id"] = category_id_from_imported_id(row["parent_category_id"]) + end + + old_id = row["id"].to_i + row["id"] = (last_id += 1) + imported_ids << old_id + @categories[old_id] = row["id"] + + @raw_connection.put_copy_data(row.values) end - - old_user_id = row['user_id'].to_i - if old_user_id >= 1 - row['user_id'] = user_id_from_imported_id(old_user_id) || -1 - end - - if row['parent_category_id'] - row['parent_category_id'] = category_id_from_imported_id(row['parent_category_id']) - end - - old_id = row['id'].to_i - row['id'] = (last_id += 1) - imported_ids << old_id - @categories[old_id] = row['id'] - - @raw_connection.put_copy_data(row.values) - end end @sequences[Category.sequence_name] = last_id + 1 - create_custom_fields('category', 'id', imported_ids) do |imported_id| - { - record_id: category_id_from_imported_id(imported_id), - value: imported_id, - } + create_custom_fields("category", "id", imported_ids) do |imported_id| + { record_id: category_id_from_imported_id(imported_id), value: imported_id } end end def fix_category_descriptions - puts 'updating category description topic ids...' + puts "updating category description topic ids..." @categories.each do |old_id, new_id| category = Category.find(new_id) if new_id.present? @@ -261,19 +268,21 @@ class BulkImport::DiscourseMerger < BulkImport::Base def copy_topics copy_model(Topic, mapping: @topics) - [TopicAllowedGroup, TopicAllowedUser, TopicEmbed, TopicSearchData, - TopicTimer, TopicUser, TopicViewItem - ].each do |k| - copy_model(k, skip_processing: true) - end + [ + TopicAllowedGroup, + TopicAllowedUser, + TopicEmbed, + TopicSearchData, + TopicTimer, + TopicUser, + TopicViewItem, + ].each { |k| copy_model(k, skip_processing: true) } end def copy_posts copy_model(Post, skip_processing: true, mapping: @posts) copy_model(PostAction, mapping: @post_actions) - [PostReply, TopicLink, UserAction, QuotedPost].each do |k| - copy_model(k) - end + [PostReply, TopicLink, UserAction, QuotedPost].each { |k| copy_model(k) } [PostStat, IncomingEmail, PostDetail, PostRevision].each do |k| copy_model(k, skip_processing: true) end @@ -286,99 +295,101 @@ class BulkImport::DiscourseMerger < BulkImport::Base imported_ids = [] last_id = Tag.unscoped.maximum(:id) || 1 - sql = "COPY tags (#{columns.map { |c| "\"#{c}\"" }.join(', ')}) FROM STDIN" + sql = "COPY tags (#{columns.map { |c| "\"#{c}\"" }.join(", ")}) FROM STDIN" @raw_connection.copy_data(sql, @encoder) do - source_raw_connection.exec("SELECT #{columns.map { |c| "\"#{c}\"" }.join(', ')} FROM tags").each do |row| + source_raw_connection + .exec("SELECT #{columns.map { |c| "\"#{c}\"" }.join(", ")} FROM tags") + .each do |row| + if existing = Tag.where_name(row["name"]).first + @tags[row["id"]] = existing.id + next + end - if existing = Tag.where_name(row['name']).first - @tags[row['id']] = existing.id - next + old_id = row["id"] + row["id"] = (last_id += 1) + @tags[old_id.to_s] = row["id"] + + @raw_connection.put_copy_data(row.values) end - - old_id = row['id'] - row['id'] = (last_id += 1) - @tags[old_id.to_s] = row['id'] - - @raw_connection.put_copy_data(row.values) - end end @sequences[Tag.sequence_name] = last_id + 1 - [TagUser, TopicTag, CategoryTag, CategoryTagStat].each do |k| - copy_model(k) - end + [TagUser, TopicTag, CategoryTag, CategoryTagStat].each { |k| copy_model(k) } copy_model(TagGroup, mapping: @tag_groups) - [TagGroupMembership, CategoryTagGroup].each do |k| - copy_model(k, skip_processing: true) - end + [TagGroupMembership, CategoryTagGroup].each { |k| copy_model(k, skip_processing: true) } - col_list = TagGroupPermission.columns.map { |c| "\"#{c.name}\"" }.join(', ') - copy_model(TagGroupPermission, + col_list = TagGroupPermission.columns.map { |c| "\"#{c.name}\"" }.join(", ") + copy_model( + TagGroupPermission, skip_processing: true, - select_sql: "SELECT #{col_list} FROM tag_group_permissions WHERE group_id NOT IN (#{@auto_group_ids.join(', ')})" + select_sql: + "SELECT #{col_list} FROM tag_group_permissions WHERE group_id NOT IN (#{@auto_group_ids.join(", ")})", ) end def copy_uploads - puts '' + puts "" print "copying uploads..." FileUtils.cp_r( - File.join(@uploads_path, '.'), - File.join(Rails.root, 'public', 'uploads', 'default') + File.join(@uploads_path, "."), + File.join(Rails.root, "public", "uploads", "default"), ) columns = Upload.columns.map(&:name) last_id = Upload.unscoped.maximum(:id) || 1 - sql = "COPY uploads (#{columns.map { |c| "\"#{c}\"" }.join(', ')}) FROM STDIN" + sql = "COPY uploads (#{columns.map { |c| "\"#{c}\"" }.join(", ")}) FROM STDIN" @raw_connection.copy_data(sql, @encoder) do - source_raw_connection.exec("SELECT #{columns.map { |c| "\"#{c}\"" }.join(', ')} FROM uploads").each do |row| + source_raw_connection + .exec("SELECT #{columns.map { |c| "\"#{c}\"" }.join(", ")} FROM uploads") + .each do |row| + next if Upload.where(sha1: row["sha1"]).exists? - next if Upload.where(sha1: row['sha1']).exists? + # make sure to get a backup with uploads then convert them to local. + # when the backup is restored to a site with s3 uploads, it will upload the items + # to the bucket + rel_filename = row["url"].gsub(%r{^/uploads/[^/]+/}, "") + # assumes if coming from amazonaws.com that we want to remove everything + # but the text after the last `/`, which should leave us the filename + rel_filename = rel_filename.gsub(%r{^//[^/]+\.amazonaws\.com/\S+/}, "") + absolute_filename = File.join(@uploads_path, rel_filename) - # make sure to get a backup with uploads then convert them to local. - # when the backup is restored to a site with s3 uploads, it will upload the items - # to the bucket - rel_filename = row['url'].gsub(/^\/uploads\/[^\/]+\//, '') - # assumes if coming from amazonaws.com that we want to remove everything - # but the text after the last `/`, which should leave us the filename - rel_filename = rel_filename.gsub(/^\/\/[^\/]+\.amazonaws\.com\/\S+\//, '') - absolute_filename = File.join(@uploads_path, rel_filename) + old_id = row["id"] + if old_id && last_id + row["id"] = (last_id += 1) + @uploads[old_id.to_s] = row["id"] + end - old_id = row['id'] - if old_id && last_id - row['id'] = (last_id += 1) - @uploads[old_id.to_s] = row['id'] + old_user_id = row["user_id"].to_i + if old_user_id >= 1 + row["user_id"] = user_id_from_imported_id(old_user_id) + next if row["user_id"].nil? + end + + row["url"] = "/uploads/default/#{rel_filename}" if File.exist?(absolute_filename) + + @raw_connection.put_copy_data(row.values) end - - old_user_id = row['user_id'].to_i - if old_user_id >= 1 - row['user_id'] = user_id_from_imported_id(old_user_id) - next if row['user_id'].nil? - end - - row['url'] = "/uploads/default/#{rel_filename}" if File.exist?(absolute_filename) - - @raw_connection.put_copy_data(row.values) - end end @sequences[Upload.sequence_name] = last_id + 1 - puts '' + puts "" copy_model(PostUpload) copy_model(UserAvatar) # Users have a column "uploaded_avatar_id" which needs to be mapped now. - User.where("id >= ?", @first_new_user_id).find_each do |u| - if u.uploaded_avatar_id - u.uploaded_avatar_id = upload_id_from_imported_id(u.uploaded_avatar_id) - u.save! unless u.uploaded_avatar_id.nil? + User + .where("id >= ?", @first_new_user_id) + .find_each do |u| + if u.uploaded_avatar_id + u.uploaded_avatar_id = upload_id_from_imported_id(u.uploaded_avatar_id) + u.save! unless u.uploaded_avatar_id.nil? + end end - end end def copy_everything_else @@ -386,16 +397,16 @@ class BulkImport::DiscourseMerger < BulkImport::Base copy_model(k, skip_processing: true) end - [UserHistory, UserWarning, GroupArchivedMessage].each do |k| - copy_model(k) - end + [UserHistory, UserWarning, GroupArchivedMessage].each { |k| copy_model(k) } copy_model(Notification, mapping: @notifications) [CategoryGroup, GroupHistory].each do |k| - col_list = k.columns.map { |c| "\"#{c.name}\"" }.join(', ') - copy_model(k, - select_sql: "SELECT #{col_list} FROM #{k.table_name} WHERE group_id NOT IN (#{@auto_group_ids.join(', ')})" + col_list = k.columns.map { |c| "\"#{c.name}\"" }.join(", ") + copy_model( + k, + select_sql: + "SELECT #{col_list} FROM #{k.table_name} WHERE group_id NOT IN (#{@auto_group_ids.join(", ")})", ) end end @@ -408,23 +419,26 @@ class BulkImport::DiscourseMerger < BulkImport::Base imported_ids = [] last_id = Badge.unscoped.maximum(:id) || 1 - sql = "COPY badges (#{columns.map { |c| "\"#{c}\"" }.join(', ')}) FROM STDIN" + sql = "COPY badges (#{columns.map { |c| "\"#{c}\"" }.join(", ")}) FROM STDIN" @raw_connection.copy_data(sql, @encoder) do - source_raw_connection.exec("SELECT #{columns.map { |c| "\"#{c}\"" }.join(', ')} FROM badges").each do |row| + source_raw_connection + .exec("SELECT #{columns.map { |c| "\"#{c}\"" }.join(", ")} FROM badges") + .each do |row| + if existing = Badge.where(name: row["name"]).first + @badges[row["id"]] = existing.id + next + end - if existing = Badge.where(name: row['name']).first - @badges[row['id']] = existing.id - next + old_id = row["id"] + row["id"] = (last_id += 1) + @badges[old_id.to_s] = row["id"] + + row["badge_grouping_id"] = @badge_groupings[row["badge_grouping_id"]] if row[ + "badge_grouping_id" + ] + + @raw_connection.put_copy_data(row.values) end - - old_id = row['id'] - row['id'] = (last_id += 1) - @badges[old_id.to_s] = row['id'] - - row['badge_grouping_id'] = @badge_groupings[row['badge_grouping_id']] if row['badge_grouping_id'] - - @raw_connection.put_copy_data(row.values) - end end @sequences[Badge.sequence_name] = last_id + 1 @@ -432,72 +446,94 @@ class BulkImport::DiscourseMerger < BulkImport::Base copy_model(UserBadge, is_a_user_model: true) end - def copy_model(klass, skip_if_merged: false, is_a_user_model: false, skip_processing: false, mapping: nil, select_sql: nil) - + def copy_model( + klass, + skip_if_merged: false, + is_a_user_model: false, + skip_processing: false, + mapping: nil, + select_sql: nil + ) puts "copying #{klass.table_name}..." columns = klass.columns.map(&:name) has_custom_fields = CUSTOM_FIELDS.include?(klass.name.downcase) imported_ids = [] - last_id = columns.include?('id') ? (klass.unscoped.maximum(:id) || 1) : nil + last_id = columns.include?("id") ? (klass.unscoped.maximum(:id) || 1) : nil - sql = "COPY #{klass.table_name} (#{columns.map { |c| "\"#{c}\"" }.join(', ')}) FROM STDIN" + sql = "COPY #{klass.table_name} (#{columns.map { |c| "\"#{c}\"" }.join(", ")}) FROM STDIN" @raw_connection.copy_data(sql, @encoder) do - source_raw_connection.exec(select_sql || "SELECT #{columns.map { |c| "\"#{c}\"" }.join(', ')} FROM #{klass.table_name}").each do |row| - if row['user_id'] - old_user_id = row['user_id'].to_i + source_raw_connection + .exec( + select_sql || + "SELECT #{columns.map { |c| "\"#{c}\"" }.join(", ")} FROM #{klass.table_name}", + ) + .each do |row| + if row["user_id"] + old_user_id = row["user_id"].to_i - next if skip_if_merged && @merged_user_ids.include?(old_user_id) + next if skip_if_merged && @merged_user_ids.include?(old_user_id) - if is_a_user_model - next if old_user_id < 1 - next if user_id_from_imported_id(old_user_id).nil? - end - - if old_user_id >= 1 - row['user_id'] = user_id_from_imported_id(old_user_id) - if is_a_user_model && row['user_id'].nil? - raise "user_id nil for user id '#{old_user_id}'" + if is_a_user_model + next if old_user_id < 1 + next if user_id_from_imported_id(old_user_id).nil? end - next if row['user_id'].nil? # associated record for a deleted user + + if old_user_id >= 1 + row["user_id"] = user_id_from_imported_id(old_user_id) + if is_a_user_model && row["user_id"].nil? + raise "user_id nil for user id '#{old_user_id}'" + end + next if row["user_id"].nil? # associated record for a deleted user + end + end + + row["group_id"] = group_id_from_imported_id(row["group_id"]) if row["group_id"] + row["category_id"] = category_id_from_imported_id(row["category_id"]) if row[ + "category_id" + ] + if row["topic_id"] && klass != Category + row["topic_id"] = topic_id_from_imported_id(row["topic_id"]) + next if row["topic_id"].nil? + end + if row["post_id"] + row["post_id"] = post_id_from_imported_id(row["post_id"]) + next if row["post_id"].nil? + end + row["tag_id"] = tag_id_from_imported_id(row["tag_id"]) if row["tag_id"] + row["tag_group_id"] = tag_group_id_from_imported_id(row["tag_group_id"]) if row[ + "tag_group_id" + ] + row["upload_id"] = upload_id_from_imported_id(row["upload_id"]) if row["upload_id"] + row["deleted_by_id"] = user_id_from_imported_id(row["deleted_by_id"]) if row[ + "deleted_by_id" + ] + row["badge_id"] = badge_id_from_imported_id(row["badge_id"]) if row["badge_id"] + + old_id = row["id"].to_i + if old_id && last_id + row["id"] = (last_id += 1) + imported_ids << old_id if has_custom_fields + mapping[old_id] = row["id"] if mapping + end + + if skip_processing + @raw_connection.put_copy_data(row.values) + else + process_method_name = "process_#{klass.name.underscore}" + + processed = + ( + if respond_to?(process_method_name) + send(process_method_name, HashWithIndifferentAccess.new(row)) + else + row + end + ) + + @raw_connection.put_copy_data columns.map { |c| processed[c] } if processed end end - - row['group_id'] = group_id_from_imported_id(row['group_id']) if row['group_id'] - row['category_id'] = category_id_from_imported_id(row['category_id']) if row['category_id'] - if row['topic_id'] && klass != Category - row['topic_id'] = topic_id_from_imported_id(row['topic_id']) - next if row['topic_id'].nil? - end - if row['post_id'] - row['post_id'] = post_id_from_imported_id(row['post_id']) - next if row['post_id'].nil? - end - row['tag_id'] = tag_id_from_imported_id(row['tag_id']) if row['tag_id'] - row['tag_group_id'] = tag_group_id_from_imported_id(row['tag_group_id']) if row['tag_group_id'] - row['upload_id'] = upload_id_from_imported_id(row['upload_id']) if row['upload_id'] - row['deleted_by_id'] = user_id_from_imported_id(row['deleted_by_id']) if row['deleted_by_id'] - row['badge_id'] = badge_id_from_imported_id(row['badge_id']) if row['badge_id'] - - old_id = row['id'].to_i - if old_id && last_id - row['id'] = (last_id += 1) - imported_ids << old_id if has_custom_fields - mapping[old_id] = row['id'] if mapping - end - - if skip_processing - @raw_connection.put_copy_data(row.values) - else - process_method_name = "process_#{klass.name.underscore}" - - processed = respond_to?(process_method_name) ? send(process_method_name, HashWithIndifferentAccess.new(row)) : row - - if processed - @raw_connection.put_copy_data columns.map { |c| processed[c] } - end - end - end end @sequences[klass.sequence_name] = last_id + 1 if last_id @@ -506,192 +542,248 @@ class BulkImport::DiscourseMerger < BulkImport::Base id_mapping_method_name = "#{klass.name.downcase}_id_from_imported_id".freeze return unless respond_to?(id_mapping_method_name) create_custom_fields(klass.name.downcase, "id", imported_ids) do |imported_id| - { - record_id: send(id_mapping_method_name, imported_id), - value: imported_id, - } + { record_id: send(id_mapping_method_name, imported_id), value: imported_id } end end end def process_topic(topic) - return nil if topic['category_id'].nil? && topic['archetype'] != Archetype.private_message - topic['last_post_user_id'] = user_id_from_imported_id(topic['last_post_user_id']) || -1 - topic['featured_user1_id'] = user_id_from_imported_id(topic['featured_user1_id']) || -1 - topic['featured_user2_id'] = user_id_from_imported_id(topic['featured_user2_id']) || -1 - topic['featured_user3_id'] = user_id_from_imported_id(topic['featured_user3_id']) || -1 - topic['featured_user4_id'] = user_id_from_imported_id(topic['featured_user4_id']) || -1 + return nil if topic["category_id"].nil? && topic["archetype"] != Archetype.private_message + topic["last_post_user_id"] = user_id_from_imported_id(topic["last_post_user_id"]) || -1 + topic["featured_user1_id"] = user_id_from_imported_id(topic["featured_user1_id"]) || -1 + topic["featured_user2_id"] = user_id_from_imported_id(topic["featured_user2_id"]) || -1 + topic["featured_user3_id"] = user_id_from_imported_id(topic["featured_user3_id"]) || -1 + topic["featured_user4_id"] = user_id_from_imported_id(topic["featured_user4_id"]) || -1 topic end def process_post(post) - post['last_editor_id'] = user_id_from_imported_id(post['last_editor_id']) || -1 - post['reply_to_user_id'] = user_id_from_imported_id(post['reply_to_user_id']) || -1 - post['locked_by_id'] = user_id_from_imported_id(post['locked_by_id']) || -1 + post["last_editor_id"] = user_id_from_imported_id(post["last_editor_id"]) || -1 + post["reply_to_user_id"] = user_id_from_imported_id(post["reply_to_user_id"]) || -1 + post["locked_by_id"] = user_id_from_imported_id(post["locked_by_id"]) || -1 @topic_id_by_post_id[post[:id]] = post[:topic_id] post end def process_post_reply(post_reply) - post_reply['reply_post_id'] = post_id_from_imported_id(post_reply['reply_post_id']) if post_reply['reply_post_id'] + post_reply["reply_post_id"] = post_id_from_imported_id( + post_reply["reply_post_id"], + ) if post_reply["reply_post_id"] post_reply end def process_quoted_post(quoted_post) - quoted_post['quoted_post_id'] = post_id_from_imported_id(quoted_post['quoted_post_id']) if quoted_post['quoted_post_id'] - return nil if quoted_post['quoted_post_id'].nil? + quoted_post["quoted_post_id"] = post_id_from_imported_id( + quoted_post["quoted_post_id"], + ) if quoted_post["quoted_post_id"] + return nil if quoted_post["quoted_post_id"].nil? quoted_post end def process_topic_link(topic_link) - old_topic_id = topic_link['link_topic_id'] - topic_link['link_topic_id'] = topic_id_from_imported_id(topic_link['link_topic_id']) if topic_link['link_topic_id'] - topic_link['link_post_id'] = post_id_from_imported_id(topic_link['link_post_id']) if topic_link['link_post_id'] - return nil if topic_link['link_topic_id'].nil? + old_topic_id = topic_link["link_topic_id"] + topic_link["link_topic_id"] = topic_id_from_imported_id( + topic_link["link_topic_id"], + ) if topic_link["link_topic_id"] + topic_link["link_post_id"] = post_id_from_imported_id(topic_link["link_post_id"]) if topic_link[ + "link_post_id" + ] + return nil if topic_link["link_topic_id"].nil? r = Regexp.new("^#{@source_base_url}/t/([^\/]+)/#{old_topic_id}(.*)") - if m = r.match(topic_link['url']) - topic_link['url'] = "#{@source_base_url}/t/#{m[1]}/#{topic_link['link_topic_id']}#{m[2]}" + if m = r.match(topic_link["url"]) + topic_link["url"] = "#{@source_base_url}/t/#{m[1]}/#{topic_link["link_topic_id"]}#{m[2]}" end topic_link end def process_post_action(post_action) - return nil unless post_action['post_id'].present? - post_action['related_post_id'] = post_id_from_imported_id(post_action['related_post_id']) - post_action['deferred_by_id'] = user_id_from_imported_id(post_action['deferred_by_id']) - post_action['agreed_by_id'] = user_id_from_imported_id(post_action['agreed_by_id']) - post_action['disagreed_by_id'] = user_id_from_imported_id(post_action['disagreed_by_id']) + return nil unless post_action["post_id"].present? + post_action["related_post_id"] = post_id_from_imported_id(post_action["related_post_id"]) + post_action["deferred_by_id"] = user_id_from_imported_id(post_action["deferred_by_id"]) + post_action["agreed_by_id"] = user_id_from_imported_id(post_action["agreed_by_id"]) + post_action["disagreed_by_id"] = user_id_from_imported_id(post_action["disagreed_by_id"]) post_action end def process_user_action(user_action) - user_action['target_topic_id'] = topic_id_from_imported_id(user_action['target_topic_id']) if user_action['target_topic_id'] - user_action['target_post_id'] = post_id_from_imported_id(user_action['target_post_id']) if user_action['target_post_id'] - user_action['target_user_id'] = user_id_from_imported_id(user_action['target_user_id']) if user_action['target_user_id'] - user_action['acting_user_id'] = user_id_from_imported_id(user_action['acting_user_id']) if user_action['acting_user_id'] - user_action['queued_post_id'] = post_id_from_imported_id(user_action['queued_post_id']) if user_action['queued_post_id'] + user_action["target_topic_id"] = topic_id_from_imported_id( + user_action["target_topic_id"], + ) if user_action["target_topic_id"] + user_action["target_post_id"] = post_id_from_imported_id( + user_action["target_post_id"], + ) if user_action["target_post_id"] + user_action["target_user_id"] = user_id_from_imported_id( + user_action["target_user_id"], + ) if user_action["target_user_id"] + user_action["acting_user_id"] = user_id_from_imported_id( + user_action["acting_user_id"], + ) if user_action["acting_user_id"] + user_action["queued_post_id"] = post_id_from_imported_id( + user_action["queued_post_id"], + ) if user_action["queued_post_id"] user_action end def process_tag_group(tag_group) - tag_group['parent_tag_id'] = tag_id_from_imported_id(tag_group['parent_tag_id']) if tag_group['parent_tag_id'] + tag_group["parent_tag_id"] = tag_id_from_imported_id(tag_group["parent_tag_id"]) if tag_group[ + "parent_tag_id" + ] tag_group end def process_category_group(category_group) - return nil if category_group['category_id'].nil? || category_group['group_id'].nil? + return nil if category_group["category_id"].nil? || category_group["group_id"].nil? category_group end def process_group_user(group_user) - if @auto_group_ids.include?(group_user['group_id'].to_i) && - @merged_user_ids.include?(group_user['user_id'].to_i) + if @auto_group_ids.include?(group_user["group_id"].to_i) && + @merged_user_ids.include?(group_user["user_id"].to_i) return nil end - return nil if group_user['user_id'].to_i < 1 + return nil if group_user["user_id"].to_i < 1 group_user end def process_group_history(group_history) - group_history['acting_user_id'] = user_id_from_imported_id(group_history['acting_user_id']) if group_history['acting_user_id'] - group_history['target_user_id'] = user_id_from_imported_id(group_history['target_user_id']) if group_history['target_user_id'] + group_history["acting_user_id"] = user_id_from_imported_id( + group_history["acting_user_id"], + ) if group_history["acting_user_id"] + group_history["target_user_id"] = user_id_from_imported_id( + group_history["target_user_id"], + ) if group_history["target_user_id"] group_history end def process_group_archived_message(gam) - return nil unless gam['topic_id'].present? && gam['group_id'].present? + return nil unless gam["topic_id"].present? && gam["group_id"].present? gam end def process_topic_link(topic_link) - topic_link['link_topic_id'] = topic_id_from_imported_id(topic_link['link_topic_id']) if topic_link['link_topic_id'] - topic_link['link_post_id'] = post_id_from_imported_id(topic_link['link_post_id']) if topic_link['link_post_id'] + topic_link["link_topic_id"] = topic_id_from_imported_id( + topic_link["link_topic_id"], + ) if topic_link["link_topic_id"] + topic_link["link_post_id"] = post_id_from_imported_id(topic_link["link_post_id"]) if topic_link[ + "link_post_id" + ] topic_link end def process_user_avatar(user_avatar) - user_avatar['custom_upload_id'] = upload_id_from_imported_id(user_avatar['custom_upload_id']) if user_avatar['custom_upload_id'] - user_avatar['gravatar_upload_id'] = upload_id_from_imported_id(user_avatar['gravatar_upload_id']) if user_avatar['gravatar_upload_id'] - return nil unless user_avatar['custom_upload_id'].present? || user_avatar['gravatar_upload_id'].present? + user_avatar["custom_upload_id"] = upload_id_from_imported_id( + user_avatar["custom_upload_id"], + ) if user_avatar["custom_upload_id"] + user_avatar["gravatar_upload_id"] = upload_id_from_imported_id( + user_avatar["gravatar_upload_id"], + ) if user_avatar["gravatar_upload_id"] + unless user_avatar["custom_upload_id"].present? || user_avatar["gravatar_upload_id"].present? + return nil + end user_avatar end def process_user_history(user_history) - user_history['acting_user_id'] = user_id_from_imported_id(user_history['acting_user_id']) if user_history['acting_user_id'] - user_history['target_user_id'] = user_id_from_imported_id(user_history['target_user_id']) if user_history['target_user_id'] + user_history["acting_user_id"] = user_id_from_imported_id( + user_history["acting_user_id"], + ) if user_history["acting_user_id"] + user_history["target_user_id"] = user_id_from_imported_id( + user_history["target_user_id"], + ) if user_history["target_user_id"] user_history end def process_user_warning(user_warning) - user_warning['created_by_id'] = user_id_from_imported_id(user_warning['created_by_id']) if user_warning['created_by_id'] + user_warning["created_by_id"] = user_id_from_imported_id( + user_warning["created_by_id"], + ) if user_warning["created_by_id"] user_warning end def process_post_upload(post_upload) - return nil unless post_upload['upload_id'].present? + return nil unless post_upload["upload_id"].present? @imported_post_uploads ||= {} - return nil if @imported_post_uploads[post_upload['post_id']]&.include?(post_upload['upload_id']) - @imported_post_uploads[post_upload['post_id']] ||= [] - @imported_post_uploads[post_upload['post_id']] << post_upload['upload_id'] + return nil if @imported_post_uploads[post_upload["post_id"]]&.include?(post_upload["upload_id"]) + @imported_post_uploads[post_upload["post_id"]] ||= [] + @imported_post_uploads[post_upload["post_id"]] << post_upload["upload_id"] - return nil if PostUpload.where(post_id: post_upload['post_id'], upload_id: post_upload['upload_id']).exists? + if PostUpload.where( + post_id: post_upload["post_id"], + upload_id: post_upload["upload_id"], + ).exists? + return nil + end post_upload end def process_notification(notification) - notification['post_action_id'] = post_action_id_from_imported_id(notification['post_action_id']) if notification['post_action_id'] + notification["post_action_id"] = post_action_id_from_imported_id( + notification["post_action_id"], + ) if notification["post_action_id"] notification end def process_oauth2_user_info(r) - return nil if Oauth2UserInfo.where(uid: r['uid'], provider: r['provider']).exists? + return nil if Oauth2UserInfo.where(uid: r["uid"], provider: r["provider"]).exists? r end def process_user_associated_account(r) - return nil if UserAssociatedAccount.where(provider_uid: r['uid'], provider_name: r['provider']).exists? + if UserAssociatedAccount.where(provider_uid: r["uid"], provider_name: r["provider"]).exists? + return nil + end r end def process_single_sign_on_record(r) - return nil if SingleSignOnRecord.where(external_id: r['external_id']).exists? + return nil if SingleSignOnRecord.where(external_id: r["external_id"]).exists? r end def process_user_badge(user_badge) - user_badge['granted_by_id'] = user_id_from_imported_id(user_badge['granted_by_id']) if user_badge['granted_by_id'] - user_badge['notification_id'] = notification_id_from_imported_id(user_badge['notification_id']) if user_badge['notification_id'] - return nil if UserBadge.where(user_id: user_badge['user_id'], badge_id: user_badge['badge_id']).exists? + user_badge["granted_by_id"] = user_id_from_imported_id( + user_badge["granted_by_id"], + ) if user_badge["granted_by_id"] + user_badge["notification_id"] = notification_id_from_imported_id( + user_badge["notification_id"], + ) if user_badge["notification_id"] + if UserBadge.where(user_id: user_badge["user_id"], badge_id: user_badge["badge_id"]).exists? + return nil + end user_badge end def process_email_change_request(ecr) - ecr['old_email_token_id'] = email_token_id_from_imported_id(ecr['old_email_token_id']) if ecr['old_email_token_id'] - ecr['new_email_token_id'] = email_token_id_from_imported_id(ecr['new_email_token_id']) if ecr['new_email_token_id'] + ecr["old_email_token_id"] = email_token_id_from_imported_id(ecr["old_email_token_id"]) if ecr[ + "old_email_token_id" + ] + ecr["new_email_token_id"] = email_token_id_from_imported_id(ecr["new_email_token_id"]) if ecr[ + "new_email_token_id" + ] ecr end def process_tag_user(x) - return nil if TagUser.where(tag_id: x['tag_id'], user_id: x['user_id']).exists? + return nil if TagUser.where(tag_id: x["tag_id"], user_id: x["user_id"]).exists? x end def process_topic_tag(x) - return nil if TopicTag.where(topic_id: x['topic_id'], tag_id: x['tag_id']).exists? + return nil if TopicTag.where(topic_id: x["topic_id"], tag_id: x["tag_id"]).exists? x end def process_category_tag(x) - return nil if CategoryTag.where(category_id: x['category_id'], tag_id: x['tag_id']).exists? + return nil if CategoryTag.where(category_id: x["category_id"], tag_id: x["tag_id"]).exists? x end def process_category_tag_stat(x) - return nil if CategoryTagStat.where(category_id: x['category_id'], tag_id: x['tag_id']).exists? + return nil if CategoryTagStat.where(category_id: x["category_id"], tag_id: x["tag_id"]).exists? x end @@ -744,27 +836,29 @@ class BulkImport::DiscourseMerger < BulkImport::Base def fix_user_columns puts "updating foreign keys in the users table..." - User.where('id >= ?', @first_new_user_id).find_each do |u| - arr = [] - sql = "UPDATE users SET".dup + User + .where("id >= ?", @first_new_user_id) + .find_each do |u| + arr = [] + sql = "UPDATE users SET".dup - if new_approved_by_id = user_id_from_imported_id(u.approved_by_id) - arr << " approved_by_id = #{new_approved_by_id}" + if new_approved_by_id = user_id_from_imported_id(u.approved_by_id) + arr << " approved_by_id = #{new_approved_by_id}" + end + if new_primary_group_id = group_id_from_imported_id(u.primary_group_id) + arr << " primary_group_id = #{new_primary_group_id}" + end + if new_notification_id = notification_id_from_imported_id(u.seen_notification_id) + arr << " seen_notification_id = #{new_notification_id}" + end + + next if arr.empty? + + sql << arr.join(", ") + sql << " WHERE id = #{u.id}" + + @raw_connection.exec(sql) end - if new_primary_group_id = group_id_from_imported_id(u.primary_group_id) - arr << " primary_group_id = #{new_primary_group_id}" - end - if new_notification_id = notification_id_from_imported_id(u.seen_notification_id) - arr << " seen_notification_id = #{new_notification_id}" - end - - next if arr.empty? - - sql << arr.join(', ') - sql << " WHERE id = #{u.id}" - - @raw_connection.exec(sql) - end end def fix_topic_links @@ -777,33 +871,37 @@ class BulkImport::DiscourseMerger < BulkImport::Base @topics.each do |old_topic_id, new_topic_id| current += 1 percent = (current * 100) / total - puts "#{current} (#{percent}\%) completed. #{update_count} rows updated." if current % 200 == 0 + if current % 200 == 0 + puts "#{current} (#{percent}\%) completed. #{update_count} rows updated." + end if topic = Topic.find_by_id(new_topic_id) replace_arg = [ "#{@source_base_url}/t/#{topic.slug}/#{old_topic_id}", - "#{@source_base_url}/t/#{topic.slug}/#{new_topic_id}" + "#{@source_base_url}/t/#{topic.slug}/#{new_topic_id}", ] - r = @raw_connection.async_exec( - "UPDATE posts + r = + @raw_connection.async_exec( + "UPDATE posts SET raw = replace(raw, $1, $2) WHERE NOT raw IS NULL AND topic_id >= #{@first_new_topic_id} AND raw <> replace(raw, $1, $2)", - replace_arg - ) + replace_arg, + ) update_count += r.cmd_tuples - r = @raw_connection.async_exec( - "UPDATE posts + r = + @raw_connection.async_exec( + "UPDATE posts SET cooked = replace(cooked, $1, $2) WHERE NOT cooked IS NULL AND topic_id >= #{@first_new_topic_id} AND cooked <> replace(cooked, $1, $2)", - replace_arg - ) + replace_arg, + ) update_count += r.cmd_tuples end @@ -811,7 +909,6 @@ class BulkImport::DiscourseMerger < BulkImport::Base puts "updated #{update_count} rows" end - end BulkImport::DiscourseMerger.new.start diff --git a/script/bulk_import/phpbb_postgresql.rb b/script/bulk_import/phpbb_postgresql.rb index cd5fa626fd5..70def55fb83 100644 --- a/script/bulk_import/phpbb_postgresql.rb +++ b/script/bulk_import/phpbb_postgresql.rb @@ -3,17 +3,16 @@ require_relative "base" require "pg" require "htmlentities" -require 'ruby-bbcode-to-md' +require "ruby-bbcode-to-md" class BulkImport::PhpBB < BulkImport::Base - SUSPENDED_TILL ||= Date.new(3000, 1, 1) - TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "phpbb_" + TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "phpbb_" def initialize super - charset = ENV["DB_CHARSET"] || "utf8" + charset = ENV["DB_CHARSET"] || "utf8" database = ENV["DB_NAME"] || "flightaware" password = ENV["DB_PASSWORD"] || "discourse" @@ -57,7 +56,7 @@ class BulkImport::PhpBB < BulkImport::Base { imported_id: row["group_id"], name: normalize_text(row["group_name"]), - bio_raw: normalize_text(row["group_desc"]) + bio_raw: normalize_text(row["group_desc"]), } end end @@ -85,15 +84,28 @@ class BulkImport::PhpBB < BulkImport::Base username: normalize_text(row["username"]), email: row["user_email"], created_at: Time.zone.at(row["user_regdate"].to_i), - last_seen_at: row["user_lastvisit"] == 0 ? Time.zone.at(row["user_regdate"].to_i) : Time.zone.at(row["user_lastvisit"].to_i), + last_seen_at: + ( + if row["user_lastvisit"] == 0 + Time.zone.at(row["user_regdate"].to_i) + else + Time.zone.at(row["user_lastvisit"].to_i) + end + ), trust_level: row["user_posts"] == 0 ? TrustLevel[0] : TrustLevel[1], date_of_birth: parse_birthday(row["user_birthday"]), - primary_group_id: group_id_from_imported_id(row["group_id"]) + primary_group_id: group_id_from_imported_id(row["group_id"]), } u[:ip_address] = row["user_ip"][/\b(?:\d{1,3}\.){3}\d{1,3}\b/] if row["user_ip"].present? if row["ban_start"] u[:suspended_at] = Time.zone.at(row["ban_start"].to_i) - u[:suspended_till] = row["ban_end"].to_i > 0 ? Time.zone.at(row["ban_end"].to_i) : SUSPENDED_TILL + u[:suspended_till] = ( + if row["ban_end"].to_i > 0 + Time.zone.at(row["ban_end"].to_i) + else + SUSPENDED_TILL + end + ) end u end @@ -114,7 +126,7 @@ class BulkImport::PhpBB < BulkImport::Base imported_id: row["user_id"], imported_user_id: row["user_id"], email: row["user_email"], - created_at: Time.zone.at(row["user_regdate"].to_i) + created_at: Time.zone.at(row["user_regdate"].to_i), } end end @@ -149,7 +161,14 @@ class BulkImport::PhpBB < BulkImport::Base create_user_profiles(user_profiles) do |row| { user_id: user_id_from_imported_id(row["user_id"]), - website: (URI.parse(row["user_website"]).to_s rescue nil), + website: + ( + begin + URI.parse(row["user_website"]).to_s + rescue StandardError + nil + end + ), location: row["user_from"], } end @@ -158,17 +177,16 @@ class BulkImport::PhpBB < BulkImport::Base def import_categories puts "Importing categories..." - categories = psql_query(<<-SQL + categories = psql_query(<<-SQL).to_a SELECT forum_id, parent_id, forum_name, forum_desc FROM #{TABLE_PREFIX}forums WHERE forum_id > #{@last_imported_category_id} ORDER BY parent_id, left_id SQL - ).to_a return if categories.empty? - parent_categories = categories.select { |c| c["parent_id"].to_i == 0 } + parent_categories = categories.select { |c| c["parent_id"].to_i == 0 } children_categories = categories.select { |c| c["parent_id"].to_i != 0 } puts "Importing parent categories..." @@ -176,7 +194,7 @@ class BulkImport::PhpBB < BulkImport::Base { imported_id: row["forum_id"], name: normalize_text(row["forum_name"]), - description: normalize_text(row["forum_desc"]) + description: normalize_text(row["forum_desc"]), } end @@ -186,7 +204,7 @@ class BulkImport::PhpBB < BulkImport::Base imported_id: row["forum_id"], name: normalize_text(row["forum_name"]), description: normalize_text(row["forum_desc"]), - parent_category_id: category_id_from_imported_id(row["parent_id"]) + parent_category_id: category_id_from_imported_id(row["parent_id"]), } end end @@ -209,7 +227,7 @@ class BulkImport::PhpBB < BulkImport::Base category_id: category_id_from_imported_id(row["forum_id"]), user_id: user_id_from_imported_id(row["topic_poster"]), created_at: Time.zone.at(row["topic_time"].to_i), - views: row["topic_views"] + views: row["topic_views"], } end end @@ -261,7 +279,7 @@ class BulkImport::PhpBB < BulkImport::Base imported_id: row["msg_id"].to_i + PRIVATE_OFFSET, title: normalize_text(title), user_id: user_id_from_imported_id(row["author_id"].to_i), - created_at: Time.zone.at(row["message_time"].to_i) + created_at: Time.zone.at(row["message_time"].to_i), } end end @@ -271,13 +289,12 @@ class BulkImport::PhpBB < BulkImport::Base allowed_users = [] - psql_query(<<-SQL + psql_query(<<-SQL).each do |row| SELECT msg_id, author_id, to_address FROM #{TABLE_PREFIX}privmsgs WHERE msg_id > (#{@last_imported_private_topic_id - PRIVATE_OFFSET}) ORDER BY msg_id SQL - ).each do |row| next unless topic_id = topic_id_from_imported_id(row["msg_id"].to_i + PRIVATE_OFFSET) user_ids = get_message_recipients(row["author_id"], row["to_address"]) @@ -287,12 +304,7 @@ class BulkImport::PhpBB < BulkImport::Base end end - create_topic_allowed_users(allowed_users) do |row| - { - topic_id: row[0], - user_id: row[1] - } - end + create_topic_allowed_users(allowed_users) { |row| { topic_id: row[0], user_id: row[1] } } end def import_private_posts @@ -316,13 +328,13 @@ class BulkImport::PhpBB < BulkImport::Base topic_id: topic_id, user_id: user_id_from_imported_id(row["author_id"].to_i), created_at: Time.zone.at(row["message_time"].to_i), - raw: process_raw_text(row["message_text"]) + raw: process_raw_text(row["message_text"]), } end end def get_message_recipients(from, to) - user_ids = to.split(':') + user_ids = to.split(":") user_ids.map! { |u| u[2..-1].to_i } user_ids.push(from.to_i) user_ids.uniq! @@ -332,15 +344,29 @@ class BulkImport::PhpBB < BulkImport::Base def extract_pm_title(title) pm_title = CGI.unescapeHTML(title) - pm_title = title.gsub(/^Re\s*:\s*/i, "") rescue nil + pm_title = + begin + title.gsub(/^Re\s*:\s*/i, "") + rescue StandardError + nil + end pm_title end def parse_birthday(birthday) return if birthday.blank? - date_of_birth = Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") rescue nil + date_of_birth = + begin + Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") + rescue StandardError + nil + end return if date_of_birth.nil? - date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth + if date_of_birth.year < 1904 + Date.new(1904, date_of_birth.month, date_of_birth.day) + else + date_of_birth + end end def psql_query(sql) @@ -352,34 +378,36 @@ class BulkImport::PhpBB < BulkImport::Base text = raw.dup text = CGI.unescapeHTML(text) - text.gsub!(/:(?:\w{8})\]/, ']') + text.gsub!(/:(?:\w{8})\]/, "]") # Some links look like this: http://www.onegameamonth.com - text.gsub!(/(.+)<\/a>/i, '[\2](\1)') + text.gsub!(%r{(.+)}i, '[\2](\1)') # phpBB shortens link text like this, which breaks our markdown processing: # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli) # # Work around it for now: - text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[') + text.gsub!(%r{\[http(s)?://(www\.)?}i, "[") # convert list tags to ul and list=1 tags to ol # list=a is not supported, so handle it like list=1 # list=9 and list=x have the same result as list=1 and list=a - text.gsub!(/\[list\](.*?)\[\/list:u\]/mi, '[ul]\1[/ul]') - text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi, '[ol]\1[/ol]') + text.gsub!(%r{\[list\](.*?)\[/list:u\]}mi, '[ul]\1[/ul]') + text.gsub!(%r{\[list=.*?\](.*?)\[/list:o\]}mi, '[ol]\1[/ol]') # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists: - text.gsub!(/\[\*\](.*?)\[\/\*:m\]/mi, '[li]\1[/li]') + text.gsub!(%r{\[\*\](.*?)\[/\*:m\]}mi, '[li]\1[/li]') # [QUOTE=""] -- add newline text.gsub!(/(\[quote="[a-zA-Z\d]+"\])/i) { "#{$1}\n" } # [/QUOTE] -- add newline - text.gsub!(/(\[\/quote\])/i) { "\n#{$1}" } + text.gsub!(%r{(\[/quote\])}i) { "\n#{$1}" } # :) is encoded as :) - text.gsub!(/(.*?)/) do + text.gsub!( + /(.*?)/, + ) do smiley = $1 @smiley_map.fetch(smiley) do # upload_smiley(smiley, $2, $3, $4) || smiley_as_text(smiley) @@ -405,33 +433,30 @@ class BulkImport::PhpBB < BulkImport::Base def add_default_smilies { - [':D', ':-D', ':grin:'] => ':smiley:', - [':)', ':-)', ':smile:'] => ':slight_smile:', - [';)', ';-)', ':wink:'] => ':wink:', - [':(', ':-(', ':sad:'] => ':frowning:', - [':o', ':-o', ':eek:'] => ':astonished:', - [':shock:'] => ':open_mouth:', - [':?', ':-?', ':???:'] => ':confused:', - ['8-)', ':cool:'] => ':sunglasses:', - [':lol:'] => ':laughing:', - [':x', ':-x', ':mad:'] => ':angry:', - [':P', ':-P', ':razz:'] => ':stuck_out_tongue:', - [':oops:'] => ':blush:', - [':cry:'] => ':cry:', - [':evil:'] => ':imp:', - [':twisted:'] => ':smiling_imp:', - [':roll:'] => ':unamused:', - [':!:'] => ':exclamation:', - [':?:'] => ':question:', - [':idea:'] => ':bulb:', - [':arrow:'] => ':arrow_right:', - [':|', ':-|'] => ':neutral_face:', - [':geek:'] => ':nerd:' - }.each do |smilies, emoji| - smilies.each { |smiley| @smiley_map[smiley] = emoji } - end + %w[:D :-D :grin:] => ":smiley:", + %w[:) :-) :smile:] => ":slight_smile:", + %w[;) ;-) :wink:] => ":wink:", + %w[:( :-( :sad:] => ":frowning:", + %w[:o :-o :eek:] => ":astonished:", + [":shock:"] => ":open_mouth:", + %w[:? :-? :???:] => ":confused:", + %w[8-) :cool:] => ":sunglasses:", + [":lol:"] => ":laughing:", + %w[:x :-x :mad:] => ":angry:", + %w[:P :-P :razz:] => ":stuck_out_tongue:", + [":oops:"] => ":blush:", + [":cry:"] => ":cry:", + [":evil:"] => ":imp:", + [":twisted:"] => ":smiling_imp:", + [":roll:"] => ":unamused:", + [":!:"] => ":exclamation:", + [":?:"] => ":question:", + [":idea:"] => ":bulb:", + [":arrow:"] => ":arrow_right:", + %w[:| :-|] => ":neutral_face:", + [":geek:"] => ":nerd:", + }.each { |smilies, emoji| smilies.each { |smiley| @smiley_map[smiley] = emoji } } end - end BulkImport::PhpBB.new.run diff --git a/script/bulk_import/vanilla.rb b/script/bulk_import/vanilla.rb index d01eed3af03..827f57c3fd9 100644 --- a/script/bulk_import/vanilla.rb +++ b/script/bulk_import/vanilla.rb @@ -8,7 +8,6 @@ require "htmlentities" # NOTE: this importer expects a MySQL DB to directly connect to class BulkImport::Vanilla < BulkImport::Base - VANILLA_DB = "dbname" TABLE_PREFIX = "GDN_" ATTACHMENTS_BASE_DIR = "/my/absolute/path/to/from_vanilla/uploads" @@ -20,13 +19,14 @@ class BulkImport::Vanilla < BulkImport::Base def initialize super @htmlentities = HTMLEntities.new - @client = Mysql2::Client.new( - host: "localhost", - username: "root", - database: VANILLA_DB, - password: "", - reconnect: true - ) + @client = + Mysql2::Client.new( + host: "localhost", + username: "root", + database: VANILLA_DB, + password: "", + reconnect: true, + ) @import_tags = false begin @@ -88,10 +88,10 @@ class BulkImport::Vanilla < BulkImport::Base end def import_users - puts '', "Importing users..." + puts "", "Importing users..." username = nil - total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}User;").first['count'] + total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}User;").first["count"] users = mysql_stream <<-SQL SELECT UserID, Name, Title, Location, Email, @@ -103,26 +103,32 @@ class BulkImport::Vanilla < BulkImport::Base SQL create_users(users) do |row| - next if row['Email'].blank? - next if row['Name'].blank? + next if row["Email"].blank? + next if row["Name"].blank? - if ip_address = row['InsertIPAddress']&.split(',').try(:[], 0) - ip_address = nil unless (IPAddr.new(ip_address) rescue false) + if ip_address = row["InsertIPAddress"]&.split(",").try(:[], 0) + ip_address = nil unless ( + begin + IPAddr.new(ip_address) + rescue StandardError + false + end + ) end u = { - imported_id: row['UserID'], - email: row['Email'], - username: row['Name'], - name: row['Name'], - created_at: row['DateInserted'] == nil ? 0 : Time.zone.at(row['DateInserted']), + imported_id: row["UserID"], + email: row["Email"], + username: row["Name"], + name: row["Name"], + created_at: row["DateInserted"] == nil ? 0 : Time.zone.at(row["DateInserted"]), registration_ip_address: ip_address, - last_seen_at: row['DateLastActive'] == nil ? 0 : Time.zone.at(row['DateLastActive']), - location: row['Location'], - admin: row['Admin'] > 0 + last_seen_at: row["DateLastActive"] == nil ? 0 : Time.zone.at(row["DateLastActive"]), + location: row["Location"], + admin: row["Admin"] > 0, } if row["Banned"] > 0 - u[:suspended_at] = Time.zone.at(row['DateInserted']) + u[:suspended_at] = Time.zone.at(row["DateInserted"]) u[:suspended_till] = SUSPENDED_TILL end u @@ -130,7 +136,7 @@ class BulkImport::Vanilla < BulkImport::Base end def import_user_emails - puts '', 'Importing user emails...' + puts "", "Importing user emails..." users = mysql_stream <<-SQL SELECT UserID, Name, Email, DateInserted @@ -141,20 +147,20 @@ class BulkImport::Vanilla < BulkImport::Base SQL create_user_emails(users) do |row| - next if row['Email'].blank? - next if row['Name'].blank? + next if row["Email"].blank? + next if row["Name"].blank? { imported_id: row["UserID"], imported_user_id: row["UserID"], email: row["Email"], - created_at: Time.zone.at(row["DateInserted"]) + created_at: Time.zone.at(row["DateInserted"]), } end end def import_user_profiles - puts '', 'Importing user profiles...' + puts "", "Importing user profiles..." user_profiles = mysql_stream <<-SQL SELECT UserID, Name, Email, Location, About @@ -165,19 +171,19 @@ class BulkImport::Vanilla < BulkImport::Base SQL create_user_profiles(user_profiles) do |row| - next if row['Email'].blank? - next if row['Name'].blank? + next if row["Email"].blank? + next if row["Name"].blank? { user_id: user_id_from_imported_id(row["UserID"]), location: row["Location"], - bio_raw: row["About"] + bio_raw: row["About"], } end end def import_user_stats - puts '', "Importing user stats..." + puts "", "Importing user stats..." users = mysql_stream <<-SQL SELECT UserID, CountDiscussions, CountComments, DateInserted @@ -190,14 +196,14 @@ class BulkImport::Vanilla < BulkImport::Base now = Time.zone.now create_user_stats(users) do |row| - next unless @users[row['UserID'].to_i] # shouldn't need this but it can be NULL :< + next unless @users[row["UserID"].to_i] # shouldn't need this but it can be NULL :< { - imported_id: row['UserID'], - imported_user_id: row['UserID'], - new_since: Time.zone.at(row['DateInserted'] || now), - post_count: row['CountComments'] || 0, - topic_count: row['CountDiscussions'] || 0 + imported_id: row["UserID"], + imported_user_id: row["UserID"], + new_since: Time.zone.at(row["DateInserted"] || now), + post_count: row["CountComments"] || 0, + topic_count: row["CountDiscussions"] || 0, } end end @@ -215,7 +221,10 @@ class BulkImport::Vanilla < BulkImport::Base next unless u.custom_fields["import_id"] - r = mysql_query("SELECT photo FROM #{TABLE_PREFIX}User WHERE UserID = #{u.custom_fields['import_id']};").first + r = + mysql_query( + "SELECT photo FROM #{TABLE_PREFIX}User WHERE UserID = #{u.custom_fields["import_id"]};", + ).first next if r.nil? photo = r["photo"] next unless photo.present? @@ -229,9 +238,9 @@ class BulkImport::Vanilla < BulkImport::Base photo_real_filename = nil parts = photo.squeeze("/").split("/") if parts[0] =~ /^[a-z0-9]{2}:/ - photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[2..-2].join('/')}".squeeze("/") + photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[2..-2].join("/")}".squeeze("/") elsif parts[0] == "~cf" - photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[1..-2].join('/')}".squeeze("/") + photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[1..-2].join("/")}".squeeze("/") else puts "UNKNOWN FORMAT: #{photo}" next @@ -272,75 +281,86 @@ class BulkImport::Vanilla < BulkImport::Base count = 0 # https://us.v-cdn.net/1234567/uploads/editor/xyz/image.jpg - cdn_regex = /https:\/\/us.v-cdn.net\/1234567\/uploads\/(\S+\/(\w|-)+.\w+)/i + cdn_regex = %r{https://us.v-cdn.net/1234567/uploads/(\S+/(\w|-)+.\w+)}i # [attachment=10109:Screen Shot 2012-04-01 at 3.47.35 AM.png] attachment_regex = /\[attachment=(\d+):(.*?)\]/i - Post.where("raw LIKE '%/us.v-cdn.net/%' OR raw LIKE '%[attachment%'").find_each do |post| - count += 1 - print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] - new_raw = post.raw.dup + Post + .where("raw LIKE '%/us.v-cdn.net/%' OR raw LIKE '%[attachment%'") + .find_each do |post| + count += 1 + print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] + new_raw = post.raw.dup - new_raw.gsub!(attachment_regex) do |s| - matches = attachment_regex.match(s) - attachment_id = matches[1] - file_name = matches[2] - next unless attachment_id + new_raw.gsub!(attachment_regex) do |s| + matches = attachment_regex.match(s) + attachment_id = matches[1] + file_name = matches[2] + next unless attachment_id - r = mysql_query("SELECT Path, Name FROM #{TABLE_PREFIX}Media WHERE MediaID = #{attachment_id};").first - next if r.nil? - path = r["Path"] - name = r["Name"] - next unless path.present? + r = + mysql_query( + "SELECT Path, Name FROM #{TABLE_PREFIX}Media WHERE MediaID = #{attachment_id};", + ).first + next if r.nil? + path = r["Path"] + name = r["Name"] + next unless path.present? - path.gsub!("s3://content/", "") - path.gsub!("s3://uploads/", "") - file_path = "#{ATTACHMENTS_BASE_DIR}/#{path}" + path.gsub!("s3://content/", "") + path.gsub!("s3://uploads/", "") + file_path = "#{ATTACHMENTS_BASE_DIR}/#{path}" - if File.exist?(file_path) - upload = create_upload(post.user.id, file_path, File.basename(file_path)) - if upload && upload.errors.empty? - # upload.url - filename = name || file_name || File.basename(file_path) - html_for_upload(upload, normalize_text(filename)) + if File.exist?(file_path) + upload = create_upload(post.user.id, file_path, File.basename(file_path)) + if upload && upload.errors.empty? + # upload.url + filename = name || file_name || File.basename(file_path) + html_for_upload(upload, normalize_text(filename)) + else + puts "Error: Upload did not persist for #{post.id} #{attachment_id}!" + end else - puts "Error: Upload did not persist for #{post.id} #{attachment_id}!" + puts "Couldn't find file for #{attachment_id}. Skipping." + next end - else - puts "Couldn't find file for #{attachment_id}. Skipping." - next end - end - new_raw.gsub!(cdn_regex) do |s| - matches = cdn_regex.match(s) - attachment_id = matches[1] + new_raw.gsub!(cdn_regex) do |s| + matches = cdn_regex.match(s) + attachment_id = matches[1] - file_path = "#{ATTACHMENTS_BASE_DIR}/#{attachment_id}" + file_path = "#{ATTACHMENTS_BASE_DIR}/#{attachment_id}" - if File.exist?(file_path) - upload = create_upload(post.user.id, file_path, File.basename(file_path)) - if upload && upload.errors.empty? - upload.url + if File.exist?(file_path) + upload = create_upload(post.user.id, file_path, File.basename(file_path)) + if upload && upload.errors.empty? + upload.url + else + puts "Error: Upload did not persist for #{post.id} #{attachment_id}!" + end else - puts "Error: Upload did not persist for #{post.id} #{attachment_id}!" + puts "Couldn't find file for #{attachment_id}. Skipping." + next end - else - puts "Couldn't find file for #{attachment_id}. Skipping." - next end - end - if new_raw != post.raw - begin - PostRevisor.new(post).revise!(post.user, { raw: new_raw }, skip_revision: true, skip_validations: true, bypass_bump: true) - rescue - puts "PostRevisor error for #{post.id}" - post.raw = new_raw - post.save(validate: false) + if new_raw != post.raw + begin + PostRevisor.new(post).revise!( + post.user, + { raw: new_raw }, + skip_revision: true, + skip_validations: true, + bypass_bump: true, + ) + rescue StandardError + puts "PostRevisor error for #{post.id}" + post.raw = new_raw + post.save(validate: false) + end end end - end end end @@ -352,7 +372,7 @@ class BulkImport::Vanilla < BulkImport::Base # Otherwise, the file exists but with a prefix: # The p prefix seems to be the full file, so try to find that one first. - ['p', 't', 'n'].each do |prefix| + %w[p t n].each do |prefix| full_guess = File.join(path, "#{prefix}#{base_guess}") return full_guess if File.exist?(full_guess) end @@ -364,26 +384,30 @@ class BulkImport::Vanilla < BulkImport::Base def import_categories puts "", "Importing categories..." - categories = mysql_query(" + categories = + mysql_query( + " SELECT CategoryID, ParentCategoryID, Name, Description, Sort FROM #{TABLE_PREFIX}Category WHERE CategoryID > 0 ORDER BY Sort, CategoryID - ").to_a + ", + ).to_a # Throw the -1 level categories away since they contain no topics. # Use the next level as root categories. - top_level_categories = categories.select { |c| c["ParentCategoryID"].blank? || c['ParentCategoryID'] == -1 } + top_level_categories = + categories.select { |c| c["ParentCategoryID"].blank? || c["ParentCategoryID"] == -1 } # Depth = 2 create_categories(top_level_categories) do |category| - next if category_id_from_imported_id(category['CategoryID']) + next if category_id_from_imported_id(category["CategoryID"]) { - imported_id: category['CategoryID'], - name: CGI.unescapeHTML(category['Name']), - description: category['Description'] ? CGI.unescapeHTML(category['Description']) : nil, - position: category['Sort'] + imported_id: category["CategoryID"], + name: CGI.unescapeHTML(category["Name"]), + description: category["Description"] ? CGI.unescapeHTML(category["Description"]) : nil, + position: category["Sort"], } end @@ -393,39 +417,39 @@ class BulkImport::Vanilla < BulkImport::Base # Depth = 3 create_categories(subcategories) do |category| - next if category_id_from_imported_id(category['CategoryID']) + next if category_id_from_imported_id(category["CategoryID"]) { - imported_id: category['CategoryID'], - parent_category_id: category_id_from_imported_id(category['ParentCategoryID']), - name: CGI.unescapeHTML(category['Name']), - description: category['Description'] ? CGI.unescapeHTML(category['Description']) : nil, - position: category['Sort'] + imported_id: category["CategoryID"], + parent_category_id: category_id_from_imported_id(category["ParentCategoryID"]), + name: CGI.unescapeHTML(category["Name"]), + description: category["Description"] ? CGI.unescapeHTML(category["Description"]) : nil, + position: category["Sort"], } end - subcategory_ids = Set.new(subcategories.map { |c| c['CategoryID'] }) + subcategory_ids = Set.new(subcategories.map { |c| c["CategoryID"] }) # Depth 4 and 5 need to be tags categories.each do |c| - next if c['ParentCategoryID'] == -1 - next if top_level_category_ids.include?(c['CategoryID']) - next if subcategory_ids.include?(c['CategoryID']) + next if c["ParentCategoryID"] == -1 + next if top_level_category_ids.include?(c["CategoryID"]) + next if subcategory_ids.include?(c["CategoryID"]) # Find a depth 3 category for topics in this category parent = c - while !parent.nil? && !subcategory_ids.include?(parent['CategoryID']) - parent = categories.find { |subcat| subcat['CategoryID'] == parent['ParentCategoryID'] } + while !parent.nil? && !subcategory_ids.include?(parent["CategoryID"]) + parent = categories.find { |subcat| subcat["CategoryID"] == parent["ParentCategoryID"] } end if parent - tag_name = DiscourseTagging.clean_tag(c['Name']) - @category_mappings[c['CategoryID']] = { - category_id: category_id_from_imported_id(parent['CategoryID']), - tag: Tag.find_by_name(tag_name) || Tag.create(name: tag_name) + tag_name = DiscourseTagging.clean_tag(c["Name"]) + @category_mappings[c["CategoryID"]] = { + category_id: category_id_from_imported_id(parent["CategoryID"]), + tag: Tag.find_by_name(tag_name) || Tag.create(name: tag_name), } else - puts '', "Couldn't find a category for #{c['CategoryID']} '#{c['Name']}'!" + puts "", "Couldn't find a category for #{c["CategoryID"]} '#{c["Name"]}'!" end end end @@ -433,7 +457,8 @@ class BulkImport::Vanilla < BulkImport::Base def import_topics puts "", "Importing topics..." - topics_sql = "SELECT DiscussionID, CategoryID, Name, Body, DateInserted, InsertUserID, Announce, Format + topics_sql = + "SELECT DiscussionID, CategoryID, Name, Body, DateInserted, InsertUserID, Announce, Format FROM #{TABLE_PREFIX}Discussion WHERE DiscussionID > #{@last_imported_topic_id} ORDER BY DiscussionID ASC" @@ -442,11 +467,12 @@ class BulkImport::Vanilla < BulkImport::Base data = { imported_id: row["DiscussionID"], title: normalize_text(row["Name"]), - category_id: category_id_from_imported_id(row["CategoryID"]) || - @category_mappings[row["CategoryID"]].try(:[], :category_id), + category_id: + category_id_from_imported_id(row["CategoryID"]) || + @category_mappings[row["CategoryID"]].try(:[], :category_id), user_id: user_id_from_imported_id(row["InsertUserID"]), - created_at: Time.zone.at(row['DateInserted']), - pinned_at: row['Announce'] == 0 ? nil : Time.zone.at(row['DateInserted']) + created_at: Time.zone.at(row["DateInserted"]), + pinned_at: row["Announce"] == 0 ? nil : Time.zone.at(row["DateInserted"]), } (data[:user_id].present? && data[:title].present?) ? data : false end @@ -455,46 +481,45 @@ class BulkImport::Vanilla < BulkImport::Base create_posts(mysql_stream(topics_sql)) do |row| data = { - imported_id: "d-" + row['DiscussionID'].to_s, - topic_id: topic_id_from_imported_id(row['DiscussionID']), + imported_id: "d-" + row["DiscussionID"].to_s, + topic_id: topic_id_from_imported_id(row["DiscussionID"]), user_id: user_id_from_imported_id(row["InsertUserID"]), - created_at: Time.zone.at(row['DateInserted']), - raw: clean_up(row['Body'], row['Format']) + created_at: Time.zone.at(row["DateInserted"]), + raw: clean_up(row["Body"], row["Format"]), } data[:topic_id].present? ? data : false end - puts '', 'converting deep categories to tags...' + puts "", "converting deep categories to tags..." create_topic_tags(mysql_stream(topics_sql)) do |row| - next unless mapping = @category_mappings[row['CategoryID']] + next unless mapping = @category_mappings[row["CategoryID"]] - { - tag_id: mapping[:tag].id, - topic_id: topic_id_from_imported_id(row["DiscussionID"]) - } + { tag_id: mapping[:tag].id, topic_id: topic_id_from_imported_id(row["DiscussionID"]) } end end def import_posts puts "", "Importing posts..." - posts = mysql_stream( - "SELECT CommentID, DiscussionID, Body, DateInserted, InsertUserID, Format + posts = + mysql_stream( + "SELECT CommentID, DiscussionID, Body, DateInserted, InsertUserID, Format FROM #{TABLE_PREFIX}Comment WHERE CommentID > #{@last_imported_post_id} - ORDER BY CommentID ASC") + ORDER BY CommentID ASC", + ) create_posts(posts) do |row| - next unless topic_id = topic_id_from_imported_id(row['DiscussionID']) - next if row['Body'].blank? + next unless topic_id = topic_id_from_imported_id(row["DiscussionID"]) + next if row["Body"].blank? { - imported_id: row['CommentID'], + imported_id: row["CommentID"], topic_id: topic_id, - user_id: user_id_from_imported_id(row['InsertUserID']), - created_at: Time.zone.at(row['DateInserted']), - raw: clean_up(row['Body'], row['Format']) + user_id: user_id_from_imported_id(row["InsertUserID"]), + created_at: Time.zone.at(row["DateInserted"]), + raw: clean_up(row["Body"], row["Format"]), } end end @@ -505,31 +530,31 @@ class BulkImport::Vanilla < BulkImport::Base tag_mapping = {} mysql_query("SELECT TagID, Name FROM #{TABLE_PREFIX}Tag").each do |row| - tag_name = DiscourseTagging.clean_tag(row['Name']) + tag_name = DiscourseTagging.clean_tag(row["Name"]) tag = Tag.find_by_name(tag_name) || Tag.create(name: tag_name) - tag_mapping[row['TagID']] = tag.id + tag_mapping[row["TagID"]] = tag.id end - tags = mysql_query( - "SELECT TagID, DiscussionID + tags = + mysql_query( + "SELECT TagID, DiscussionID FROM #{TABLE_PREFIX}TagDiscussion WHERE DiscussionID > #{@last_imported_topic_id} - ORDER BY DateInserted") + ORDER BY DateInserted", + ) create_topic_tags(tags) do |row| - next unless topic_id = topic_id_from_imported_id(row['DiscussionID']) + next unless topic_id = topic_id_from_imported_id(row["DiscussionID"]) - { - topic_id: topic_id, - tag_id: tag_mapping[row['TagID']] - } + { topic_id: topic_id, tag_id: tag_mapping[row["TagID"]] } end end def import_private_topics puts "", "Importing private topics..." - topics_sql = "SELECT c.ConversationID, c.Subject, m.MessageID, m.Body, c.DateInserted, c.InsertUserID + topics_sql = + "SELECT c.ConversationID, c.Subject, m.MessageID, m.Body, c.DateInserted, c.InsertUserID FROM #{TABLE_PREFIX}Conversation c, #{TABLE_PREFIX}ConversationMessage m WHERE c.FirstMessageID = m.MessageID AND c.ConversationID > #{@last_imported_private_topic_id - PRIVATE_OFFSET} @@ -539,9 +564,10 @@ class BulkImport::Vanilla < BulkImport::Base { archetype: Archetype.private_message, imported_id: row["ConversationID"] + PRIVATE_OFFSET, - title: row["Subject"] ? normalize_text(row["Subject"]) : "Conversation #{row["ConversationID"]}", + title: + row["Subject"] ? normalize_text(row["Subject"]) : "Conversation #{row["ConversationID"]}", user_id: user_id_from_imported_id(row["InsertUserID"]), - created_at: Time.zone.at(row['DateInserted']) + created_at: Time.zone.at(row["DateInserted"]), } end end @@ -549,7 +575,8 @@ class BulkImport::Vanilla < BulkImport::Base def import_topic_allowed_users puts "", "importing topic_allowed_users..." - topic_allowed_users_sql = " + topic_allowed_users_sql = + " SELECT ConversationID, UserID FROM #{TABLE_PREFIX}UserConversation WHERE Deleted = 0 @@ -559,45 +586,43 @@ class BulkImport::Vanilla < BulkImport::Base added = 0 create_topic_allowed_users(mysql_stream(topic_allowed_users_sql)) do |row| - next unless topic_id = topic_id_from_imported_id(row['ConversationID'] + PRIVATE_OFFSET) + next unless topic_id = topic_id_from_imported_id(row["ConversationID"] + PRIVATE_OFFSET) next unless user_id = user_id_from_imported_id(row["UserID"]) added += 1 - { - topic_id: topic_id, - user_id: user_id, - } + { topic_id: topic_id, user_id: user_id } end - puts '', "Added #{added} topic_allowed_users records." + puts "", "Added #{added} topic_allowed_users records." end def import_private_posts puts "", "importing private replies..." - private_posts_sql = " + private_posts_sql = + " SELECT ConversationID, MessageID, Body, InsertUserID, DateInserted, Format FROM GDN_ConversationMessage WHERE ConversationID > #{@last_imported_private_topic_id - PRIVATE_OFFSET} ORDER BY ConversationID ASC, MessageID ASC" create_posts(mysql_stream(private_posts_sql)) do |row| - next unless topic_id = topic_id_from_imported_id(row['ConversationID'] + PRIVATE_OFFSET) + next unless topic_id = topic_id_from_imported_id(row["ConversationID"] + PRIVATE_OFFSET) { - imported_id: row['MessageID'] + PRIVATE_OFFSET, + imported_id: row["MessageID"] + PRIVATE_OFFSET, topic_id: topic_id, - user_id: user_id_from_imported_id(row['InsertUserID']), - created_at: Time.zone.at(row['DateInserted']), - raw: clean_up(row['Body'], row['Format']) + user_id: user_id_from_imported_id(row["InsertUserID"]), + created_at: Time.zone.at(row["DateInserted"]), + raw: clean_up(row["Body"], row["Format"]), } end end # TODO: too slow def create_permalinks - puts '', 'Creating permalinks...', '' + puts "", "Creating permalinks...", "" - puts ' User pages...' + puts " User pages..." start = Time.now count = 0 @@ -606,21 +631,23 @@ class BulkImport::Vanilla < BulkImport::Base sql = "COPY permalinks (url, created_at, updated_at, external_url) FROM STDIN" @raw_connection.copy_data(sql, @encoder) do - User.includes(:_custom_fields).find_each do |u| - count += 1 - ucf = u.custom_fields - if ucf && ucf["import_id"] - vanilla_username = ucf["import_username"] || u.username - @raw_connection.put_copy_data( - ["profile/#{vanilla_username}", now, now, "/users/#{u.username}"] - ) - end + User + .includes(:_custom_fields) + .find_each do |u| + count += 1 + ucf = u.custom_fields + if ucf && ucf["import_id"] + vanilla_username = ucf["import_username"] || u.username + @raw_connection.put_copy_data( + ["profile/#{vanilla_username}", now, now, "/users/#{u.username}"], + ) + end - print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0 - end + print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0 + end end - puts '', '', ' Topics and posts...' + puts "", "", " Topics and posts..." start = Time.now count = 0 @@ -628,38 +655,36 @@ class BulkImport::Vanilla < BulkImport::Base sql = "COPY permalinks (url, topic_id, post_id, created_at, updated_at) FROM STDIN" @raw_connection.copy_data(sql, @encoder) do - Post.includes(:_custom_fields).find_each do |post| - count += 1 - pcf = post.custom_fields - if pcf && pcf["import_id"] - topic = post.topic - if topic.present? - id = pcf["import_id"].split('-').last - if post.post_number == 1 - slug = Slug.for(topic.title) # probably matches what vanilla would do... - @raw_connection.put_copy_data( - ["discussion/#{id}/#{slug}", topic.id, nil, now, now] - ) - else - @raw_connection.put_copy_data( - ["discussion/comment/#{id}", nil, post.id, now, now] - ) + Post + .includes(:_custom_fields) + .find_each do |post| + count += 1 + pcf = post.custom_fields + if pcf && pcf["import_id"] + topic = post.topic + if topic.present? + id = pcf["import_id"].split("-").last + if post.post_number == 1 + slug = Slug.for(topic.title) # probably matches what vanilla would do... + @raw_connection.put_copy_data(["discussion/#{id}/#{slug}", topic.id, nil, now, now]) + else + @raw_connection.put_copy_data(["discussion/comment/#{id}", nil, post.id, now, now]) + end end end - end - print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0 - end + print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0 + end end end def clean_up(raw, format) raw.encode!("utf-8", "utf-8", invalid: :replace, undef: :replace, replace: "") - raw.gsub!(/<(.+)> <\/\1>/, "\n\n") + raw.gsub!(%r{<(.+)> }, "\n\n") html = - if format == 'Html' + if format == "Html" raw else markdown = Redcarpet::Markdown.new(Redcarpet::Render::HTML, autolink: true, tables: true) @@ -668,29 +693,23 @@ class BulkImport::Vanilla < BulkImport::Base doc = Nokogiri::HTML5.fragment(html) - doc.css("blockquote").each do |bq| - name = bq["rel"] - user = User.find_by(name: name) - bq.replace %{
[QUOTE="#{user&.username || name}"]\n#{bq.inner_html}\n[/QUOTE]
} - end + doc + .css("blockquote") + .each do |bq| + name = bq["rel"] + user = User.find_by(name: name) + bq.replace %{
[QUOTE="#{user&.username || name}"]\n#{bq.inner_html}\n[/QUOTE]
} + end - doc.css("font").reverse.each do |f| - f.replace f.inner_html - end + doc.css("font").reverse.each { |f| f.replace f.inner_html } - doc.css("span").reverse.each do |f| - f.replace f.inner_html - end + doc.css("span").reverse.each { |f| f.replace f.inner_html } - doc.css("sub").reverse.each do |f| - f.replace f.inner_html - end + doc.css("sub").reverse.each { |f| f.replace f.inner_html } - doc.css("u").reverse.each do |f| - f.replace f.inner_html - end + doc.css("u").reverse.each { |f| f.replace f.inner_html } - markdown = format == 'Html' ? ReverseMarkdown.convert(doc.to_html) : doc.to_html + markdown = format == "Html" ? ReverseMarkdown.convert(doc.to_html) : doc.to_html markdown.gsub!(/\[QUOTE="([^;]+);c-(\d+)"\]/i) { "[QUOTE=#{$1};#{$2}]" } markdown = process_raw_text(markdown) @@ -702,31 +721,31 @@ class BulkImport::Vanilla < BulkImport::Base text = raw.dup text = CGI.unescapeHTML(text) - text.gsub!(/:(?:\w{8})\]/, ']') + text.gsub!(/:(?:\w{8})\]/, "]") # Some links look like this: http://www.onegameamonth.com - text.gsub!(/(.+)<\/a>/i, '[\2](\1)') + text.gsub!(%r{(.+)}i, '[\2](\1)') # phpBB shortens link text like this, which breaks our markdown processing: # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli) # # Work around it for now: - text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[') + text.gsub!(%r{\[http(s)?://(www\.)?}i, "[") # convert list tags to ul and list=1 tags to ol # list=a is not supported, so handle it like list=1 # list=9 and list=x have the same result as list=1 and list=a - text.gsub!(/\[list\](.*?)\[\/list:u\]/mi, '[ul]\1[/ul]') - text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi, '[ol]\1[/ol]') + text.gsub!(%r{\[list\](.*?)\[/list:u\]}mi, '[ul]\1[/ul]') + text.gsub!(%r{\[list=.*?\](.*?)\[/list:o\]}mi, '[ol]\1[/ol]') # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists: - text.gsub!(/\[\*\](.*?)\[\/\*:m\]/mi, '[li]\1[/li]') + text.gsub!(%r{\[\*\](.*?)\[/\*:m\]}mi, '[li]\1[/li]') # [QUOTE=""] -- add newline text.gsub!(/(\[quote="[a-zA-Z\d]+"\])/i) { "#{$1}\n" } # [/QUOTE] -- add newline - text.gsub!(/(\[\/quote\])/i) { "\n#{$1}" } + text.gsub!(%r{(\[/quote\])}i) { "\n#{$1}" } text end @@ -742,7 +761,6 @@ class BulkImport::Vanilla < BulkImport::Base def mysql_query(sql) @client.query(sql) end - end BulkImport::Vanilla.new.start diff --git a/script/bulk_import/vbulletin.rb b/script/bulk_import/vbulletin.rb index fde0fbe7d72..b836338a516 100644 --- a/script/bulk_import/vbulletin.rb +++ b/script/bulk_import/vbulletin.rb @@ -7,43 +7,42 @@ require "htmlentities" require "parallel" class BulkImport::VBulletin < BulkImport::Base - - TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "vb_" + TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "vb_" SUSPENDED_TILL ||= Date.new(3000, 1, 1) - ATTACHMENT_DIR ||= ENV['ATTACHMENT_DIR'] || '/shared/import/data/attachments' - AVATAR_DIR ||= ENV['AVATAR_DIR'] || '/shared/import/data/customavatars' + ATTACHMENT_DIR ||= ENV["ATTACHMENT_DIR"] || "/shared/import/data/attachments" + AVATAR_DIR ||= ENV["AVATAR_DIR"] || "/shared/import/data/customavatars" def initialize super - host = ENV["DB_HOST"] || "localhost" + host = ENV["DB_HOST"] || "localhost" username = ENV["DB_USERNAME"] || "root" password = ENV["DB_PASSWORD"] database = ENV["DB_NAME"] || "vbulletin" - charset = ENV["DB_CHARSET"] || "utf8" + charset = ENV["DB_CHARSET"] || "utf8" @html_entities = HTMLEntities.new @encoding = CHARSET_MAP[charset] - @client = Mysql2::Client.new( - host: host, - username: username, - password: password, - database: database, - encoding: charset, - reconnect: true - ) + @client = + Mysql2::Client.new( + host: host, + username: username, + password: password, + database: database, + encoding: charset, + reconnect: true, + ) @client.query_options.merge!(as: :array, cache_rows: false) - @has_post_thanks = mysql_query(<<-SQL + @has_post_thanks = mysql_query(<<-SQL).to_a.count > 0 SELECT `COLUMN_NAME` FROM `INFORMATION_SCHEMA`.`COLUMNS` WHERE `TABLE_SCHEMA`='#{database}' AND `TABLE_NAME`='user' AND `COLUMN_NAME` LIKE 'post_thanks_%' SQL - ).to_a.count > 0 @user_ids_by_email = {} end @@ -95,7 +94,7 @@ class BulkImport::VBulletin < BulkImport::Base end def import_groups - puts '', "Importing groups..." + puts "", "Importing groups..." groups = mysql_stream <<-SQL SELECT usergroupid, title, description, usertitle @@ -115,7 +114,7 @@ class BulkImport::VBulletin < BulkImport::Base end def import_users - puts '', "Importing users..." + puts "", "Importing users..." users = mysql_stream <<-SQL SELECT u.userid, username, email, joindate, birthday, ipaddress, u.usergroupid, bandate, liftdate @@ -145,7 +144,7 @@ class BulkImport::VBulletin < BulkImport::Base end def import_user_emails - puts '', "Importing user emails..." + puts "", "Importing user emails..." users = mysql_stream <<-SQL SELECT u.userid, email, joindate @@ -155,7 +154,7 @@ class BulkImport::VBulletin < BulkImport::Base SQL create_user_emails(users) do |row| - user_id, email = row[0 .. 1] + user_id, email = row[0..1] @user_ids_by_email[email.downcase] ||= [] user_ids = @user_ids_by_email[email.downcase] << user_id @@ -170,7 +169,7 @@ class BulkImport::VBulletin < BulkImport::Base imported_id: user_id, imported_user_id: user_id, email: email, - created_at: Time.zone.at(row[2]) + created_at: Time.zone.at(row[2]), } end @@ -179,7 +178,7 @@ class BulkImport::VBulletin < BulkImport::Base end def import_user_stats - puts '', "Importing user stats..." + puts "", "Importing user stats..." users = mysql_stream <<-SQL SELECT u.userid, joindate, posts, COUNT(t.threadid) AS threads, p.dateline @@ -199,7 +198,7 @@ class BulkImport::VBulletin < BulkImport::Base new_since: Time.zone.at(row[1]), post_count: row[2], topic_count: row[3], - first_post_created_at: row[4] && Time.zone.at(row[4]) + first_post_created_at: row[4] && Time.zone.at(row[4]), } if @has_post_thanks @@ -212,7 +211,7 @@ class BulkImport::VBulletin < BulkImport::Base end def import_group_users - puts '', "Importing group users..." + puts "", "Importing group users..." group_users = mysql_stream <<-SQL SELECT usergroupid, userid @@ -221,15 +220,12 @@ class BulkImport::VBulletin < BulkImport::Base SQL create_group_users(group_users) do |row| - { - group_id: group_id_from_imported_id(row[0]), - user_id: user_id_from_imported_id(row[1]), - } + { group_id: group_id_from_imported_id(row[0]), user_id: user_id_from_imported_id(row[1]) } end end def import_user_passwords - puts '', "Importing user passwords..." + puts "", "Importing user passwords..." user_passwords = mysql_stream <<-SQL SELECT userid, password @@ -239,15 +235,12 @@ class BulkImport::VBulletin < BulkImport::Base SQL create_custom_fields("user", "password", user_passwords) do |row| - { - record_id: user_id_from_imported_id(row[0]), - value: row[1], - } + { record_id: user_id_from_imported_id(row[0]), value: row[1] } end end def import_user_salts - puts '', "Importing user salts..." + puts "", "Importing user salts..." user_salts = mysql_stream <<-SQL SELECT userid, salt @@ -258,15 +251,12 @@ class BulkImport::VBulletin < BulkImport::Base SQL create_custom_fields("user", "salt", user_salts) do |row| - { - record_id: user_id_from_imported_id(row[0]), - value: row[1], - } + { record_id: user_id_from_imported_id(row[0]), value: row[1] } end end def import_user_profiles - puts '', "Importing user profiles..." + puts "", "Importing user profiles..." user_profiles = mysql_stream <<-SQL SELECT userid, homepage, profilevisits @@ -278,16 +268,23 @@ class BulkImport::VBulletin < BulkImport::Base create_user_profiles(user_profiles) do |row| { user_id: user_id_from_imported_id(row[0]), - website: (URI.parse(row[1]).to_s rescue nil), + website: + ( + begin + URI.parse(row[1]).to_s + rescue StandardError + nil + end + ), views: row[2], } end end def import_categories - puts '', "Importing categories..." + puts "", "Importing categories..." - categories = mysql_query(<<-SQL + categories = mysql_query(<<-SQL).to_a select forumid, parentid, @@ -311,23 +308,20 @@ class BulkImport::VBulletin < BulkImport::Base from forum order by forumid SQL - ).to_a return if categories.empty? - parent_categories = categories.select { |c| c[1] == -1 } + parent_categories = categories.select { |c| c[1] == -1 } children_categories = categories.select { |c| c[1] != -1 } parent_category_ids = Set.new parent_categories.map { |c| c[0] } # cut down the tree to only 2 levels of categories children_categories.each do |cc| - until parent_category_ids.include?(cc[1]) - cc[1] = categories.find { |c| c[0] == cc[1] }[1] - end + cc[1] = categories.find { |c| c[0] == cc[1] }[1] until parent_category_ids.include?(cc[1]) end - puts '', "Importing parent categories..." + puts "", "Importing parent categories..." create_categories(parent_categories) do |row| { imported_id: row[0], @@ -337,7 +331,7 @@ class BulkImport::VBulletin < BulkImport::Base } end - puts '', "Importing children categories..." + puts "", "Importing children categories..." create_categories(children_categories) do |row| { imported_id: row[0], @@ -350,7 +344,7 @@ class BulkImport::VBulletin < BulkImport::Base end def import_topics - puts '', "Importing topics..." + puts "", "Importing topics..." topics = mysql_stream <<-SQL SELECT threadid, title, forumid, postuserid, open, dateline, views, visible, sticky @@ -381,7 +375,7 @@ class BulkImport::VBulletin < BulkImport::Base end def import_posts - puts '', "Importing posts..." + puts "", "Importing posts..." posts = mysql_stream <<-SQL SELECT postid, p.threadid, parentid, userid, p.dateline, p.visible, pagetext @@ -396,7 +390,8 @@ class BulkImport::VBulletin < BulkImport::Base create_posts(posts) do |row| topic_id = topic_id_from_imported_id(row[1]) replied_post_topic_id = topic_id_from_imported_post_id(row[2]) - reply_to_post_number = topic_id == replied_post_topic_id ? post_number_from_imported_id(row[2]) : nil + reply_to_post_number = + topic_id == replied_post_topic_id ? post_number_from_imported_id(row[2]) : nil post = { imported_id: row[0], @@ -415,7 +410,7 @@ class BulkImport::VBulletin < BulkImport::Base def import_likes return unless @has_post_thanks - puts '', "Importing likes..." + puts "", "Importing likes..." @imported_likes = Set.new @last_imported_post_id = 0 @@ -438,13 +433,13 @@ class BulkImport::VBulletin < BulkImport::Base post_id: post_id_from_imported_id(row[0]), user_id: user_id_from_imported_id(row[1]), post_action_type_id: 2, - created_at: Time.zone.at(row[2]) + created_at: Time.zone.at(row[2]), } end end def import_private_topics - puts '', "Importing private topics..." + puts "", "Importing private topics..." @imported_topics = {} @@ -473,34 +468,31 @@ class BulkImport::VBulletin < BulkImport::Base end def import_topic_allowed_users - puts '', "Importing topic allowed users..." + puts "", "Importing topic allowed users..." allowed_users = Set.new - mysql_stream(<<-SQL + mysql_stream(<<-SQL).each do |row| SELECT pmtextid, touserarray FROM #{TABLE_PREFIX}pmtext WHERE pmtextid > (#{@last_imported_private_topic_id - PRIVATE_OFFSET}) ORDER BY pmtextid SQL - ).each do |row| next unless topic_id = topic_id_from_imported_id(row[0] + PRIVATE_OFFSET) - row[1].scan(/i:(\d+)/).flatten.each do |id| - next unless user_id = user_id_from_imported_id(id) - allowed_users << [topic_id, user_id] - end + row[1] + .scan(/i:(\d+)/) + .flatten + .each do |id| + next unless user_id = user_id_from_imported_id(id) + allowed_users << [topic_id, user_id] + end end - create_topic_allowed_users(allowed_users) do |row| - { - topic_id: row[0], - user_id: row[1], - } - end + create_topic_allowed_users(allowed_users) { |row| { topic_id: row[0], user_id: row[1] } } end def import_private_posts - puts '', "Importing private posts..." + puts "", "Importing private posts..." posts = mysql_stream <<-SQL SELECT pmtextid, title, fromuserid, touserarray, dateline, message @@ -527,7 +519,7 @@ class BulkImport::VBulletin < BulkImport::Base end def create_permalink_file - puts '', 'Creating Permalink File...', '' + puts "", "Creating Permalink File...", "" total = Topic.listable_topics.count start = Time.now @@ -538,9 +530,9 @@ class BulkImport::VBulletin < BulkImport::Base i += 1 pcf = topic.posts.includes(:_custom_fields).where(post_number: 1).first.custom_fields if pcf && pcf["import_id"] - id = pcf["import_id"].split('-').last + id = pcf["import_id"].split("-").last - f.print [ "XXX#{id} YYY#{topic.id}" ].to_csv + f.print ["XXX#{id} YYY#{topic.id}"].to_csv print "\r%7d/%7d - %6d/sec" % [i, total, i.to_f / (Time.now - start)] if i % 5000 == 0 end end @@ -549,7 +541,8 @@ class BulkImport::VBulletin < BulkImport::Base # find the uploaded file information from the db def find_upload(post, attachment_id) - sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filename filename + sql = + "SELECT a.attachmentid attachment_id, a.userid user_id, a.filename filename FROM #{TABLE_PREFIX}attachment a WHERE a.attachmentid = #{attachment_id}" results = mysql_query(sql) @@ -563,9 +556,10 @@ class BulkImport::VBulletin < BulkImport::Base user_id = row[1] db_filename = row[2] - filename = File.join(ATTACHMENT_DIR, user_id.to_s.split('').join('/'), "#{attachment_id}.attach") + filename = + File.join(ATTACHMENT_DIR, user_id.to_s.split("").join("/"), "#{attachment_id}.attach") real_filename = db_filename - real_filename.prepend SecureRandom.hex if real_filename[0] == '.' + real_filename.prepend SecureRandom.hex if real_filename[0] == "." unless File.exist?(filename) puts "Attachment file #{row.inspect} doesn't exist" @@ -588,7 +582,7 @@ class BulkImport::VBulletin < BulkImport::Base end def import_attachments - puts '', 'importing attachments...' + puts "", "importing attachments..." RateLimiter.disable current_count = 0 @@ -596,7 +590,7 @@ class BulkImport::VBulletin < BulkImport::Base success_count = 0 fail_count = 0 - attachment_regex = /\[attach[^\]]*\](\d+)\[\/attach\]/i + attachment_regex = %r{\[attach[^\]]*\](\d+)\[/attach\]}i Post.find_each do |post| current_count += 1 @@ -618,7 +612,12 @@ class BulkImport::VBulletin < BulkImport::Base end if new_raw != post.raw - PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from vBulletin') + PostRevisor.new(post).revise!( + post.user, + { raw: new_raw }, + bypass_bump: true, + edit_reason: "Import attachments from vBulletin", + ) end success_count += 1 @@ -639,7 +638,7 @@ class BulkImport::VBulletin < BulkImport::Base Dir.foreach(AVATAR_DIR) do |item| print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] - next if item == ('.') || item == ('..') || item == ('.DS_Store') + next if item == (".") || item == ("..") || item == (".DS_Store") next unless item =~ /avatar(\d+)_(\d).gif/ scan = item.scan(/avatar(\d+)_(\d).gif/) next unless scan[0][0].present? @@ -671,11 +670,10 @@ class BulkImport::VBulletin < BulkImport::Base def import_signatures puts "Importing user signatures..." - total_count = mysql_query(<<-SQL + total_count = mysql_query(<<-SQL).first[0].to_i SELECT COUNT(userid) count FROM #{TABLE_PREFIX}sigparsed SQL - ).first[0].to_i current_count = 0 user_signatures = mysql_stream <<-SQL @@ -695,13 +693,20 @@ class BulkImport::VBulletin < BulkImport::Base next unless u.present? # can not hold dupes - UserCustomField.where(user_id: u.id, name: ["see_signatures", "signature_raw", "signature_cooked"]).destroy_all + UserCustomField.where( + user_id: u.id, + name: %w[see_signatures signature_raw signature_cooked], + ).destroy_all - user_sig.gsub!(/\[\/?sigpic\]/i, "") + user_sig.gsub!(%r{\[/?sigpic\]}i, "") UserCustomField.create!(user_id: u.id, name: "see_signatures", value: true) UserCustomField.create!(user_id: u.id, name: "signature_raw", value: user_sig) - UserCustomField.create!(user_id: u.id, name: "signature_cooked", value: PrettyText.cook(user_sig, omit_nofollow: false)) + UserCustomField.create!( + user_id: u.id, + name: "signature_cooked", + value: PrettyText.cook(user_sig, omit_nofollow: false), + ) end end @@ -710,15 +715,15 @@ class BulkImport::VBulletin < BulkImport::Base total_count = 0 duplicated = {} - @user_ids_by_email. - select { |e, ids| ids.count > 1 }. - each_with_index do |(email, ids), i| - duplicated[email] = [ ids, i ] + @user_ids_by_email + .select { |e, ids| ids.count > 1 } + .each_with_index do |(email, ids), i| + duplicated[email] = [ids, i] count += 1 total_count += ids.count end - puts '', "Merging #{total_count} duplicated users across #{count} distinct emails..." + puts "", "Merging #{total_count} duplicated users across #{count} distinct emails..." start = Time.now @@ -727,14 +732,15 @@ class BulkImport::VBulletin < BulkImport::Base next unless email.presence # queried one by one to ensure ordering - first, *rest = user_ids.map do |id| - UserCustomField.includes(:user).find_by!(name: 'import_id', value: id).user - end + first, *rest = + user_ids.map do |id| + UserCustomField.includes(:user).find_by!(name: "import_id", value: id).user + end rest.each do |dup| UserMerger.new(dup, first).merge! first.reload - printf '.' + printf "." end print "\n%6d/%6d - %6d/sec" % [i, count, i.to_f / (Time.now - start)] if i % 10 == 0 @@ -744,13 +750,11 @@ class BulkImport::VBulletin < BulkImport::Base end def save_duplicated_users - File.open('duplicated_users.json', 'w+') do |f| - f.puts @user_ids_by_email.to_json - end + File.open("duplicated_users.json", "w+") { |f| f.puts @user_ids_by_email.to_json } end def read_duplicated_users - @user_ids_by_email = JSON.parse File.read('duplicated_users.json') + @user_ids_by_email = JSON.parse File.read("duplicated_users.json") end def extract_pm_title(title) @@ -759,17 +763,26 @@ class BulkImport::VBulletin < BulkImport::Base def parse_birthday(birthday) return if birthday.blank? - date_of_birth = Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") rescue nil + date_of_birth = + begin + Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") + rescue StandardError + nil + end return if date_of_birth.nil? - date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth + if date_of_birth.year < 1904 + Date.new(1904, date_of_birth.month, date_of_birth.day) + else + date_of_birth + end end def print_status(current, max, start_time = nil) if start_time.present? elapsed_seconds = Time.now - start_time - elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60] + elements_per_minute = "[%.0f items/min] " % [current / elapsed_seconds.to_f * 60] else - elements_per_minute = '' + elements_per_minute = "" end print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute] @@ -782,7 +795,6 @@ class BulkImport::VBulletin < BulkImport::Base def mysql_query(sql) @client.query(sql) end - end BulkImport::VBulletin.new.run diff --git a/script/bulk_import/vbulletin5.rb b/script/bulk_import/vbulletin5.rb index 9be967c25c4..e952ab5d764 100644 --- a/script/bulk_import/vbulletin5.rb +++ b/script/bulk_import/vbulletin5.rb @@ -5,47 +5,56 @@ require "cgi" require "set" require "mysql2" require "htmlentities" -require 'ruby-bbcode-to-md' -require 'find' +require "ruby-bbcode-to-md" +require "find" class BulkImport::VBulletin5 < BulkImport::Base - DB_PREFIX = "" SUSPENDED_TILL ||= Date.new(3000, 1, 1) - ATTACH_DIR ||= ENV['ATTACH_DIR'] || '/shared/import/data/attachments' - AVATAR_DIR ||= ENV['AVATAR_DIR'] || '/shared/import/data/customavatars' + ATTACH_DIR ||= ENV["ATTACH_DIR"] || "/shared/import/data/attachments" + AVATAR_DIR ||= ENV["AVATAR_DIR"] || "/shared/import/data/customavatars" ROOT_NODE = 2 def initialize super - host = ENV["DB_HOST"] || "localhost" + host = ENV["DB_HOST"] || "localhost" username = ENV["DB_USERNAME"] || "root" password = ENV["DB_PASSWORD"] database = ENV["DB_NAME"] || "vbulletin" - charset = ENV["DB_CHARSET"] || "utf8" + charset = ENV["DB_CHARSET"] || "utf8" @html_entities = HTMLEntities.new @encoding = CHARSET_MAP[charset] @bbcode_to_md = true - @client = Mysql2::Client.new( - host: host, - username: username, - password: password, - database: database, - encoding: charset, - reconnect: true - ) + @client = + Mysql2::Client.new( + host: host, + username: username, + password: password, + database: database, + encoding: charset, + reconnect: true, + ) @client.query_options.merge!(as: :array, cache_rows: false) # TODO: Add `LIMIT 1` to the below queries # ------ # be aware there may be other contenttypeid's in use, such as poll, link, video, etc. - @forum_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Forum'").to_a[0][0] - @channel_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Channel'").to_a[0][0] - @text_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Text'").to_a[0][0] + @forum_typeid = + mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Forum'").to_a[0][ + 0 + ] + @channel_typeid = + mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Channel'").to_a[ + 0 + ][ + 0 + ] + @text_typeid = + mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Text'").to_a[0][0] end def execute @@ -127,7 +136,7 @@ class BulkImport::VBulletin5 < BulkImport::Base date_of_birth: parse_birthday(row[3]), primary_group_id: group_id_from_imported_id(row[5]), admin: row[5] == 6, - moderator: row[5] == 7 + moderator: row[5] == 7, } u[:ip_address] = row[4][/\b(?:\d{1,3}\.){3}\d{1,3}\b/] if row[4].present? if row[7] @@ -153,7 +162,7 @@ class BulkImport::VBulletin5 < BulkImport::Base imported_id: row[0], imported_user_id: row[0], email: random_email, - created_at: Time.zone.at(row[2]) + created_at: Time.zone.at(row[2]), } end end @@ -203,10 +212,7 @@ class BulkImport::VBulletin5 < BulkImport::Base SQL create_group_users(group_users) do |row| - { - group_id: group_id_from_imported_id(row[0]), - user_id: user_id_from_imported_id(row[1]), - } + { group_id: group_id_from_imported_id(row[0]), user_id: user_id_from_imported_id(row[1]) } end # import secondary group memberships @@ -228,12 +234,7 @@ class BulkImport::VBulletin5 < BulkImport::Base end end - create_group_users(group_mapping) do |row| - { - group_id: row[0], - user_id: row[1] - } - end + create_group_users(group_mapping) { |row| { group_id: row[0], user_id: row[1] } } end def import_user_profiles @@ -249,7 +250,14 @@ class BulkImport::VBulletin5 < BulkImport::Base create_user_profiles(user_profiles) do |row| { user_id: user_id_from_imported_id(row[0]), - website: (URI.parse(row[1]).to_s rescue nil), + website: + ( + begin + URI.parse(row[1]).to_s + rescue StandardError + nil + end + ), views: row[2], } end @@ -258,7 +266,7 @@ class BulkImport::VBulletin5 < BulkImport::Base def import_categories puts "Importing categories..." - categories = mysql_query(<<-SQL + categories = mysql_query(<<-SQL).to_a SELECT nodeid AS forumid, title, description, displayorder, parentid, urlident FROM #{DB_PREFIX}node WHERE parentid = #{ROOT_NODE} @@ -269,11 +277,10 @@ class BulkImport::VBulletin5 < BulkImport::Base WHERE contenttypeid = #{@channel_typeid} AND nodeid > #{@last_imported_category_id} SQL - ).to_a return if categories.empty? - parent_categories = categories.select { |c| c[4] == ROOT_NODE } + parent_categories = categories.select { |c| c[4] == ROOT_NODE } children_categories = categories.select { |c| c[4] != ROOT_NODE } parent_category_ids = Set.new parent_categories.map { |c| c[0] } @@ -285,7 +292,7 @@ class BulkImport::VBulletin5 < BulkImport::Base name: normalize_text(row[1]), description: normalize_text(row[2]), position: row[3], - slug: row[5] + slug: row[5], } end @@ -297,7 +304,7 @@ class BulkImport::VBulletin5 < BulkImport::Base description: normalize_text(row[2]), position: row[3], parent_category_id: category_id_from_imported_id(row[4]), - slug: row[5] + slug: row[5], } end end @@ -428,7 +435,7 @@ class BulkImport::VBulletin5 < BulkImport::Base post_id: post_id, user_id: user_id, post_action_type_id: 2, - created_at: Time.zone.at(row[2]) + created_at: Time.zone.at(row[2]), } end end @@ -455,7 +462,6 @@ class BulkImport::VBulletin5 < BulkImport::Base user_id: user_id_from_imported_id(row[2]), created_at: Time.zone.at(row[3]), } - end end @@ -475,17 +481,18 @@ class BulkImport::VBulletin5 < BulkImport::Base users_added = Set.new create_topic_allowed_users(mysql_stream(allowed_users_sql)) do |row| - next unless topic_id = topic_id_from_imported_id(row[0] + PRIVATE_OFFSET) || topic_id_from_imported_id(row[2] + PRIVATE_OFFSET) + unless topic_id = + topic_id_from_imported_id(row[0] + PRIVATE_OFFSET) || + topic_id_from_imported_id(row[2] + PRIVATE_OFFSET) + next + end next unless user_id = user_id_from_imported_id(row[1]) next if users_added.add?([topic_id, user_id]).nil? added += 1 - { - topic_id: topic_id, - user_id: user_id, - } + { topic_id: topic_id, user_id: user_id } end - puts '', "Added #{added} topic allowed users records." + puts "", "Added #{added} topic allowed users records." end def import_private_first_posts @@ -543,7 +550,7 @@ class BulkImport::VBulletin5 < BulkImport::Base end def create_permalinks - puts '', 'creating permalinks...', '' + puts "", "creating permalinks...", "" # add permalink normalizations to site settings # EVERYTHING: /.*\/([\w-]+)$/\1 -- selects the last segment of the URL @@ -580,21 +587,23 @@ class BulkImport::VBulletin5 < BulkImport::Base return nil end - tmpfile = 'attach_' + row[6].to_s - filename = File.join('/tmp/', tmpfile) - File.open(filename, 'wb') { |f| f.write(row[5]) } + tmpfile = "attach_" + row[6].to_s + filename = File.join("/tmp/", tmpfile) + File.open(filename, "wb") { |f| f.write(row[5]) } filename end def find_upload(post, opts = {}) if opts[:node_id].present? - sql = "SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid + sql = + "SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid FROM #{DB_PREFIX}attach a LEFT JOIN #{DB_PREFIX}filedata fd ON fd.filedataid = a.filedataid LEFT JOIN #{DB_PREFIX}node n ON n.nodeid = a.nodeid WHERE a.nodeid = #{opts[:node_id]}" elsif opts[:attachment_id].present? - sql = "SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid + sql = + "SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid FROM #{DB_PREFIX}attachment a LEFT JOIN #{DB_PREFIX}filedata fd ON fd.filedataid = a.filedataid LEFT JOIN #{DB_PREFIX}node n ON n.nodeid = a.nodeid @@ -612,9 +621,9 @@ class BulkImport::VBulletin5 < BulkImport::Base user_id = row[3] db_filename = row[2] - filename = File.join(ATTACH_DIR, user_id.to_s.split('').join('/'), "#{attachment_id}.attach") + filename = File.join(ATTACH_DIR, user_id.to_s.split("").join("/"), "#{attachment_id}.attach") real_filename = db_filename - real_filename.prepend SecureRandom.hex if real_filename[0] == '.' + real_filename.prepend SecureRandom.hex if real_filename[0] == "." unless File.exist?(filename) filename = check_database_for_attachment(row) if filename.blank? @@ -637,7 +646,7 @@ class BulkImport::VBulletin5 < BulkImport::Base end def import_attachments - puts '', 'importing attachments...' + puts "", "importing attachments..." # add extensions to authorized setting #ext = mysql_query("SELECT GROUP_CONCAT(DISTINCT(extension)) exts FROM #{DB_PREFIX}filedata").first[0].split(',') @@ -655,8 +664,8 @@ class BulkImport::VBulletin5 < BulkImport::Base # new style matches the nodeid in the attach table # old style matches the filedataid in attach/filedata tables # if the site is very old, there may be multiple different attachment syntaxes used in posts - attachment_regex = /\[attach[^\]]*\].*\"data-attachmentid\":"?(\d+)"?,?.*\[\/attach\]/i - attachment_regex_oldstyle = /\[attach[^\]]*\](\d+)\[\/attach\]/i + attachment_regex = %r{\[attach[^\]]*\].*\"data-attachmentid\":"?(\d+)"?,?.*\[/attach\]}i + attachment_regex_oldstyle = %r{\[attach[^\]]*\](\d+)\[/attach\]}i Post.find_each do |post| current_count += 1 @@ -715,9 +724,18 @@ class BulkImport::VBulletin5 < BulkImport::Base def parse_birthday(birthday) return if birthday.blank? - date_of_birth = Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") rescue nil + date_of_birth = + begin + Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") + rescue StandardError + nil + end return if date_of_birth.nil? - date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth + if date_of_birth.year < 1904 + Date.new(1904, date_of_birth.month, date_of_birth.day) + else + date_of_birth + end end def preprocess_raw(raw) @@ -726,33 +744,37 @@ class BulkImport::VBulletin5 < BulkImport::Base raw = raw.dup # [PLAINTEXT]...[/PLAINTEXT] - raw.gsub!(/\[\/?PLAINTEXT\]/i, "\n\n```\n\n") + raw.gsub!(%r{\[/?PLAINTEXT\]}i, "\n\n```\n\n") # [FONT=font]...[/FONT] raw.gsub!(/\[FONT=\w*\]/im, "") - raw.gsub!(/\[\/FONT\]/im, "") + raw.gsub!(%r{\[/FONT\]}im, "") # @[URL=][/URL] # [USER=id]username[/USER] # [MENTION=id]username[/MENTION] - raw.gsub!(/@\[URL=\"\S+\"\]([\w\s]+)\[\/URL\]/i) { "@#{$1.gsub(" ", "_")}" } - raw.gsub!(/\[USER=\"\d+\"\]([\S]+)\[\/USER\]/i) { "@#{$1.gsub(" ", "_")}" } - raw.gsub!(/\[MENTION=\d+\]([\S]+)\[\/MENTION\]/i) { "@#{$1.gsub(" ", "_")}" } + raw.gsub!(%r{@\[URL=\"\S+\"\]([\w\s]+)\[/URL\]}i) { "@#{$1.gsub(" ", "_")}" } + raw.gsub!(%r{\[USER=\"\d+\"\]([\S]+)\[/USER\]}i) { "@#{$1.gsub(" ", "_")}" } + raw.gsub!(%r{\[MENTION=\d+\]([\S]+)\[/MENTION\]}i) { "@#{$1.gsub(" ", "_")}" } # [IMG2=JSON]{..."src":""}[/IMG2] - raw.gsub!(/\[img2[^\]]*\].*\"src\":\"?([\w\\\/:\.\-;%]*)\"?}.*\[\/img2\]/i) { "\n#{CGI::unescape($1)}\n" } + raw.gsub!(/\[img2[^\]]*\].*\"src\":\"?([\w\\\/:\.\-;%]*)\"?}.*\[\/img2\]/i) do + "\n#{CGI.unescape($1)}\n" + end # [TABLE]...[/TABLE] raw.gsub!(/\[TABLE=\\"[\w:\-\s,]+\\"\]/i, "") - raw.gsub!(/\[\/TABLE\]/i, "") + raw.gsub!(%r{\[/TABLE\]}i, "") # [HR]...[/HR] - raw.gsub(/\[HR\]\s*\[\/HR\]/im, "---") + raw.gsub(%r{\[HR\]\s*\[/HR\]}im, "---") # [VIDEO=youtube_share;]...[/VIDEO] # [VIDEO=vimeo;]...[/VIDEO] - raw.gsub!(/\[VIDEO=YOUTUBE_SHARE;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } - raw.gsub!(/\[VIDEO=VIMEO;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://vimeo.com/#{$1}\n" } + raw.gsub!(%r{\[VIDEO=YOUTUBE_SHARE;([^\]]+)\].*?\[/VIDEO\]}i) do + "\nhttps://www.youtube.com/watch?v=#{$1}\n" + end + raw.gsub!(%r{\[VIDEO=VIMEO;([^\]]+)\].*?\[/VIDEO\]}i) { "\nhttps://vimeo.com/#{$1}\n" } raw end @@ -760,9 +782,9 @@ class BulkImport::VBulletin5 < BulkImport::Base def print_status(current, max, start_time = nil) if start_time.present? elapsed_seconds = Time.now - start_time - elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60] + elements_per_minute = "[%.0f items/min] " % [current / elapsed_seconds.to_f * 60] else - elements_per_minute = '' + elements_per_minute = "" end print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute] @@ -775,7 +797,6 @@ class BulkImport::VBulletin5 < BulkImport::Base def mysql_query(sql) @client.query(sql) end - end BulkImport::VBulletin5.new.run diff --git a/script/check_forking.rb b/script/check_forking.rb index ae8196af943..8679e6ec25e 100644 --- a/script/check_forking.rb +++ b/script/check_forking.rb @@ -13,20 +13,22 @@ end Discourse.after_fork pretty -child = fork do - Discourse.after_fork - pretty - grand_child = fork do +child = + fork do Discourse.after_fork pretty - puts "try to exit" + grand_child = + fork do + Discourse.after_fork + pretty + puts "try to exit" + Process.kill "KILL", Process.pid + end + puts "before wait 2" + Process.wait grand_child + puts "after wait 2" Process.kill "KILL", Process.pid end - puts "before wait 2" - Process.wait grand_child - puts "after wait 2" - Process.kill "KILL", Process.pid -end puts "before wait 1" Process.wait child diff --git a/script/db_timestamps_mover.rb b/script/db_timestamps_mover.rb index 30701dd1f6f..248f1893871 100644 --- a/script/db_timestamps_mover.rb +++ b/script/db_timestamps_mover.rb @@ -12,20 +12,18 @@ class TimestampsUpdater def initialize(schema, ignore_tables) @schema = schema @ignore_tables = ignore_tables - @raw_connection = PG.connect( - host: ENV['DISCOURSE_DB_HOST'] || 'localhost', - port: ENV['DISCOURSE_DB_PORT'] || 5432, - dbname: ENV['DISCOURSE_DB_NAME'] || 'discourse_development', - user: ENV['DISCOURSE_DB_USERNAME'] || 'postgres', - password: ENV['DISCOURSE_DB_PASSWORD'] || '') + @raw_connection = + PG.connect( + host: ENV["DISCOURSE_DB_HOST"] || "localhost", + port: ENV["DISCOURSE_DB_PORT"] || 5432, + dbname: ENV["DISCOURSE_DB_NAME"] || "discourse_development", + user: ENV["DISCOURSE_DB_USERNAME"] || "postgres", + password: ENV["DISCOURSE_DB_PASSWORD"] || "", + ) end def move_by(days) - postgresql_date_types = [ - "timestamp without time zone", - "timestamp with time zone", - "date" - ] + postgresql_date_types = ["timestamp without time zone", "timestamp with time zone", "date"] postgresql_date_types.each do |data_type| columns = all_columns_of_type(data_type) @@ -118,11 +116,19 @@ class TimestampsUpdater end def is_i?(string) - true if Integer(string) rescue false + begin + true if Integer(string) + rescue StandardError + false + end end def is_date?(string) - true if Date.parse(string) rescue false + begin + true if Date.parse(string) + rescue StandardError + false + end end def create_updater diff --git a/script/diff_heaps.rb b/script/diff_heaps.rb index 5a0e91efd67..8433773d7f5 100644 --- a/script/diff_heaps.rb +++ b/script/diff_heaps.rb @@ -6,8 +6,8 @@ # rbtrace -p 15193 -e 'Thread.new{require "objspace"; ObjectSpace.trace_object_allocations_start; GC.start(full_mark: true); ObjectSpace.dump_all(output: File.open("heap.json","w"))}.join' # # -require 'set' -require 'json' +require "set" +require "json" if ARGV.length != 2 puts "Usage: diff_heaps [ORIG.json] [AFTER.json]" @@ -16,26 +16,26 @@ end origs = Set.new -File.open(ARGV[0], "r").each_line do |line| - parsed = JSON.parse(line) - origs << parsed["address"] if parsed && parsed["address"] -end +File + .open(ARGV[0], "r") + .each_line do |line| + parsed = JSON.parse(line) + origs << parsed["address"] if parsed && parsed["address"] + end diff = [] -File.open(ARGV[1], "r").each_line do |line| - parsed = JSON.parse(line) - if parsed && parsed["address"] - diff << parsed unless origs.include? parsed["address"] +File + .open(ARGV[1], "r") + .each_line do |line| + parsed = JSON.parse(line) + if parsed && parsed["address"] + diff << parsed unless origs.include? parsed["address"] + end end -end -diff.group_by do |x| - [x["type"], x["file"], x["line"]] -end.map { |x, y| - [x, y.count] -}.sort { |a, b| - b[1] <=> a[1] -}.each { |x, y| - puts "Leaked #{y} #{x[0]} objects at: #{x[1]}:#{x[2]}" -} +diff + .group_by { |x| [x["type"], x["file"], x["line"]] } + .map { |x, y| [x, y.count] } + .sort { |a, b| b[1] <=> a[1] } + .each { |x, y| puts "Leaked #{y} #{x[0]} objects at: #{x[1]}:#{x[2]}" } diff --git a/script/docker_test.rb b/script/docker_test.rb index db73d531be4..d51b63b5c57 100644 --- a/script/docker_test.rb +++ b/script/docker_test.rb @@ -19,11 +19,11 @@ def run_or_fail(command) exit 1 unless $?.exitstatus == 0 end -unless ENV['NO_UPDATE'] +unless ENV["NO_UPDATE"] run_or_fail("git reset --hard") run_or_fail("git fetch") - checkout = ENV['COMMIT_HASH'] || "FETCH_HEAD" + checkout = ENV["COMMIT_HASH"] || "FETCH_HEAD" run_or_fail("LEFTHOOK=0 git checkout #{checkout}") run_or_fail("bundle") @@ -31,7 +31,7 @@ end log("Running tests") -if ENV['RUN_SMOKE_TESTS'] +if ENV["RUN_SMOKE_TESTS"] run_or_fail("bundle exec rake smoke:test") else run_or_fail("bundle exec rake docker:test") diff --git a/script/i18n_lint.rb b/script/i18n_lint.rb index b00de751916..f3f96abf87d 100755 --- a/script/i18n_lint.rb +++ b/script/i18n_lint.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true -require 'colored2' -require 'psych' +require "colored2" +require "psych" class I18nLinter def initialize(filenames_or_patterns) @@ -27,16 +27,22 @@ end class LocaleFileValidator ERROR_MESSAGES = { - invalid_relative_links: "The following keys have relative links, but do not start with %{base_url} or %{base_path}:", - invalid_relative_image_sources: "The following keys have relative image sources, but do not start with %{base_url} or %{base_path}:", - invalid_interpolation_key_format: "The following keys use {{key}} instead of %{key} for interpolation keys:", - wrong_pluralization_keys: "Pluralized strings must have only the sub-keys 'one' and 'other'.\nThe following keys have missing or additional keys:", - invalid_one_keys: "The following keys contain the number 1 instead of the interpolation key %{count}:", - invalid_message_format_one_key: "The following keys use 'one {1 foo}' instead of the generic 'one {# foo}':", + invalid_relative_links: + "The following keys have relative links, but do not start with %{base_url} or %{base_path}:", + invalid_relative_image_sources: + "The following keys have relative image sources, but do not start with %{base_url} or %{base_path}:", + invalid_interpolation_key_format: + "The following keys use {{key}} instead of %{key} for interpolation keys:", + wrong_pluralization_keys: + "Pluralized strings must have only the sub-keys 'one' and 'other'.\nThe following keys have missing or additional keys:", + invalid_one_keys: + "The following keys contain the number 1 instead of the interpolation key %{count}:", + invalid_message_format_one_key: + "The following keys use 'one {1 foo}' instead of the generic 'one {# foo}':", } - PLURALIZATION_KEYS = ['zero', 'one', 'two', 'few', 'many', 'other'] - ENGLISH_KEYS = ['one', 'other'] + PLURALIZATION_KEYS = %w[zero one two few many other] + ENGLISH_KEYS = %w[one other] def initialize(filename) @filename = filename @@ -66,7 +72,7 @@ class LocaleFileValidator private - def each_translation(hash, parent_key = '', &block) + def each_translation(hash, parent_key = "", &block) hash.each do |key, value| current_key = parent_key.empty? ? key : "#{parent_key}.#{key}" @@ -85,13 +91,9 @@ class LocaleFileValidator @errors[:invalid_message_format_one_key] = [] each_translation(yaml) do |key, value| - if value.match?(/href\s*=\s*["']\/[^\/]|\]\(\/[^\/]/i) - @errors[:invalid_relative_links] << key - end + @errors[:invalid_relative_links] << key if value.match?(%r{href\s*=\s*["']/[^/]|\]\(/[^/]}i) - if value.match?(/src\s*=\s*["']\/[^\/]/i) - @errors[:invalid_relative_image_sources] << key - end + @errors[:invalid_relative_image_sources] << key if value.match?(%r{src\s*=\s*["']/[^/]}i) if value.match?(/{{.+?}}/) && !key.end_with?("_MF") @errors[:invalid_interpolation_key_format] << key @@ -103,7 +105,7 @@ class LocaleFileValidator end end - def each_pluralization(hash, parent_key = '', &block) + def each_pluralization(hash, parent_key = "", &block) hash.each do |key, value| if Hash === value current_key = parent_key.empty? ? key : "#{parent_key}.#{key}" @@ -124,8 +126,8 @@ class LocaleFileValidator @errors[:wrong_pluralization_keys] << key if hash.keys.sort != ENGLISH_KEYS - one_value = hash['one'] - if one_value && one_value.include?('1') && !one_value.match?(/%{count}|{{count}}/) + one_value = hash["one"] + if one_value && one_value.include?("1") && !one_value.match?(/%{count}|{{count}}/) @errors[:invalid_one_keys] << key end end diff --git a/script/import_scripts/answerbase.rb b/script/import_scripts/answerbase.rb index 1f436ac0ba3..63de1d4a937 100644 --- a/script/import_scripts/answerbase.rb +++ b/script/import_scripts/answerbase.rb @@ -1,9 +1,9 @@ # frozen_string_literal: true -require 'csv' -require 'reverse_markdown' -require_relative 'base' -require_relative 'base/generic_database' +require "csv" +require "reverse_markdown" +require_relative "base" +require_relative "base/generic_database" # Call it like this: # RAILS_ENV=production bundle exec ruby script/import_scripts/answerbase.rb DIRNAME @@ -15,8 +15,10 @@ class ImportScripts::Answerbase < ImportScripts::Base ANSWER_IMAGE_DIRECTORY = "Answer Images" QUESTION_ATTACHMENT_DIRECTORY = "Question Attachments" QUESTION_IMAGE_DIRECTORY = "Question Images" - EMBEDDED_IMAGE_REGEX = /]*href="[^"]*relativeUrl=(?[^"\&]*)[^"]*"[^>]*>\s*]*>\s*<\/a>/i - QUESTION_LINK_REGEX = /]*?href="#{Regexp.escape(OLD_DOMAIN)}\/[^"]*?(?:q|questionid=)(?\d+)[^"]*?"[^>]*>(?.*?)<\/a>/i + EMBEDDED_IMAGE_REGEX = + %r{]*href="[^"]*relativeUrl=(?[^"\&]*)[^"]*"[^>]*>\s*]*>\s*}i + QUESTION_LINK_REGEX = + %r{]*?href="#{Regexp.escape(OLD_DOMAIN)}/[^"]*?(?:q|questionid=)(?\d+)[^"]*?"[^>]*>(?.*?)}i TOPIC_LINK_NORMALIZATION = '/.*?-(q\d+).*/\1' BATCH_SIZE = 1000 @@ -24,12 +26,13 @@ class ImportScripts::Answerbase < ImportScripts::Base super() @path = path - @db = ImportScripts::GenericDatabase.new( - @path, - batch_size: BATCH_SIZE, - recreate: true, - numeric_keys: true - ) + @db = + ImportScripts::GenericDatabase.new( + @path, + batch_size: BATCH_SIZE, + recreate: true, + numeric_keys: true, + ) end def execute @@ -47,11 +50,7 @@ class ImportScripts::Answerbase < ImportScripts::Base category_position = 0 csv_parse("categories") do |row| - @db.insert_category( - id: row[:id], - name: row[:name], - position: category_position += 1 - ) + @db.insert_category(id: row[:id], name: row[:name], position: category_position += 1) end csv_parse("users") do |row| @@ -62,7 +61,7 @@ class ImportScripts::Answerbase < ImportScripts::Base bio: row[:description], avatar_path: row[:profile_image], created_at: parse_date(row[:createtime]), - active: true + active: true, ) end @@ -74,8 +73,9 @@ class ImportScripts::Answerbase < ImportScripts::Base begin if row[:type] == "Question" - attachments = parse_filenames(row[:attachments], QUESTION_ATTACHMENT_DIRECTORY) + - parse_filenames(row[:images], QUESTION_IMAGE_DIRECTORY) + attachments = + parse_filenames(row[:attachments], QUESTION_ATTACHMENT_DIRECTORY) + + parse_filenames(row[:images], QUESTION_IMAGE_DIRECTORY) @db.insert_topic( id: row[:id], @@ -84,12 +84,13 @@ class ImportScripts::Answerbase < ImportScripts::Base category_id: row[:categorylist], user_id: user_id, created_at: created_at, - attachments: attachments + attachments: attachments, ) last_topic_id = row[:id] else - attachments = parse_filenames(row[:attachments], ANSWER_ATTACHMENT_DIRECTORY) + - parse_filenames(row[:images], ANSWER_IMAGE_DIRECTORY) + attachments = + parse_filenames(row[:attachments], ANSWER_ATTACHMENT_DIRECTORY) + + parse_filenames(row[:images], ANSWER_IMAGE_DIRECTORY) @db.insert_post( id: row[:id], @@ -97,10 +98,10 @@ class ImportScripts::Answerbase < ImportScripts::Base topic_id: last_topic_id, user_id: user_id, created_at: created_at, - attachments: attachments + attachments: attachments, ) end - rescue + rescue StandardError p row raise end @@ -110,9 +111,7 @@ class ImportScripts::Answerbase < ImportScripts::Base def parse_filenames(text, directory) return [] if text.blank? - text - .split(';') - .map { |filename| File.join(@path, directory, filename.strip) } + text.split(";").map { |filename| File.join(@path, directory, filename.strip) } end def parse_date(text) @@ -132,10 +131,10 @@ class ImportScripts::Answerbase < ImportScripts::Base create_categories(rows) do |row| { - id: row['id'], - name: row['name'], - description: row['description'], - position: row['position'] + id: row["id"], + name: row["name"], + description: row["description"], + position: row["position"], } end end @@ -153,19 +152,17 @@ class ImportScripts::Answerbase < ImportScripts::Base rows, last_id = @db.fetch_users(last_id) break if rows.empty? - next if all_records_exist?(:users, rows.map { |row| row['id'] }) + next if all_records_exist?(:users, rows.map { |row| row["id"] }) create_users(rows, total: total_count, offset: offset) do |row| { - id: row['id'], - email: row['email'], - username: row['username'], - bio_raw: row['bio'], - created_at: row['created_at'], - active: row['active'] == 1, - post_create_action: proc do |user| - create_avatar(user, row['avatar_path']) - end + id: row["id"], + email: row["email"], + username: row["username"], + bio_raw: row["bio"], + created_at: row["created_at"], + active: row["active"] == 1, + post_create_action: proc { |user| create_avatar(user, row["avatar_path"]) }, } end end @@ -191,24 +188,25 @@ class ImportScripts::Answerbase < ImportScripts::Base rows, last_id = @db.fetch_topics(last_id) break if rows.empty? - next if all_records_exist?(:posts, rows.map { |row| row['id'] }) + next if all_records_exist?(:posts, rows.map { |row| row["id"] }) create_posts(rows, total: total_count, offset: offset) do |row| - attachments = @db.fetch_topic_attachments(row['id']) if row['upload_count'] > 0 - user_id = user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id + attachments = @db.fetch_topic_attachments(row["id"]) if row["upload_count"] > 0 + user_id = user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id { - id: row['id'], - title: row['title'], - raw: raw_with_attachments(row['raw'].presence || row['title'], attachments, user_id), - category: category_id_from_imported_category_id(row['category_id']), + id: row["id"], + title: row["title"], + raw: raw_with_attachments(row["raw"].presence || row["title"], attachments, user_id), + category: category_id_from_imported_category_id(row["category_id"]), user_id: user_id, - created_at: row['created_at'], - closed: row['closed'] == 1, - post_create_action: proc do |post| - url = "q#{row['id']}" - Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url) - end + created_at: row["created_at"], + closed: row["closed"] == 1, + post_create_action: + proc do |post| + url = "q#{row["id"]}" + Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url) + end, } end end @@ -223,19 +221,19 @@ class ImportScripts::Answerbase < ImportScripts::Base rows, last_row_id = @db.fetch_posts(last_row_id) break if rows.empty? - next if all_records_exist?(:posts, rows.map { |row| row['id'] }) + next if all_records_exist?(:posts, rows.map { |row| row["id"] }) create_posts(rows, total: total_count, offset: offset) do |row| - topic = topic_lookup_from_imported_post_id(row['topic_id']) - attachments = @db.fetch_post_attachments(row['id']) if row['upload_count'] > 0 - user_id = user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id + topic = topic_lookup_from_imported_post_id(row["topic_id"]) + attachments = @db.fetch_post_attachments(row["id"]) if row["upload_count"] > 0 + user_id = user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id { - id: row['id'], - raw: raw_with_attachments(row['raw'], attachments, user_id), + id: row["id"], + raw: raw_with_attachments(row["raw"], attachments, user_id), user_id: user_id, topic_id: topic[:topic_id], - created_at: row['created_at'] + created_at: row["created_at"], } end end @@ -247,7 +245,7 @@ class ImportScripts::Answerbase < ImportScripts::Base raw = ReverseMarkdown.convert(raw) || "" attachments&.each do |attachment| - path = attachment['path'] + path = attachment["path"] next if embedded_paths.include?(path) if File.exist?(path) @@ -269,23 +267,24 @@ class ImportScripts::Answerbase < ImportScripts::Base paths = [] upload_ids = [] - raw = raw.gsub(EMBEDDED_IMAGE_REGEX) do - path = File.join(@path, Regexp.last_match['path']) - filename = File.basename(path) - path = find_image_path(filename) + raw = + raw.gsub(EMBEDDED_IMAGE_REGEX) do + path = File.join(@path, Regexp.last_match["path"]) + filename = File.basename(path) + path = find_image_path(filename) - if path - upload = @uploader.create_upload(user_id, path, filename) + if path + upload = @uploader.create_upload(user_id, path, filename) - if upload.present? && upload.persisted? - paths << path - upload_ids << upload.id - @uploader.html_for_upload(upload, filename) + if upload.present? && upload.persisted? + paths << path + upload_ids << upload.id + @uploader.html_for_upload(upload, filename) + end + else + STDERR.puts "Could not find file: #{path}" end - else - STDERR.puts "Could not find file: #{path}" end - end [raw, paths, upload_ids] end @@ -311,11 +310,11 @@ class ImportScripts::Answerbase < ImportScripts::Base def add_permalink_normalizations normalizations = SiteSetting.permalink_normalizations - normalizations = normalizations.blank? ? [] : normalizations.split('|') + normalizations = normalizations.blank? ? [] : normalizations.split("|") add_normalization(normalizations, TOPIC_LINK_NORMALIZATION) - SiteSetting.permalink_normalizations = normalizations.join('|') + SiteSetting.permalink_normalizations = normalizations.join("|") end def add_normalization(normalizations, normalization) @@ -327,11 +326,13 @@ class ImportScripts::Answerbase < ImportScripts::Base end def csv_parse(table_name) - CSV.foreach(File.join(@path, "#{table_name}.csv"), - headers: true, - header_converters: :symbol, - skip_blanks: true, - encoding: 'bom|utf-8') { |row| yield row } + CSV.foreach( + File.join(@path, "#{table_name}.csv"), + headers: true, + header_converters: :symbol, + skip_blanks: true, + encoding: "bom|utf-8", + ) { |row| yield row } end end diff --git a/script/import_scripts/answerhub.rb b/script/import_scripts/answerhub.rb index f2e05811bac..b4954dcccba 100644 --- a/script/import_scripts/answerhub.rb +++ b/script/import_scripts/answerhub.rb @@ -5,34 +5,29 @@ # Based on having access to a mysql dump. # Pass in the ENV variables listed below before running the script. -require_relative 'base' -require 'mysql2' -require 'open-uri' +require_relative "base" +require "mysql2" +require "open-uri" class ImportScripts::AnswerHub < ImportScripts::Base - - DB_NAME ||= ENV['DB_NAME'] || "answerhub" - DB_PASS ||= ENV['DB_PASS'] || "answerhub" - DB_USER ||= ENV['DB_USER'] || "answerhub" - TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "network1" - BATCH_SIZE ||= ENV['BATCH_SIZE'].to_i || 1000 - ATTACHMENT_DIR = ENV['ATTACHMENT_DIR'] || '' - PROCESS_UPLOADS = ENV['PROCESS_UPLOADS'].to_i || 0 - ANSWERHUB_DOMAIN = ENV['ANSWERHUB_DOMAIN'] - AVATAR_DIR = ENV['AVATAR_DIR'] || "" - SITE_ID = ENV['SITE_ID'].to_i || 0 - CATEGORY_MAP_FROM = ENV['CATEGORY_MAP_FROM'].to_i || 0 - CATEGORY_MAP_TO = ENV['CATEGORY_MAP_TO'].to_i || 0 - SCRAPE_AVATARS = ENV['SCRAPE_AVATARS'].to_i || 0 + DB_NAME ||= ENV["DB_NAME"] || "answerhub" + DB_PASS ||= ENV["DB_PASS"] || "answerhub" + DB_USER ||= ENV["DB_USER"] || "answerhub" + TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "network1" + BATCH_SIZE ||= ENV["BATCH_SIZE"].to_i || 1000 + ATTACHMENT_DIR = ENV["ATTACHMENT_DIR"] || "" + PROCESS_UPLOADS = ENV["PROCESS_UPLOADS"].to_i || 0 + ANSWERHUB_DOMAIN = ENV["ANSWERHUB_DOMAIN"] + AVATAR_DIR = ENV["AVATAR_DIR"] || "" + SITE_ID = ENV["SITE_ID"].to_i || 0 + CATEGORY_MAP_FROM = ENV["CATEGORY_MAP_FROM"].to_i || 0 + CATEGORY_MAP_TO = ENV["CATEGORY_MAP_TO"].to_i || 0 + SCRAPE_AVATARS = ENV["SCRAPE_AVATARS"].to_i || 0 def initialize super - @client = Mysql2::Client.new( - host: "localhost", - username: DB_USER, - password: DB_PASS, - database: DB_NAME - ) + @client = + Mysql2::Client.new(host: "localhost", username: DB_USER, password: DB_PASS, database: DB_NAME) @skip_updates = true SiteSetting.tagging_enabled = true SiteSetting.max_tags_per_topic = 10 @@ -56,7 +51,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base end def import_users - puts '', "creating users" + puts "", "creating users" query = "SELECT count(*) count @@ -64,12 +59,13 @@ class ImportScripts::AnswerHub < ImportScripts::Base WHERE c_type = 'user' AND c_active = 1 AND c_system <> 1;" - total_count = @client.query(query).first['count'] + total_count = @client.query(query).first["count"] puts "Total count: #{total_count}" @last_user_id = -1 batches(BATCH_SIZE) do |offset| - query = "SELECT c_id, c_creation_date, c_name, c_primaryEmail, c_last_seen, c_description + query = + "SELECT c_id, c_creation_date, c_name, c_primaryEmail, c_last_seen, c_description FROM #{TABLE_PREFIX}_authoritables WHERE c_type = 'user' AND c_active = 1 @@ -79,17 +75,18 @@ class ImportScripts::AnswerHub < ImportScripts::Base results = @client.query(query) break if results.size < 1 - @last_user_id = results.to_a.last['c_id'] + @last_user_id = results.to_a.last["c_id"] create_users(results, total: total_count, offset: offset) do |user| # puts user['c_id'].to_s + ' ' + user['c_name'] - next if @lookup.user_id_from_imported_user_id(user['c_id']) - { id: user['c_id'], + next if @lookup.user_id_from_imported_user_id(user["c_id"]) + { + id: user["c_id"], email: "#{SecureRandom.hex}@invalid.invalid", - username: user['c_name'], - created_at: user['c_creation_date'], - bio_raw: user['c_description'], - last_seen_at: user['c_last_seen'], + username: user["c_name"], + created_at: user["c_creation_date"], + bio_raw: user["c_description"], + last_seen_at: user["c_last_seen"], } end end @@ -99,7 +96,8 @@ class ImportScripts::AnswerHub < ImportScripts::Base puts "", "importing categories..." # Import parent categories first - query = "SELECT c_id, c_name, c_plug, c_parent + query = + "SELECT c_id, c_name, c_plug, c_parent FROM containers WHERE c_type = 'space' AND c_active = 1 @@ -107,15 +105,12 @@ class ImportScripts::AnswerHub < ImportScripts::Base results = @client.query(query) create_categories(results) do |c| - { - id: c['c_id'], - name: c['c_name'], - parent_category_id: check_parent_id(c['c_parent']), - } + { id: c["c_id"], name: c["c_name"], parent_category_id: check_parent_id(c["c_parent"]) } end # Import sub-categories - query = "SELECT c_id, c_name, c_plug, c_parent + query = + "SELECT c_id, c_name, c_plug, c_parent FROM containers WHERE c_type = 'space' AND c_active = 1 @@ -125,9 +120,9 @@ class ImportScripts::AnswerHub < ImportScripts::Base create_categories(results) do |c| # puts c.inspect { - id: c['c_id'], - name: c['c_name'], - parent_category_id: category_id_from_imported_category_id(check_parent_id(c['c_parent'])), + id: c["c_id"], + name: c["c_name"], + parent_category_id: category_id_from_imported_category_id(check_parent_id(c["c_parent"])), } end end @@ -141,7 +136,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base WHERE c_visibility <> 'deleted' AND (c_type = 'question' OR c_type = 'kbentry');" - total_count = @client.query(count_query).first['count'] + total_count = @client.query(count_query).first["count"] @last_topic_id = -1 @@ -159,26 +154,25 @@ class ImportScripts::AnswerHub < ImportScripts::Base topics = @client.query(query) break if topics.size < 1 - @last_topic_id = topics.to_a.last['c_id'] + @last_topic_id = topics.to_a.last["c_id"] create_posts(topics, total: total_count, offset: offset) do |t| - user_id = user_id_from_imported_user_id(t['c_author']) || Discourse::SYSTEM_USER_ID - body = process_mentions(t['c_body']) - if PROCESS_UPLOADS == 1 - body = process_uploads(body, user_id) - end + user_id = user_id_from_imported_user_id(t["c_author"]) || Discourse::SYSTEM_USER_ID + body = process_mentions(t["c_body"]) + body = process_uploads(body, user_id) if PROCESS_UPLOADS == 1 markdown_body = HtmlToMarkdown.new(body).to_markdown { - id: t['c_id'], + id: t["c_id"], user_id: user_id, - title: t['c_title'], - category: category_id_from_imported_category_id(t['c_primaryContainer']), + title: t["c_title"], + category: category_id_from_imported_category_id(t["c_primaryContainer"]), raw: markdown_body, - created_at: t['c_creation_date'], - post_create_action: proc do |post| - tag_names = t['c_topic_names'].split(',') - DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names) - end + created_at: t["c_creation_date"], + post_create_action: + proc do |post| + tag_names = t["c_topic_names"].split(",") + DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names) + end, } end end @@ -194,7 +188,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base AND (c_type = 'answer' OR c_type = 'comment' OR c_type = 'kbentry');" - total_count = @client.query(count_query).first['count'] + total_count = @client.query(count_query).first["count"] @last_post_id = -1 @@ -210,49 +204,49 @@ class ImportScripts::AnswerHub < ImportScripts::Base ORDER BY c_id ASC LIMIT #{BATCH_SIZE};" posts = @client.query(query) - next if all_records_exist? :posts, posts.map { |p| p['c_id'] } + next if all_records_exist? :posts, posts.map { |p| p["c_id"] } break if posts.size < 1 - @last_post_id = posts.to_a.last['c_id'] + @last_post_id = posts.to_a.last["c_id"] create_posts(posts, total: total_count, offset: offset) do |p| - t = topic_lookup_from_imported_post_id(p['c_originalParent']) + t = topic_lookup_from_imported_post_id(p["c_originalParent"]) next unless t - reply_to_post_id = post_id_from_imported_post_id(p['c_parent']) + reply_to_post_id = post_id_from_imported_post_id(p["c_parent"]) reply_to_post = reply_to_post_id.present? ? Post.find(reply_to_post_id) : nil reply_to_post_number = reply_to_post.present? ? reply_to_post.post_number : nil - user_id = user_id_from_imported_user_id(p['c_author']) || Discourse::SYSTEM_USER_ID + user_id = user_id_from_imported_user_id(p["c_author"]) || Discourse::SYSTEM_USER_ID - body = process_mentions(p['c_body']) - if PROCESS_UPLOADS == 1 - body = process_uploads(body, user_id) - end + body = process_mentions(p["c_body"]) + body = process_uploads(body, user_id) if PROCESS_UPLOADS == 1 markdown_body = HtmlToMarkdown.new(body).to_markdown { - id: p['c_id'], + id: p["c_id"], user_id: user_id, topic_id: t[:topic_id], reply_to_post_number: reply_to_post_number, raw: markdown_body, - created_at: p['c_creation_date'], - post_create_action: proc do |post_info| - begin - if p['c_type'] == 'answer' && p['c_marked'] == 1 - post = Post.find(post_info[:id]) - if post - user_id = user_id_from_imported_user_id(p['c_author']) || Discourse::SYSTEM_USER_ID - current_user = User.find(user_id) - solved = DiscourseSolved.accept_answer!(post, current_user) - # puts "SOLVED: #{solved}" + created_at: p["c_creation_date"], + post_create_action: + proc do |post_info| + begin + if p["c_type"] == "answer" && p["c_marked"] == 1 + post = Post.find(post_info[:id]) + if post + user_id = + user_id_from_imported_user_id(p["c_author"]) || Discourse::SYSTEM_USER_ID + current_user = User.find(user_id) + solved = DiscourseSolved.accept_answer!(post, current_user) + # puts "SOLVED: #{solved}" + end end + rescue ActiveRecord::RecordInvalid + puts "SOLVED: Skipped post_id: #{post.id} because invalid" end - rescue ActiveRecord::RecordInvalid - puts "SOLVED: Skipped post_id: #{post.id} because invalid" - end - end + end, } end end @@ -269,11 +263,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base groups = @client.query(query) create_groups(groups) do |group| - { - id: group["c_id"], - name: group["c_name"], - visibility_level: 1 - } + { id: group["c_id"], name: group["c_name"], visibility_level: 1 } end end @@ -298,11 +288,16 @@ class ImportScripts::AnswerHub < ImportScripts::Base group_members.map groups.each do |group| - dgroup = find_group_by_import_id(group['c_id']) + dgroup = find_group_by_import_id(group["c_id"]) - next if dgroup.custom_fields['import_users_added'] + next if dgroup.custom_fields["import_users_added"] - group_member_ids = group_members.map { |m| user_id_from_imported_user_id(m["c_members"]) if m["c_groups"] == group['c_id'] }.compact + group_member_ids = + group_members + .map do |m| + user_id_from_imported_user_id(m["c_members"]) if m["c_groups"] == group["c_id"] + end + .compact # add members dgroup.bulk_add(group_member_ids) @@ -310,7 +305,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base # reload group dgroup.reload - dgroup.custom_fields['import_users_added'] = true + dgroup.custom_fields["import_users_added"] = true dgroup.save progress_count += 1 @@ -362,7 +357,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base avatars.each do |a| begin - user_id = user_id_from_imported_user_id(a['c_user']) + user_id = user_id_from_imported_user_id(a["c_user"]) user = User.find(user_id) if user filename = "avatar-#{user_id}.png" @@ -371,9 +366,11 @@ class ImportScripts::AnswerHub < ImportScripts::Base # Scrape Avatars - Avatars are saved in the db, but it might be easier to just scrape them if SCRAPE_AVATARS == 1 - File.open(path, 'wb') { |f| - f << open("https://#{ANSWERHUB_DOMAIN}/forums/users/#{a['c_user']}/photo/view.html?s=240").read - } + File.open(path, "wb") do |f| + f << open( + "https://#{ANSWERHUB_DOMAIN}/forums/users/#{a["c_user"]}/photo/view.html?s=240", + ).read + end end upload = @uploader.create_upload(user.id, path, filename) @@ -389,7 +386,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base end end rescue ActiveRecord::RecordNotFound - puts "Could not find User for user_id: #{a['c_user']}" + puts "Could not find User for user_id: #{a["c_user"]}" end end end @@ -438,9 +435,10 @@ class ImportScripts::AnswerHub < ImportScripts::Base raw = body.dup # https://example.forum.com/forums/users/1469/XYZ_Rob.html - raw.gsub!(/(https:\/\/example.forum.com\/forums\/users\/\d+\/[\w_%-.]*.html)/) do + raw.gsub!(%r{(https://example.forum.com/forums/users/\d+/[\w_%-.]*.html)}) do legacy_url = $1 - import_user_id = legacy_url.match(/https:\/\/example.forum.com\/forums\/users\/(\d+)\/[\w_%-.]*.html/).captures + import_user_id = + legacy_url.match(%r{https://example.forum.com/forums/users/(\d+)/[\w_%-.]*.html}).captures user = @lookup.find_user_by_import_id(import_user_id[0]) if user.present? @@ -453,9 +451,9 @@ class ImportScripts::AnswerHub < ImportScripts::Base end # /forums/users/395/petrocket.html - raw.gsub!(/(\/forums\/users\/\d+\/[\w_%-.]*.html)/) do + raw.gsub!(%r{(/forums/users/\d+/[\w_%-.]*.html)}) do legacy_url = $1 - import_user_id = legacy_url.match(/\/forums\/users\/(\d+)\/[\w_%-.]*.html/).captures + import_user_id = legacy_url.match(%r{/forums/users/(\d+)/[\w_%-.]*.html}).captures # puts raw user = @lookup.find_user_by_import_id(import_user_id[0]) @@ -472,7 +470,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base end def create_permalinks - puts '', 'Creating redirects...', '' + puts "", "Creating redirects...", "" # https://example.forum.com/forums/questions/2005/missing-file.html Topic.find_each do |topic| @@ -480,8 +478,12 @@ class ImportScripts::AnswerHub < ImportScripts::Base if pcf && pcf["import_id"] id = pcf["import_id"] slug = Slug.for(topic.title) - Permalink.create(url: "questions/#{id}/#{slug}.html", topic_id: topic.id) rescue nil - print '.' + begin + Permalink.create(url: "questions/#{id}/#{slug}.html", topic_id: topic.id) + rescue StandardError + nil + end + print "." end end end @@ -496,7 +498,6 @@ class ImportScripts::AnswerHub < ImportScripts::Base return CATEGORY_MAP_TO if CATEGORY_MAP_FROM > 0 && id == CATEGORY_MAP_FROM id end - end ImportScripts::AnswerHub.new.perform diff --git a/script/import_scripts/askbot.rb b/script/import_scripts/askbot.rb index 7bc7866a0ef..537c5ba71fe 100644 --- a/script/import_scripts/askbot.rb +++ b/script/import_scripts/askbot.rb @@ -1,23 +1,23 @@ # frozen_string_literal: true require File.expand_path(File.dirname(__FILE__) + "/base.rb") -require 'pg' +require "pg" class ImportScripts::MyAskBot < ImportScripts::Base # CHANGE THESE BEFORE RUNNING THE IMPORTER BATCH_SIZE = 1000 - OLD_SITE = "ask.cvxr.com" - DB_NAME = "cvxforum" - DB_USER = "cvxforum" - DB_PORT = 5432 - DB_HOST = "ask.cvxr.com" - DB_PASS = 'yeah, right' + OLD_SITE = "ask.cvxr.com" + DB_NAME = "cvxforum" + DB_USER = "cvxforum" + DB_PORT = 5432 + DB_HOST = "ask.cvxr.com" + DB_PASS = "yeah, right" # A list of categories to create. Any post with one of these tags will be # assigned to that category. Ties are broken by list order. - CATEGORIES = [ 'Nonconvex', 'TFOCS', 'MIDCP', 'FAQ' ] + CATEGORIES = %w[Nonconvex TFOCS MIDCP FAQ] def initialize super @@ -25,13 +25,8 @@ class ImportScripts::MyAskBot < ImportScripts::Base @thread_parents = {} @tagmap = [] @td = PG::TextDecoder::TimestampWithTimeZone.new - @client = PG.connect( - dbname: DB_NAME, - host: DB_HOST, - port: DB_PORT, - user: DB_USER, - password: DB_PASS - ) + @client = + PG.connect(dbname: DB_NAME, host: DB_HOST, port: DB_PORT, user: DB_USER, password: DB_PASS) end def execute @@ -55,18 +50,17 @@ class ImportScripts::MyAskBot < ImportScripts::Base def read_tags puts "", "reading thread tags..." - tag_count = @client.exec(<<-SQL + tag_count = @client.exec(<<-SQL)[0]["count"] SELECT COUNT(A.id) FROM askbot_thread_tags A JOIN tag B ON A.tag_id = B.id WHERE A.tag_id > 0 SQL - )[0]["count"] tags_done = 0 batches(BATCH_SIZE) do |offset| - tags = @client.exec(<<-SQL + tags = @client.exec(<<-SQL) SELECT A.thread_id, B.name FROM askbot_thread_tags A JOIN tag B @@ -75,7 +69,6 @@ class ImportScripts::MyAskBot < ImportScripts::Base LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL - ) break if tags.ntuples() < 1 tags.each do |tag| tid = tag["thread_id"].to_i @@ -83,7 +76,7 @@ class ImportScripts::MyAskBot < ImportScripts::Base if @tagmap[tid] @tagmap[tid].push(tnm) else - @tagmap[tid] = [ tnm ] + @tagmap[tid] = [tnm] end tags_done += 1 print_status tags_done, tag_count @@ -94,21 +87,19 @@ class ImportScripts::MyAskBot < ImportScripts::Base def import_users puts "", "importing users" - total_count = @client.exec(<<-SQL + total_count = @client.exec(<<-SQL)[0]["count"] SELECT COUNT(id) FROM auth_user SQL - )[0]["count"] batches(BATCH_SIZE) do |offset| - users = @client.query(<<-SQL + users = @client.query(<<-SQL) SELECT id, username, email, is_staff, date_joined, last_seen, real_name, website, location, about FROM auth_user ORDER BY date_joined LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL - ) break if users.ntuples() < 1 @@ -133,17 +124,16 @@ class ImportScripts::MyAskBot < ImportScripts::Base def import_posts puts "", "importing questions..." - post_count = @client.exec(<<-SQL + post_count = @client.exec(<<-SQL)[0]["count"] SELECT COUNT(A.id) FROM askbot_post A JOIN askbot_thread B ON A.thread_id = B.id WHERE NOT B.closed AND A.post_type='question' SQL - )[0]["count"] batches(BATCH_SIZE) do |offset| - posts = @client.exec(<<-SQL + posts = @client.exec(<<-SQL) SELECT A.id, A.author_id, A.added_at, A.text, A.thread_id, B.title FROM askbot_post A JOIN askbot_thread B @@ -153,7 +143,6 @@ class ImportScripts::MyAskBot < ImportScripts::Base LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL - ) break if posts.ntuples() < 1 @@ -176,7 +165,11 @@ class ImportScripts::MyAskBot < ImportScripts::Base id: pid, title: post["title"], category: cat, - custom_fields: { import_id: pid, import_thread_id: tid, import_tags: tags }, + custom_fields: { + import_id: pid, + import_thread_id: tid, + import_tags: tags, + }, user_id: user_id_from_imported_user_id(post["author_id"]) || Discourse::SYSTEM_USER_ID, created_at: Time.zone.at(@td.decode(post["added_at"])), raw: post["text"], @@ -188,17 +181,16 @@ class ImportScripts::MyAskBot < ImportScripts::Base def import_replies puts "", "importing answers and comments..." - post_count = @client.exec(<<-SQL + post_count = @client.exec(<<-SQL)[0]["count"] SELECT COUNT(A.id) FROM askbot_post A JOIN askbot_thread B ON A.thread_id = B.id WHERE NOT B.closed AND A.post_type<>'question' SQL - )[0]["count"] batches(BATCH_SIZE) do |offset| - posts = @client.exec(<<-SQL + posts = @client.exec(<<-SQL) SELECT A.id, A.author_id, A.added_at, A.text, A.thread_id, B.title FROM askbot_post A JOIN askbot_thread B @@ -208,7 +200,6 @@ class ImportScripts::MyAskBot < ImportScripts::Base LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL - ) break if posts.ntuples() < 1 @@ -222,10 +213,12 @@ class ImportScripts::MyAskBot < ImportScripts::Base { id: pid, topic_id: parent[:topic_id], - custom_fields: { import_id: pid }, + custom_fields: { + import_id: pid, + }, user_id: user_id_from_imported_user_id(post["author_id"]) || Discourse::SYSTEM_USER_ID, created_at: Time.zone.at(@td.decode(post["added_at"])), - raw: post["text"] + raw: post["text"], } end end @@ -240,32 +233,37 @@ class ImportScripts::MyAskBot < ImportScripts::Base # I am sure this is incomplete, but we didn't make heavy use of internal # links on our site. tmp = Regexp.quote("http://#{OLD_SITE}") - r1 = /"(#{tmp})?\/question\/(\d+)\/[a-zA-Z-]*\/?"/ - r2 = /\((#{tmp})?\/question\/(\d+)\/[a-zA-Z-]*\/?\)/ - r3 = /?/ + r1 = %r{"(#{tmp})?/question/(\d+)/[a-zA-Z-]*/?"} + r2 = %r{\((#{tmp})?/question/(\d+)/[a-zA-Z-]*/?\)} + r3 = %r{?} Post.find_each do |post| - raw = post.raw.gsub(r1) do - if topic = topic_lookup_from_imported_post_id($2) - "\"#{topic[:url]}\"" - else - $& + raw = + post + .raw + .gsub(r1) do + if topic = topic_lookup_from_imported_post_id($2) + "\"#{topic[:url]}\"" + else + $& + end + end + raw = + raw.gsub(r2) do + if topic = topic_lookup_from_imported_post_id($2) + "(#{topic[:url]})" + else + $& + end end - end - raw = raw.gsub(r2) do - if topic = topic_lookup_from_imported_post_id($2) - "(#{topic[:url]})" - else - $& + raw = + raw.gsub(r3) do + if topic = topic_lookup_from_imported_post_id($1) + trec = Topic.find_by(id: topic[:topic_id]) + "[#{trec.title}](#{topic[:url]})" + else + $& + end end - end - raw = raw.gsub(r3) do - if topic = topic_lookup_from_imported_post_id($1) - trec = Topic.find_by(id: topic[:topic_id]) - "[#{trec.title}](#{topic[:url]})" - else - $& - end - end if raw != post.raw post.raw = raw diff --git a/script/import_scripts/base.rb b/script/import_scripts/base.rb index 1345f206d78..7dbca7d3532 100644 --- a/script/import_scripts/base.rb +++ b/script/import_scripts/base.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -if ARGV.include?('bbcode-to-md') +if ARGV.include?("bbcode-to-md") # Replace (most) bbcode with markdown before creating posts. # This will dramatically clean up the final posts in Discourse. # @@ -10,17 +10,17 @@ if ARGV.include?('bbcode-to-md') # cd ruby-bbcode-to-md # gem build ruby-bbcode-to-md.gemspec # gem install ruby-bbcode-to-md-*.gem - require 'ruby-bbcode-to-md' + require "ruby-bbcode-to-md" end -require_relative '../../config/environment' -require_relative 'base/lookup_container' -require_relative 'base/uploader' +require_relative "../../config/environment" +require_relative "base/lookup_container" +require_relative "base/uploader" -module ImportScripts; end +module ImportScripts +end class ImportScripts::Base - def initialize preload_i18n @@ -62,15 +62,14 @@ class ImportScripts::Base end elapsed = Time.now - @start_times[:import] - puts '', '', 'Done (%02dh %02dmin %02dsec)' % [elapsed / 3600, elapsed / 60 % 60, elapsed % 60] - + puts "", "", "Done (%02dh %02dmin %02dsec)" % [elapsed / 3600, elapsed / 60 % 60, elapsed % 60] ensure reset_site_settings end def get_site_settings_for_import { - blocked_email_domains: '', + blocked_email_domains: "", min_topic_title_length: 1, min_post_length: 1, min_first_post_length: 1, @@ -78,21 +77,23 @@ class ImportScripts::Base min_personal_message_title_length: 1, allow_duplicate_topic_titles: true, allow_duplicate_topic_titles_category: false, - disable_emails: 'yes', - max_attachment_size_kb: 102400, - max_image_size_kb: 102400, - authorized_extensions: '*', + disable_emails: "yes", + max_attachment_size_kb: 102_400, + max_image_size_kb: 102_400, + authorized_extensions: "*", clean_up_inactive_users_after_days: 0, clean_up_unused_staged_users_after_days: 0, clean_up_uploads: false, - clean_orphan_uploads_grace_period_hours: 1800 + clean_orphan_uploads_grace_period_hours: 1800, } end def change_site_settings if SiteSetting.bootstrap_mode_enabled - SiteSetting.default_trust_level = TrustLevel[0] if SiteSetting.default_trust_level == TrustLevel[1] - SiteSetting.default_email_digest_frequency = 10080 if SiteSetting.default_email_digest_frequency == 1440 + SiteSetting.default_trust_level = TrustLevel[0] if SiteSetting.default_trust_level == + TrustLevel[1] + SiteSetting.default_email_digest_frequency = + 10_080 if SiteSetting.default_email_digest_frequency == 1440 SiteSetting.bootstrap_mode_enabled = false end @@ -131,7 +132,7 @@ class ImportScripts::Base raise NotImplementedError end - %i{ + %i[ add_category add_group add_post @@ -146,9 +147,7 @@ class ImportScripts::Base topic_lookup_from_imported_post_id user_already_imported? user_id_from_imported_user_id - }.each do |method_name| - delegate method_name, to: :@lookup - end + ].each { |method_name| delegate method_name, to: :@lookup } def create_admin(opts = {}) admin = User.new @@ -196,7 +195,11 @@ class ImportScripts::Base end end - print_status(created + skipped + failed + (opts[:offset] || 0), total, get_start_time("groups")) + print_status( + created + skipped + failed + (opts[:offset] || 0), + total, + get_start_time("groups"), + ) end [created, skipped] @@ -224,23 +227,22 @@ class ImportScripts::Base ActiveRecord::Base.transaction do begin connection = ActiveRecord::Base.connection.raw_connection - connection.exec('CREATE TEMP TABLE import_ids(val text PRIMARY KEY)') + connection.exec("CREATE TEMP TABLE import_ids(val text PRIMARY KEY)") - import_id_clause = import_ids.map { |id| "('#{PG::Connection.escape_string(id.to_s)}')" }.join(",") + import_id_clause = + import_ids.map { |id| "('#{PG::Connection.escape_string(id.to_s)}')" }.join(",") connection.exec("INSERT INTO import_ids VALUES #{import_id_clause}") existing = "#{type.to_s.classify}CustomField".constantize - existing = existing.where(name: 'import_id') - .joins('JOIN import_ids ON val = value') - .count + existing = existing.where(name: "import_id").joins("JOIN import_ids ON val = value").count if existing == import_ids.length puts "Skipping #{import_ids.length} already imported #{type}" true end ensure - connection.exec('DROP TABLE import_ids') unless connection.nil? + connection.exec("DROP TABLE import_ids") unless connection.nil? end end end @@ -292,7 +294,11 @@ class ImportScripts::Base end end - print_status(created + skipped + failed + (opts[:offset] || 0), total, get_start_time("users")) + print_status( + created + skipped + failed + (opts[:offset] || 0), + total, + get_start_time("users"), + ) end [created, skipped] @@ -305,7 +311,9 @@ class ImportScripts::Base post_create_action = opts.delete(:post_create_action) existing = find_existing_user(opts[:email], opts[:username]) - return existing if existing && (merge || existing.custom_fields["import_id"].to_s == import_id.to_s) + if existing && (merge || existing.custom_fields["import_id"].to_s == import_id.to_s) + return existing + end bio_raw = opts.delete(:bio_raw) website = opts.delete(:website) @@ -316,8 +324,11 @@ class ImportScripts::Base original_name = opts[:name] original_email = opts[:email] = opts[:email].downcase - if !UsernameValidator.new(opts[:username]).valid_format? || !User.username_available?(opts[:username]) - opts[:username] = UserNameSuggester.suggest(opts[:username].presence || opts[:name].presence || opts[:email]) + if !UsernameValidator.new(opts[:username]).valid_format? || + !User.username_available?(opts[:username]) + opts[:username] = UserNameSuggester.suggest( + opts[:username].presence || opts[:name].presence || opts[:email], + ) end if !EmailAddressValidator.valid_value?(opts[:email]) @@ -339,7 +350,8 @@ class ImportScripts::Base u = User.new(opts) (opts[:custom_fields] || {}).each { |k, v| u.custom_fields[k] = v } u.custom_fields["import_id"] = import_id - u.custom_fields["import_username"] = original_username if original_username.present? && original_username != opts[:username] + u.custom_fields["import_username"] = original_username if original_username.present? && + original_username != opts[:username] u.custom_fields["import_avatar_url"] = avatar_url if avatar_url.present? u.custom_fields["import_pass"] = opts[:password] if opts[:password].present? u.custom_fields["import_email"] = original_email if original_email != opts[:email] @@ -359,9 +371,7 @@ class ImportScripts::Base end end - if opts[:active] && opts[:password].present? - u.activate - end + u.activate if opts[:active] && opts[:password].present? rescue => e # try based on email if e.try(:record).try(:errors).try(:messages).try(:[], :primary_email).present? @@ -377,7 +387,7 @@ class ImportScripts::Base end end - if u.custom_fields['import_email'] + if u.custom_fields["import_email"] u.suspended_at = Time.zone.at(Time.now) u.suspended_till = 200.years.from_now u.save! @@ -388,11 +398,15 @@ class ImportScripts::Base user_option.email_messages_level = UserOption.email_level_types[:never] user_option.save! if u.save - StaffActionLogger.new(Discourse.system_user).log_user_suspend(u, 'Invalid email address on import') + StaffActionLogger.new(Discourse.system_user).log_user_suspend( + u, + "Invalid email address on import", + ) else - Rails.logger.error("Failed to suspend user #{u.username}. #{u.errors.try(:full_messages).try(:inspect)}") + Rails.logger.error( + "Failed to suspend user #{u.username}. #{u.errors.try(:full_messages).try(:inspect)}", + ) end - end post_create_action.try(:call, u) if u.persisted? @@ -402,7 +416,8 @@ class ImportScripts::Base def find_existing_user(email, username) # Force the use of the index on the 'user_emails' table - UserEmail.where("lower(email) = ?", email.downcase).first&.user || User.where(username: username).first + UserEmail.where("lower(email) = ?", email.downcase).first&.user || + User.where(username: username).first end def created_category(category) @@ -435,7 +450,8 @@ class ImportScripts::Base # make sure categories don't go more than 2 levels deep if params[:parent_category_id] top = Category.find_by_id(params[:parent_category_id]) - top = top.parent_category while (top&.height_of_ancestors || -1) + 1 >= SiteSetting.max_category_nesting + top = top.parent_category while (top&.height_of_ancestors || -1) + 1 >= + SiteSetting.max_category_nesting params[:parent_category_id] = top.id if top end @@ -471,15 +487,16 @@ class ImportScripts::Base post_create_action = opts.delete(:post_create_action) - new_category = Category.new( - name: opts[:name], - user_id: opts[:user_id] || opts[:user].try(:id) || Discourse::SYSTEM_USER_ID, - position: opts[:position], - parent_category_id: opts[:parent_category_id], - color: opts[:color] || category_color(opts[:parent_category_id]), - text_color: opts[:text_color] || "FFF", - read_restricted: opts[:read_restricted] || false, - ) + new_category = + Category.new( + name: opts[:name], + user_id: opts[:user_id] || opts[:user].try(:id) || Discourse::SYSTEM_USER_ID, + position: opts[:position], + parent_category_id: opts[:parent_category_id], + color: opts[:color] || category_color(opts[:parent_category_id]), + text_color: opts[:text_color] || "FFF", + read_restricted: opts[:read_restricted] || false, + ) new_category.custom_fields["import_id"] = import_id if import_id new_category.save! @@ -498,10 +515,16 @@ class ImportScripts::Base end def category_color(parent_category_id) - @category_colors ||= SiteSetting.category_colors.split('|') + @category_colors ||= SiteSetting.category_colors.split("|") index = @next_category_color_index[parent_category_id].presence || 0 - @next_category_color_index[parent_category_id] = index + 1 >= @category_colors.count ? 0 : index + 1 + @next_category_color_index[parent_category_id] = ( + if index + 1 >= @category_colors.count + 0 + else + index + 1 + end + ) @category_colors[index] end @@ -571,7 +594,7 @@ class ImportScripts::Base opts = opts.merge(skip_validations: true) opts[:import_mode] = true opts[:custom_fields] ||= {} - opts[:custom_fields]['import_id'] = import_id + opts[:custom_fields]["import_id"] = import_id unless opts[:topic_id] opts[:meta_data] = meta_data = {} @@ -582,7 +605,11 @@ class ImportScripts::Base opts[:guardian] = STAFF_GUARDIAN if @bbcode_to_md - opts[:raw] = opts[:raw].bbcode_to_md(false, {}, :disable, :quote) rescue opts[:raw] + opts[:raw] = begin + opts[:raw].bbcode_to_md(false, {}, :disable, :quote) + rescue StandardError + opts[:raw] + end end post_creator = PostCreator.new(user, opts) @@ -628,7 +655,7 @@ class ImportScripts::Base created += 1 if manager.errors.none? skipped += 1 if manager.errors.any? - rescue + rescue StandardError skipped += 1 end end @@ -671,14 +698,14 @@ class ImportScripts::Base def close_inactive_topics(opts = {}) num_days = opts[:days] || 30 - puts '', "Closing topics that have been inactive for more than #{num_days} days." + puts "", "Closing topics that have been inactive for more than #{num_days} days." - query = Topic.where('last_posted_at < ?', num_days.days.ago).where(closed: false) + query = Topic.where("last_posted_at < ?", num_days.days.ago).where(closed: false) total_count = query.count closed_count = 0 query.find_each do |topic| - topic.update_status('closed', true, Discourse.system_user) + topic.update_status("closed", true, Discourse.system_user) closed_count += 1 print_status(closed_count, total_count, get_start_time("close_inactive_topics")) end @@ -790,7 +817,9 @@ class ImportScripts::Base puts "", "Updating user digest_attempted_at..." - DB.exec("UPDATE user_stats SET digest_attempted_at = now() - random() * interval '1 week' WHERE digest_attempted_at IS NULL") + DB.exec( + "UPDATE user_stats SET digest_attempted_at = now() - random() * interval '1 week' WHERE digest_attempted_at IS NULL", + ) end # scripts that are able to import last_seen_at from the source data should override this method @@ -854,13 +883,15 @@ class ImportScripts::Base count = 0 total = User.count - User.includes(:user_stat).find_each do |user| - begin - user.update_columns(trust_level: 0) if user.trust_level > 0 && user.post_count == 0 - rescue Discourse::InvalidAccess + User + .includes(:user_stat) + .find_each do |user| + begin + user.update_columns(trust_level: 0) if user.trust_level > 0 && user.post_count == 0 + rescue Discourse::InvalidAccess + end + print_status(count += 1, total, get_start_time("update_tl0")) end - print_status(count += 1, total, get_start_time("update_tl0")) - end end def update_user_signup_date_based_on_first_post @@ -870,7 +901,7 @@ class ImportScripts::Base total = User.count User.find_each do |user| - if first = user.posts.order('created_at ASC').first + if first = user.posts.order("created_at ASC").first user.created_at = first.created_at user.save! end @@ -893,16 +924,16 @@ class ImportScripts::Base def print_status(current, max, start_time = nil) if start_time.present? elapsed_seconds = Time.now - start_time - elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60] + elements_per_minute = "[%.0f items/min] " % [current / elapsed_seconds.to_f * 60] else - elements_per_minute = '' + elements_per_minute = "" end print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute] end def print_spinner - @spinner_chars ||= %w{ | / - \\ } + @spinner_chars ||= %w[| / - \\] @spinner_chars.push @spinner_chars.shift print "\b#{@spinner_chars[0]}" end diff --git a/script/import_scripts/base/csv_helper.rb b/script/import_scripts/base/csv_helper.rb index 7f7becbd3d6..3f211ea366a 100644 --- a/script/import_scripts/base/csv_helper.rb +++ b/script/import_scripts/base/csv_helper.rb @@ -13,65 +13,69 @@ module ImportScripts def initialize(cols) cols.each_with_index do |col, idx| - self.class.public_send(:define_method, col.downcase.gsub(/[\W]/, '_').squeeze('_')) do - @row[idx] - end + self + .class + .public_send(:define_method, col.downcase.gsub(/[\W]/, "_").squeeze("_")) { @row[idx] } end end end - def csv_parse(filename, col_sep = ',') + def csv_parse(filename, col_sep = ",") first = true row = nil current_row = +"" double_quote_count = 0 - File.open(filename).each_line do |line| + File + .open(filename) + .each_line do |line| + line.strip! - line.strip! + current_row << "\n" unless current_row.empty? + current_row << line - current_row << "\n" unless current_row.empty? - current_row << line + double_quote_count += line.scan('"').count - double_quote_count += line.scan('"').count + next if double_quote_count % 2 == 1 # this row continues on a new line. don't parse until we have the whole row. - next if double_quote_count % 2 == 1 # this row continues on a new line. don't parse until we have the whole row. + raw = + begin + CSV.parse(current_row, col_sep: col_sep) + rescue CSV::MalformedCSVError => e + puts e.message + puts "*" * 100 + puts "Bad row skipped, line is: #{line}" + puts + puts current_row + puts + puts "double quote count is : #{double_quote_count}" + puts "*" * 100 - raw = begin - CSV.parse(current_row, col_sep: col_sep) - rescue CSV::MalformedCSVError => e - puts e.message - puts "*" * 100 - puts "Bad row skipped, line is: #{line}" - puts - puts current_row - puts - puts "double quote count is : #{double_quote_count}" - puts "*" * 100 + current_row = "" + double_quote_count = 0 - current_row = "" - double_quote_count = 0 + next + end[ + 0 + ] - next - end[0] + if first + row = RowResolver.create(raw) - if first - row = RowResolver.create(raw) + current_row = "" + double_quote_count = 0 + first = false + next + end + + row.load(raw) + + yield row current_row = "" double_quote_count = 0 - first = false - next end - - row.load(raw) - - yield row - - current_row = "" - double_quote_count = 0 - end end end end diff --git a/script/import_scripts/base/generic_database.rb b/script/import_scripts/base/generic_database.rb index 28bfd8ee3bf..dafea94199e 100644 --- a/script/import_scripts/base/generic_database.rb +++ b/script/import_scripts/base/generic_database.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require 'sqlite3' +require "sqlite3" module ImportScripts class GenericDatabase @@ -80,24 +80,20 @@ module ImportScripts VALUES (:id, :raw, :topic_id, :user_id, :created_at, :reply_to_post_id, :url, :upload_count) SQL - attachments&.each do |attachment| - @db.execute(<<-SQL, post_id: post[:id], path: attachment) + attachments&.each { |attachment| @db.execute(<<-SQL, post_id: post[:id], path: attachment) } INSERT OR REPLACE INTO post_upload (post_id, path) VALUES (:post_id, :path) SQL - end - like_user_ids&.each do |user_id| - @db.execute(<<-SQL, post_id: post[:id], user_id: user_id) + like_user_ids&.each { |user_id| @db.execute(<<-SQL, post_id: post[:id], user_id: user_id) } INSERT OR REPLACE INTO like (post_id, user_id) VALUES (:post_id, :user_id) SQL - end end end def sort_posts_by_created_at - @db.execute 'DELETE FROM post_order' + @db.execute "DELETE FROM post_order" @db.execute <<-SQL INSERT INTO post_order (post_id) @@ -146,7 +142,7 @@ module ImportScripts LIMIT #{@batch_size} SQL - add_last_column_value(rows, 'id') + add_last_column_value(rows, "id") end def get_user_id(username) @@ -173,7 +169,7 @@ module ImportScripts LIMIT #{@batch_size} SQL - add_last_column_value(rows, 'id') + add_last_column_value(rows, "id") end def fetch_topic_attachments(topic_id) @@ -200,7 +196,7 @@ module ImportScripts LIMIT #{@batch_size} SQL - add_last_column_value(rows, 'rowid') + add_last_column_value(rows, "rowid") end def fetch_sorted_posts(last_row_id) @@ -213,7 +209,7 @@ module ImportScripts LIMIT #{@batch_size} SQL - add_last_column_value(rows, 'rowid') + add_last_column_value(rows, "rowid") end def fetch_post_attachments(post_id) @@ -240,7 +236,7 @@ module ImportScripts LIMIT #{@batch_size} SQL - add_last_column_value(rows, 'rowid') + add_last_column_value(rows, "rowid") end def execute_sql(sql) @@ -254,12 +250,12 @@ module ImportScripts private def configure_database - @db.execute 'PRAGMA journal_mode = OFF' - @db.execute 'PRAGMA locking_mode = EXCLUSIVE' + @db.execute "PRAGMA journal_mode = OFF" + @db.execute "PRAGMA locking_mode = EXCLUSIVE" end def key_data_type - @numeric_keys ? 'INTEGER' : 'TEXT' + @numeric_keys ? "INTEGER" : "TEXT" end def create_category_table @@ -299,7 +295,7 @@ module ImportScripts ) SQL - @db.execute 'CREATE INDEX IF NOT EXISTS user_by_username ON user (username)' + @db.execute "CREATE INDEX IF NOT EXISTS user_by_username ON user (username)" end def create_topic_table @@ -317,7 +313,7 @@ module ImportScripts ) SQL - @db.execute 'CREATE INDEX IF NOT EXISTS topic_by_user_id ON topic (user_id)' + @db.execute "CREATE INDEX IF NOT EXISTS topic_by_user_id ON topic (user_id)" @db.execute <<-SQL CREATE TABLE IF NOT EXISTS topic_upload ( @@ -326,7 +322,7 @@ module ImportScripts ) SQL - @db.execute 'CREATE UNIQUE INDEX IF NOT EXISTS topic_upload_unique ON topic_upload(topic_id, path)' + @db.execute "CREATE UNIQUE INDEX IF NOT EXISTS topic_upload_unique ON topic_upload(topic_id, path)" end def create_post_table @@ -343,7 +339,7 @@ module ImportScripts ) SQL - @db.execute 'CREATE INDEX IF NOT EXISTS post_by_user_id ON post (user_id)' + @db.execute "CREATE INDEX IF NOT EXISTS post_by_user_id ON post (user_id)" @db.execute <<-SQL CREATE TABLE IF NOT EXISTS post_order ( @@ -358,7 +354,7 @@ module ImportScripts ) SQL - @db.execute 'CREATE UNIQUE INDEX IF NOT EXISTS post_upload_unique ON post_upload(post_id, path)' + @db.execute "CREATE UNIQUE INDEX IF NOT EXISTS post_upload_unique ON post_upload(post_id, path)" end def prepare(hash) diff --git a/script/import_scripts/base/lookup_container.rb b/script/import_scripts/base/lookup_container.rb index 30ac96203d4..4147daf43f4 100644 --- a/script/import_scripts/base/lookup_container.rb +++ b/script/import_scripts/base/lookup_container.rb @@ -3,27 +3,26 @@ module ImportScripts class LookupContainer def initialize - puts 'Loading existing groups...' - @groups = GroupCustomField.where(name: 'import_id').pluck(:value, :group_id).to_h + puts "Loading existing groups..." + @groups = GroupCustomField.where(name: "import_id").pluck(:value, :group_id).to_h - puts 'Loading existing users...' - @users = UserCustomField.where(name: 'import_id').pluck(:value, :user_id).to_h + puts "Loading existing users..." + @users = UserCustomField.where(name: "import_id").pluck(:value, :user_id).to_h - puts 'Loading existing categories...' - @categories = CategoryCustomField.where(name: 'import_id').pluck(:value, :category_id).to_h + puts "Loading existing categories..." + @categories = CategoryCustomField.where(name: "import_id").pluck(:value, :category_id).to_h - puts 'Loading existing posts...' - @posts = PostCustomField.where(name: 'import_id').pluck(:value, :post_id).to_h + puts "Loading existing posts..." + @posts = PostCustomField.where(name: "import_id").pluck(:value, :post_id).to_h - puts 'Loading existing topics...' + puts "Loading existing topics..." @topics = {} - Post.joins(:topic).pluck('posts.id, posts.topic_id, posts.post_number, topics.slug').each do |p| - @topics[p[0]] = { - topic_id: p[1], - post_number: p[2], - url: Post.url(p[3], p[1], p[2]) - } - end + Post + .joins(:topic) + .pluck("posts.id, posts.topic_id, posts.post_number, topics.slug") + .each do |p| + @topics[p[0]] = { topic_id: p[1], post_number: p[2], url: Post.url(p[3], p[1], p[2]) } + end end # Get the Discourse Post id based on the id of the source record @@ -44,7 +43,7 @@ module ImportScripts # Get the Discourse Group based on the id of the source group def find_group_by_import_id(import_id) - GroupCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:group) + GroupCustomField.where(name: "import_id", value: import_id.to_s).first.try(:group) end # Get the Discourse User id based on the id of the source user @@ -54,7 +53,7 @@ module ImportScripts # Get the Discourse User based on the id of the source user def find_user_by_import_id(import_id) - UserCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:user) + UserCustomField.where(name: "import_id", value: import_id.to_s).first.try(:user) end def find_username_by_import_id(import_id) @@ -84,11 +83,7 @@ module ImportScripts end def add_topic(post) - @topics[post.id] = { - post_number: post.post_number, - topic_id: post.topic_id, - url: post.url, - } + @topics[post.id] = { post_number: post.post_number, topic_id: post.topic_id, url: post.url } end def user_already_imported?(import_id) @@ -98,6 +93,5 @@ module ImportScripts def post_already_imported?(import_id) @posts.has_key?(import_id) || @posts.has_key?(import_id.to_s) end - end end diff --git a/script/import_scripts/base/uploader.rb b/script/import_scripts/base/uploader.rb index 45404bba21a..4342d328878 100644 --- a/script/import_scripts/base/uploader.rb +++ b/script/import_scripts/base/uploader.rb @@ -13,8 +13,16 @@ module ImportScripts STDERR.puts "Failed to create upload: #{e}" nil ensure - tmp.close rescue nil - tmp.unlink rescue nil + begin + tmp.close + rescue StandardError + nil + end + begin + tmp.unlink + rescue StandardError + nil + end end def create_avatar(user, avatar_path) @@ -30,7 +38,7 @@ module ImportScripts STDERR.puts "Failed to upload avatar for user #{user.username}: #{avatar_path}" STDERR.puts upload.errors.inspect if upload end - rescue + rescue StandardError STDERR.puts "Failed to create avatar for user #{user.username}: #{avatar_path}" ensure tempfile.close! if tempfile @@ -52,11 +60,9 @@ module ImportScripts def copy_to_tempfile(source_path) extension = File.extname(source_path) - tmp = Tempfile.new(['discourse-upload', extension]) + tmp = Tempfile.new(["discourse-upload", extension]) - File.open(source_path) do |source_stream| - IO.copy_stream(source_stream, tmp) - end + File.open(source_path) { |source_stream| IO.copy_stream(source_stream, tmp) } tmp.rewind tmp diff --git a/script/import_scripts/bbpress.rb b/script/import_scripts/bbpress.rb index 2cd0698d123..4864ba6b94f 100644 --- a/script/import_scripts/bbpress.rb +++ b/script/import_scripts/bbpress.rb @@ -1,29 +1,29 @@ # frozen_string_literal: true -require 'mysql2' +require "mysql2" require File.expand_path(File.dirname(__FILE__) + "/base.rb") class ImportScripts::Bbpress < ImportScripts::Base - - BB_PRESS_HOST ||= ENV['BBPRESS_HOST'] || "localhost" - BB_PRESS_DB ||= ENV['BBPRESS_DB'] || "bbpress" - BATCH_SIZE ||= 1000 - BB_PRESS_PW ||= ENV['BBPRESS_PW'] || "" - BB_PRESS_USER ||= ENV['BBPRESS_USER'] || "root" - BB_PRESS_PREFIX ||= ENV['BBPRESS_PREFIX'] || "wp_" - BB_PRESS_ATTACHMENTS_DIR ||= ENV['BBPRESS_ATTACHMENTS_DIR'] || "/path/to/attachments" + BB_PRESS_HOST ||= ENV["BBPRESS_HOST"] || "localhost" + BB_PRESS_DB ||= ENV["BBPRESS_DB"] || "bbpress" + BATCH_SIZE ||= 1000 + BB_PRESS_PW ||= ENV["BBPRESS_PW"] || "" + BB_PRESS_USER ||= ENV["BBPRESS_USER"] || "root" + BB_PRESS_PREFIX ||= ENV["BBPRESS_PREFIX"] || "wp_" + BB_PRESS_ATTACHMENTS_DIR ||= ENV["BBPRESS_ATTACHMENTS_DIR"] || "/path/to/attachments" def initialize super @he = HTMLEntities.new - @client = Mysql2::Client.new( - host: BB_PRESS_HOST, - username: BB_PRESS_USER, - database: BB_PRESS_DB, - password: BB_PRESS_PW, - ) + @client = + Mysql2::Client.new( + host: BB_PRESS_HOST, + username: BB_PRESS_USER, + database: BB_PRESS_DB, + password: BB_PRESS_PW, + ) end def execute @@ -40,17 +40,16 @@ class ImportScripts::Bbpress < ImportScripts::Base puts "", "importing users..." last_user_id = -1 - total_users = bbpress_query(<<-SQL + total_users = bbpress_query(<<-SQL).first["cnt"] SELECT COUNT(DISTINCT(u.id)) AS cnt FROM #{BB_PRESS_PREFIX}users u LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id WHERE p.post_type IN ('forum', 'reply', 'topic') AND user_email LIKE '%@%' SQL - ).first["cnt"] batches(BATCH_SIZE) do |offset| - users = bbpress_query(<<-SQL + users = bbpress_query(<<-SQL).to_a SELECT u.id, user_nicename, display_name, user_email, user_registered, user_url, user_pass FROM #{BB_PRESS_PREFIX}users u LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id @@ -61,7 +60,6 @@ class ImportScripts::Bbpress < ImportScripts::Base ORDER BY u.id LIMIT #{BATCH_SIZE} SQL - ).to_a break if users.empty? @@ -73,22 +71,20 @@ class ImportScripts::Bbpress < ImportScripts::Base user_ids_sql = user_ids.join(",") users_description = {} - bbpress_query(<<-SQL + bbpress_query(<<-SQL).each { |um| users_description[um["user_id"]] = um["description"] } SELECT user_id, meta_value description FROM #{BB_PRESS_PREFIX}usermeta WHERE user_id IN (#{user_ids_sql}) AND meta_key = 'description' SQL - ).each { |um| users_description[um["user_id"]] = um["description"] } users_last_activity = {} - bbpress_query(<<-SQL + bbpress_query(<<-SQL).each { |um| users_last_activity[um["user_id"]] = um["last_activity"] } SELECT user_id, meta_value last_activity FROM #{BB_PRESS_PREFIX}usermeta WHERE user_id IN (#{user_ids_sql}) AND meta_key = 'last_activity' SQL - ).each { |um| users_last_activity[um["user_id"]] = um["last_activity"] } create_users(users, total: total_users, offset: offset) do |u| { @@ -96,7 +92,7 @@ class ImportScripts::Bbpress < ImportScripts::Base username: u["user_nicename"], password: u["user_pass"], email: u["user_email"].downcase, - name: u["display_name"].presence || u['user_nicename'], + name: u["display_name"].presence || u["user_nicename"], created_at: u["user_registered"], website: u["user_url"], bio_raw: users_description[u["id"]], @@ -114,67 +110,60 @@ class ImportScripts::Bbpress < ImportScripts::Base emails = Array.new # gather anonymous users via postmeta table - bbpress_query(<<-SQL + bbpress_query(<<-SQL).each do |pm| SELECT post_id, meta_key, meta_value FROM #{BB_PRESS_PREFIX}postmeta WHERE meta_key LIKE '_bbp_anonymous%' SQL - ).each do |pm| - anon_posts[pm['post_id']] = Hash.new if not anon_posts[pm['post_id']] + anon_posts[pm["post_id"]] = Hash.new if not anon_posts[pm["post_id"]] - if pm['meta_key'] == '_bbp_anonymous_email' - anon_posts[pm['post_id']]['email'] = pm['meta_value'] + if pm["meta_key"] == "_bbp_anonymous_email" + anon_posts[pm["post_id"]]["email"] = pm["meta_value"] end - if pm['meta_key'] == '_bbp_anonymous_name' - anon_posts[pm['post_id']]['name'] = pm['meta_value'] + if pm["meta_key"] == "_bbp_anonymous_name" + anon_posts[pm["post_id"]]["name"] = pm["meta_value"] end - if pm['meta_key'] == '_bbp_anonymous_website' - anon_posts[pm['post_id']]['website'] = pm['meta_value'] + if pm["meta_key"] == "_bbp_anonymous_website" + anon_posts[pm["post_id"]]["website"] = pm["meta_value"] end end # gather every existent username anon_posts.each do |id, post| - anon_names[post['name']] = Hash.new if not anon_names[post['name']] + anon_names[post["name"]] = Hash.new if not anon_names[post["name"]] # overwriting email address, one user can only use one email address - anon_names[post['name']]['email'] = post['email'] - anon_names[post['name']]['website'] = post['website'] if post['website'] != '' + anon_names[post["name"]]["email"] = post["email"] + anon_names[post["name"]]["website"] = post["website"] if post["website"] != "" end # make sure every user name has a unique email address anon_names.each do |k, name| - if not emails.include? name['email'] - emails.push ( name['email']) + if not emails.include? name["email"] + emails.push (name["email"]) else - name['email'] = "anonymous_#{SecureRandom.hex}@no-email.invalid" + name["email"] = "anonymous_#{SecureRandom.hex}@no-email.invalid" end end create_users(anon_names) do |k, n| - { - id: k, - email: n["email"].downcase, - name: k, - website: n["website"] - } + { id: k, email: n["email"].downcase, name: k, website: n["website"] } end end def import_categories puts "", "importing categories..." - categories = bbpress_query(<<-SQL + categories = bbpress_query(<<-SQL) SELECT id, post_name, post_parent FROM #{BB_PRESS_PREFIX}posts WHERE post_type = 'forum' AND LENGTH(COALESCE(post_name, '')) > 0 ORDER BY post_parent, id SQL - ) create_categories(categories) do |c| - category = { id: c['id'], name: c['post_name'] } - if (parent_id = c['post_parent'].to_i) > 0 + category = { id: c["id"], name: c["post_name"] } + if (parent_id = c["post_parent"].to_i) > 0 category[:parent_category_id] = category_id_from_imported_category_id(parent_id) end category @@ -185,16 +174,15 @@ class ImportScripts::Bbpress < ImportScripts::Base puts "", "importing topics and posts..." last_post_id = -1 - total_posts = bbpress_query(<<-SQL + total_posts = bbpress_query(<<-SQL).first["count"] SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}posts WHERE post_status <> 'spam' AND post_type IN ('topic', 'reply') SQL - ).first["count"] batches(BATCH_SIZE) do |offset| - posts = bbpress_query(<<-SQL + posts = bbpress_query(<<-SQL).to_a SELECT id, post_author, post_date, @@ -209,7 +197,6 @@ class ImportScripts::Bbpress < ImportScripts::Base ORDER BY id LIMIT #{BATCH_SIZE} SQL - ).to_a break if posts.empty? @@ -221,31 +208,29 @@ class ImportScripts::Bbpress < ImportScripts::Base post_ids_sql = post_ids.join(",") posts_likes = {} - bbpress_query(<<-SQL + bbpress_query(<<-SQL).each { |pm| posts_likes[pm["post_id"]] = pm["likes"].to_i } SELECT post_id, meta_value likes FROM #{BB_PRESS_PREFIX}postmeta WHERE post_id IN (#{post_ids_sql}) AND meta_key = 'Likes' SQL - ).each { |pm| posts_likes[pm["post_id"]] = pm["likes"].to_i } anon_names = {} - bbpress_query(<<-SQL + bbpress_query(<<-SQL).each { |pm| anon_names[pm["post_id"]] = pm["meta_value"] } SELECT post_id, meta_value FROM #{BB_PRESS_PREFIX}postmeta WHERE post_id IN (#{post_ids_sql}) AND meta_key = '_bbp_anonymous_name' SQL - ).each { |pm| anon_names[pm["post_id"]] = pm["meta_value"] } create_posts(posts, total: total_posts, offset: offset) do |p| skip = false - user_id = user_id_from_imported_user_id(p["post_author"]) || - find_user_by_import_id(p["post_author"]).try(:id) || - user_id_from_imported_user_id(anon_names[p['id']]) || - find_user_by_import_id(anon_names[p['id']]).try(:id) || - -1 + user_id = + user_id_from_imported_user_id(p["post_author"]) || + find_user_by_import_id(p["post_author"]).try(:id) || + user_id_from_imported_user_id(anon_names[p["id"]]) || + find_user_by_import_id(anon_names[p["id"]]).try(:id) || -1 post = { id: p["id"], @@ -256,7 +241,9 @@ class ImportScripts::Bbpress < ImportScripts::Base } if post[:raw].present? - post[:raw].gsub!(/\\(.*?)\<\/code\>\<\/pre\>/im) { "```\n#{@he.decode($2)}\n```" } + post[:raw].gsub!(%r{\\(.*?)\\}im) do + "```\n#{@he.decode($2)}\n```" + end end if p["post_type"] == "topic" @@ -288,17 +275,16 @@ class ImportScripts::Bbpress < ImportScripts::Base count = 0 last_attachment_id = -1 - total_attachments = bbpress_query(<<-SQL + total_attachments = bbpress_query(<<-SQL).first["count"] SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}postmeta pm JOIN #{BB_PRESS_PREFIX}posts p ON p.id = pm.post_id WHERE pm.meta_key = '_wp_attached_file' AND p.post_parent > 0 SQL - ).first["count"] batches(BATCH_SIZE) do |offset| - attachments = bbpress_query(<<-SQL + attachments = bbpress_query(<<-SQL).to_a SELECT pm.meta_id id, pm.meta_value, p.post_parent post_id FROM #{BB_PRESS_PREFIX}postmeta pm JOIN #{BB_PRESS_PREFIX}posts p ON p.id = pm.post_id @@ -308,7 +294,6 @@ class ImportScripts::Bbpress < ImportScripts::Base ORDER BY pm.meta_id LIMIT #{BATCH_SIZE} SQL - ).to_a break if attachments.empty? last_attachment_id = attachments[-1]["id"].to_i @@ -325,7 +310,9 @@ class ImportScripts::Bbpress < ImportScripts::Base if !post.raw[html] post.raw << "\n\n" << html post.save! - PostUpload.create!(post: post, upload: upload) unless PostUpload.where(post: post, upload: upload).exists? + unless PostUpload.where(post: post, upload: upload).exists? + PostUpload.create!(post: post, upload: upload) + end end end end @@ -340,15 +327,14 @@ class ImportScripts::Bbpress < ImportScripts::Base count = 0 last_attachment_id = -1 - total_attachments = bbpress_query(<<-SQL + total_attachments = bbpress_query(<<-SQL).first["count"] SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bb_attachments WHERE post_id IN (SELECT id FROM #{BB_PRESS_PREFIX}posts WHERE post_status <> 'spam' AND post_type IN ('topic', 'reply')) SQL - ).first["count"] batches(BATCH_SIZE) do |offset| - attachments = bbpress_query(<<-SQL + attachments = bbpress_query(<<-SQL).to_a SELECT id, filename, post_id FROM #{BB_PRESS_PREFIX}bb_attachments WHERE post_id IN (SELECT id FROM #{BB_PRESS_PREFIX}posts WHERE post_status <> 'spam' AND post_type IN ('topic', 'reply')) @@ -356,13 +342,16 @@ class ImportScripts::Bbpress < ImportScripts::Base ORDER BY id LIMIT #{BATCH_SIZE} SQL - ).to_a break if attachments.empty? last_attachment_id = attachments[-1]["id"].to_i attachments.each do |a| - print_status(count += 1, total_attachments, get_start_time("attachments_from_bb_attachments")) + print_status( + count += 1, + total_attachments, + get_start_time("attachments_from_bb_attachments"), + ) if path = find_attachment(a["filename"], a["id"]) if post = Post.find_by(id: post_id_from_imported_post_id(a["post_id"])) upload = create_upload(post.user.id, path, a["filename"]) @@ -371,7 +360,9 @@ class ImportScripts::Bbpress < ImportScripts::Base if !post.raw[html] post.raw << "\n\n" << html post.save! - PostUpload.create!(post: post, upload: upload) unless PostUpload.where(post: post, upload: upload).exists? + unless PostUpload.where(post: post, upload: upload).exists? + PostUpload.create!(post: post, upload: upload) + end end end end @@ -391,7 +382,7 @@ class ImportScripts::Bbpress < ImportScripts::Base last_topic_id = -1 batches(BATCH_SIZE) do |offset| - topics = bbpress_query(<<-SQL + topics = bbpress_query(<<-SQL).to_a SELECT id, guid FROM #{BB_PRESS_PREFIX}posts @@ -401,14 +392,17 @@ class ImportScripts::Bbpress < ImportScripts::Base ORDER BY id LIMIT #{BATCH_SIZE} SQL - ).to_a break if topics.empty? last_topic_id = topics[-1]["id"].to_i topics.each do |t| - topic = topic_lookup_from_imported_post_id(t['id']) - Permalink.create(url: URI.parse(t['guid']).path.chomp('/'), topic_id: topic[:topic_id]) rescue nil + topic = topic_lookup_from_imported_post_id(t["id"]) + begin + Permalink.create(url: URI.parse(t["guid"]).path.chomp("/"), topic_id: topic[:topic_id]) + rescue StandardError + nil + end end end end @@ -417,42 +411,44 @@ class ImportScripts::Bbpress < ImportScripts::Base puts "", "importing private messages..." last_post_id = -1 - total_posts = bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_messages").first["count"] + total_posts = + bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_messages").first[ + "count" + ] threads = {} - total_count = bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_recipients").first["count"] + total_count = + bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_recipients").first[ + "count" + ] current_count = 0 batches(BATCH_SIZE) do |offset| - rows = bbpress_query(<<-SQL + rows = bbpress_query(<<-SQL).to_a SELECT thread_id, user_id FROM #{BB_PRESS_PREFIX}bp_messages_recipients ORDER BY id LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL - ).to_a break if rows.empty? rows.each do |row| current_count += 1 - print_status(current_count, total_count, get_start_time('private_messages')) + print_status(current_count, total_count, get_start_time("private_messages")) - threads[row['thread_id']] ||= { - target_user_ids: [], - imported_topic_id: nil - } - user_id = user_id_from_imported_user_id(row['user_id']) - if user_id && !threads[row['thread_id']][:target_user_ids].include?(user_id) - threads[row['thread_id']][:target_user_ids] << user_id + threads[row["thread_id"]] ||= { target_user_ids: [], imported_topic_id: nil } + user_id = user_id_from_imported_user_id(row["user_id"]) + if user_id && !threads[row["thread_id"]][:target_user_ids].include?(user_id) + threads[row["thread_id"]][:target_user_ids] << user_id end end end batches(BATCH_SIZE) do |offset| - posts = bbpress_query(<<-SQL + posts = bbpress_query(<<-SQL).to_a SELECT id, thread_id, date_sent, @@ -464,39 +460,48 @@ class ImportScripts::Bbpress < ImportScripts::Base ORDER BY thread_id, date_sent LIMIT #{BATCH_SIZE} SQL - ).to_a break if posts.empty? last_post_id = posts[-1]["id"].to_i create_posts(posts, total: total_posts, offset: offset) do |post| - if tcf = TopicCustomField.where(name: 'bb_thread_id', value: post['thread_id']).first + if tcf = TopicCustomField.where(name: "bb_thread_id", value: post["thread_id"]).first { - id: "pm#{post['id']}", - topic_id: threads[post['thread_id']][:imported_topic_id], - user_id: user_id_from_imported_user_id(post['sender_id']) || find_user_by_import_id(post['sender_id'])&.id || -1, - raw: post['message'], - created_at: post['date_sent'], + id: "pm#{post["id"]}", + topic_id: threads[post["thread_id"]][:imported_topic_id], + user_id: + user_id_from_imported_user_id(post["sender_id"]) || + find_user_by_import_id(post["sender_id"])&.id || -1, + raw: post["message"], + created_at: post["date_sent"], } else # First post of the thread { - id: "pm#{post['id']}", + id: "pm#{post["id"]}", archetype: Archetype.private_message, - user_id: user_id_from_imported_user_id(post['sender_id']) || find_user_by_import_id(post['sender_id'])&.id || -1, - title: post['subject'], - raw: post['message'], - created_at: post['date_sent'], - target_usernames: User.where(id: threads[post['thread_id']][:target_user_ids]).pluck(:username), - post_create_action: proc do |new_post| - if topic = new_post.topic - threads[post['thread_id']][:imported_topic_id] = topic.id - TopicCustomField.create(topic_id: topic.id, name: 'bb_thread_id', value: post['thread_id']) - else - puts "Error in post_create_action! Can't find topic!" - end - end + user_id: + user_id_from_imported_user_id(post["sender_id"]) || + find_user_by_import_id(post["sender_id"])&.id || -1, + title: post["subject"], + raw: post["message"], + created_at: post["date_sent"], + target_usernames: + User.where(id: threads[post["thread_id"]][:target_user_ids]).pluck(:username), + post_create_action: + proc do |new_post| + if topic = new_post.topic + threads[post["thread_id"]][:imported_topic_id] = topic.id + TopicCustomField.create( + topic_id: topic.id, + name: "bb_thread_id", + value: post["thread_id"], + ) + else + puts "Error in post_create_action! Can't find topic!" + end + end, } end end @@ -506,7 +511,6 @@ class ImportScripts::Bbpress < ImportScripts::Base def bbpress_query(sql) @client.query(sql, cache_rows: false) end - end ImportScripts::Bbpress.new.perform diff --git a/script/import_scripts/bespoke_1.rb b/script/import_scripts/bespoke_1.rb index 9ca420f319a..833f7723500 100644 --- a/script/import_scripts/bespoke_1.rb +++ b/script/import_scripts/bespoke_1.rb @@ -2,13 +2,12 @@ # bespoke importer for a customer, feel free to borrow ideas -require 'csv' +require "csv" require File.expand_path(File.dirname(__FILE__) + "/base.rb") # Call it like this: # RAILS_ENV=production bundle exec ruby script/import_scripts/bespoke_1.rb class ImportScripts::Bespoke < ImportScripts::Base - BATCH_SIZE = 1000 def initialize(path) @@ -18,9 +17,9 @@ class ImportScripts::Bespoke < ImportScripts::Base puts "loading post mappings..." @post_number_map = {} - Post.pluck(:id, :post_number).each do |post_id, post_number| - @post_number_map[post_id] = post_number - end + Post + .pluck(:id, :post_number) + .each { |post_id, post_number| @post_number_map[post_id] = post_number } end def created_post(post) @@ -32,7 +31,6 @@ class ImportScripts::Bespoke < ImportScripts::Base import_users import_categories import_posts - end class RowResolver @@ -45,19 +43,13 @@ class ImportScripts::Bespoke < ImportScripts::Base end def initialize(cols) - cols.each_with_index do |col, idx| - self.class.public_send(:define_method, col) do - @row[idx] - end - end + cols.each_with_index { |col, idx| self.class.public_send(:define_method, col) { @row[idx] } } end end def load_user_batch!(users, offset, total) if users.length > 0 - create_users(users, offset: offset, total: total) do |user| - user - end + create_users(users, offset: offset, total: total) { |user| user } users.clear end end @@ -70,54 +62,56 @@ class ImportScripts::Bespoke < ImportScripts::Base current_row = +"" double_quote_count = 0 - File.open(filename).each_line do |line| + File + .open(filename) + .each_line do |line| + # escaping is mental here + line.gsub!(/\\(.{1})/) { |m| m[-1] == '"' ? '""' : m[-1] } + line.strip! - # escaping is mental here - line.gsub!(/\\(.{1})/) { |m| m[-1] == '"' ? '""' : m[-1] } - line.strip! + current_row << "\n" unless current_row.empty? + current_row << line - current_row << "\n" unless current_row.empty? - current_row << line + double_quote_count += line.scan('"').count - double_quote_count += line.scan('"').count + next if double_quote_count % 2 == 1 - if double_quote_count % 2 == 1 - next - end + raw = + begin + CSV.parse(current_row) + rescue CSV::MalformedCSVError => e + puts e.message + puts "*" * 100 + puts "Bad row skipped, line is: #{line}" + puts + puts current_row + puts + puts "double quote count is : #{double_quote_count}" + puts "*" * 100 - raw = begin - CSV.parse(current_row) - rescue CSV::MalformedCSVError => e - puts e.message - puts "*" * 100 - puts "Bad row skipped, line is: #{line}" - puts - puts current_row - puts - puts "double quote count is : #{double_quote_count}" - puts "*" * 100 + current_row = "" + double_quote_count = 0 + next + end[ + 0 + ] - current_row = "" - double_quote_count = 0 - next - end[0] + if first + row = RowResolver.create(raw) - if first - row = RowResolver.create(raw) + current_row = "" + double_quote_count = 0 + first = false + next + end + + row.load(raw) + + yield row current_row = "" double_quote_count = 0 - first = false - next end - - row.load(raw) - - yield row - - current_row = "" - double_quote_count = 0 - end end def total_rows(table) @@ -133,14 +127,11 @@ class ImportScripts::Bespoke < ImportScripts::Base total = total_rows("users") csv_parse("users") do |row| - id = row.id email = row.email # fake it - if row.email.blank? || row.email !~ /@/ - email = fake_email - end + email = fake_email if row.email.blank? || row.email !~ /@/ name = row.display_name username = row.key_custom @@ -150,19 +141,10 @@ class ImportScripts::Bespoke < ImportScripts::Base username = email.split("@")[0] if username.blank? name = email.split("@")[0] if name.blank? - users << { - id: id, - email: email, - name: name, - username: username, - created_at: created_at - } + users << { id: id, email: email, name: name, username: username, created_at: created_at } count += 1 - if count % BATCH_SIZE == 0 - load_user_batch! users, count - users.length, total - end - + load_user_batch! users, count - users.length, total if count % BATCH_SIZE == 0 end load_user_batch! users, count, total @@ -174,22 +156,19 @@ class ImportScripts::Bespoke < ImportScripts::Base rows << { id: row.id, name: row.name, description: row.description } end - create_categories(rows) do |row| - row - end + create_categories(rows) { |row| row } end def normalize_raw!(raw) # purple and #1223f3 raw.gsub!(/\[color=[#a-z0-9]+\]/i, "") - raw.gsub!(/\[\/color\]/i, "") - raw.gsub!(/\[signature\].+\[\/signature\]/im, "") + raw.gsub!(%r{\[/color\]}i, "") + raw.gsub!(%r{\[signature\].+\[/signature\]}im, "") raw end def import_post_batch!(posts, topics, offset, total) create_posts(posts, total: total, offset: offset) do |post| - mapped = {} mapped[:id] = post[:id] @@ -223,7 +202,7 @@ class ImportScripts::Bespoke < ImportScripts::Base mapped end - posts.clear + posts.clear end def import_posts @@ -237,7 +216,7 @@ class ImportScripts::Bespoke < ImportScripts::Base category_id: topic.forum_category_id, deleted: topic.is_deleted.to_i == 1, locked: topic.is_locked.to_i == 1, - pinned: topic.is_pinned.to_i == 1 + pinned: topic.is_pinned.to_i == 1, } end @@ -246,7 +225,6 @@ class ImportScripts::Bespoke < ImportScripts::Base posts = [] count = 0 csv_parse("posts") do |row| - unless row.dcreate puts "NO CREATION DATE FOR POST" p row @@ -261,7 +239,7 @@ class ImportScripts::Bespoke < ImportScripts::Base title: row.title, body: normalize_raw!(row.body), deleted: row.is_deleted.to_i == 1, - created_at: DateTime.parse(row.dcreate) + created_at: DateTime.parse(row.dcreate), } posts << row count += 1 @@ -275,7 +253,6 @@ class ImportScripts::Bespoke < ImportScripts::Base exit end - end unless ARGV[0] && Dir.exist?(ARGV[0]) diff --git a/script/import_scripts/csv_importer.rb b/script/import_scripts/csv_importer.rb index a414373dbba..626645f5303 100644 --- a/script/import_scripts/csv_importer.rb +++ b/script/import_scripts/csv_importer.rb @@ -7,18 +7,18 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb") # Make sure to follow the right format in your CSV files. class ImportScripts::CsvImporter < ImportScripts::Base - - CSV_FILE_PATH = ENV['CSV_USER_FILE'] || '/var/www/discourse/tmp/users.csv' - CSV_CUSTOM_FIELDS = ENV['CSV_CUSTOM_FIELDS'] || '/var/www/discourse/tmp/custom_fields.csv' - CSV_EMAILS = ENV['CSV_EMAILS'] || '/var/www/discourse/tmp/emails.csv' - CSV_CATEGORIES = ENV['CSV_CATEGORIES'] || '/var/www/discourse/tmp/categories.csv' - CSV_TOPICS = ENV['CSV_TOPICS'] || '/var/www/discourse/tmp/topics_new_users.csv' - CSV_TOPICS_EXISTING_USERS = ENV['CSV_TOPICS'] || '/var/www/discourse/tmp/topics_existing_users.csv' - IMPORT_PREFIX = ENV['IMPORT_PREFIX'] || '2022-08-11' - IMPORT_USER_ID_PREFIX = 'csv-user-import-' + IMPORT_PREFIX + '-' - IMPORT_CATEGORY_ID_PREFIX = 'csv-category-import-' + IMPORT_PREFIX + '-' - IMPORT_TOPIC_ID_PREFIX = 'csv-topic-import-' + IMPORT_PREFIX + '-' - IMPORT_TOPIC_ID_EXISITNG_PREFIX = 'csv-topic_existing-import-' + IMPORT_PREFIX + '-' + CSV_FILE_PATH = ENV["CSV_USER_FILE"] || "/var/www/discourse/tmp/users.csv" + CSV_CUSTOM_FIELDS = ENV["CSV_CUSTOM_FIELDS"] || "/var/www/discourse/tmp/custom_fields.csv" + CSV_EMAILS = ENV["CSV_EMAILS"] || "/var/www/discourse/tmp/emails.csv" + CSV_CATEGORIES = ENV["CSV_CATEGORIES"] || "/var/www/discourse/tmp/categories.csv" + CSV_TOPICS = ENV["CSV_TOPICS"] || "/var/www/discourse/tmp/topics_new_users.csv" + CSV_TOPICS_EXISTING_USERS = + ENV["CSV_TOPICS"] || "/var/www/discourse/tmp/topics_existing_users.csv" + IMPORT_PREFIX = ENV["IMPORT_PREFIX"] || "2022-08-11" + IMPORT_USER_ID_PREFIX = "csv-user-import-" + IMPORT_PREFIX + "-" + IMPORT_CATEGORY_ID_PREFIX = "csv-category-import-" + IMPORT_PREFIX + "-" + IMPORT_TOPIC_ID_PREFIX = "csv-topic-import-" + IMPORT_PREFIX + "-" + IMPORT_TOPIC_ID_EXISITNG_PREFIX = "csv-topic_existing-import-" + IMPORT_PREFIX + "-" def initialize super @@ -49,25 +49,19 @@ class ImportScripts::CsvImporter < ImportScripts::Base return nil end - CSV.parse(File.read(path, encoding: 'bom|utf-8'), headers: true) + CSV.parse(File.read(path, encoding: "bom|utf-8"), headers: true) end def username_for(name) - result = name.downcase.gsub(/[^a-z0-9\-\_]/, '') - if result.blank? - result = Digest::SHA1.hexdigest(name)[0...10] - end + result = name.downcase.gsub(/[^a-z0-9\-\_]/, "") + result = Digest::SHA1.hexdigest(name)[0...10] if result.blank? result end def get_email(id) email = nil - @imported_emails.each do |e| - if e["user_id"] == id - email = e["email"] - end - end + @imported_emails.each { |e| email = e["email"] if e["user_id"] == id } email end @@ -76,9 +70,7 @@ class ImportScripts::CsvImporter < ImportScripts::Base custom_fields = {} @imported_custom_fields.each do |cf| if cf["user_id"] == id - @imported_custom_fields_names.each do |name| - custom_fields[name] = cf[name] - end + @imported_custom_fields_names.each { |name| custom_fields[name] = cf[name] } end end @@ -86,98 +78,95 @@ class ImportScripts::CsvImporter < ImportScripts::Base end def import_users - puts '', "Importing users" + puts "", "Importing users" users = [] @imported_users.each do |u| - email = get_email(u['id']) - custom_fields = get_custom_fields(u['id']) - u['email'] = email - u['custom_fields'] = custom_fields - u['id'] = IMPORT_USER_ID_PREFIX + u['id'] + email = get_email(u["id"]) + custom_fields = get_custom_fields(u["id"]) + u["email"] = email + u["custom_fields"] = custom_fields + u["id"] = IMPORT_USER_ID_PREFIX + u["id"] users << u end users.uniq! create_users(users) do |u| { - id: u['id'], - username: u['username'], - email: u['email'], - created_at: u['created_at'], - custom_fields: u['custom_fields'], + id: u["id"], + username: u["username"], + email: u["email"], + created_at: u["created_at"], + custom_fields: u["custom_fields"], } end end def import_categories - puts '', "Importing categories" + puts "", "Importing categories" categories = [] @imported_categories.each do |c| - c['user_id'] = user_id_from_imported_user_id(IMPORT_USER_ID_PREFIX + c['user_id']) || Discourse::SYSTEM_USER_ID - c['id'] = IMPORT_CATEGORY_ID_PREFIX + c['id'] + c["user_id"] = user_id_from_imported_user_id(IMPORT_USER_ID_PREFIX + c["user_id"]) || + Discourse::SYSTEM_USER_ID + c["id"] = IMPORT_CATEGORY_ID_PREFIX + c["id"] categories << c end categories.uniq! create_categories(categories) do |c| - { - id: c['id'], - user_id: c['user_id'], - name: c['name'], - description: c['description'] - } + { id: c["id"], user_id: c["user_id"], name: c["name"], description: c["description"] } end end def import_topics - puts '', "Importing topics" + puts "", "Importing topics" topics = [] @imported_topics.each do |t| - t['user_id'] = user_id_from_imported_user_id(IMPORT_USER_ID_PREFIX + t['user_id']) || Discourse::SYSTEM_USER_ID - t['category_id'] = category_id_from_imported_category_id(IMPORT_CATEGORY_ID_PREFIX + t['category_id']) - t['id'] = IMPORT_TOPIC_ID_PREFIX + t['id'] + t["user_id"] = user_id_from_imported_user_id(IMPORT_USER_ID_PREFIX + t["user_id"]) || + Discourse::SYSTEM_USER_ID + t["category_id"] = category_id_from_imported_category_id( + IMPORT_CATEGORY_ID_PREFIX + t["category_id"], + ) + t["id"] = IMPORT_TOPIC_ID_PREFIX + t["id"] topics << t end create_posts(topics) do |t| { - id: t['id'], - user_id: t['user_id'], - title: t['title'], - category: t['category_id'], - raw: t['raw'] + id: t["id"], + user_id: t["user_id"], + title: t["title"], + category: t["category_id"], + raw: t["raw"], } end end def import_topics_existing_users # Import topics for users that already existed in the DB, not imported during this migration - puts '', "Importing topics for existing users" + puts "", "Importing topics for existing users" topics = [] @imported_topics_existing_users.each do |t| - t['id'] = IMPORT_TOPIC_ID_EXISITNG_PREFIX + t['id'] + t["id"] = IMPORT_TOPIC_ID_EXISITNG_PREFIX + t["id"] topics << t end create_posts(topics) do |t| { - id: t['id'], - user_id: t['user_id'], # This is a Discourse user ID - title: t['title'], - category: t['category_id'], # This is a Discourse category ID - raw: t['raw'] + id: t["id"], + user_id: t["user_id"], # This is a Discourse user ID + title: t["title"], + category: t["category_id"], # This is a Discourse category ID + raw: t["raw"], } end end end -if __FILE__ == $0 - ImportScripts::CsvImporter.new.perform -end +ImportScripts::CsvImporter.new.perform if __FILE__ == $0 # == CSV files format # diff --git a/script/import_scripts/csv_restore_staged_users.rb b/script/import_scripts/csv_restore_staged_users.rb index 2145bcc3518..314004b8807 100755 --- a/script/import_scripts/csv_restore_staged_users.rb +++ b/script/import_scripts/csv_restore_staged_users.rb @@ -6,10 +6,9 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb") # Edit the constants and initialize method for your import data. class ImportScripts::CsvRestoreStagedUsers < ImportScripts::Base - - CSV_FILE_PATH = ENV['CSV_USER_FILE'] - CSV_CUSTOM_FIELDS = ENV['CSV_CUSTOM_FIELDS'] - CSV_EMAILS = ENV['CSV_EMAILS'] + CSV_FILE_PATH = ENV["CSV_USER_FILE"] + CSV_CUSTOM_FIELDS = ENV["CSV_CUSTOM_FIELDS"] + CSV_EMAILS = ENV["CSV_EMAILS"] BATCH_SIZE ||= 1000 @@ -35,62 +34,51 @@ class ImportScripts::CsvRestoreStagedUsers < ImportScripts::Base end def username_for(name) - result = name.downcase.gsub(/[^a-z0-9\-\_]/, '') + result = name.downcase.gsub(/[^a-z0-9\-\_]/, "") - if result.blank? - result = Digest::SHA1.hexdigest(name)[0...10] - end + result = Digest::SHA1.hexdigest(name)[0...10] if result.blank? result end def get_email(id) email = nil - @imported_emails.each do |e| - if e["user_id"] == id - email = e["email"] - end - end + @imported_emails.each { |e| email = e["email"] if e["user_id"] == id } email end def get_custom_fields(id) custom_fields = {} @imported_custom_fields.each do |cf| - if cf["user_id"] == id - custom_fields[cf["name"]] = cf["value"] - end + custom_fields[cf["name"]] = cf["value"] if cf["user_id"] == id end custom_fields end def import_users - puts '', "Importing users" + puts "", "Importing users" users = [] @imported_users.each do |u| - email = get_email(u['id']) - custom_fields = get_custom_fields(u['id']) - u['email'] = email - u['custom_fields'] = custom_fields + email = get_email(u["id"]) + custom_fields = get_custom_fields(u["id"]) + u["email"] = email + u["custom_fields"] = custom_fields users << u end users.uniq! create_users(users) do |u| { - id: u['id'], - username: u['username'], - email: u['email'], - created_at: u['created_at'], - staged: u['staged'], - custom_fields: u['custom_fields'], + id: u["id"], + username: u["username"], + email: u["email"], + created_at: u["created_at"], + staged: u["staged"], + custom_fields: u["custom_fields"], } end end - end -if __FILE__ == $0 - ImportScripts::CsvRestoreStagedUsers.new.perform -end +ImportScripts::CsvRestoreStagedUsers.new.perform if __FILE__ == $0 diff --git a/script/import_scripts/discuz_x.rb b/script/import_scripts/discuz_x.rb index 1b3cb5b8cbc..df6a28c2af1 100644 --- a/script/import_scripts/discuz_x.rb +++ b/script/import_scripts/discuz_x.rb @@ -9,48 +9,47 @@ # This script is tested only on Simplified Chinese Discuz! X instances # If you want to import data other than Simplified Chinese, email me. -require 'php_serialize' -require 'miro' -require 'mysql2' +require "php_serialize" +require "miro" +require "mysql2" require File.expand_path(File.dirname(__FILE__) + "/base.rb") class ImportScripts::DiscuzX < ImportScripts::Base - DISCUZX_DB = "ultrax" - DB_TABLE_PREFIX = 'pre_' + DB_TABLE_PREFIX = "pre_" BATCH_SIZE = 1000 ORIGINAL_SITE_PREFIX = "oldsite.example.com/forums" # without http(s):// - NEW_SITE_PREFIX = "http://discourse.example.com" # with http:// or https:// + NEW_SITE_PREFIX = "http://discourse.example.com" # with http:// or https:// # Set DISCUZX_BASE_DIR to the base directory of your discuz installation. - DISCUZX_BASE_DIR = '/var/www/discuz/upload' - AVATAR_DIR = '/uc_server/data/avatar' - ATTACHMENT_DIR = '/data/attachment/forum' - AUTHORIZED_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'zip', 'rar', 'pdf'] + DISCUZX_BASE_DIR = "/var/www/discuz/upload" + AVATAR_DIR = "/uc_server/data/avatar" + ATTACHMENT_DIR = "/data/attachment/forum" + AUTHORIZED_EXTENSIONS = %w[jpg jpeg png gif zip rar pdf] def initialize super - @client = Mysql2::Client.new( - host: "localhost", - username: "root", - #password: "password", - database: DISCUZX_DB - ) + @client = + Mysql2::Client.new( + host: "localhost", + username: "root", + #password: "password", + database: DISCUZX_DB, + ) @first_post_id_by_topic_id = {} @internal_url_regexps = [ - /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=viewthread(?:&|&)tid=(?\d+)(?:[^\[\]\s]*)(?:pid=?(?\d+))?(?:[^\[\]\s]*)/, - /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/viewthread\.php\?tid=(?\d+)(?:[^\[\]\s]*)(?:pid=?(?\d+))?(?:[^\[\]\s]*)/, - /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=redirect(?:&|&)goto=findpost(?:&|&)pid=(?\d+)(?:&|&)ptid=(?\d+)(?:[^\[\]\s]*)/, - /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/redirect\.php\?goto=findpost(?:&|&)pid=(?\d+)(?:&|&)ptid=(?\d+)(?:[^\[\]\s]*)/, - /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forumdisplay\.php\?fid=(?\d+)(?:[^\[\]\s]*)/, - /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=forumdisplay(?:&|&)fid=(?\d+)(?:[^\[\]\s]*)/, - /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/(?index)\.php(?:[^\[\]\s]*)/, - /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/(?stats)\.php(?:[^\[\]\s]*)/, - /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/misc.php\?mod=(?stat|ranklist)(?:[^\[\]\s]*)/ + %r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/forum\.php\?mod=viewthread(?:&|&)tid=(?\d+)(?:[^\[\]\s]*)(?:pid=?(?\d+))?(?:[^\[\]\s]*)}, + %r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/viewthread\.php\?tid=(?\d+)(?:[^\[\]\s]*)(?:pid=?(?\d+))?(?:[^\[\]\s]*)}, + %r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/forum\.php\?mod=redirect(?:&|&)goto=findpost(?:&|&)pid=(?\d+)(?:&|&)ptid=(?\d+)(?:[^\[\]\s]*)}, + %r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/redirect\.php\?goto=findpost(?:&|&)pid=(?\d+)(?:&|&)ptid=(?\d+)(?:[^\[\]\s]*)}, + %r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/forumdisplay\.php\?fid=(?\d+)(?:[^\[\]\s]*)}, + %r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/forum\.php\?mod=forumdisplay(?:&|&)fid=(?\d+)(?:[^\[\]\s]*)}, + %r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/(?index)\.php(?:[^\[\]\s]*)}, + %r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/(?stats)\.php(?:[^\[\]\s]*)}, + %r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/misc.php\?mod=(?stat|ranklist)(?:[^\[\]\s]*)}, ] - end def execute @@ -69,75 +68,84 @@ class ImportScripts::DiscuzX < ImportScripts::Base # find which group members can be granted as admin def get_knowledge_about_group - group_table = table_name 'common_usergroup' - result = mysql_query( - "SELECT groupid group_id, radminid role_id - FROM #{group_table};") + group_table = table_name "common_usergroup" + result = + mysql_query( + "SELECT groupid group_id, radminid role_id + FROM #{group_table};", + ) @moderator_group_id = [] @admin_group_id = [] #@banned_group_id = [4,5] # 禁止的用户及其帖子均不导入,如果你想导入这些用户和帖子,请把这个数组清空。 result.each do |group| - case group['role_id'] + case group["role_id"] when 1 # 管理员 - @admin_group_id << group['group_id'] - when 2, 3 # 超级版主、版主。如果你不希望原普通版主成为Discourse版主,把3去掉。 - @moderator_group_id << group['group_id'] + @admin_group_id << group["group_id"] + when 2, + 3 # 超级版主、版主。如果你不希望原普通版主成为Discourse版主,把3去掉。 + @moderator_group_id << group["group_id"] end end end def get_knowledge_about_category_slug @category_slug = {} - results = mysql_query("SELECT svalue value - FROM #{table_name 'common_setting'} - WHERE skey = 'forumkeys'") + results = + mysql_query( + "SELECT svalue value + FROM #{table_name "common_setting"} + WHERE skey = 'forumkeys'", + ) return if results.size < 1 - value = results.first['value'] + value = results.first["value"] return if value.blank? - PHP.unserialize(value).each do |category_import_id, slug| - next if slug.blank? - @category_slug[category_import_id] = slug - end + PHP + .unserialize(value) + .each do |category_import_id, slug| + next if slug.blank? + @category_slug[category_import_id] = slug + end end def get_knowledge_about_duplicated_email @duplicated_email = {} - results = mysql_query( - "select a.uid uid, b.uid import_id from pre_common_member a + results = + mysql_query( + "select a.uid uid, b.uid import_id from pre_common_member a join (select uid, email from pre_common_member group by email having count(email) > 1 order by uid asc) b USING(email) - where a.uid != b.uid") + where a.uid != b.uid", + ) users = @lookup.instance_variable_get :@users results.each do |row| - @duplicated_email[row['uid']] = row['import_id'] - user_id = users[row['import_id']] - if user_id - users[row['uid']] = user_id - end + @duplicated_email[row["uid"]] = row["import_id"] + user_id = users[row["import_id"]] + users[row["uid"]] = user_id if user_id end end def import_users - puts '', "creating users" + puts "", "creating users" get_knowledge_about_group - sensitive_user_table = table_name 'ucenter_members' - user_table = table_name 'common_member' - profile_table = table_name 'common_member_profile' - status_table = table_name 'common_member_status' - forum_table = table_name 'common_member_field_forum' - home_table = table_name 'common_member_field_home' - total_count = mysql_query("SELECT count(*) count FROM #{user_table};").first['count'] + sensitive_user_table = table_name "ucenter_members" + user_table = table_name "common_member" + profile_table = table_name "common_member_profile" + status_table = table_name "common_member_status" + forum_table = table_name "common_member_field_forum" + home_table = table_name "common_member_field_home" + total_count = mysql_query("SELECT count(*) count FROM #{user_table};").first["count"] batches(BATCH_SIZE) do |offset| - results = mysql_query( - "SELECT u.uid id, u.username username, u.email email, u.groupid group_id, + results = + mysql_query( + "SELECT u.uid id, u.username username, u.email email, u.groupid group_id, su.regdate regdate, su.password password_hash, su.salt salt, s.regip regip, s.lastip last_visit_ip, s.lastvisit last_visit_time, s.lastpost last_posted_at, s.lastsendmail last_emailed_at, u.emailstatus email_confirmed, u.avatarstatus avatar_exists, @@ -154,7 +162,8 @@ class ImportScripts::DiscuzX < ImportScripts::Base LEFT JOIN #{home_table} h USING(uid) ORDER BY u.uid ASC LIMIT #{BATCH_SIZE} - OFFSET #{offset};") + OFFSET #{offset};", + ) break if results.size < 1 @@ -162,147 +171,233 @@ class ImportScripts::DiscuzX < ImportScripts::Base # next if all_records_exist? :users, users.map {|u| u["id"].to_i} create_users(results, total: total_count, offset: offset) do |user| - { id: user['id'], - email: user['email'], - username: user['username'], - name: first_exists(user['realname'], user['customstatus'], user['username']), - import_pass: user['password_hash'], + { + id: user["id"], + email: user["email"], + username: user["username"], + name: first_exists(user["realname"], user["customstatus"], user["username"]), + import_pass: user["password_hash"], active: true, - salt: user['salt'], + salt: user["salt"], # TODO: title: user['customstatus'], # move custom title to name since discourse can't let user custom title https://meta.discourse.org/t/let-users-custom-their-title/37626 - created_at: user['regdate'] ? Time.zone.at(user['regdate']) : nil, - registration_ip_address: user['regip'], - ip_address: user['last_visit_ip'], - last_seen_at: user['last_visit_time'], - last_emailed_at: user['last_emailed_at'], - last_posted_at: user['last_posted_at'], - moderator: @moderator_group_id.include?(user['group_id']), - admin: @admin_group_id.include?(user['group_id']), - website: (user['website'] && user['website'].include?('.')) ? user['website'].strip : (user['qq'] && user['qq'].strip == (user['qq'].strip.to_i) && user['qq'].strip.to_i > (10000)) ? 'http://user.qzone.qq.com/' + user['qq'].strip : nil, - bio_raw: first_exists((user['bio'] && CGI.unescapeHTML(user['bio'])), user['sightml'], user['spacenote']).strip[0, 3000], - location: first_exists(user['address'], (!user['resideprovince'].blank? ? [user['resideprovince'], user['residecity'], user['residedist'], user['residecommunity']] : [user['birthprovince'], user['birthcity'], user['birthdist'], user['birthcommunity']]).reject { |location|location.blank? }.join(' ')), - post_create_action: lambda do |newmember| - if user['avatar_exists'] == (1) && newmember.uploaded_avatar_id.blank? - path, filename = discuzx_avatar_fullpath(user['id']) - if path - begin - upload = create_upload(newmember.id, path, filename) - if !upload.nil? && upload.persisted? - newmember.import_mode = false - newmember.create_user_avatar - newmember.import_mode = true - newmember.user_avatar.update(custom_upload_id: upload.id) - newmember.update(uploaded_avatar_id: upload.id) - else - puts "Error: Upload did not persist!" + created_at: user["regdate"] ? Time.zone.at(user["regdate"]) : nil, + registration_ip_address: user["regip"], + ip_address: user["last_visit_ip"], + last_seen_at: user["last_visit_time"], + last_emailed_at: user["last_emailed_at"], + last_posted_at: user["last_posted_at"], + moderator: @moderator_group_id.include?(user["group_id"]), + admin: @admin_group_id.include?(user["group_id"]), + website: + (user["website"] && user["website"].include?(".")) ? + user["website"].strip : + if ( + user["qq"] && user["qq"].strip == (user["qq"].strip.to_i) && + user["qq"].strip.to_i > (10_000) + ) + "http://user.qzone.qq.com/" + user["qq"].strip + else + nil + end, + bio_raw: + first_exists( + (user["bio"] && CGI.unescapeHTML(user["bio"])), + user["sightml"], + user["spacenote"], + ).strip[ + 0, + 3000 + ], + location: + first_exists( + user["address"], + ( + if !user["resideprovince"].blank? + [ + user["resideprovince"], + user["residecity"], + user["residedist"], + user["residecommunity"], + ] + else + [ + user["birthprovince"], + user["birthcity"], + user["birthdist"], + user["birthcommunity"], + ] + end + ).reject { |location| location.blank? }.join(" "), + ), + post_create_action: + lambda do |newmember| + if user["avatar_exists"] == (1) && newmember.uploaded_avatar_id.blank? + path, filename = discuzx_avatar_fullpath(user["id"]) + if path + begin + upload = create_upload(newmember.id, path, filename) + if !upload.nil? && upload.persisted? + newmember.import_mode = false + newmember.create_user_avatar + newmember.import_mode = true + newmember.user_avatar.update(custom_upload_id: upload.id) + newmember.update(uploaded_avatar_id: upload.id) + else + puts "Error: Upload did not persist!" + end + rescue SystemCallError => err + puts "Could not import avatar: #{err.message}" end - rescue SystemCallError => err - puts "Could not import avatar: #{err.message}" end end - end - if !user['spacecss'].blank? && newmember.user_profile.profile_background_upload.blank? - # profile background - if matched = user['spacecss'].match(/body\s*{[^}]*url\('?(.+?)'?\)/i) - body_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last - end - if matched = user['spacecss'].match(/#hd\s*{[^}]*url\('?(.+?)'?\)/i) - header_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last - end - if matched = user['spacecss'].match(/.blocktitle\s*{[^}]*url\('?(.+?)'?\)/i) - blocktitle_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last - end - if matched = user['spacecss'].match(/#ct\s*{[^}]*url\('?(.+?)'?\)/i) - content_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last + if !user["spacecss"].blank? && newmember.user_profile.profile_background_upload.blank? + # profile background + if matched = user["spacecss"].match(/body\s*{[^}]*url\('?(.+?)'?\)/i) + body_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last + end + if matched = user["spacecss"].match(/#hd\s*{[^}]*url\('?(.+?)'?\)/i) + header_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last + end + if matched = user["spacecss"].match(/.blocktitle\s*{[^}]*url\('?(.+?)'?\)/i) + blocktitle_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last + end + if matched = user["spacecss"].match(/#ct\s*{[^}]*url\('?(.+?)'?\)/i) + content_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last + end + + if body_background || header_background || blocktitle_background || + content_background + profile_background = + first_exists( + header_background, + body_background, + content_background, + blocktitle_background, + ) + card_background = + first_exists( + content_background, + body_background, + header_background, + blocktitle_background, + ) + upload = + create_upload( + newmember.id, + File.join(DISCUZX_BASE_DIR, profile_background), + File.basename(profile_background), + ) + if upload + newmember.user_profile.upload_profile_background upload + else + puts "WARNING: #{user["username"]} (UID: #{user["id"]}) profile_background file did not persist!" + end + upload = + create_upload( + newmember.id, + File.join(DISCUZX_BASE_DIR, card_background), + File.basename(card_background), + ) + if upload + newmember.user_profile.upload_card_background upload + else + puts "WARNING: #{user["username"]} (UID: #{user["id"]}) card_background file did not persist!" + end + end end - if body_background || header_background || blocktitle_background || content_background - profile_background = first_exists(header_background, body_background, content_background, blocktitle_background) - card_background = first_exists(content_background, body_background, header_background, blocktitle_background) - upload = create_upload(newmember.id, File.join(DISCUZX_BASE_DIR, profile_background), File.basename(profile_background)) - if upload - newmember.user_profile.upload_profile_background upload - else - puts "WARNING: #{user['username']} (UID: #{user['id']}) profile_background file did not persist!" - end - upload = create_upload(newmember.id, File.join(DISCUZX_BASE_DIR, card_background), File.basename(card_background)) - if upload - newmember.user_profile.upload_card_background upload - else - puts "WARNING: #{user['username']} (UID: #{user['id']}) card_background file did not persist!" - end + # we don't send email to the unconfirmed user + if newmember.email_digests + newmember.update(email_digests: user["email_confirmed"] == 1) end - end - - # we don't send email to the unconfirmed user - newmember.update(email_digests: user['email_confirmed'] == 1) if newmember.email_digests - newmember.update(name: '') if !newmember.name.blank? && newmember.name == (newmember.username) - end + if !newmember.name.blank? && newmember.name == (newmember.username) + newmember.update(name: "") + end + end, } end end end def import_categories - puts '', "creating categories" + puts "", "creating categories" get_knowledge_about_category_slug - forums_table = table_name 'forum_forum' - forums_data_table = table_name 'forum_forumfield' + forums_table = table_name "forum_forum" + forums_data_table = table_name "forum_forumfield" - results = mysql_query(" + results = + mysql_query( + " SELECT f.fid id, f.fup parent_id, f.name, f.type type, f.status status, f.displayorder position, d.description description, d.rules rules, d.icon, d.extra extra FROM #{forums_table} f LEFT JOIN #{forums_data_table} d USING(fid) ORDER BY parent_id ASC, id ASC - ") + ", + ) max_position = Category.all.max_by(&:position).position create_categories(results) do |row| - next if row['type'] == ('group') || row['status'] == (2) # or row['status'].to_i == 3 # 如果不想导入群组,取消注释 - extra = PHP.unserialize(row['extra']) if !row['extra'].blank? - if extra && !extra["namecolor"].blank? - color = extra["namecolor"][1, 6] - end + next if row["type"] == ("group") || row["status"] == (2) # or row['status'].to_i == 3 # 如果不想导入群组,取消注释 + extra = PHP.unserialize(row["extra"]) if !row["extra"].blank? + color = extra["namecolor"][1, 6] if extra && !extra["namecolor"].blank? Category.all.max_by(&:position).position h = { - id: row['id'], - name: row['name'], - description: row['description'], - position: row['position'].to_i + max_position, + id: row["id"], + name: row["name"], + description: row["description"], + position: row["position"].to_i + max_position, color: color, - post_create_action: lambda do |category| - if slug = @category_slug[row['id']] - category.update(slug: slug) - end - - raw = process_discuzx_post(row['rules'], nil) - if @bbcode_to_md - raw = raw.bbcode_to_md(false) rescue raw - end - category.topic.posts.first.update_attribute(:raw, raw) - if !row['icon'].empty? - upload = create_upload(Discourse::SYSTEM_USER_ID, File.join(DISCUZX_BASE_DIR, ATTACHMENT_DIR, '../common', row['icon']), File.basename(row['icon'])) - if upload - category.uploaded_logo_id = upload.id - # FIXME: I don't know how to get '/shared' by script. May change to Rails.root - category.color = Miro::DominantColors.new(File.join('/shared', upload.url)).to_hex.first[1, 6] if !color - category.save! + post_create_action: + lambda do |category| + if slug = @category_slug[row["id"]] + category.update(slug: slug) end - end - if row['status'] == (0) || row['status'] == (3) - SiteSetting.default_categories_muted = [SiteSetting.default_categories_muted, category.id].reject(&:blank?).join("|") - end - category - end + raw = process_discuzx_post(row["rules"], nil) + if @bbcode_to_md + raw = + begin + raw.bbcode_to_md(false) + rescue StandardError + raw + end + end + category.topic.posts.first.update_attribute(:raw, raw) + if !row["icon"].empty? + upload = + create_upload( + Discourse::SYSTEM_USER_ID, + File.join(DISCUZX_BASE_DIR, ATTACHMENT_DIR, "../common", row["icon"]), + File.basename(row["icon"]), + ) + if upload + category.uploaded_logo_id = upload.id + # FIXME: I don't know how to get '/shared' by script. May change to Rails.root + category.color = + Miro::DominantColors.new(File.join("/shared", upload.url)).to_hex.first[ + 1, + 6 + ] if !color + category.save! + end + end + + if row["status"] == (0) || row["status"] == (3) + SiteSetting.default_categories_muted = [ + SiteSetting.default_categories_muted, + category.id, + ].reject(&:blank?).join("|") + end + category + end, } - if row['parent_id'].to_i > 0 - h[:parent_category_id] = category_id_from_imported_category_id(row['parent_id']) + if row["parent_id"].to_i > 0 + h[:parent_category_id] = category_id_from_imported_category_id(row["parent_id"]) end h end @@ -311,14 +406,16 @@ class ImportScripts::DiscuzX < ImportScripts::Base def import_posts puts "", "creating topics and posts" - users_table = table_name 'common_member' - posts_table = table_name 'forum_post' - topics_table = table_name 'forum_thread' + users_table = table_name "common_member" + posts_table = table_name "forum_post" + topics_table = table_name "forum_thread" - total_count = mysql_query("SELECT count(*) count FROM #{posts_table}").first['count'] + total_count = mysql_query("SELECT count(*) count FROM #{posts_table}").first["count"] batches(BATCH_SIZE) do |offset| - results = mysql_query(" + results = + mysql_query( + " SELECT p.pid id, p.tid topic_id, t.fid category_id, @@ -336,7 +433,8 @@ class ImportScripts::DiscuzX < ImportScripts::Base ORDER BY id ASC, topic_id ASC LIMIT #{BATCH_SIZE} OFFSET #{offset}; - ") + ", + ) # u.status != -1 AND u.groupid != 4 AND u.groupid != 5 用户未被锁定、禁访或禁言。在现实中的 Discuz 论坛,禁止的用户通常是广告机或驱逐的用户,这些不需要导入。 break if results.size < 1 @@ -346,63 +444,70 @@ class ImportScripts::DiscuzX < ImportScripts::Base skip = false mapped = {} - mapped[:id] = m['id'] - mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 - mapped[:raw] = process_discuzx_post(m['raw'], m['id']) - mapped[:created_at] = Time.zone.at(m['post_time']) - mapped[:tags] = m['tags'] + mapped[:id] = m["id"] + mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1 + mapped[:raw] = process_discuzx_post(m["raw"], m["id"]) + mapped[:created_at] = Time.zone.at(m["post_time"]) + mapped[:tags] = m["tags"] - if m['id'] == m['first_id'] - mapped[:category] = category_id_from_imported_category_id(m['category_id']) - mapped[:title] = CGI.unescapeHTML(m['title']) + if m["id"] == m["first_id"] + mapped[:category] = category_id_from_imported_category_id(m["category_id"]) + mapped[:title] = CGI.unescapeHTML(m["title"]) - if m['special'] == 1 - results = mysql_query(" + if m["special"] == 1 + results = + mysql_query( + " SELECT multiple, maxchoices - FROM #{table_name 'forum_poll'} - WHERE tid = #{m['topic_id']}") + FROM #{table_name "forum_poll"} + WHERE tid = #{m["topic_id"]}", + ) poll = results.first || {} - results = mysql_query(" + results = + mysql_query( + " SELECT polloption - FROM #{table_name 'forum_polloption'} - WHERE tid = #{m['topic_id']} - ORDER BY displayorder") + FROM #{table_name "forum_polloption"} + WHERE tid = #{m["topic_id"]} + ORDER BY displayorder", + ) if results.empty? - puts "WARNING: can't find poll options for topic #{m['topic_id']}, skip poll" + puts "WARNING: can't find poll options for topic #{m["topic_id"]}, skip poll" else - mapped[:raw].prepend "[poll#{poll['multiple'] ? ' type=multiple' : ''}#{poll['maxchoices'] > 0 ? " max=#{poll['maxchoices']}" : ''}]\n#{results.map { |option|'- ' + option['polloption'] }.join("\n")}\n[/poll]\n" + mapped[ + :raw + ].prepend "[poll#{poll["multiple"] ? " type=multiple" : ""}#{poll["maxchoices"] > 0 ? " max=#{poll["maxchoices"]}" : ""}]\n#{results.map { |option| "- " + option["polloption"] }.join("\n")}\n[/poll]\n" end end else - parent = topic_lookup_from_imported_post_id(m['first_id']) + parent = topic_lookup_from_imported_post_id(m["first_id"]) if parent mapped[:topic_id] = parent[:topic_id] - reply_post_import_id = find_post_id_by_quote_number(m['raw']) + reply_post_import_id = find_post_id_by_quote_number(m["raw"]) if reply_post_import_id post_id = post_id_from_imported_post_id(reply_post_import_id.to_i) if (post = Post.find_by(id: post_id)) if post.topic_id == mapped[:topic_id] mapped[:reply_to_post_number] = post.post_number else - puts "post #{m['id']} reply to another topic, skip reply" + puts "post #{m["id"]} reply to another topic, skip reply" end else - puts "post #{m['id']} reply to not exists post #{reply_post_import_id}, skip reply" + puts "post #{m["id"]} reply to not exists post #{reply_post_import_id}, skip reply" end end else - puts "Parent topic #{m['topic_id']} doesn't exist. Skipping #{m['id']}: #{m['title'][0..40]}" + puts "Parent topic #{m["topic_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" skip = true end - end - if m['status'] & 1 == 1 || mapped[:raw].blank? + if m["status"] & 1 == 1 || mapped[:raw].blank? mapped[:post_create_action] = lambda do |action_post| PostDestroyer.new(Discourse.system_user, action_post).perform_delete end - elsif (m['status'] & 2) >> 1 == 1 # waiting for approve + elsif (m["status"] & 2) >> 1 == 1 # waiting for approve mapped[:post_create_action] = lambda do |action_post| PostActionCreator.notify_user(Discourse.system_user, action_post) end @@ -413,42 +518,47 @@ class ImportScripts::DiscuzX < ImportScripts::Base end def import_bookmarks - puts '', 'creating bookmarks' - favorites_table = table_name 'home_favorite' - posts_table = table_name 'forum_post' + puts "", "creating bookmarks" + favorites_table = table_name "home_favorite" + posts_table = table_name "forum_post" - total_count = mysql_query("SELECT count(*) count FROM #{favorites_table} WHERE idtype = 'tid'").first['count'] + total_count = + mysql_query("SELECT count(*) count FROM #{favorites_table} WHERE idtype = 'tid'").first[ + "count" + ] batches(BATCH_SIZE) do |offset| - results = mysql_query(" + results = + mysql_query( + " SELECT p.pid post_id, f.uid user_id FROM #{favorites_table} f JOIN #{posts_table} p ON f.id = p.tid WHERE f.idtype = 'tid' AND p.first LIMIT #{BATCH_SIZE} - OFFSET #{offset};") + OFFSET #{offset};", + ) break if results.size < 1 # next if all_records_exist? create_bookmarks(results, total: total_count, offset: offset) do |row| - { - user_id: row['user_id'], - post_id: row['post_id'] - } + { user_id: row["user_id"], post_id: row["post_id"] } end end end def import_private_messages - puts '', 'creating private messages' + puts "", "creating private messages" - pm_indexes = table_name 'ucenter_pm_indexes' - pm_messages = table_name 'ucenter_pm_messages' - total_count = mysql_query("SELECT count(*) count FROM #{pm_indexes}").first['count'] + pm_indexes = table_name "ucenter_pm_indexes" + pm_messages = table_name "ucenter_pm_messages" + total_count = mysql_query("SELECT count(*) count FROM #{pm_indexes}").first["count"] batches(BATCH_SIZE) do |offset| - results = mysql_query(" + results = + mysql_query( + " SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at FROM #{pm_messages}_1 UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at @@ -469,7 +579,8 @@ class ImportScripts::DiscuzX < ImportScripts::Base FROM #{pm_messages}_9 ORDER BY thread_id ASC, id ASC LIMIT #{BATCH_SIZE} - OFFSET #{offset};") + OFFSET #{offset};", + ) break if results.size < 1 @@ -479,35 +590,47 @@ class ImportScripts::DiscuzX < ImportScripts::Base skip = false mapped = {} - mapped[:id] = "pm:#{m['id']}" - mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 - mapped[:raw] = process_discuzx_post(m['message'], m['id']) - mapped[:created_at] = Time.zone.at(m['created_at']) - thread_id = "pm_#{m['thread_id']}" + mapped[:id] = "pm:#{m["id"]}" + mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1 + mapped[:raw] = process_discuzx_post(m["message"], m["id"]) + mapped[:created_at] = Time.zone.at(m["created_at"]) + thread_id = "pm_#{m["thread_id"]}" - if is_first_pm(m['id'], m['thread_id']) + if is_first_pm(m["id"], m["thread_id"]) # find the title from list table - pm_thread = mysql_query(" + pm_thread = + mysql_query( + " SELECT plid thread_id, subject - FROM #{table_name 'ucenter_pm_lists'} - WHERE plid = #{m['thread_id']};").first - mapped[:title] = pm_thread['subject'] + FROM #{table_name "ucenter_pm_lists"} + WHERE plid = #{m["thread_id"]};", + ).first + mapped[:title] = pm_thread["subject"] mapped[:archetype] = Archetype.private_message # Find the users who are part of this private message. - import_user_ids = mysql_query(" + import_user_ids = + mysql_query( + " SELECT plid thread_id, uid user_id - FROM #{table_name 'ucenter_pm_members'} - WHERE plid = #{m['thread_id']}; - ").map { |r| r['user_id'] }.uniq + FROM #{table_name "ucenter_pm_members"} + WHERE plid = #{m["thread_id"]}; + ", + ).map { |r| r["user_id"] }.uniq - mapped[:target_usernames] = import_user_ids.map! do |import_user_id| - import_user_id.to_s == m['user_id'].to_s ? nil : User.find_by(id: user_id_from_imported_user_id(import_user_id)).try(:username) - end.compact + mapped[:target_usernames] = import_user_ids + .map! do |import_user_id| + if import_user_id.to_s == m["user_id"].to_s + nil + else + User.find_by(id: user_id_from_imported_user_id(import_user_id)).try(:username) + end + end + .compact if mapped[:target_usernames].empty? # pm with yourself? skip = true - puts "Skipping pm:#{m['id']} due to no target" + puts "Skipping pm:#{m["id"]} due to no target" else @first_post_id_by_topic_id[thread_id] = mapped[:id] end @@ -523,22 +646,24 @@ class ImportScripts::DiscuzX < ImportScripts::Base skip ? nil : mapped end - end end # search for first pm id for the series of pm def is_first_pm(pm_id, thread_id) - result = mysql_query(" + result = + mysql_query( + " SELECT pmid id - FROM #{table_name 'ucenter_pm_indexes'} + FROM #{table_name "ucenter_pm_indexes"} WHERE plid = #{thread_id} - ORDER BY id") - result.first['id'].to_s == pm_id.to_s + ORDER BY id", + ) + result.first["id"].to_s == pm_id.to_s end def process_and_upload_inline_images(raw) - inline_image_regex = /\[img\]([\s\S]*?)\[\/img\]/ + inline_image_regex = %r{\[img\]([\s\S]*?)\[/img\]} s = raw.dup @@ -549,7 +674,6 @@ class ImportScripts::DiscuzX < ImportScripts::Base upload, filename = upload_inline_image data upload ? html_for_upload(upload, filename) : nil end - end def process_discuzx_post(raw, import_id) @@ -559,10 +683,18 @@ class ImportScripts::DiscuzX < ImportScripts::Base # Strip the quote # [quote] quotation includes the topic which is the same as reply to in Discourse # We get the pid to find the post number the post reply to. So it can be stripped - s = s.gsub(/\[b\]回复 \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].* 的帖子\[\/url\]\[\/b\]/i, '').strip - s = s.gsub(/\[b\]回复 \[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\].*?\[\/b\]/i, '').strip + s = + s.gsub( + %r{\[b\]回复 \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].* 的帖子\[/url\]\[/b\]}i, + "", + ).strip + s = + s.gsub( + %r{\[b\]回复 \[url=https?://#{ORIGINAL_SITE_PREFIX}/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[/url\].*?\[/b\]}i, + "", + ).strip - s.gsub!(/\[quote\](.*)?\[\/quote\]/im) do |matched| + s.gsub!(%r{\[quote\](.*)?\[/quote\]}im) do |matched| content = $1 post_import_id = find_post_id_by_quote_number(content) if post_import_id @@ -578,73 +710,93 @@ class ImportScripts::DiscuzX < ImportScripts::Base end end - s.gsub!(/\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[\/color\] \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\]\[\/size\]/i, '') - s.gsub!(/\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[\/color\] \[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\]\[\/size\]/i, '') + s.gsub!( + %r{\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[/color\] \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].*?\[/url\]\[/size\]}i, + "", + ) + s.gsub!( + %r{\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[/color\] \[url=https?://#{ORIGINAL_SITE_PREFIX}/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[/url\]\[/size\]}i, + "", + ) # convert quote - s.gsub!(/\[quote\](.*?)\[\/quote\]/m) { "\n" + ($1.strip).gsub(/^/, '> ') + "\n" } + s.gsub!(%r{\[quote\](.*?)\[/quote\]}m) { "\n" + ($1.strip).gsub(/^/, "> ") + "\n" } # truncate line space, preventing line starting with many blanks to be parsed as code blocks - s.gsub!(/^ {4,}/, ' ') + s.gsub!(/^ {4,}/, " ") # TODO: Much better to use bbcode-to-md gem # Convert image bbcode with width and height - s.gsub!(/\[img[^\]]*\]https?:\/\/#{ORIGINAL_SITE_PREFIX}\/(.*)\[\/img\]/i, '[x-attach]\1[/x-attach]') # dont convert attachment - s.gsub!(/]*src="https?:\/\/#{ORIGINAL_SITE_PREFIX}\/(.*)".*?>/i, '[x-attach]\1[/x-attach]') # dont convert attachment - s.gsub!(/\[img[^\]]*\]https?:\/\/www\.touhou\.cc\/blog\/(.*)\[\/img\]/i, '[x-attach]../blog/\1[/x-attach]') # 私货 - s.gsub!(/\[img[^\]]*\]https?:\/\/www\.touhou\.cc\/ucenter\/avatar.php\?uid=(\d+)[^\]]*\[\/img\]/i) { "[x-attach]#{discuzx_avatar_fullpath($1, false)[0]}[/x-attach]" } # 私货 - s.gsub!(/\[img=(\d+),(\d+)\]([^\]]*)\[\/img\]/i, '') - s.gsub!(/\[img\]([^\]]*)\[\/img\]/i, '') + s.gsub!( + %r{\[img[^\]]*\]https?://#{ORIGINAL_SITE_PREFIX}/(.*)\[/img\]}i, + '[x-attach]\1[/x-attach]', + ) # dont convert attachment + s.gsub!( + %r{]*src="https?://#{ORIGINAL_SITE_PREFIX}/(.*)".*?>}i, + '[x-attach]\1[/x-attach]', + ) # dont convert attachment + s.gsub!( + %r{\[img[^\]]*\]https?://www\.touhou\.cc/blog/(.*)\[/img\]}i, + '[x-attach]../blog/\1[/x-attach]', + ) # 私货 + s.gsub!( + %r{\[img[^\]]*\]https?://www\.touhou\.cc/ucenter/avatar.php\?uid=(\d+)[^\]]*\[/img\]}i, + ) { "[x-attach]#{discuzx_avatar_fullpath($1, false)[0]}[/x-attach]" } # 私货 + s.gsub!(%r{\[img=(\d+),(\d+)\]([^\]]*)\[/img\]}i, '') + s.gsub!(%r{\[img\]([^\]]*)\[/img\]}i, '') - s.gsub!(/\[qq\]([^\]]*)\[\/qq\]/i, 'QQ 交谈') + s.gsub!( + %r{\[qq\]([^\]]*)\[/qq\]}i, + 'QQ 交谈', + ) - s.gsub!(/\[email\]([^\]]*)\[\/email\]/i, '[url=mailto:\1]\1[/url]') # bbcode-to-md can convert it - s.gsub!(/\[s\]([^\]]*)\[\/s\]/i, '\1') - s.gsub!(/\[sup\]([^\]]*)\[\/sup\]/i, '\1') - s.gsub!(/\[sub\]([^\]]*)\[\/sub\]/i, '\1') + s.gsub!(%r{\[email\]([^\]]*)\[/email\]}i, '[url=mailto:\1]\1[/url]') # bbcode-to-md can convert it + s.gsub!(%r{\[s\]([^\]]*)\[/s\]}i, '\1') + s.gsub!(%r{\[sup\]([^\]]*)\[/sup\]}i, '\1') + s.gsub!(%r{\[sub\]([^\]]*)\[/sub\]}i, '\1') s.gsub!(/\[hr\]/i, "\n---\n") # remove the media tag - s.gsub!(/\[\/?media[^\]]*\]/i, "\n") - s.gsub!(/\[\/?flash[^\]]*\]/i, "\n") - s.gsub!(/\[\/?audio[^\]]*\]/i, "\n") - s.gsub!(/\[\/?video[^\]]*\]/i, "\n") + s.gsub!(%r{\[/?media[^\]]*\]}i, "\n") + s.gsub!(%r{\[/?flash[^\]]*\]}i, "\n") + s.gsub!(%r{\[/?audio[^\]]*\]}i, "\n") + s.gsub!(%r{\[/?video[^\]]*\]}i, "\n") # Remove the font, p and backcolor tag # Discourse doesn't support the font tag - s.gsub!(/\[font=[^\]]*?\]/i, '') - s.gsub!(/\[\/font\]/i, '') - s.gsub!(/\[p=[^\]]*?\]/i, '') - s.gsub!(/\[\/p\]/i, '') - s.gsub!(/\[backcolor=[^\]]*?\]/i, '') - s.gsub!(/\[\/backcolor\]/i, '') + s.gsub!(/\[font=[^\]]*?\]/i, "") + s.gsub!(%r{\[/font\]}i, "") + s.gsub!(/\[p=[^\]]*?\]/i, "") + s.gsub!(%r{\[/p\]}i, "") + s.gsub!(/\[backcolor=[^\]]*?\]/i, "") + s.gsub!(%r{\[/backcolor\]}i, "") # Remove the size tag # I really have no idea what is this - s.gsub!(/\[size=[^\]]*?\]/i, '') - s.gsub!(/\[\/size\]/i, '') + s.gsub!(/\[size=[^\]]*?\]/i, "") + s.gsub!(%r{\[/size\]}i, "") # Remove the color tag - s.gsub!(/\[color=[^\]]*?\]/i, '') - s.gsub!(/\[\/color\]/i, '') + s.gsub!(/\[color=[^\]]*?\]/i, "") + s.gsub!(%r{\[/color\]}i, "") # Remove the hide tag - s.gsub!(/\[\/?hide\]/i, '') - s.gsub!(/\[\/?free[^\]]*\]/i, "\n") + s.gsub!(%r{\[/?hide\]}i, "") + s.gsub!(%r{\[/?free[^\]]*\]}i, "\n") # Remove the align tag # still don't know what it is s.gsub!(/\[align=[^\]]*?\]/i, "\n") - s.gsub!(/\[\/align\]/i, "\n") + s.gsub!(%r{\[/align\]}i, "\n") s.gsub!(/\[float=[^\]]*?\]/i, "\n") - s.gsub!(/\[\/float\]/i, "\n") + s.gsub!(%r{\[/float\]}i, "\n") # Convert code - s.gsub!(/\[\/?code\]/i, "\n```\n") + s.gsub!(%r{\[/?code\]}i, "\n```\n") # The edit notice should be removed # example: 本帖最后由 Helloworld 于 2015-1-28 22:05 编辑 - s.gsub!(/\[i=s\] 本帖最后由[\s\S]*?编辑 \[\/i\]/, '') + s.gsub!(%r{\[i=s\] 本帖最后由[\s\S]*?编辑 \[/i\]}, "") # Convert the custom smileys to emojis # `{:cry:}` to `:cry` @@ -653,35 +805,71 @@ class ImportScripts::DiscuzX < ImportScripts::Base # Replace internal forum links that aren't in the format # convert list tags to ul and list=1 tags to ol # (basically, we're only missing list=a here...) - s.gsub!(/\[list\](.*?)\[\/list:u\]/m, '[ul]\1[/ul]') - s.gsub!(/\[list=1\](.*?)\[\/list:o\]/m, '[ol]\1[/ol]') + s.gsub!(%r{\[list\](.*?)\[/list:u\]}m, '[ul]\1[/ul]') + s.gsub!(%r{\[list=1\](.*?)\[/list:o\]}m, '[ol]\1[/ol]') # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists: - s.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]') + s.gsub!(%r{\[\*\](.*?)\[/\*:m\]}, '[li]\1[/li]') # Discuz can create PM out of a post, which will generates like # [url=http://example.com/forum.php?mod=redirect&goto=findpost&pid=111&ptid=11][b]关于您在“主题名称”的帖子[/b][/url] - s.gsub!(pm_url_regexp) do |discuzx_link| - replace_internal_link(discuzx_link, $1) - end + s.gsub!(pm_url_regexp) { |discuzx_link| replace_internal_link(discuzx_link, $1) } # [url][b]text[/b][/url] to **[url]text[/url]** - s.gsub!(/(\[url=[^\[\]]*?\])\[b\](\S*)\[\/b\](\[\/url\])/, '**\1\2\3**') + s.gsub!(%r{(\[url=[^\[\]]*?\])\[b\](\S*)\[/b\](\[/url\])}, '**\1\2\3**') @internal_url_regexps.each do |internal_url_regexp| s.gsub!(internal_url_regexp) do |discuzx_link| - replace_internal_link(discuzx_link, ($~[:tid].to_i rescue nil), ($~[:pid].to_i rescue nil), ($~[:fid].to_i rescue nil), ($~[:action] rescue nil)) + replace_internal_link( + discuzx_link, + ( + begin + $~[:tid].to_i + rescue StandardError + nil + end + ), + ( + begin + $~[:pid].to_i + rescue StandardError + nil + end + ), + ( + begin + $~[:fid].to_i + rescue StandardError + nil + end + ), + ( + begin + $~[:action] + rescue StandardError + nil + end + ), + ) end end # @someone without the url - s.gsub!(/@\[url=[^\[\]]*?\](\S*)\[\/url\]/i, '@\1') + s.gsub!(%r{@\[url=[^\[\]]*?\](\S*)\[/url\]}i, '@\1') - s.scan(/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/[^\[\]\s]*/) { |link|puts "WARNING: post #{import_id} can't replace internal url #{link}" } + s.scan(%r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/[^\[\]\s]*}) do |link| + puts "WARNING: post #{import_id} can't replace internal url #{link}" + end s.strip end - def replace_internal_link(discuzx_link, import_topic_id, import_post_id, import_category_id, action) + def replace_internal_link( + discuzx_link, + import_topic_id, + import_post_id, + import_category_id, + action + ) if import_post_id post_id = post_id_from_imported_post_id import_post_id if post_id @@ -691,15 +879,17 @@ class ImportScripts::DiscuzX < ImportScripts::Base end if import_topic_id - - results = mysql_query("SELECT pid - FROM #{table_name 'forum_post'} + results = + mysql_query( + "SELECT pid + FROM #{table_name "forum_post"} WHERE tid = #{import_topic_id} AND first - LIMIT 1") + LIMIT 1", + ) return discuzx_link unless results.size > 0 - linked_post_id = results.first['pid'] + linked_post_id = results.first["pid"] lookup = topic_lookup_from_imported_post_id(linked_post_id) if lookup @@ -707,7 +897,6 @@ class ImportScripts::DiscuzX < ImportScripts::Base else return discuzx_link end - end if import_category_id @@ -719,9 +908,9 @@ class ImportScripts::DiscuzX < ImportScripts::Base end case action - when 'index' + when "index" return "#{NEW_SITE_PREFIX}/" - when 'stat', 'stats', 'ranklist' + when "stat", "stats", "ranklist" return "#{NEW_SITE_PREFIX}/users" end @@ -729,28 +918,32 @@ class ImportScripts::DiscuzX < ImportScripts::Base end def pm_url_regexp - @pm_url_regexp ||= Regexp.new("http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}/forum\\.php\\?mod=redirect&goto=findpost&pid=\\d+&ptid=(\\d+)") + @pm_url_regexp ||= + Regexp.new( + "http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/forum\\.php\\?mod=redirect&goto=findpost&pid=\\d+&ptid=(\\d+)", + ) end # This step is done separately because it can take multiple attempts to get right (because of # missing files, wrong paths, authorized extensions, etc.). def import_attachments - setting = AUTHORIZED_EXTENSIONS.join('|') + setting = AUTHORIZED_EXTENSIONS.join("|") SiteSetting.authorized_extensions = setting if setting != SiteSetting.authorized_extensions - attachment_regex = /\[attach\](\d+)\[\/attach\]/ - attachment_link_regex = /\[x-attach\](.+)\[\/x-attach\]/ + attachment_regex = %r{\[attach\](\d+)\[/attach\]} + attachment_link_regex = %r{\[x-attach\](.+)\[/x-attach\]} current_count = 0 - total_count = mysql_query("SELECT count(*) count FROM #{table_name 'forum_post'};").first['count'] + total_count = + mysql_query("SELECT count(*) count FROM #{table_name "forum_post"};").first["count"] success_count = 0 fail_count = 0 - puts '', "Importing attachments...", '' + puts "", "Importing attachments...", "" Post.find_each do |post| - next unless post.custom_fields['import_id'] == post.custom_fields['import_id'].to_i.to_s + next unless post.custom_fields["import_id"] == post.custom_fields["import_id"].to_i.to_s user = post.user @@ -786,17 +979,16 @@ class ImportScripts::DiscuzX < ImportScripts::Base html_for_upload(upload, filename) end - sql = "SELECT aid - FROM #{table_name 'forum_attachment'} - WHERE pid = #{post.custom_fields['import_id']}" - if !inline_attachments.empty? - sql = "#{sql} AND aid NOT IN (#{inline_attachments.join(',')})" - end + sql = + "SELECT aid + FROM #{table_name "forum_attachment"} + WHERE pid = #{post.custom_fields["import_id"]}" + sql = "#{sql} AND aid NOT IN (#{inline_attachments.join(",")})" if !inline_attachments.empty? results = mysql_query(sql) results.each do |attachment| - attachment_id = attachment['aid'] + attachment_id = attachment["aid"] upload, filename = find_upload(user, post, attachment_id) unless upload fail_count += 1 @@ -810,21 +1002,26 @@ class ImportScripts::DiscuzX < ImportScripts::Base end if new_raw != post.raw - PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: '从 Discuz 中导入附件') + PostRevisor.new(post).revise!( + post.user, + { raw: new_raw }, + bypass_bump: true, + edit_reason: "从 Discuz 中导入附件", + ) end success_count += 1 end - puts '', '' + puts "", "" puts "succeeded: #{success_count}" puts " failed: #{fail_count}" if fail_count > 0 - puts '' + puts "" end # Create the full path to the discuz avatar specified from user id def discuzx_avatar_fullpath(user_id, absolute = true) - padded_id = user_id.to_s.rjust(9, '0') + padded_id = user_id.to_s.rjust(9, "0") part_1 = padded_id[0..2] part_2 = padded_id[3..4] @@ -844,9 +1041,9 @@ class ImportScripts::DiscuzX < ImportScripts::Base case raw when /\[url=forum.php\?mod=redirect&goto=findpost&pid=(\d+)&ptid=\d+\]/ #standard $1 - when /\[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=(\d+)&ptid=\d+\]/ # old discuz 7 format + when %r{\[url=https?://#{ORIGINAL_SITE_PREFIX}/redirect.php\?goto=findpost&pid=(\d+)&ptid=\d+\]} # old discuz 7 format $1 - when /\[quote\][\S\s]*pid=(\d+)[\S\s]*\[\/quote\]/ # quote + when %r{\[quote\][\S\s]*pid=(\d+)[\S\s]*\[/quote\]} # quote $1 end end @@ -856,18 +1053,18 @@ class ImportScripts::DiscuzX < ImportScripts::Base def upload_inline_image(data) return unless data - puts 'Creating inline image' + puts "Creating inline image" - encoded_photo = data['data:image/png;base64,'.length .. -1] + encoded_photo = data["data:image/png;base64,".length..-1] if encoded_photo raw_file = Base64.decode64(encoded_photo) else - puts 'Error parsed inline photo', data[0..20] + puts "Error parsed inline photo", data[0..20] return end real_filename = "#{SecureRandom.hex}.png" - filename = Tempfile.new(['inline', '.png']) + filename = Tempfile.new(%w[inline .png]) begin filename.binmode filename.write(raw_file) @@ -875,8 +1072,16 @@ class ImportScripts::DiscuzX < ImportScripts::Base upload = create_upload(Discourse::SYSTEM_USER_ID, filename, real_filename) ensure - filename.close rescue nil - filename.unlink rescue nil + begin + filename.close + rescue StandardError + nil + end + begin + filename.unlink + rescue StandardError + nil + end end if upload.nil? || !upload.valid? @@ -890,23 +1095,25 @@ class ImportScripts::DiscuzX < ImportScripts::Base # find the uploaded file and real name from the db def find_upload(user, post, upload_id) - attachment_table = table_name 'forum_attachment' + attachment_table = table_name "forum_attachment" # search for table id - sql = "SELECT a.pid post_id, + sql = + "SELECT a.pid post_id, a.aid upload_id, a.tableid table_id FROM #{attachment_table} a - WHERE a.pid = #{post.custom_fields['import_id']} + WHERE a.pid = #{post.custom_fields["import_id"]} AND a.aid = #{upload_id};" results = mysql_query(sql) unless (meta_data = results.first) - puts "Couldn't find forum_attachment record meta data for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}" + puts "Couldn't find forum_attachment record meta data for post.id = #{post.id}, import_id = #{post.custom_fields["import_id"]}" return nil end # search for uploaded file meta data - sql = "SELECT a.pid post_id, + sql = + "SELECT a.pid post_id, a.aid upload_id, a.tid topic_id, a.uid user_id, @@ -917,22 +1124,22 @@ class ImportScripts::DiscuzX < ImportScripts::Base a.description description, a.isimage is_image, a.thumb is_thumb - FROM #{attachment_table}_#{meta_data['table_id']} a + FROM #{attachment_table}_#{meta_data["table_id"]} a WHERE a.aid = #{upload_id};" results = mysql_query(sql) unless (row = results.first) - puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}" + puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields["import_id"]}" return nil end - filename = File.join(DISCUZX_BASE_DIR, ATTACHMENT_DIR, row['attachment_path']) + filename = File.join(DISCUZX_BASE_DIR, ATTACHMENT_DIR, row["attachment_path"]) unless File.exist?(filename) puts "Attachment file doesn't exist: #{filename}" return nil end - real_filename = row['real_filename'] - real_filename.prepend SecureRandom.hex if real_filename[0] == '.' + real_filename = row["real_filename"] + real_filename.prepend SecureRandom.hex if real_filename[0] == "." upload = create_upload(user.id, filename, real_filename) if upload.nil? || !upload.valid? @@ -950,7 +1157,7 @@ class ImportScripts::DiscuzX < ImportScripts::Base end def first_exists(*items) - items.find { |item|!item.blank? } || '' + items.find { |item| !item.blank? } || "" end def mysql_query(sql) diff --git a/script/import_scripts/disqus.rb b/script/import_scripts/disqus.rb index 5dbeb08775a..1b6f4185a5f 100644 --- a/script/import_scripts/disqus.rb +++ b/script/import_scripts/disqus.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true -require 'nokogiri' -require 'optparse' +require "nokogiri" +require "optparse" require File.expand_path(File.dirname(__FILE__) + "/base") class ImportScripts::Disqus < ImportScripts::Base @@ -35,7 +35,7 @@ class ImportScripts::Disqus < ImportScripts::Base by_email = {} @parser.posts.each do |id, p| - next if p[:is_spam] == 'true' || p[:is_deleted] == 'true' + next if p[:is_spam] == "true" || p[:is_deleted] == "true" by_email[p[:author_email]] = { name: p[:author_name], username: p[:author_username] } end @@ -45,13 +45,7 @@ class ImportScripts::Disqus < ImportScripts::Base create_users(by_email.keys) do |email| user = by_email[email] - { - id: email, - email: email, - username: user[:username], - name: user[:name], - merge: true - } + { id: email, email: email, username: user[:username], name: user[:name], merge: true } end end @@ -59,7 +53,6 @@ class ImportScripts::Disqus < ImportScripts::Base puts "", "importing topics..." @parser.threads.each do |id, t| - title = t[:title] title.gsub!(/“/, '"') title.gsub!(/”/, '"') @@ -79,7 +72,7 @@ class ImportScripts::Disqus < ImportScripts::Base if post.present? && post.topic.posts_count <= 1 (t[:posts] || []).each do |p| - post_user = find_existing_user(p[:author_email] || '', p[:author_username]) + post_user = find_existing_user(p[:author_email] || "", p[:author_username]) next unless post_user.present? attrs = { @@ -87,7 +80,7 @@ class ImportScripts::Disqus < ImportScripts::Base topic_id: post.topic_id, raw: p[:cooked], cooked: p[:cooked], - created_at: Date.parse(p[:created_at]) + created_at: Date.parse(p[:created_at]), } if p[:parent_id] @@ -125,23 +118,22 @@ class DisqusSAX < Nokogiri::XML::SAX::Document end def start_element(name, attrs = []) - hashed = Hash[attrs] case name - when 'post' + when "post" @post = {} - @post[:id] = hashed['dsq:id'] if @post - when 'thread' - id = hashed['dsq:id'] + @post[:id] = hashed["dsq:id"] if @post + when "thread" + id = hashed["dsq:id"] if @post thread = @threads[id] thread[:posts] << @post else @thread = { id: id, posts: [] } end - when 'parent' + when "parent" if @post - id = hashed['dsq:id'] + id = hashed["dsq:id"] @post[:parent_id] = id end end @@ -151,10 +143,10 @@ class DisqusSAX < Nokogiri::XML::SAX::Document def end_element(name) case name - when 'post' + when "post" @posts[@post[:id]] = @post @post = nil - when 'thread' + when "thread" if @post.nil? @threads[@thread[:id]] = @thread @thread = nil @@ -165,25 +157,25 @@ class DisqusSAX < Nokogiri::XML::SAX::Document end def characters(str) - record(@post, :author_email, str, 'author', 'email') - record(@post, :author_name, str, 'author', 'name') - record(@post, :author_username, str, 'author', 'username') - record(@post, :author_anonymous, str, 'author', 'isAnonymous') - record(@post, :created_at, str, 'createdAt') - record(@post, :is_deleted, str, 'isDeleted') - record(@post, :is_spam, str, 'isSpam') + record(@post, :author_email, str, "author", "email") + record(@post, :author_name, str, "author", "name") + record(@post, :author_username, str, "author", "username") + record(@post, :author_anonymous, str, "author", "isAnonymous") + record(@post, :created_at, str, "createdAt") + record(@post, :is_deleted, str, "isDeleted") + record(@post, :is_spam, str, "isSpam") - record(@thread, :link, str, 'link') - record(@thread, :title, str, 'title') - record(@thread, :created_at, str, 'createdAt') - record(@thread, :author_email, str, 'author', 'email') - record(@thread, :author_name, str, 'author', 'name') - record(@thread, :author_username, str, 'author', 'username') - record(@thread, :author_anonymous, str, 'author', 'isAnonymous') + record(@thread, :link, str, "link") + record(@thread, :title, str, "title") + record(@thread, :created_at, str, "createdAt") + record(@thread, :author_email, str, "author", "email") + record(@thread, :author_name, str, "author", "name") + record(@thread, :author_username, str, "author", "username") + record(@thread, :author_anonymous, str, "author", "isAnonymous") end def cdata_block(str) - record(@post, :cooked, str, 'message') + record(@post, :cooked, str, "message") end def record(target, sym, str, *params) @@ -205,7 +197,7 @@ class DisqusSAX < Nokogiri::XML::SAX::Document # Remove any threads that have no posts @threads.delete(id) else - t[:posts].delete_if { |p| p[:is_spam] == 'true' || p[:is_deleted] == 'true' } + t[:posts].delete_if { |p| p[:is_spam] == "true" || p[:is_deleted] == "true" } end end diff --git a/script/import_scripts/drupal-6.rb b/script/import_scripts/drupal-6.rb index 182596c63c3..3e27c0becd3 100644 --- a/script/import_scripts/drupal-6.rb +++ b/script/import_scripts/drupal-6.rb @@ -4,19 +4,19 @@ require "mysql2" require File.expand_path(File.dirname(__FILE__) + "/base.rb") class ImportScripts::Drupal < ImportScripts::Base - - DRUPAL_DB = ENV['DRUPAL_DB'] || "newsite3" - VID = ENV['DRUPAL_VID'] || 1 + DRUPAL_DB = ENV["DRUPAL_DB"] || "newsite3" + VID = ENV["DRUPAL_VID"] || 1 def initialize super - @client = Mysql2::Client.new( - host: "localhost", - username: "root", - #password: "password", - database: DRUPAL_DB - ) + @client = + Mysql2::Client.new( + host: "localhost", + username: "root", + #password: "password", + database: DRUPAL_DB, + ) end def categories_query @@ -25,7 +25,12 @@ class ImportScripts::Drupal < ImportScripts::Base def execute create_users(@client.query("SELECT uid id, name, mail email, created FROM users;")) do |row| - { id: row['id'], username: row['name'], email: row['email'], created_at: Time.zone.at(row['created']) } + { + id: row["id"], + username: row["name"], + email: row["email"], + created_at: Time.zone.at(row["created"]), + } end # You'll need to edit the following query for your Drupal install: @@ -34,38 +39,36 @@ class ImportScripts::Drupal < ImportScripts::Base # * Table name may be term_data. # * May need to select a vid other than 1. create_categories(categories_query) do |c| - { id: c['tid'], name: c['name'], description: c['description'] } + { id: c["tid"], name: c["name"], description: c["description"] } end # "Nodes" in Drupal are divided into types. Here we import two types, # and will later import all the comments/replies for each node. # You will need to figure out what the type names are on your install and edit the queries to match. - if ENV['DRUPAL_IMPORT_BLOG'] - create_blog_topics - end + create_blog_topics if ENV["DRUPAL_IMPORT_BLOG"] create_forum_topics create_replies begin - create_admin(email: 'neil.lalonde@discourse.org', username: UserNameSuggester.suggest('neil')) + create_admin(email: "neil.lalonde@discourse.org", username: UserNameSuggester.suggest("neil")) rescue => e - puts '', "Failed to create admin user" + puts "", "Failed to create admin user" puts e.message end end def create_blog_topics - puts '', "creating blog topics" + puts "", "creating blog topics" - create_category({ - name: 'Blog', - user_id: -1, - description: "Articles from the blog" - }, nil) unless Category.find_by_name('Blog') + unless Category.find_by_name("Blog") + create_category({ name: "Blog", user_id: -1, description: "Articles from the blog" }, nil) + end - results = @client.query(" + results = + @client.query( + " SELECT n.nid nid, n.title title, n.uid uid, @@ -76,37 +79,48 @@ class ImportScripts::Drupal < ImportScripts::Base LEFT JOIN node_revisions nr ON nr.vid=n.vid WHERE n.type = 'blog' AND n.status = 1 - ", cache_rows: false) + ", + cache_rows: false, + ) create_posts(results) do |row| { - id: "nid:#{row['nid']}", - user_id: user_id_from_imported_user_id(row['uid']) || -1, - category: 'Blog', - raw: row['body'], - created_at: Time.zone.at(row['created']), - pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil, - title: row['title'].try(:strip), - custom_fields: { import_id: "nid:#{row['nid']}" } + id: "nid:#{row["nid"]}", + user_id: user_id_from_imported_user_id(row["uid"]) || -1, + category: "Blog", + raw: row["body"], + created_at: Time.zone.at(row["created"]), + pinned_at: row["sticky"].to_i == 1 ? Time.zone.at(row["created"]) : nil, + title: row["title"].try(:strip), + custom_fields: { + import_id: "nid:#{row["nid"]}", + }, } end end def create_forum_topics - puts '', "creating forum topics" + puts "", "creating forum topics" - total_count = @client.query(" + total_count = + @client.query( + " SELECT COUNT(*) count FROM node n LEFT JOIN forum f ON f.vid=n.vid WHERE n.type = 'forum' AND n.status = 1 - ").first['count'] + ", + ).first[ + "count" + ] batch_size = 1000 batches(batch_size) do |offset| - results = @client.query(" + results = + @client.query( + " SELECT n.nid nid, n.title title, f.tid tid, @@ -121,48 +135,57 @@ class ImportScripts::Drupal < ImportScripts::Base AND n.status = 1 LIMIT #{batch_size} OFFSET #{offset}; - ", cache_rows: false) + ", + cache_rows: false, + ) break if results.size < 1 - next if all_records_exist? :posts, results.map { |p| "nid:#{p['nid']}" } + next if all_records_exist? :posts, results.map { |p| "nid:#{p["nid"]}" } create_posts(results, total: total_count, offset: offset) do |row| { - id: "nid:#{row['nid']}", - user_id: user_id_from_imported_user_id(row['uid']) || -1, - category: category_id_from_imported_category_id(row['tid']), - raw: row['body'], - created_at: Time.zone.at(row['created']), - pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil, - title: row['title'].try(:strip) + id: "nid:#{row["nid"]}", + user_id: user_id_from_imported_user_id(row["uid"]) || -1, + category: category_id_from_imported_category_id(row["tid"]), + raw: row["body"], + created_at: Time.zone.at(row["created"]), + pinned_at: row["sticky"].to_i == 1 ? Time.zone.at(row["created"]) : nil, + title: row["title"].try(:strip), } end end end def create_replies - puts '', "creating replies in topics" + puts "", "creating replies in topics" - if ENV['DRUPAL_IMPORT_BLOG'] + if ENV["DRUPAL_IMPORT_BLOG"] node_types = "('forum','blog')" else node_types = "('forum')" end - total_count = @client.query(" + total_count = + @client.query( + " SELECT COUNT(*) count FROM comments c LEFT JOIN node n ON n.nid=c.nid WHERE n.type IN #{node_types} AND n.status = 1 AND c.status=0; - ").first['count'] + ", + ).first[ + "count" + ] batch_size = 1000 batches(batch_size) do |offset| - results = @client.query(" + results = + @client.query( + " SELECT c.cid, c.pid, c.nid, @@ -176,37 +199,36 @@ class ImportScripts::Drupal < ImportScripts::Base AND c.status=0 LIMIT #{batch_size} OFFSET #{offset}; - ", cache_rows: false) + ", + cache_rows: false, + ) break if results.size < 1 - next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" } + next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" } create_posts(results, total: total_count, offset: offset) do |row| - topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}") + topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}") if topic_mapping && topic_id = topic_mapping[:topic_id] h = { - id: "cid:#{row['cid']}", + id: "cid:#{row["cid"]}", topic_id: topic_id, - user_id: user_id_from_imported_user_id(row['uid']) || -1, - raw: row['body'], - created_at: Time.zone.at(row['timestamp']), + user_id: user_id_from_imported_user_id(row["uid"]) || -1, + raw: row["body"], + created_at: Time.zone.at(row["timestamp"]), } - if row['pid'] - parent = topic_lookup_from_imported_post_id("cid:#{row['pid']}") + if row["pid"] + parent = topic_lookup_from_imported_post_id("cid:#{row["pid"]}") h[:reply_to_post_number] = parent[:post_number] if parent && parent[:post_number] > (1) end h else - puts "No topic found for comment #{row['cid']}" + puts "No topic found for comment #{row["cid"]}" nil end end end end - end -if __FILE__ == $0 - ImportScripts::Drupal.new.perform -end +ImportScripts::Drupal.new.perform if __FILE__ == $0 diff --git a/script/import_scripts/drupal.rb b/script/import_scripts/drupal.rb index 2350a4efbf8..ac01a2daa40 100644 --- a/script/import_scripts/drupal.rb +++ b/script/import_scripts/drupal.rb @@ -5,9 +5,8 @@ require "htmlentities" require File.expand_path(File.dirname(__FILE__) + "/base.rb") class ImportScripts::Drupal < ImportScripts::Base - - DRUPAL_DB = ENV['DRUPAL_DB'] || "drupal" - VID = ENV['DRUPAL_VID'] || 1 + DRUPAL_DB = ENV["DRUPAL_DB"] || "drupal" + VID = ENV["DRUPAL_VID"] || 1 BATCH_SIZE = 1000 ATTACHMENT_DIR = "/root/files/upload" @@ -16,25 +15,23 @@ class ImportScripts::Drupal < ImportScripts::Base @htmlentities = HTMLEntities.new - @client = Mysql2::Client.new( - host: "localhost", - username: "root", - #password: "password", - database: DRUPAL_DB - ) + @client = + Mysql2::Client.new( + host: "localhost", + username: "root", + #password: "password", + database: DRUPAL_DB, + ) end def execute - import_users import_categories # "Nodes" in Drupal are divided into types. Here we import two types, # and will later import all the comments/replies for each node. # You will need to figure out what the type names are on your install and edit the queries to match. - if ENV['DRUPAL_IMPORT_BLOG'] - import_blog_topics - end + import_blog_topics if ENV["DRUPAL_IMPORT_BLOG"] import_forum_topics @@ -56,7 +53,7 @@ class ImportScripts::Drupal < ImportScripts::Base last_user_id = -1 batches(BATCH_SIZE) do |offset| - users = mysql_query(<<-SQL + users = mysql_query(<<-SQL).to_a SELECT uid, name username, mail email, @@ -66,7 +63,6 @@ class ImportScripts::Drupal < ImportScripts::Base ORDER BY uid LIMIT #{BATCH_SIZE} SQL - ).to_a break if users.empty? @@ -80,12 +76,7 @@ class ImportScripts::Drupal < ImportScripts::Base username = @htmlentities.decode(user["username"]).strip - { - id: user["uid"], - name: username, - email: email, - created_at: Time.zone.at(user["created"]) - } + { id: user["uid"], name: username, email: email, created_at: Time.zone.at(user["created"]) } end end end @@ -99,35 +90,31 @@ class ImportScripts::Drupal < ImportScripts::Base puts "", "importing categories" - categories = mysql_query(<<-SQL + categories = mysql_query(<<-SQL).to_a SELECT tid, name, description FROM taxonomy_term_data WHERE vid = #{VID} SQL - ).to_a create_categories(categories) do |category| { - id: category['tid'], - name: @htmlentities.decode(category['name']).strip, - description: @htmlentities.decode(category['description']).strip + id: category["tid"], + name: @htmlentities.decode(category["name"]).strip, + description: @htmlentities.decode(category["description"]).strip, } end end def import_blog_topics - puts '', "importing blog topics" + puts "", "importing blog topics" - create_category( - { - name: 'Blog', - description: "Articles from the blog" - }, - nil) unless Category.find_by_name('Blog') + unless Category.find_by_name("Blog") + create_category({ name: "Blog", description: "Articles from the blog" }, nil) + end - blogs = mysql_query(<<-SQL + blogs = mysql_query(<<-SQL).to_a SELECT n.nid nid, n.title title, n.uid uid, n.created created, n.sticky sticky, f.body_value body FROM node n, @@ -136,38 +123,38 @@ class ImportScripts::Drupal < ImportScripts::Base AND n.nid = f.entity_id AND n.status = 1 SQL - ).to_a - category_id = Category.find_by_name('Blog').id + category_id = Category.find_by_name("Blog").id create_posts(blogs) do |topic| { - id: "nid:#{topic['nid']}", - user_id: user_id_from_imported_user_id(topic['uid']) || -1, + id: "nid:#{topic["nid"]}", + user_id: user_id_from_imported_user_id(topic["uid"]) || -1, category: category_id, - raw: topic['body'], - created_at: Time.zone.at(topic['created']), - pinned_at: topic['sticky'].to_i == 1 ? Time.zone.at(topic['created']) : nil, - title: topic['title'].try(:strip), - custom_fields: { import_id: "nid:#{topic['nid']}" } + raw: topic["body"], + created_at: Time.zone.at(topic["created"]), + pinned_at: topic["sticky"].to_i == 1 ? Time.zone.at(topic["created"]) : nil, + title: topic["title"].try(:strip), + custom_fields: { + import_id: "nid:#{topic["nid"]}", + }, } end end def import_forum_topics - puts '', "importing forum topics" + puts "", "importing forum topics" - total_count = mysql_query(<<-SQL + total_count = mysql_query(<<-SQL).first["count"] SELECT COUNT(*) count FROM forum_index fi, node n WHERE n.type = 'forum' AND fi.nid = n.nid AND n.status = 1 SQL - ).first['count'] batches(BATCH_SIZE) do |offset| - results = mysql_query(<<-SQL + results = mysql_query(<<-SQL).to_a SELECT fi.nid nid, fi.title title, fi.tid tid, @@ -188,34 +175,33 @@ class ImportScripts::Drupal < ImportScripts::Base LIMIT #{BATCH_SIZE} OFFSET #{offset}; SQL - ).to_a break if results.size < 1 - next if all_records_exist? :posts, results.map { |p| "nid:#{p['nid']}" } + next if all_records_exist? :posts, results.map { |p| "nid:#{p["nid"]}" } create_posts(results, total: total_count, offset: offset) do |row| - raw = preprocess_raw(row['body']) + raw = preprocess_raw(row["body"]) topic = { - id: "nid:#{row['nid']}", - user_id: user_id_from_imported_user_id(row['uid']) || -1, - category: category_id_from_imported_category_id(row['tid']), + id: "nid:#{row["nid"]}", + user_id: user_id_from_imported_user_id(row["uid"]) || -1, + category: category_id_from_imported_category_id(row["tid"]), raw: raw, - created_at: Time.zone.at(row['created']), - pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil, - title: row['title'].try(:strip), - views: row['views'] + created_at: Time.zone.at(row["created"]), + pinned_at: row["sticky"].to_i == 1 ? Time.zone.at(row["created"]) : nil, + title: row["title"].try(:strip), + views: row["views"], } - topic[:custom_fields] = { import_solved: true } if row['solved'].present? + topic[:custom_fields] = { import_solved: true } if row["solved"].present? topic end end end def import_replies - puts '', "creating replies in topics" + puts "", "creating replies in topics" - total_count = mysql_query(<<-SQL + total_count = mysql_query(<<-SQL).first["count"] SELECT COUNT(*) count FROM comment c, node n @@ -224,10 +210,9 @@ class ImportScripts::Drupal < ImportScripts::Base AND n.type IN ('article', 'forum') AND n.status = 1 SQL - ).first['count'] batches(BATCH_SIZE) do |offset| - results = mysql_query(<<-SQL + results = mysql_query(<<-SQL).to_a SELECT c.cid, c.pid, c.nid, c.uid, c.created, f.comment_body_value body FROM comment c, @@ -241,30 +226,29 @@ class ImportScripts::Drupal < ImportScripts::Base LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL - ).to_a break if results.size < 1 - next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" } + next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" } create_posts(results, total: total_count, offset: offset) do |row| - topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}") + topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}") if topic_mapping && topic_id = topic_mapping[:topic_id] - raw = preprocess_raw(row['body']) + raw = preprocess_raw(row["body"]) h = { - id: "cid:#{row['cid']}", + id: "cid:#{row["cid"]}", topic_id: topic_id, - user_id: user_id_from_imported_user_id(row['uid']) || -1, + user_id: user_id_from_imported_user_id(row["uid"]) || -1, raw: raw, - created_at: Time.zone.at(row['created']), + created_at: Time.zone.at(row["created"]), } - if row['pid'] - parent = topic_lookup_from_imported_post_id("cid:#{row['pid']}") + if row["pid"] + parent = topic_lookup_from_imported_post_id("cid:#{row["pid"]}") h[:reply_to_post_number] = parent[:post_number] if parent && parent[:post_number] > (1) end h else - puts "No topic found for comment #{row['cid']}" + puts "No topic found for comment #{row["cid"]}" nil end end @@ -275,7 +259,7 @@ class ImportScripts::Drupal < ImportScripts::Base puts "", "importing post likes" batches(BATCH_SIZE) do |offset| - likes = mysql_query(<<-SQL + likes = mysql_query(<<-SQL).to_a SELECT flagging_id, fid, entity_id, @@ -286,17 +270,20 @@ class ImportScripts::Drupal < ImportScripts::Base LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL - ).to_a break if likes.empty? likes.each do |l| - identifier = l['fid'] == 5 ? 'nid' : 'cid' - next unless user_id = user_id_from_imported_user_id(l['uid']) - next unless post_id = post_id_from_imported_post_id("#{identifier}:#{l['entity_id']}") + identifier = l["fid"] == 5 ? "nid" : "cid" + next unless user_id = user_id_from_imported_user_id(l["uid"]) + next unless post_id = post_id_from_imported_post_id("#{identifier}:#{l["entity_id"]}") next unless user = User.find_by(id: user_id) next unless post = Post.find_by(id: post_id) - PostActionCreator.like(user, post) rescue nil + begin + PostActionCreator.like(user, post) + rescue StandardError + nil + end end end end @@ -304,7 +291,8 @@ class ImportScripts::Drupal < ImportScripts::Base def mark_topics_as_solved puts "", "marking topics as solved" - solved_topics = TopicCustomField.where(name: "import_solved").where(value: true).pluck(:topic_id) + solved_topics = + TopicCustomField.where(name: "import_solved").where(value: true).pluck(:topic_id) solved_topics.each do |topic_id| next unless topic = Topic.find(topic_id) @@ -336,8 +324,13 @@ class ImportScripts::Drupal < ImportScripts::Base begin current_count += 1 print_status(current_count, total_count, start_time) - SingleSignOnRecord.create!(user_id: user.id, external_id: external_id, external_email: user.email, last_payload: '') - rescue + SingleSignOnRecord.create!( + user_id: user.id, + external_id: external_id, + external_email: user.email, + last_payload: "", + ) + rescue StandardError next end end @@ -350,14 +343,13 @@ class ImportScripts::Drupal < ImportScripts::Base success_count = 0 fail_count = 0 - total_count = mysql_query(<<-SQL + total_count = mysql_query(<<-SQL).first["count"] SELECT count(field_post_attachment_fid) count FROM field_data_field_post_attachment SQL - ).first["count"] batches(BATCH_SIZE) do |offset| - attachments = mysql_query(<<-SQL + attachments = mysql_query(<<-SQL).to_a SELECT * FROM field_data_field_post_attachment fp LEFT JOIN file_managed fm @@ -365,7 +357,6 @@ class ImportScripts::Drupal < ImportScripts::Base LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL - ).to_a break if attachments.size < 1 @@ -373,9 +364,11 @@ class ImportScripts::Drupal < ImportScripts::Base current_count += 1 print_status current_count, total_count - identifier = attachment['entity_type'] == "comment" ? "cid" : "nid" - next unless user_id = user_id_from_imported_user_id(attachment['uid']) - next unless post_id = post_id_from_imported_post_id("#{identifier}:#{attachment['entity_id']}") + identifier = attachment["entity_type"] == "comment" ? "cid" : "nid" + next unless user_id = user_id_from_imported_user_id(attachment["uid"]) + unless post_id = post_id_from_imported_post_id("#{identifier}:#{attachment["entity_id"]}") + next + end next unless user = User.find(user_id) next unless post = Post.find(post_id) @@ -392,9 +385,14 @@ class ImportScripts::Drupal < ImportScripts::Base new_raw = "#{new_raw}\n\n#{upload_html}" unless new_raw.include?(upload_html) if new_raw != post.raw - PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: "Import attachment from Drupal") + PostRevisor.new(post).revise!( + post.user, + { raw: new_raw }, + bypass_bump: true, + edit_reason: "Import attachment from Drupal", + ) else - puts '', 'Skipped upload: already imported' + puts "", "Skipped upload: already imported" end success_count += 1 @@ -406,13 +404,13 @@ class ImportScripts::Drupal < ImportScripts::Base end def create_permalinks - puts '', 'creating permalinks...' + puts "", "creating permalinks..." Topic.listable_topics.find_each do |topic| begin tcf = topic.custom_fields - if tcf && tcf['import_id'] - node_id = tcf['import_id'][/nid:(\d+)/, 1] + if tcf && tcf["import_id"] + node_id = tcf["import_id"][/nid:(\d+)/, 1] slug = "/node/#{node_id}" Permalink.create(url: slug, topic_id: topic.id) end @@ -424,18 +422,16 @@ class ImportScripts::Drupal < ImportScripts::Base end def find_upload(post, attachment) - uri = attachment['uri'][/public:\/\/upload\/(.+)/, 1] + uri = attachment["uri"][%r{public://upload/(.+)}, 1] real_filename = CGI.unescapeHTML(uri) file = File.join(ATTACHMENT_DIR, real_filename) unless File.exist?(file) - puts "Attachment file #{attachment['filename']} doesn't exist" + puts "Attachment file #{attachment["filename"]} doesn't exist" tmpfile = "attachments_failed.txt" - filename = File.join('/tmp/', tmpfile) - File.open(filename, 'a') { |f| - f.puts attachment['filename'] - } + filename = File.join("/tmp/", tmpfile) + File.open(filename, "a") { |f| f.puts attachment["filename"] } end upload = create_upload(post.user.id || -1, file, real_filename) @@ -452,13 +448,13 @@ class ImportScripts::Drupal < ImportScripts::Base def preprocess_raw(raw) return if raw.blank? # quotes on new lines - raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { |quote| - quote.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n#{$1}\n" } + raw.gsub!(%r{\[quote\](.+?)\[/quote\]}im) do |quote| + quote.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n#{$1}\n" } quote.gsub!(/\n(.+?)/) { "\n> #{$1}" } - } + end # [QUOTE=]...[/QUOTE] - raw.gsub!(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do + raw.gsub!(%r{\[quote=([^;\]]+)\](.+?)\[/quote\]}im) do username, quote = $1, $2 "\n[quote=\"#{username}\"]\n#{quote}\n[/quote]\n" end @@ -468,7 +464,7 @@ class ImportScripts::Drupal < ImportScripts::Base end def postprocess_posts - puts '', 'postprocessing posts' + puts "", "postprocessing posts" current = 0 max = Post.count @@ -479,7 +475,7 @@ class ImportScripts::Drupal < ImportScripts::Base new_raw = raw.dup # replace old topic to new topic links - new_raw.gsub!(/https:\/\/site.com\/forum\/topic\/(\d+)/im) do + new_raw.gsub!(%r{https://site.com/forum/topic/(\d+)}im) do post_id = post_id_from_imported_post_id("nid:#{$1}") next unless post_id topic = Post.find(post_id).topic @@ -487,7 +483,7 @@ class ImportScripts::Drupal < ImportScripts::Base end # replace old comment to reply links - new_raw.gsub!(/https:\/\/site.com\/comment\/(\d+)#comment-\d+/im) do + new_raw.gsub!(%r{https://site.com/comment/(\d+)#comment-\d+}im) do post_id = post_id_from_imported_post_id("cid:#{$1}") next unless post_id post_ref = Post.find(post_id) @@ -498,8 +494,8 @@ class ImportScripts::Drupal < ImportScripts::Base post.raw = new_raw post.save end - rescue - puts '', "Failed rewrite on post: #{post.id}" + rescue StandardError + puts "", "Failed rewrite on post: #{post.id}" ensure print_status(current += 1, max) end @@ -507,15 +503,15 @@ class ImportScripts::Drupal < ImportScripts::Base end def import_gravatars - puts '', 'importing gravatars' + puts "", "importing gravatars" current = 0 max = User.count User.find_each do |user| begin user.create_user_avatar(user_id: user.id) unless user.user_avatar user.user_avatar.update_gravatar! - rescue - puts '', 'Failed avatar update on user #{user.id}' + rescue StandardError + puts "", 'Failed avatar update on user #{user.id}' ensure print_status(current += 1, max) end @@ -523,15 +519,12 @@ class ImportScripts::Drupal < ImportScripts::Base end def parse_datetime(time) - DateTime.strptime(time, '%s') + DateTime.strptime(time, "%s") end def mysql_query(sql) @client.query(sql, cache_rows: true) end - end -if __FILE__ == $0 - ImportScripts::Drupal.new.perform -end +ImportScripts::Drupal.new.perform if __FILE__ == $0 diff --git a/script/import_scripts/drupal_json.rb b/script/import_scripts/drupal_json.rb index d69f21e01bd..f97ae683e11 100644 --- a/script/import_scripts/drupal_json.rb +++ b/script/import_scripts/drupal_json.rb @@ -5,7 +5,6 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb") # Edit the constants and initialize method for your import data. class ImportScripts::DrupalJson < ImportScripts::Base - JSON_FILES_DIR = "/Users/techapj/Documents" def initialize @@ -28,20 +27,18 @@ class ImportScripts::DrupalJson < ImportScripts::Base end def import_users - puts '', "Importing users" + puts "", "Importing users" create_users(@users_json) do |u| { id: u["uid"], name: u["name"], email: u["mail"], - created_at: Time.zone.at(u["created"].to_i) + created_at: Time.zone.at(u["created"].to_i), } end EmailToken.delete_all end end -if __FILE__ == $0 - ImportScripts::DrupalJson.new.perform -end +ImportScripts::DrupalJson.new.perform if __FILE__ == $0 diff --git a/script/import_scripts/drupal_qa.rb b/script/import_scripts/drupal_qa.rb index a8febbd41c0..948b04590dd 100644 --- a/script/import_scripts/drupal_qa.rb +++ b/script/import_scripts/drupal_qa.rb @@ -5,41 +5,51 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb") require File.expand_path(File.dirname(__FILE__) + "/drupal.rb") class ImportScripts::DrupalQA < ImportScripts::Drupal - def categories_query - result = @client.query("SELECT n.nid, GROUP_CONCAT(ti.tid) AS tids + result = + @client.query( + "SELECT n.nid, GROUP_CONCAT(ti.tid) AS tids FROM node AS n INNER JOIN taxonomy_index AS ti ON ti.nid = n.nid WHERE n.type = 'question' AND n.status = 1 - GROUP BY n.nid") + GROUP BY n.nid", + ) categories = {} result.each do |r| - tids = r['tids'] + tids = r["tids"] if tids.present? - tids = tids.split(',') + tids = tids.split(",") categories[tids[0].to_i] = true end end - @client.query("SELECT tid, name, description FROM taxonomy_term_data WHERE tid IN (#{categories.keys.join(',')})") + @client.query( + "SELECT tid, name, description FROM taxonomy_term_data WHERE tid IN (#{categories.keys.join(",")})", + ) end def create_forum_topics + puts "", "creating forum topics" - puts '', "creating forum topics" - - total_count = @client.query(" + total_count = + @client.query( + " SELECT COUNT(*) count FROM node n WHERE n.type = 'question' - AND n.status = 1;").first['count'] + AND n.status = 1;", + ).first[ + "count" + ] batch_size = 1000 batches(batch_size) do |offset| - results = @client.query(" + results = + @client.query( + " SELECT n.nid, n.title, GROUP_CONCAT(t.tid) AS tid, @@ -54,40 +64,48 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal GROUP BY n.nid, n.title, n.uid, n.created, f.body_value LIMIT #{batch_size} OFFSET #{offset} - ", cache_rows: false) + ", + cache_rows: false, + ) break if results.size < 1 - next if all_records_exist? :posts, results.map { |p| "nid:#{p['nid']}" } + next if all_records_exist? :posts, results.map { |p| "nid:#{p["nid"]}" } create_posts(results, total: total_count, offset: offset) do |row| { - id: "nid:#{row['nid']}", - user_id: user_id_from_imported_user_id(row['uid']) || -1, - category: category_id_from_imported_category_id((row['tid'] || '').split(',')[0]), - raw: row['body'], - created_at: Time.zone.at(row['created']), + id: "nid:#{row["nid"]}", + user_id: user_id_from_imported_user_id(row["uid"]) || -1, + category: category_id_from_imported_category_id((row["tid"] || "").split(",")[0]), + raw: row["body"], + created_at: Time.zone.at(row["created"]), pinned_at: nil, - title: row['title'].try(:strip) + title: row["title"].try(:strip), } end end end def create_direct_replies - puts '', "creating replies in topics" + puts "", "creating replies in topics" - total_count = @client.query(" + total_count = + @client.query( + " SELECT COUNT(*) count FROM node n WHERE n.type = 'answer' - AND n.status = 1;").first['count'] + AND n.status = 1;", + ).first[ + "count" + ] batch_size = 1000 batches(batch_size) do |offset| - - results = @client.query(" + results = + @client.query( + " SELECT n.nid AS cid, q.field_answer_question_nid AS nid, n.uid, @@ -100,25 +118,27 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal AND n.type = 'answer' LIMIT #{batch_size} OFFSET #{offset} - ", cache_rows: false) + ", + cache_rows: false, + ) break if results.size < 1 - next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" } + next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" } create_posts(results, total: total_count, offset: offset) do |row| - topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}") + topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}") if topic_mapping && topic_id = topic_mapping[:topic_id] h = { - id: "cid:#{row['cid']}", + id: "cid:#{row["cid"]}", topic_id: topic_id, - user_id: user_id_from_imported_user_id(row['uid']) || -1, - raw: row['body'], - created_at: Time.zone.at(row['created']), + user_id: user_id_from_imported_user_id(row["uid"]) || -1, + raw: row["body"], + created_at: Time.zone.at(row["created"]), } h else - puts "No topic found for answer #{row['cid']}" + puts "No topic found for answer #{row["cid"]}" nil end end @@ -126,21 +146,27 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal end def create_nested_replies - puts '', "creating nested replies to posts in topics" + puts "", "creating nested replies to posts in topics" - total_count = @client.query(" + total_count = + @client.query( + " SELECT COUNT(c.cid) count FROM node n INNER JOIN comment AS c ON n.nid = c.nid WHERE n.type = 'question' - AND n.status = 1;").first['count'] + AND n.status = 1;", + ).first[ + "count" + ] batch_size = 1000 batches(batch_size) do |offset| - # WARNING: If there are more than 1000000 this might have to be revisited - results = @client.query(" + results = + @client.query( + " SELECT (c.cid + 1000000) as cid, c.nid, c.uid, @@ -153,45 +179,53 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal AND n.type = 'question' LIMIT #{batch_size} OFFSET #{offset} - ", cache_rows: false) + ", + cache_rows: false, + ) break if results.size < 1 - next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" } + next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" } create_posts(results, total: total_count, offset: offset) do |row| - topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}") + topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}") if topic_mapping && topic_id = topic_mapping[:topic_id] h = { - id: "cid:#{row['cid']}", + id: "cid:#{row["cid"]}", topic_id: topic_id, - user_id: user_id_from_imported_user_id(row['uid']) || -1, - raw: row['body'], - created_at: Time.zone.at(row['created']), + user_id: user_id_from_imported_user_id(row["uid"]) || -1, + raw: row["body"], + created_at: Time.zone.at(row["created"]), } h else - puts "No topic found for comment #{row['cid']}" + puts "No topic found for comment #{row["cid"]}" nil end end end - puts '', "creating nested replies to answers in topics" + puts "", "creating nested replies to answers in topics" - total_count = @client.query(" + total_count = + @client.query( + " SELECT COUNT(c.cid) count FROM node n INNER JOIN comment AS c ON n.nid = c.nid WHERE n.type = 'answer' - AND n.status = 1;").first['count'] + AND n.status = 1;", + ).first[ + "count" + ] batch_size = 1000 batches(batch_size) do |offset| - # WARNING: If there are more than 1000000 this might have to be revisited - results = @client.query(" + results = + @client.query( + " SELECT (c.cid + 1000000) as cid, q.field_answer_question_nid AS nid, c.uid, @@ -205,25 +239,27 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal AND n.type = 'answer' LIMIT #{batch_size} OFFSET #{offset} - ", cache_rows: false) + ", + cache_rows: false, + ) break if results.size < 1 - next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" } + next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" } create_posts(results, total: total_count, offset: offset) do |row| - topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}") + topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}") if topic_mapping && topic_id = topic_mapping[:topic_id] h = { - id: "cid:#{row['cid']}", + id: "cid:#{row["cid"]}", topic_id: topic_id, - user_id: user_id_from_imported_user_id(row['uid']) || -1, - raw: row['body'], - created_at: Time.zone.at(row['created']), + user_id: user_id_from_imported_user_id(row["uid"]) || -1, + raw: row["body"], + created_at: Time.zone.at(row["created"]), } h else - puts "No topic found for comment #{row['cid']}" + puts "No topic found for comment #{row["cid"]}" nil end end @@ -234,9 +270,6 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal create_direct_replies create_nested_replies end - end -if __FILE__ == $0 - ImportScripts::DrupalQA.new.perform -end +ImportScripts::DrupalQA.new.perform if __FILE__ == $0 diff --git a/script/import_scripts/elgg.rb b/script/import_scripts/elgg.rb index 6eb62eb0314..1293e171793 100644 --- a/script/import_scripts/elgg.rb +++ b/script/import_scripts/elgg.rb @@ -1,22 +1,16 @@ # frozen_string_literal: true -require 'mysql2' +require "mysql2" require File.expand_path(File.dirname(__FILE__) + "/base.rb") class ImportScripts::Elgg < ImportScripts::Base - BATCH_SIZE ||= 1000 def initialize super - @client = Mysql2::Client.new( - host: "127.0.0.1", - port: "3306", - username: "", - database: "", - password: "" - ) + @client = + Mysql2::Client.new(host: "127.0.0.1", port: "3306", username: "", database: "", password: "") SiteSetting.max_username_length = 50 end @@ -31,7 +25,7 @@ class ImportScripts::Elgg < ImportScripts::Base def create_avatar(user, guid) puts "#{@path}" # Put your avatar at the root of discourse in this folder: - path_prefix = 'import/data/www/' + path_prefix = "import/data/www/" # https://github.com/Elgg/Elgg/blob/2fc9c1910a9169bbe4010026c61d8e41a5b56239/engine/classes/ElggDiskFilestore.php#L24 # const BUCKET_SIZE = 5000; bucket_size = 5000 @@ -40,13 +34,11 @@ class ImportScripts::Elgg < ImportScripts::Base bucket_id = [guid / bucket_size * bucket_size, 1].max avatar_path = File.join(path_prefix, bucket_id.to_s, "/#{guid}/profile/#{guid}master.jpg") - if File.exist?(avatar_path) - @uploader.create_avatar(user, avatar_path) - end + @uploader.create_avatar(user, avatar_path) if File.exist?(avatar_path) end def grant_admin(user, is_admin) - if is_admin == 'yes' + if is_admin == "yes" puts "", "#{user.username} is granted admin!" user.grant_admin! end @@ -56,10 +48,11 @@ class ImportScripts::Elgg < ImportScripts::Base puts "", "importing users..." last_user_id = -1 - total_users = mysql_query("select count(*) from elgg_users_entity where banned='no'").first["count"] + total_users = + mysql_query("select count(*) from elgg_users_entity where banned='no'").first["count"] batches(BATCH_SIZE) do |offset| - users = mysql_query(<<-SQL + users = mysql_query(<<-SQL).to_a select eue.guid, eue.username, eue.name, eue.email, eue.admin, max(case when ems1.string='cae_structure' then ems2.string end)cae_structure, max(case when ems1.string='location' then ems2.string end)location, @@ -76,7 +69,6 @@ class ImportScripts::Elgg < ImportScripts::Base group by eue.guid LIMIT #{BATCH_SIZE} SQL - ).to_a break if users.empty? @@ -97,11 +89,12 @@ class ImportScripts::Elgg < ImportScripts::Base name: u["name"], website: u["website"], bio_raw: u["briefdescription"].to_s + " " + u["cae_structure"].to_s, - post_create_action: proc do |user| - create_avatar(user, u["guid"]) - #add_user_to_group(user, u["cae_structure"]) - grant_admin(user, u["admin"]) - end + post_create_action: + proc do |user| + create_avatar(user, u["guid"]) + #add_user_to_group(user, u["cae_structure"]) + grant_admin(user, u["admin"]) + end, } end end @@ -115,9 +108,9 @@ class ImportScripts::Elgg < ImportScripts::Base create_categories(categories) do |c| { - id: c['guid'], - name: CGI.unescapeHTML(c['name']), - description: CGI.unescapeHTML(c['description']) + id: c["guid"], + name: CGI.unescapeHTML(c["name"]), + description: CGI.unescapeHTML(c["description"]), } end end @@ -125,10 +118,13 @@ class ImportScripts::Elgg < ImportScripts::Base def import_topics puts "", "creating topics" - total_count = mysql_query("select count(*) count from elgg_entities where subtype = 32;").first["count"] + total_count = + mysql_query("select count(*) count from elgg_entities where subtype = 32;").first["count"] batches(BATCH_SIZE) do |offset| - results = mysql_query(" + results = + mysql_query( + " SELECT ee.guid id, owner_guid user_id, @@ -143,30 +139,35 @@ class ImportScripts::Elgg < ImportScripts::Base ORDER BY ee.guid LIMIT #{BATCH_SIZE} OFFSET #{offset}; - ") + ", + ) break if results.size < 1 - next if all_records_exist? :posts, results.map { |m| m['id'].to_i } + next if all_records_exist? :posts, results.map { |m| m["id"].to_i } create_posts(results, total: total_count, offset: offset) do |m| { - id: m['id'], - user_id: user_id_from_imported_user_id(m['user_id']) || -1, - raw: CGI.unescapeHTML(m['raw']), - created_at: Time.zone.at(m['created_at']), - category: category_id_from_imported_category_id(m['category_id']), - title: CGI.unescapeHTML(m['title']), - post_create_action: proc do |post| - tag_names = mysql_query(" + id: m["id"], + user_id: user_id_from_imported_user_id(m["user_id"]) || -1, + raw: CGI.unescapeHTML(m["raw"]), + created_at: Time.zone.at(m["created_at"]), + category: category_id_from_imported_category_id(m["category_id"]), + title: CGI.unescapeHTML(m["title"]), + post_create_action: + proc do |post| + tag_names = + mysql_query( + " select ms.string from elgg_metadata md join elgg_metastrings ms on md.value_id = ms.id where name_id = 43 - and entity_guid = #{m['id']}; - ").map { |tag| tag['string'] } - DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names) - end + and entity_guid = #{m["id"]}; + ", + ).map { |tag| tag["string"] } + DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names) + end, } end end @@ -179,10 +180,13 @@ class ImportScripts::Elgg < ImportScripts::Base def import_posts puts "", "creating posts" - total_count = mysql_query("SELECT count(*) count FROM elgg_entities WHERE subtype = 42").first["count"] + total_count = + mysql_query("SELECT count(*) count FROM elgg_entities WHERE subtype = 42").first["count"] batches(BATCH_SIZE) do |offset| - results = mysql_query(" + results = + mysql_query( + " SELECT ee.guid id, container_guid topic_id, @@ -195,19 +199,20 @@ class ImportScripts::Elgg < ImportScripts::Base ORDER BY ee.guid LIMIT #{BATCH_SIZE} OFFSET #{offset}; - ") + ", + ) break if results.size < 1 - next if all_records_exist? :posts, results.map { |m| m['id'].to_i } + next if all_records_exist? :posts, results.map { |m| m["id"].to_i } create_posts(results, total: total_count, offset: offset) do |m| { - id: m['id'], - user_id: user_id_from_imported_user_id(m['user_id']) || -1, - topic_id: topic_lookup_from_imported_post_id(m['topic_id'])[:topic_id], - raw: CGI.unescapeHTML(m['raw']), - created_at: Time.zone.at(m['created_at']), + id: m["id"], + user_id: user_id_from_imported_user_id(m["user_id"]) || -1, + topic_id: topic_lookup_from_imported_post_id(m["topic_id"])[:topic_id], + raw: CGI.unescapeHTML(m["raw"]), + created_at: Time.zone.at(m["created_at"]), } end end @@ -216,7 +221,6 @@ class ImportScripts::Elgg < ImportScripts::Base def mysql_query(sql) @client.query(sql, cache_rows: false) end - end ImportScripts::Elgg.new.perform diff --git a/script/import_scripts/flarum_import.rb b/script/import_scripts/flarum_import.rb index ea4eb3dddfe..737ee3a86e5 100644 --- a/script/import_scripts/flarum_import.rb +++ b/script/import_scripts/flarum_import.rb @@ -1,60 +1,62 @@ # frozen_string_literal: true require "mysql2" -require 'time' -require 'date' +require "time" +require "date" require File.expand_path(File.dirname(__FILE__) + "/base.rb") class ImportScripts::FLARUM < ImportScripts::Base #SET THE APPROPRIATE VALUES FOR YOUR MYSQL CONNECTION - FLARUM_HOST ||= ENV['FLARUM_HOST'] || "db_host" - FLARUM_DB ||= ENV['FLARUM_DB'] || "db_name" + FLARUM_HOST ||= ENV["FLARUM_HOST"] || "db_host" + FLARUM_DB ||= ENV["FLARUM_DB"] || "db_name" BATCH_SIZE ||= 1000 - FLARUM_USER ||= ENV['FLARUM_USER'] || "db_user" - FLARUM_PW ||= ENV['FLARUM_PW'] || "db_user_pass" + FLARUM_USER ||= ENV["FLARUM_USER"] || "db_user" + FLARUM_PW ||= ENV["FLARUM_PW"] || "db_user_pass" def initialize super - @client = Mysql2::Client.new( - host: FLARUM_HOST, - username: FLARUM_USER, - password: FLARUM_PW, - database: FLARUM_DB - ) + @client = + Mysql2::Client.new( + host: FLARUM_HOST, + username: FLARUM_USER, + password: FLARUM_PW, + database: FLARUM_DB, + ) end def execute - import_users import_categories import_posts - end def import_users - puts '', "creating users" - total_count = mysql_query("SELECT count(*) count FROM users;").first['count'] + puts "", "creating users" + total_count = mysql_query("SELECT count(*) count FROM users;").first["count"] batches(BATCH_SIZE) do |offset| - results = mysql_query( - "SELECT id, username, email, joined_at, last_seen_at + results = + mysql_query( + "SELECT id, username, email, joined_at, last_seen_at FROM users LIMIT #{BATCH_SIZE} - OFFSET #{offset};") + OFFSET #{offset};", + ) break if results.size < 1 next if all_records_exist? :users, results.map { |u| u["id"].to_i } create_users(results, total: total_count, offset: offset) do |user| - { id: user['id'], - email: user['email'], - username: user['username'], - name: user['username'], - created_at: user['joined_at'], - last_seen_at: user['last_seen_at'] + { + id: user["id"], + email: user["email"], + username: user["username"], + name: user["username"], + created_at: user["joined_at"], + last_seen_at: user["last_seen_at"], } end end @@ -63,30 +65,31 @@ class ImportScripts::FLARUM < ImportScripts::Base def import_categories puts "", "importing top level categories..." - categories = mysql_query(" + categories = + mysql_query( + " SELECT id, name, description, position FROM tags ORDER BY position ASC - ").to_a + ", + ).to_a - create_categories(categories) do |category| - { - id: category["id"], - name: category["name"] - } - end + create_categories(categories) { |category| { id: category["id"], name: category["name"] } } puts "", "importing children categories..." - children_categories = mysql_query(" + children_categories = + mysql_query( + " SELECT id, name, description, position FROM tags ORDER BY position - ").to_a + ", + ).to_a create_categories(children_categories) do |category| { - id: "child##{category['id']}", + id: "child##{category["id"]}", name: category["name"], description: category["description"], } @@ -99,7 +102,9 @@ class ImportScripts::FLARUM < ImportScripts::Base total_count = mysql_query("SELECT count(*) count from posts").first["count"] batches(BATCH_SIZE) do |offset| - results = mysql_query(" + results = + mysql_query( + " SELECT p.id id, d.id topic_id, d.title title, @@ -116,29 +121,30 @@ class ImportScripts::FLARUM < ImportScripts::Base ORDER BY p.created_at LIMIT #{BATCH_SIZE} OFFSET #{offset}; - ").to_a + ", + ).to_a break if results.size < 1 - next if all_records_exist? :posts, results.map { |m| m['id'].to_i } + next if all_records_exist? :posts, results.map { |m| m["id"].to_i } create_posts(results, total: total_count, offset: offset) do |m| skip = false mapped = {} - mapped[:id] = m['id'] - mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 - mapped[:raw] = process_FLARUM_post(m['raw'], m['id']) - mapped[:created_at] = Time.zone.at(m['created_at']) + mapped[:id] = m["id"] + mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1 + mapped[:raw] = process_FLARUM_post(m["raw"], m["id"]) + mapped[:created_at] = Time.zone.at(m["created_at"]) - if m['id'] == m['first_post_id'] - mapped[:category] = category_id_from_imported_category_id("child##{m['category_id']}") - mapped[:title] = CGI.unescapeHTML(m['title']) + if m["id"] == m["first_post_id"] + mapped[:category] = category_id_from_imported_category_id("child##{m["category_id"]}") + mapped[:title] = CGI.unescapeHTML(m["title"]) else - parent = topic_lookup_from_imported_post_id(m['first_post_id']) + parent = topic_lookup_from_imported_post_id(m["first_post_id"]) if parent mapped[:topic_id] = parent[:topic_id] else - puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" + puts "Parent post #{m["first_post_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" skip = true end end diff --git a/script/import_scripts/fluxbb.rb b/script/import_scripts/fluxbb.rb index 38b84ed3c17..9af64457c80 100644 --- a/script/import_scripts/fluxbb.rb +++ b/script/import_scripts/fluxbb.rb @@ -17,23 +17,23 @@ export FLUXBB_PREFIX="" # Call it like this: # RAILS_ENV=production bundle exec ruby script/import_scripts/fluxbb.rb class ImportScripts::FluxBB < ImportScripts::Base - - FLUXBB_HOST ||= ENV['FLUXBB_HOST'] || "localhost" - FLUXBB_DB ||= ENV['FLUXBB_DB'] || "fluxbb" + FLUXBB_HOST ||= ENV["FLUXBB_HOST"] || "localhost" + FLUXBB_DB ||= ENV["FLUXBB_DB"] || "fluxbb" BATCH_SIZE ||= 1000 - FLUXBB_USER ||= ENV['FLUXBB_USER'] || "root" - FLUXBB_PW ||= ENV['FLUXBB_PW'] || "" - FLUXBB_PREFIX ||= ENV['FLUXBB_PREFIX'] || "" + FLUXBB_USER ||= ENV["FLUXBB_USER"] || "root" + FLUXBB_PW ||= ENV["FLUXBB_PW"] || "" + FLUXBB_PREFIX ||= ENV["FLUXBB_PREFIX"] || "" def initialize super - @client = Mysql2::Client.new( - host: FLUXBB_HOST, - username: FLUXBB_USER, - password: FLUXBB_PW, - database: FLUXBB_DB - ) + @client = + Mysql2::Client.new( + host: FLUXBB_HOST, + username: FLUXBB_USER, + password: FLUXBB_PW, + database: FLUXBB_DB, + ) end def execute @@ -45,64 +45,67 @@ class ImportScripts::FluxBB < ImportScripts::Base end def import_groups - puts '', "creating groups" + puts "", "creating groups" - results = mysql_query( - "SELECT g_id id, g_title name, g_user_title title - FROM #{FLUXBB_PREFIX}groups") + results = + mysql_query( + "SELECT g_id id, g_title name, g_user_title title + FROM #{FLUXBB_PREFIX}groups", + ) - customgroups = results.select { |group| group['id'] > 2 } + customgroups = results.select { |group| group["id"] > 2 } create_groups(customgroups) do |group| - { id: group['id'], - name: group['name'], - title: group['title'] } + { id: group["id"], name: group["name"], title: group["title"] } end end def import_users - puts '', "creating users" + puts "", "creating users" - total_count = mysql_query("SELECT count(*) count FROM #{FLUXBB_PREFIX}users;").first['count'] + total_count = mysql_query("SELECT count(*) count FROM #{FLUXBB_PREFIX}users;").first["count"] batches(BATCH_SIZE) do |offset| - results = mysql_query( - "SELECT id, username, realname name, url website, email email, registered created_at, + results = + mysql_query( + "SELECT id, username, realname name, url website, email email, registered created_at, registration_ip registration_ip_address, last_visit last_visit_time, last_email_sent last_emailed_at, location, group_id FROM #{FLUXBB_PREFIX}users LIMIT #{BATCH_SIZE} - OFFSET #{offset};") + OFFSET #{offset};", + ) break if results.size < 1 next if all_records_exist? :users, results.map { |u| u["id"].to_i } create_users(results, total: total_count, offset: offset) do |user| - { id: user['id'], - email: user['email'], - username: user['username'], - name: user['name'], - created_at: Time.zone.at(user['created_at']), - website: user['website'], - registration_ip_address: user['registration_ip_address'], - last_seen_at: Time.zone.at(user['last_visit_time']), - last_emailed_at: user['last_emailed_at'] == nil ? 0 : Time.zone.at(user['last_emailed_at']), - location: user['location'], - moderator: user['group_id'] == 2, - admin: user['group_id'] == 1 } + { + id: user["id"], + email: user["email"], + username: user["username"], + name: user["name"], + created_at: Time.zone.at(user["created_at"]), + website: user["website"], + registration_ip_address: user["registration_ip_address"], + last_seen_at: Time.zone.at(user["last_visit_time"]), + last_emailed_at: + user["last_emailed_at"] == nil ? 0 : Time.zone.at(user["last_emailed_at"]), + location: user["location"], + moderator: user["group_id"] == 2, + admin: user["group_id"] == 1, + } end - groupusers = results.select { |user| user['group_id'] > 2 } + groupusers = results.select { |user| user["group_id"] > 2 } groupusers.each do |user| - if user['group_id'] - user_id = user_id_from_imported_user_id(user['id']) - group_id = group_id_from_imported_group_id(user['group_id']) + if user["group_id"] + user_id = user_id_from_imported_user_id(user["id"]) + group_id = group_id_from_imported_group_id(user["group_id"]) - if user_id && group_id - GroupUser.find_or_create_by(user_id: user_id, group_id: group_id) - end + GroupUser.find_or_create_by(user_id: user_id, group_id: group_id) if user_id && group_id end end end @@ -111,33 +114,34 @@ class ImportScripts::FluxBB < ImportScripts::Base def import_categories puts "", "importing top level categories..." - categories = mysql_query(" + categories = + mysql_query( + " SELECT id, cat_name name, disp_position position FROM #{FLUXBB_PREFIX}categories ORDER BY id ASC - ").to_a + ", + ).to_a - create_categories(categories) do |category| - { - id: category["id"], - name: category["name"] - } - end + create_categories(categories) { |category| { id: category["id"], name: category["name"] } } puts "", "importing children categories..." - children_categories = mysql_query(" + children_categories = + mysql_query( + " SELECT id, forum_name name, forum_desc description, disp_position position, cat_id parent_category_id FROM #{FLUXBB_PREFIX}forums ORDER BY id - ").to_a + ", + ).to_a create_categories(children_categories) do |category| { - id: "child##{category['id']}", + id: "child##{category["id"]}", name: category["name"], description: category["description"], - parent_category_id: category_id_from_imported_category_id(category["parent_category_id"]) + parent_category_id: category_id_from_imported_category_id(category["parent_category_id"]), } end end @@ -148,7 +152,9 @@ class ImportScripts::FluxBB < ImportScripts::Base total_count = mysql_query("SELECT count(*) count from #{FLUXBB_PREFIX}posts").first["count"] batches(BATCH_SIZE) do |offset| - results = mysql_query(" + results = + mysql_query( + " SELECT p.id id, t.id topic_id, t.forum_id category_id, @@ -163,29 +169,30 @@ class ImportScripts::FluxBB < ImportScripts::Base ORDER BY p.posted LIMIT #{BATCH_SIZE} OFFSET #{offset}; - ").to_a + ", + ).to_a break if results.size < 1 - next if all_records_exist? :posts, results.map { |m| m['id'].to_i } + next if all_records_exist? :posts, results.map { |m| m["id"].to_i } create_posts(results, total: total_count, offset: offset) do |m| skip = false mapped = {} - mapped[:id] = m['id'] - mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 - mapped[:raw] = process_fluxbb_post(m['raw'], m['id']) - mapped[:created_at] = Time.zone.at(m['created_at']) + mapped[:id] = m["id"] + mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1 + mapped[:raw] = process_fluxbb_post(m["raw"], m["id"]) + mapped[:created_at] = Time.zone.at(m["created_at"]) - if m['id'] == m['first_post_id'] - mapped[:category] = category_id_from_imported_category_id("child##{m['category_id']}") - mapped[:title] = CGI.unescapeHTML(m['title']) + if m["id"] == m["first_post_id"] + mapped[:category] = category_id_from_imported_category_id("child##{m["category_id"]}") + mapped[:title] = CGI.unescapeHTML(m["title"]) else - parent = topic_lookup_from_imported_post_id(m['first_post_id']) + parent = topic_lookup_from_imported_post_id(m["first_post_id"]) if parent mapped[:topic_id] = parent[:topic_id] else - puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" + puts "Parent post #{m["first_post_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" skip = true end end @@ -196,16 +203,16 @@ class ImportScripts::FluxBB < ImportScripts::Base end def suspend_users - puts '', "updating banned users" + puts "", "updating banned users" banned = 0 failed = 0 - total = mysql_query("SELECT count(*) count FROM #{FLUXBB_PREFIX}bans").first['count'] + total = mysql_query("SELECT count(*) count FROM #{FLUXBB_PREFIX}bans").first["count"] system_user = Discourse.system_user mysql_query("SELECT username, email FROM #{FLUXBB_PREFIX}bans").each do |b| - user = User.find_by_email(b['email']) + user = User.find_by_email(b["email"]) if user user.suspended_at = Time.now user.suspended_till = 200.years.from_now @@ -218,7 +225,7 @@ class ImportScripts::FluxBB < ImportScripts::Base failed += 1 end else - puts "Not found: #{b['email']}" + puts "Not found: #{b["email"]}" failed += 1 end @@ -233,15 +240,15 @@ class ImportScripts::FluxBB < ImportScripts::Base s.gsub!(/(?:.*)/, '\1') # Some links look like this: http://www.onegameamonth.com - s.gsub!(/(.+)<\/a>/, '[\2](\1)') + s.gsub!(%r{(.+)}, '[\2](\1)') # Many bbcode tags have a hash attached to them. Examples: # [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky] # [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex] - s.gsub!(/:(?:\w{8})\]/, ']') + s.gsub!(/:(?:\w{8})\]/, "]") # Remove video tags. - s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '') + s.gsub!(%r{(^\[video=.*?\])|(\[/video\]$)}, "") s = CGI.unescapeHTML(s) @@ -249,7 +256,7 @@ class ImportScripts::FluxBB < ImportScripts::Base # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli) # # Work around it for now: - s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[') + s.gsub!(%r{\[http(s)?://(www\.)?}, "[") s end diff --git a/script/import_scripts/friendsmegplus.rb b/script/import_scripts/friendsmegplus.rb index d66eed12cc1..3bcab17c90e 100644 --- a/script/import_scripts/friendsmegplus.rb +++ b/script/import_scripts/friendsmegplus.rb @@ -2,7 +2,7 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb") -require 'csv' +require "csv" # Importer for Friends+Me Google+ Exporter (F+MG+E) output. # @@ -32,18 +32,18 @@ require 'csv' # Edit values at the top of the script to fit your preferences class ImportScripts::FMGP < ImportScripts::Base - def initialize super # Set this to the base URL for the site; required for importing videos # typically just 'https:' in production - @site_base_url = 'http://localhost:3000' + @site_base_url = "http://localhost:3000" @system_user = Discourse.system_user - SiteSetting.max_image_size_kb = 40960 - SiteSetting.max_attachment_size_kb = 40960 + SiteSetting.max_image_size_kb = 40_960 + SiteSetting.max_attachment_size_kb = 40_960 # handle the same video extension as the rest of Discourse - SiteSetting.authorized_extensions = (SiteSetting.authorized_extensions.split("|") + ['mp4', 'mov', 'webm', 'ogv']).uniq.join("|") + SiteSetting.authorized_extensions = + (SiteSetting.authorized_extensions.split("|") + %w[mp4 mov webm ogv]).uniq.join("|") @invalid_bounce_score = 5.0 @min_title_words = 3 @max_title_words = 14 @@ -76,7 +76,7 @@ class ImportScripts::FMGP < ImportScripts::Base @allowlist = nil # Tags to apply to every topic; empty Array to not have any tags applied everywhere - @globaltags = [ "gplus" ] + @globaltags = ["gplus"] @imagefiles = nil @@ -101,34 +101,30 @@ class ImportScripts::FMGP < ImportScripts::Base @first_date = nil # every argument is a filename, do the right thing based on the file name ARGV.each do |arg| - if arg.end_with?('.csv') + if arg.end_with?(".csv") # CSV files produced by F+MG+E have "URL";"IsDownloaded";"FileName";"FilePath";"FileSize" - CSV.foreach(arg, headers: true, col_sep: ';') do |row| - @images[row[0]] = { - filename: row[2], - filepath: row[3], - filesize: row[4] - } + CSV.foreach(arg, headers: true, col_sep: ";") do |row| + @images[row[0]] = { filename: row[2], filepath: row[3], filesize: row[4] } end elsif arg.end_with?("upload-paths.txt") @imagefiles = File.open(arg, "w") - elsif arg.end_with?('categories.json') + elsif arg.end_with?("categories.json") @categories_filename = arg @categories = load_fmgp_json(arg) elsif arg.end_with?("usermap.json") @usermap = load_fmgp_json(arg) - elsif arg.end_with?('blocklist.json') + elsif arg.end_with?("blocklist.json") @blocklist = load_fmgp_json(arg).map { |i| i.to_s }.to_set - elsif arg.end_with?('allowlist.json') + elsif arg.end_with?("allowlist.json") @allowlist = load_fmgp_json(arg).map { |i| i.to_s }.to_set - elsif arg.end_with?('.json') + elsif arg.end_with?(".json") @feeds << load_fmgp_json(arg) - elsif arg == '--dry-run' + elsif arg == "--dry-run" @dryrun = true elsif arg.start_with?("--last-date=") - @last_date = Time.zone.parse(arg.gsub(/.*=/, '')) + @last_date = Time.zone.parse(arg.gsub(/.*=/, "")) elsif arg.start_with?("--first-date=") - @first_date = Time.zone.parse(arg.gsub(/.*=/, '')) + @first_date = Time.zone.parse(arg.gsub(/.*=/, "")) else raise RuntimeError.new("unknown argument #{arg}") end @@ -153,7 +149,6 @@ class ImportScripts::FMGP < ImportScripts::Base @blocked_posts = 0 # count uploaded file size @totalsize = 0 - end def execute @@ -222,7 +217,9 @@ class ImportScripts::FMGP < ImportScripts::Base categories_new = "#{@categories_filename}.new" File.open(categories_new, "w") do |f| f.write(@categories.to_json) - raise RuntimeError.new("Category file missing categories for #{incomplete_categories}, edit #{categories_new} and rename it to #{@category_filename} before running the same import") + raise RuntimeError.new( + "Category file missing categories for #{incomplete_categories}, edit #{categories_new} and rename it to #{@category_filename} before running the same import", + ) end end end @@ -233,28 +230,32 @@ class ImportScripts::FMGP < ImportScripts::Base @categories.each do |id, cat| if cat["parent"].present? && !cat["parent"].empty? # Two separate sub-categories can have the same name, so need to identify by parent - Category.where(name: cat["category"]).each do |category| - parent = Category.where(id: category.parent_category_id).first - @cats[id] = category if parent.name == cat["parent"] - end + Category + .where(name: cat["category"]) + .each do |category| + parent = Category.where(id: category.parent_category_id).first + @cats[id] = category if parent.name == cat["parent"] + end else if category = Category.where(name: cat["category"]).first @cats[id] = category elsif @create_categories params = {} - params[:name] = cat['category'] + params[:name] = cat["category"] params[:id] = id - puts "Creating #{cat['category']}" + puts "Creating #{cat["category"]}" category = create_category(params, id) @cats[id] = category end end - raise RuntimeError.new("Could not find category #{cat["category"]} for #{cat}") if @cats[id].nil? + if @cats[id].nil? + raise RuntimeError.new("Could not find category #{cat["category"]} for #{cat}") + end end end def import_users - puts '', "Importing Google+ post and comment author users..." + puts "", "Importing Google+ post and comment author users..." # collect authors of both posts and comments @feeds.each do |feed| @@ -263,14 +264,10 @@ class ImportScripts::FMGP < ImportScripts::Base community["categories"].each do |category| category["posts"].each do |post| import_author_user(post["author"]) - if post["message"].present? - import_message_users(post["message"]) - end + import_message_users(post["message"]) if post["message"].present? post["comments"].each do |comment| import_author_user(comment["author"]) - if comment["message"].present? - import_message_users(comment["message"]) - end + import_message_users(comment["message"]) if comment["message"].present? end end end @@ -282,12 +279,7 @@ class ImportScripts::FMGP < ImportScripts::Base # now create them all create_users(@newusers) do |id, u| - { - id: id, - email: u[:email], - name: u[:name], - post_create_action: u[:post_create_action] - } + { id: id, email: u[:email], name: u[:name], post_create_action: u[:post_create_action] } end end @@ -308,7 +300,8 @@ class ImportScripts::FMGP < ImportScripts::Base def import_google_user(id, name) if !@emails[id].present? - google_user_info = UserAssociatedAccount.find_by(provider_name: 'google_oauth2', provider_uid: id.to_i) + google_user_info = + UserAssociatedAccount.find_by(provider_name: "google_oauth2", provider_uid: id.to_i) if google_user_info.nil? # create new google user on system; expect this user to merge # when they later log in with google authentication @@ -320,36 +313,39 @@ class ImportScripts::FMGP < ImportScripts::Base @newusers[id] = { email: email, name: name, - post_create_action: proc do |newuser| - newuser.approved = true - newuser.approved_by_id = @system_user.id - newuser.approved_at = newuser.created_at - if @blocklist.include?(id.to_s) - now = DateTime.now - forever = 1000.years.from_now - # you can suspend as well if you want your blocklist to - # be hard to recover from - #newuser.suspended_at = now - #newuser.suspended_till = forever - newuser.silenced_till = forever - end - newuser.save - @users[id] = newuser - UserAssociatedAccount.create(provider_name: 'google_oauth2', user_id: newuser.id, provider_uid: id) - # Do not send email to the invalid email addresses - # this can be removed after merging with #7162 - s = UserStat.where(user_id: newuser.id).first - s.bounce_score = @invalid_bounce_score - s.reset_bounce_score_after = 1000.years.from_now - s.save - end + post_create_action: + proc do |newuser| + newuser.approved = true + newuser.approved_by_id = @system_user.id + newuser.approved_at = newuser.created_at + if @blocklist.include?(id.to_s) + now = DateTime.now + forever = 1000.years.from_now + # you can suspend as well if you want your blocklist to + # be hard to recover from + #newuser.suspended_at = now + #newuser.suspended_till = forever + newuser.silenced_till = forever + end + newuser.save + @users[id] = newuser + UserAssociatedAccount.create( + provider_name: "google_oauth2", + user_id: newuser.id, + provider_uid: id, + ) + # Do not send email to the invalid email addresses + # this can be removed after merging with #7162 + s = UserStat.where(user_id: newuser.id).first + s.bounce_score = @invalid_bounce_score + s.reset_bounce_score_after = 1000.years.from_now + s.save + end, } else # user already on system u = User.find(google_user_info.user_id) - if u.silenced? || u.suspended? - @blocklist.add(id) - end + @blocklist.add(id) if u.silenced? || u.suspended? @users[id] = u email = u.email end @@ -362,7 +358,7 @@ class ImportScripts::FMGP < ImportScripts::Base # - A google+ post is a discourse topic # - A google+ comment is a discourse post - puts '', "Importing Google+ posts and comments..." + puts "", "Importing Google+ posts and comments..." @feeds.each do |feed| feed["accounts"].each do |account| @@ -371,14 +367,16 @@ class ImportScripts::FMGP < ImportScripts::Base category["posts"].each do |post| # G+ post / Discourse topic import_topic(post, category) - print("\r#{@topics_imported}/#{@posts_imported} topics/posts (skipped: #{@topics_skipped}/#{@posts_skipped} blocklisted: #{@blocked_topics}/#{@blocked_posts}) ") + print( + "\r#{@topics_imported}/#{@posts_imported} topics/posts (skipped: #{@topics_skipped}/#{@posts_skipped} blocklisted: #{@blocked_topics}/#{@blocked_posts}) ", + ) end end end end end - puts '' + puts "" end def import_topic(post, category) @@ -431,9 +429,7 @@ class ImportScripts::FMGP < ImportScripts::Base return nil if !@frst_date.nil? && created_at < @first_date user_id = user_id_from_imported_user_id(post_author_id) - if user_id.nil? - user_id = @users[post["author"]["id"]].id - end + user_id = @users[post["author"]["id"]].id if user_id.nil? mapped = { id: post["id"], @@ -472,7 +468,8 @@ class ImportScripts::FMGP < ImportScripts::Base def title_text(post, created_at) words = message_text(post["message"]) - if words.empty? || words.join("").length < @min_title_characters || words.length < @min_title_words + if words.empty? || words.join("").length < @min_title_characters || + words.length < @min_title_words # database has minimum length # short posts appear not to work well as titles most of the time (in practice) return untitled(post["author"]["name"], created_at) @@ -483,17 +480,13 @@ class ImportScripts::FMGP < ImportScripts::Base (@min_title_words..(words.length - 1)).each do |i| # prefer full stop - if words[i].end_with?(".") - lastword = i - end + lastword = i if words[i].end_with?(".") end if lastword.nil? # fall back on other punctuation (@min_title_words..(words.length - 1)).each do |i| - if words[i].end_with?(',', ';', ':', '?') - lastword = i - end + lastword = i if words[i].end_with?(",", ";", ":", "?") end end @@ -516,9 +509,7 @@ class ImportScripts::FMGP < ImportScripts::Base text_types = [0, 3] message.each do |fragment| if text_types.include?(fragment[0]) - fragment[1].split().each do |word| - words << word - end + fragment[1].split().each { |word| words << word } elsif fragment[0] == 2 # use the display text of a link words << fragment[1] @@ -543,14 +534,10 @@ class ImportScripts::FMGP < ImportScripts::Base lines << "\n#{formatted_link(post["image"]["proxy"])}\n" end if post["images"].present? - post["images"].each do |image| - lines << "\n#{formatted_link(image["proxy"])}\n" - end + post["images"].each { |image| lines << "\n#{formatted_link(image["proxy"])}\n" } end if post["videos"].present? - post["videos"].each do |video| - lines << "\n#{formatted_link(video["proxy"])}\n" - end + post["videos"].each { |video| lines << "\n#{formatted_link(video["proxy"])}\n" } end if post["link"].present? && post["link"]["url"].present? url = post["link"]["url"] @@ -575,12 +562,8 @@ class ImportScripts::FMGP < ImportScripts::Base if fragment[2].nil? text else - if fragment[2]["italic"].present? - text = "#{text}" - end - if fragment[2]["bold"].present? - text = "#{text}" - end + text = "#{text}" if fragment[2]["italic"].present? + text = "#{text}" if fragment[2]["bold"].present? if fragment[2]["strikethrough"].present? # s more likely than del to represent user intent? text = "#{text}" @@ -594,9 +577,7 @@ class ImportScripts::FMGP < ImportScripts::Base formatted_link_text(fragment[2], fragment[1]) elsif fragment[0] == 3 # reference to a user - if @usermap.include?(fragment[2].to_s) - return "@#{@usermap[fragment[2].to_s]}" - end + return "@#{@usermap[fragment[2].to_s]}" if @usermap.include?(fragment[2].to_s) if fragment[2].nil? # deleted G+ users show up with a null ID return "+#{fragment[1]}" @@ -606,12 +587,18 @@ class ImportScripts::FMGP < ImportScripts::Base # user was in this import's authors "@#{user.username} " else - if google_user_info = UserAssociatedAccount.find_by(provider_name: 'google_oauth2', provider_uid: fragment[2]) + if google_user_info = + UserAssociatedAccount.find_by( + provider_name: "google_oauth2", + provider_uid: fragment[2], + ) # user was not in this import, but has logged in or been imported otherwise user = User.find(google_user_info.user_id) "@#{user.username} " else - raise RuntimeError.new("Google user #{fragment[1]} (id #{fragment[2]}) not imported") if !@dryrun + if !@dryrun + raise RuntimeError.new("Google user #{fragment[1]} (id #{fragment[2]}) not imported") + end # if you want to fall back to their G+ name, just erase the raise above, # but this should not happen "+#{fragment[1]}" @@ -681,6 +668,4 @@ class ImportScripts::FMGP < ImportScripts::Base end end -if __FILE__ == $0 - ImportScripts::FMGP.new.perform -end +ImportScripts::FMGP.new.perform if __FILE__ == $0 diff --git a/script/import_scripts/getsatisfaction.rb b/script/import_scripts/getsatisfaction.rb index 50f8613e683..0458c84f946 100644 --- a/script/import_scripts/getsatisfaction.rb +++ b/script/import_scripts/getsatisfaction.rb @@ -22,15 +22,14 @@ # that correctly and will import the replies in the wrong order. # You should run `rake posts:reorder_posts` after the import. -require 'csv' -require 'set' +require "csv" +require "set" require File.expand_path(File.dirname(__FILE__) + "/base.rb") -require 'reverse_markdown' # gem 'reverse_markdown' +require "reverse_markdown" # gem 'reverse_markdown' # Call it like this: # RAILS_ENV=production bundle exec ruby script/import_scripts/getsatisfaction.rb DIRNAME class ImportScripts::GetSatisfaction < ImportScripts::Base - IMPORT_ARCHIVED_TOPICS = false # The script classifies each topic as private when at least one associated category @@ -85,22 +84,24 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base previous_line = nil File.open(target_filename, "w") do |file| - File.open(source_filename).each_line do |line| - line.gsub!(/(?\s*)?(.*?)<\/code>(\s*<\/pre>)?/mi) do + raw.gsub!(%r{(
\s*)?(.*?)(\s*
)?}mi) do code = $2 hoist = SecureRandom.hex # tidy code, wow, this is impressively crazy @@ -350,9 +347,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base # in this case double space works best ... so odd raw.gsub!(" ", "\n\n") - hoisted.each do |hoist, code| - raw.gsub!(hoist, "\n```\n#{code}\n```\n") - end + hoisted.each { |hoist, code| raw.gsub!(hoist, "\n```\n#{code}\n```\n") } raw = CGI.unescapeHTML(raw) raw = ReverseMarkdown.convert(raw) @@ -360,7 +355,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base end def create_permalinks - puts '', 'Creating Permalinks...', '' + puts "", "Creating Permalinks...", "" Topic.listable_topics.find_each do |topic| tcf = topic.first_post.custom_fields @@ -372,7 +367,6 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base end end end - end unless ARGV[0] && Dir.exist?(ARGV[0]) diff --git a/script/import_scripts/google_groups.rb b/script/import_scripts/google_groups.rb index 494346292c1..1b6fa4b420a 100755 --- a/script/import_scripts/google_groups.rb +++ b/script/import_scripts/google_groups.rb @@ -20,19 +20,18 @@ DEFAULT_COOKIES_TXT = "/shared/import/cookies.txt" ABORT_AFTER_SKIPPED_TOPIC_COUNT = 10 def driver - @driver ||= begin - chrome_args = ["disable-gpu"] - chrome_args << "headless" unless ENV["NOT_HEADLESS"] == '1' - chrome_args << "no-sandbox" if inside_container? - options = Selenium::WebDriver::Chrome::Options.new(args: chrome_args) - Selenium::WebDriver.for(:chrome, options: options) - end + @driver ||= + begin + chrome_args = ["disable-gpu"] + chrome_args << "headless" unless ENV["NOT_HEADLESS"] == "1" + chrome_args << "no-sandbox" if inside_container? + options = Selenium::WebDriver::Chrome::Options.new(args: chrome_args) + Selenium::WebDriver.for(:chrome, options: options) + end end def inside_container? - File.foreach("/proc/1/cgroup") do |line| - return true if line.include?("docker") - end + File.foreach("/proc/1/cgroup") { |line| return true if line.include?("docker") } false end @@ -79,35 +78,38 @@ def base_url end def crawl_topics - 1.step(nil, 100).each do |start| - url = "#{base_url}/#{@groupname}[#{start}-#{start + 99}]" - get(url) + 1 + .step(nil, 100) + .each do |start| + url = "#{base_url}/#{@groupname}[#{start}-#{start + 99}]" + get(url) - begin - if start == 1 && find("h2").text == "Error 403" - exit_with_error(<<~TEXT.red.bold) + begin + exit_with_error(<<~TEXT.red.bold) if start == 1 && find("h2").text == "Error 403" Unable to find topics. Try running the script with the "--domain example.com" option if you are a G Suite user and your group's URL contains a path with your domain that looks like "/a/example.com". TEXT + rescue Selenium::WebDriver::Error::NoSuchElementError + # Ignore this error. It simply means there wasn't an error. end - rescue Selenium::WebDriver::Error::NoSuchElementError - # Ignore this error. It simply means there wasn't an error. - end - topic_urls = extract(".subject a[href*='#{@groupname}']") { |a| a["href"].sub("/d/topic/", "/forum/?_escaped_fragment_=topic/") } - break if topic_urls.size == 0 + topic_urls = + extract(".subject a[href*='#{@groupname}']") do |a| + a["href"].sub("/d/topic/", "/forum/?_escaped_fragment_=topic/") + end + break if topic_urls.size == 0 - topic_urls.each do |topic_url| - crawl_topic(topic_url) + topic_urls.each do |topic_url| + crawl_topic(topic_url) - # abort if this in an incremental crawl and there were too many consecutive, skipped topics - if @finished && @skipped_topic_count > ABORT_AFTER_SKIPPED_TOPIC_COUNT - puts "Skipping all other topics, because this is an incremental crawl.".green - return + # abort if this in an incremental crawl and there were too many consecutive, skipped topics + if @finished && @skipped_topic_count > ABORT_AFTER_SKIPPED_TOPIC_COUNT + puts "Skipping all other topics, because this is an incremental crawl.".green + return + end end end - end end def crawl_topic(url) @@ -126,17 +128,14 @@ def crawl_topic(url) messages_crawled = false extract(".subject a[href*='#{@groupname}']") do |a| - [ - a["href"].sub("/d/msg/", "/forum/message/raw?msg="), - a["title"].empty? - ] + [a["href"].sub("/d/msg/", "/forum/message/raw?msg="), a["title"].empty?] end.each do |msg_url, might_be_deleted| messages_crawled |= crawl_message(msg_url, might_be_deleted) end @skipped_topic_count = skippable && messages_crawled ? 0 : @skipped_topic_count + 1 @scraped_topic_urls << url -rescue +rescue StandardError puts "Failed to scrape topic at #{url}".red raise if @abort_on_error end @@ -144,18 +143,16 @@ end def crawl_message(url, might_be_deleted) get(url) - filename = File.join(@path, "#{url[/#{@groupname}\/(.+)/, 1].sub("/", "-")}.eml") + filename = File.join(@path, "#{url[%r{#{@groupname}/(.+)}, 1].sub("/", "-")}.eml") content = find("pre")["innerText"] if !@first_message_checked @first_message_checked = true - if content.match?(/From:.*\.\.\.@.*/i) && !@force_import - exit_with_error(<<~TEXT.red.bold) + exit_with_error(<<~TEXT.red.bold) if content.match?(/From:.*\.\.\.@.*/i) && !@force_import It looks like you do not have permissions to see email addresses. Aborting. Use the --force option to import anyway. TEXT - end end old_md5 = Digest::MD5.file(filename) if File.exist?(filename) @@ -169,7 +166,7 @@ rescue Selenium::WebDriver::Error::NoSuchElementError puts "Failed to scrape message at #{url}".red raise if @abort_on_error end -rescue +rescue StandardError puts "Failed to scrape message at #{url}".red raise if @abort_on_error end @@ -178,10 +175,7 @@ def login puts "Logging in..." get("https://google.com/404") - add_cookies( - "myaccount.google.com", - "google.com" - ) + add_cookies("myaccount.google.com", "google.com") get("https://myaccount.google.com/?utm_source=sign_in_no_continue") @@ -193,20 +187,24 @@ def login end def add_cookies(*domains) - File.readlines(@cookies).each do |line| - parts = line.chomp.split("\t") - next if parts.size != 7 || !domains.any? { |domain| parts[0] =~ /^\.?#{Regexp.escape(domain)}$/ } + File + .readlines(@cookies) + .each do |line| + parts = line.chomp.split("\t") + if parts.size != 7 || !domains.any? { |domain| parts[0] =~ /^\.?#{Regexp.escape(domain)}$/ } + next + end - driver.manage.add_cookie( - domain: parts[0], - httpOnly: "true".casecmp?(parts[1]), - path: parts[2], - secure: "true".casecmp?(parts[3]), - expires: parts[4] == "0" ? nil : DateTime.strptime(parts[4], "%s"), - name: parts[5], - value: parts[6] - ) - end + driver.manage.add_cookie( + domain: parts[0], + httpOnly: "true".casecmp?(parts[1]), + path: parts[2], + secure: "true".casecmp?(parts[3]), + expires: parts[4] == "0" ? nil : DateTime.strptime(parts[4], "%s"), + name: parts[5], + value: parts[6], + ) + end end def wait_for_url @@ -240,10 +238,7 @@ def crawl crawl_topics @finished = true ensure - File.write(status_filename, { - finished: @finished, - urls: @scraped_topic_urls - }.to_yaml) + File.write(status_filename, { finished: @finished, urls: @scraped_topic_urls }.to_yaml) end elapsed = Time.now - start_time @@ -258,20 +253,25 @@ def parse_arguments @abort_on_error = false @cookies = DEFAULT_COOKIES_TXT if File.exist?(DEFAULT_COOKIES_TXT) - parser = OptionParser.new do |opts| - opts.banner = "Usage: google_groups.rb [options]" + parser = + OptionParser.new do |opts| + opts.banner = "Usage: google_groups.rb [options]" - opts.on("-g", "--groupname GROUPNAME") { |v| @groupname = v } - opts.on("-d", "--domain DOMAIN") { |v| @domain = v } - opts.on("-c", "--cookies PATH", "path to cookies.txt") { |v| @cookies = v } - opts.on("--path PATH", "output path for emails") { |v| @path = v } - opts.on("-f", "--force", "force import when user isn't allowed to see email addresses") { @force_import = true } - opts.on("-a", "--abort-on-error", "abort crawl on error instead of skipping message") { @abort_on_error = true } - opts.on("-h", "--help") do - puts opts - exit + opts.on("-g", "--groupname GROUPNAME") { |v| @groupname = v } + opts.on("-d", "--domain DOMAIN") { |v| @domain = v } + opts.on("-c", "--cookies PATH", "path to cookies.txt") { |v| @cookies = v } + opts.on("--path PATH", "output path for emails") { |v| @path = v } + opts.on("-f", "--force", "force import when user isn't allowed to see email addresses") do + @force_import = true + end + opts.on("-a", "--abort-on-error", "abort crawl on error instead of skipping message") do + @abort_on_error = true + end + opts.on("-h", "--help") do + puts opts + exit + end end - end begin parser.parse! @@ -279,10 +279,12 @@ def parse_arguments exit_with_error(e.message, "", parser) end - mandatory = [:groupname, :cookies] + mandatory = %i[groupname cookies] missing = mandatory.select { |name| instance_variable_get("@#{name}").nil? } - exit_with_error("Missing arguments: #{missing.join(', ')}".red.bold, "", parser, "") if missing.any? + if missing.any? + exit_with_error("Missing arguments: #{missing.join(", ")}".red.bold, "", parser, "") + end exit_with_error("cookies.txt not found at #{@cookies}".red.bold, "") if !File.exist?(@cookies) @path = File.join(DEFAULT_OUTPUT_PATH, @groupname) if @path.nil? diff --git a/script/import_scripts/higher_logic.rb b/script/import_scripts/higher_logic.rb index fe6e19641dd..06820311264 100644 --- a/script/import_scripts/higher_logic.rb +++ b/script/import_scripts/higher_logic.rb @@ -4,7 +4,6 @@ require "mysql2" require File.expand_path(File.dirname(__FILE__) + "/base.rb") class ImportScripts::HigherLogic < ImportScripts::Base - HIGHERLOGIC_DB = "higherlogic" BATCH_SIZE = 1000 ATTACHMENT_DIR = "/shared/import/data/attachments" @@ -12,11 +11,7 @@ class ImportScripts::HigherLogic < ImportScripts::Base def initialize super - @client = Mysql2::Client.new( - host: "localhost", - username: "root", - database: HIGHERLOGIC_DB - ) + @client = Mysql2::Client.new(host: "localhost", username: "root", database: HIGHERLOGIC_DB) end def execute @@ -29,7 +24,7 @@ class ImportScripts::HigherLogic < ImportScripts::Base end def import_groups - puts '', 'importing groups' + puts "", "importing groups" groups = mysql_query <<-SQL SELECT CommunityKey, CommunityName @@ -37,16 +32,11 @@ class ImportScripts::HigherLogic < ImportScripts::Base ORDER BY CommunityName SQL - create_groups(groups) do |group| - { - id: group['CommunityKey'], - name: group['CommunityName'] - } - end + create_groups(groups) { |group| { id: group["CommunityKey"], name: group["CommunityName"] } } end def import_users - puts '', 'importing users' + puts "", "importing users" total_count = mysql_query("SELECT count(*) FROM Contact").first["count"] batches(BATCH_SIZE) do |offset| @@ -59,43 +49,42 @@ class ImportScripts::HigherLogic < ImportScripts::Base break if results.size < 1 - next if all_records_exist? :users, results.map { |u| u['ContactKey'] } + next if all_records_exist? :users, results.map { |u| u["ContactKey"] } create_users(results, total: total_count, offset: offset) do |user| - next if user['EmailAddress'].blank? + next if user["EmailAddress"].blank? { - id: user['ContactKey'], - email: user['EmailAddress'], - name: "#{user['FirstName']} #{user['LastName']}", - created_at: user['CreatedOn'] == nil ? 0 : Time.zone.at(user['CreatedOn']), - bio_raw: user['Bio'], - active: user['UserStatus'] == "Active", - admin: user['HLAdminFlag'] == 1 + id: user["ContactKey"], + email: user["EmailAddress"], + name: "#{user["FirstName"]} #{user["LastName"]}", + created_at: user["CreatedOn"] == nil ? 0 : Time.zone.at(user["CreatedOn"]), + bio_raw: user["Bio"], + active: user["UserStatus"] == "Active", + admin: user["HLAdminFlag"] == 1, } end end end def import_group_users - puts '', 'importing group users' + puts "", "importing group users" - group_users = mysql_query(<<-SQL + group_users = mysql_query(<<-SQL).to_a SELECT CommunityKey, ContactKey FROM CommunityMember SQL - ).to_a group_users.each do |row| - next unless user_id = user_id_from_imported_user_id(row['ContactKey']) - next unless group_id = group_id_from_imported_group_id(row['CommunityKey']) - puts '', '.' + next unless user_id = user_id_from_imported_user_id(row["ContactKey"]) + next unless group_id = group_id_from_imported_group_id(row["CommunityKey"]) + puts "", "." GroupUser.find_or_create_by(user_id: user_id, group_id: group_id) end end def import_categories - puts '', 'importing categories' + puts "", "importing categories" categories = mysql_query <<-SQL SELECT DiscussionKey, DiscussionName @@ -103,15 +92,12 @@ class ImportScripts::HigherLogic < ImportScripts::Base SQL create_categories(categories) do |category| - { - id: category['DiscussionKey'], - name: category['DiscussionName'] - } + { id: category["DiscussionKey"], name: category["DiscussionName"] } end end def import_posts - puts '', 'importing topics and posts' + puts "", "importing topics and posts" total_count = mysql_query("SELECT count(*) FROM DiscussionPost").first["count"] batches(BATCH_SIZE) do |offset| @@ -131,28 +117,28 @@ class ImportScripts::HigherLogic < ImportScripts::Base SQL break if results.size < 1 - next if all_records_exist? :posts, results.map { |p| p['MessageKey'] } + next if all_records_exist? :posts, results.map { |p| p["MessageKey"] } create_posts(results, total: total_count, offset: offset) do |post| - raw = preprocess_raw(post['Body']) + raw = preprocess_raw(post["Body"]) mapped = { - id: post['MessageKey'], - user_id: user_id_from_imported_user_id(post['ContactKey']), + id: post["MessageKey"], + user_id: user_id_from_imported_user_id(post["ContactKey"]), raw: raw, - created_at: Time.zone.at(post['CreatedOn']), + created_at: Time.zone.at(post["CreatedOn"]), } - if post['ParentMessageKey'].nil? - mapped[:category] = category_id_from_imported_category_id(post['DiscussionKey']).to_i - mapped[:title] = CGI.unescapeHTML(post['Subject']) - mapped[:pinned] = post['PinnedFlag'] == 1 + if post["ParentMessageKey"].nil? + mapped[:category] = category_id_from_imported_category_id(post["DiscussionKey"]).to_i + mapped[:title] = CGI.unescapeHTML(post["Subject"]) + mapped[:pinned] = post["PinnedFlag"] == 1 else - topic = topic_lookup_from_imported_post_id(post['ParentMessageKey']) + topic = topic_lookup_from_imported_post_id(post["ParentMessageKey"]) if topic.present? mapped[:topic_id] = topic[:topic_id] else - puts "Parent post #{post['ParentMessageKey']} doesn't exist. Skipping." + puts "Parent post #{post["ParentMessageKey"]} doesn't exist. Skipping." next end end @@ -163,20 +149,19 @@ class ImportScripts::HigherLogic < ImportScripts::Base end def import_attachments - puts '', 'importing attachments' + puts "", "importing attachments" count = 0 - total_attachments = mysql_query(<<-SQL + total_attachments = mysql_query(<<-SQL).first["count"] SELECT COUNT(*) count FROM LibraryEntryFile l JOIN DiscussionPost p ON p.AttachmentDocumentKey = l.DocumentKey WHERE p.CreatedOn > '2020-01-01 00:00:00' SQL - ).first['count'] batches(BATCH_SIZE) do |offset| - attachments = mysql_query(<<-SQL + attachments = mysql_query(<<-SQL).to_a SELECT l.VersionName, l.FileExtension, p.MessageKey @@ -186,17 +171,16 @@ class ImportScripts::HigherLogic < ImportScripts::Base LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL - ).to_a break if attachments.empty? attachments.each do |a| print_status(count += 1, total_attachments, get_start_time("attachments")) - original_filename = "#{a['VersionName']}.#{a['FileExtension']}" + original_filename = "#{a["VersionName"]}.#{a["FileExtension"]}" path = File.join(ATTACHMENT_DIR, original_filename) if File.exist?(path) - if post = Post.find(post_id_from_imported_post_id(a['MessageKey'])) + if post = Post.find(post_id_from_imported_post_id(a["MessageKey"])) filename = File.basename(original_filename) upload = create_upload(post.user.id, path, filename) @@ -205,7 +189,9 @@ class ImportScripts::HigherLogic < ImportScripts::Base post.raw << "\n\n" << html post.save! - PostUpload.create!(post: post, upload: upload) unless PostUpload.where(post: post, upload: upload).exists? + unless PostUpload.where(post: post, upload: upload).exists? + PostUpload.create!(post: post, upload: upload) + end end end end @@ -217,7 +203,7 @@ class ImportScripts::HigherLogic < ImportScripts::Base raw = body.dup # trim off any post text beyond ---- to remove email threading - raw = raw.slice(0..(raw.index('------'))) || raw + raw = raw.slice(0..(raw.index("------"))) || raw raw = HtmlToMarkdown.new(raw).to_markdown raw diff --git a/script/import_scripts/ipboard.rb b/script/import_scripts/ipboard.rb index dcd8cb03f52..4f5c5ed0bd4 100644 --- a/script/import_scripts/ipboard.rb +++ b/script/import_scripts/ipboard.rb @@ -3,13 +3,13 @@ require "mysql2" require File.expand_path(File.dirname(__FILE__) + "/base.rb") -require 'htmlentities' +require "htmlentities" begin - require 'reverse_markdown' # https://github.com/jqr/php-serialize + require "reverse_markdown" # https://github.com/jqr/php-serialize rescue LoadError puts - puts 'reverse_markdown not found.' - puts 'Add to Gemfile, like this: ' + puts "reverse_markdown not found." + puts "Add to Gemfile, like this: " puts puts "echo gem \\'reverse_markdown\\' >> Gemfile" puts "bundle install" @@ -32,28 +32,27 @@ export USERDIR="user" =end class ImportScripts::IpboardSQL < ImportScripts::Base - - DB_HOST ||= ENV['DB_HOST'] || "localhost" - DB_NAME ||= ENV['DB_NAME'] || "ipboard" - DB_PW ||= ENV['DB_PW'] || "ipboard" - DB_USER ||= ENV['DB_USER'] || "ipboard" - TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "ipb_" - IMPORT_AFTER ||= ENV['IMPORT_AFTER'] || "1970-01-01" - UPLOADS ||= ENV['UPLOADS'] || "http://UPLOADS+LOCATION+IS+NOT+SET/uploads" - USERDIR ||= ENV['USERDIR'] || "user" - URL ||= ENV['URL'] || "https://forum.example.com" - AVATARS_DIR ||= ENV['AVATARS_DIR'] || '/home/pfaffman/data/example.com/avatars/' + DB_HOST ||= ENV["DB_HOST"] || "localhost" + DB_NAME ||= ENV["DB_NAME"] || "ipboard" + DB_PW ||= ENV["DB_PW"] || "ipboard" + DB_USER ||= ENV["DB_USER"] || "ipboard" + TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "ipb_" + IMPORT_AFTER ||= ENV["IMPORT_AFTER"] || "1970-01-01" + UPLOADS ||= ENV["UPLOADS"] || "http://UPLOADS+LOCATION+IS+NOT+SET/uploads" + USERDIR ||= ENV["USERDIR"] || "user" + URL ||= ENV["URL"] || "https://forum.example.com" + AVATARS_DIR ||= ENV["AVATARS_DIR"] || "/home/pfaffman/data/example.com/avatars/" BATCH_SIZE = 1000 ID_FIRST = true QUIET = true DEBUG = false - GALLERY_CAT_ID = 1234567 - GALLERY_CAT_NAME = 'galeria' - EMO_DIR ||= ENV['EMO_DIR'] || "default" + GALLERY_CAT_ID = 1_234_567 + GALLERY_CAT_NAME = "galeria" + EMO_DIR ||= ENV["EMO_DIR"] || "default" OLD_FORMAT = false if OLD_FORMAT MEMBERS_TABLE = "#{TABLE_PREFIX}core_members" - FORUMS_TABLE = "#{TABLE_PREFIX}forums_forums" + FORUMS_TABLE = "#{TABLE_PREFIX}forums_forums" POSTS_TABLE = "#{TABLE_PREFIX}forums_posts" TOPICS_TABLE = "#{TABLE_PREFIX}forums_topics" else @@ -89,8 +88,8 @@ class ImportScripts::IpboardSQL < ImportScripts::Base # TODO figure this out puts "WARNING: permalink_normalizations not set!!!" sleep 1 - #raw = "[ORIGINAL POST](#{URL}/topic/#{id}-#{slug})\n\n" + raw - #SiteSetting.permalink_normalizations='/topic/(.*t)\?.*/\1' + #raw = "[ORIGINAL POST](#{URL}/topic/#{id}-#{slug})\n\n" + raw + #SiteSetting.permalink_normalizations='/topic/(.*t)\?.*/\1' else # remove stuff after a "?" and work for urls that end in .html SiteSetting.permalink_normalizations = '/(.*t)[?.].*/\1' @@ -98,21 +97,15 @@ class ImportScripts::IpboardSQL < ImportScripts::Base end def initialize - if IMPORT_AFTER > "1970-01-01" - print_warning("Importing data after #{IMPORT_AFTER}") - end + print_warning("Importing data after #{IMPORT_AFTER}") if IMPORT_AFTER > "1970-01-01" super @htmlentities = HTMLEntities.new begin - @client = Mysql2::Client.new( - host: DB_HOST, - username: DB_USER, - password: DB_PW, - database: DB_NAME - ) + @client = + Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME) rescue Exception => e - puts '=' * 50 + puts "=" * 50 puts e.message puts <<~TEXT Cannot log in to database. @@ -151,18 +144,24 @@ class ImportScripts::IpboardSQL < ImportScripts::Base # NOT SUPPORTED import_gallery_topics update_tl0 create_permalinks - end def import_users - puts '', "creating users" + puts "", "creating users" - total_count = mysql_query("SELECT count(*) count FROM #{MEMBERS_TABLE} - WHERE last_activity > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'));").first['count'] + total_count = + mysql_query( + "SELECT count(*) count FROM #{MEMBERS_TABLE} + WHERE last_activity > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'));", + ).first[ + "count" + ] batches(BATCH_SIZE) do |offset| #notes: no location, url, - results = mysql_query(" + results = + mysql_query( + " SELECT member_id id, name username, member_group_id usergroup, @@ -184,58 +183,64 @@ class ImportScripts::IpboardSQL < ImportScripts::Base AND member_group_id = g_id order by member_id ASC LIMIT #{BATCH_SIZE} - OFFSET #{offset};") + OFFSET #{offset};", + ) break if results.size < 1 - next if all_records_exist? :users, results.map { |u| u['id'].to_i } + next if all_records_exist? :users, results.map { |u| u["id"].to_i } create_users(results, total: total_count, offset: offset) do |user| - next if user['email'].blank? - next if user['username'].blank? - next if @lookup.user_id_from_imported_user_id(user['id']) + next if user["email"].blank? + next if user["username"].blank? + next if @lookup.user_id_from_imported_user_id(user["id"]) - birthday = Date.parse("#{user['bday_year']}-#{user['bday_month']}-#{user['bday_day']}") rescue nil - # TODO: what about timezones? - next if user['id'] == 0 - { id: user['id'], - email: user['email'], - username: user['username'], - avatar_url: user['avatar_url'], - title: user['member_type'], - created_at: user['created_at'] == nil ? 0 : Time.zone.at(user['created_at']), - # bio_raw: user['bio_raw'], - registration_ip_address: user['registration_ip_address'], - # birthday: birthday, - last_seen_at: user['last_seen_at'] == nil ? 0 : Time.zone.at(user['last_seen_at']), - admin: /^Admin/.match(user['member_type']) ? true : false, - moderator: /^MOD/.match(user['member_type']) ? true : false, - post_create_action: proc do |newuser| - if user['avatar_url'] && user['avatar_url'].length > 0 - photo_path = AVATARS_DIR + user['avatar_url'] - if File.exist?(photo_path) - begin - upload = create_upload(newuser.id, photo_path, File.basename(photo_path)) - if upload && upload.persisted? - newuser.import_mode = false - newuser.create_user_avatar - newuser.import_mode = true - newuser.user_avatar.update(custom_upload_id: upload.id) - newuser.update(uploaded_avatar_id: upload.id) - else - puts "Error: Upload did not persist for #{photo_path}!" - end - rescue SystemCallError => err - puts "Could not import avatar #{photo_path}: #{err.message}" - end - else - puts "avatar file not found at #{photo_path}" - end - end - if user['banned'] != 0 - suspend_user(newuser) - end + birthday = + begin + Date.parse("#{user["bday_year"]}-#{user["bday_month"]}-#{user["bday_day"]}") + rescue StandardError + nil end + # TODO: what about timezones? + next if user["id"] == 0 + { + id: user["id"], + email: user["email"], + username: user["username"], + avatar_url: user["avatar_url"], + title: user["member_type"], + created_at: user["created_at"] == nil ? 0 : Time.zone.at(user["created_at"]), + # bio_raw: user['bio_raw'], + registration_ip_address: user["registration_ip_address"], + # birthday: birthday, + last_seen_at: user["last_seen_at"] == nil ? 0 : Time.zone.at(user["last_seen_at"]), + admin: /^Admin/.match(user["member_type"]) ? true : false, + moderator: /^MOD/.match(user["member_type"]) ? true : false, + post_create_action: + proc do |newuser| + if user["avatar_url"] && user["avatar_url"].length > 0 + photo_path = AVATARS_DIR + user["avatar_url"] + if File.exist?(photo_path) + begin + upload = create_upload(newuser.id, photo_path, File.basename(photo_path)) + if upload && upload.persisted? + newuser.import_mode = false + newuser.create_user_avatar + newuser.import_mode = true + newuser.user_avatar.update(custom_upload_id: upload.id) + newuser.update(uploaded_avatar_id: upload.id) + else + puts "Error: Upload did not persist for #{photo_path}!" + end + rescue SystemCallError => err + puts "Could not import avatar #{photo_path}: #{err.message}" + end + else + puts "avatar file not found at #{photo_path}" + end + end + suspend_user(newuser) if user["banned"] != 0 + end, } end end @@ -244,7 +249,7 @@ class ImportScripts::IpboardSQL < ImportScripts::Base def suspend_user(user) user.suspended_at = Time.now user.suspended_till = 200.years.from_now - ban_reason = 'Account deactivated by administrator' + ban_reason = "Account deactivated by administrator" user_option = user.user_option user_option.email_digests = false @@ -266,45 +271,50 @@ class ImportScripts::IpboardSQL < ImportScripts::Base def import_image_categories puts "", "importing image categories..." - categories = mysql_query(" + categories = + mysql_query( + " SELECT category_id id, category_name_seo name, category_parent_id as parent_id FROM #{TABLE_PREFIX}gallery_categories ORDER BY id ASC - ").to_a + ", + ).to_a - category_names = mysql_query(" + category_names = + mysql_query( + " SELECT DISTINCT word_key, word_default title FROM #{TABLE_PREFIX}core_sys_lang_words where word_app='gallery' AND word_key REGEXP 'gallery_category_[0-9]+$' ORDER BY word_key ASC - ").to_a + ", + ).to_a cat_map = {} puts "Creating gallery_cat_map" category_names.each do |name| - title = name['title'] - word_key = name['word_key'] + title = name["title"] + word_key = name["word_key"] puts "Processing #{word_key}: #{title}" - id = word_key.gsub('gallery_category_', '') + id = word_key.gsub("gallery_category_", "") next if cat_map[id] cat_map[id] = cat_map.has_value?(title) ? title + " " + id : title puts "#{id} => #{cat_map[id]}" end - params = { id: GALLERY_CAT_ID, - name: GALLERY_CAT_NAME } + params = { id: GALLERY_CAT_ID, name: GALLERY_CAT_NAME } create_category(params, params[:id]) create_categories(categories) do |category| - id = (category['id']).to_s + id = (category["id"]).to_s name = CGI.unescapeHTML(cat_map[id]) { - id: id + 'gal', + id: id + "gal", name: name, parent_category_id: @lookup.category_id_from_imported_category_id(GALLERY_CAT_ID), - color: random_category_color + color: random_category_color, } end end @@ -312,34 +322,34 @@ class ImportScripts::IpboardSQL < ImportScripts::Base def import_categories puts "", "importing categories..." - categories = mysql_query(" + categories = + mysql_query( + " SELECT id, name name, parent_id as parent_id FROM #{FORUMS_TABLE} ORDER BY parent_id ASC - ").to_a + ", + ).to_a top_level_categories = categories.select { |c| c["parent.id"] == -1 } create_categories(top_level_categories) do |category| - id = category['id'].to_s - name = category['name'] - { - id: id, - name: name, - } + id = category["id"].to_s + name = category["name"] + { id: id, name: name } end children_categories = categories.select { |c| c["parent.id"] != -1 } create_categories(children_categories) do |category| - id = category['id'].to_s - name = category['name'] + id = category["id"].to_s + name = category["name"] { id: id, name: name, - parent_category_id: @lookup.category_id_from_imported_category_id(category['parent_id']), - color: random_category_color + parent_category_id: @lookup.category_id_from_imported_category_id(category["parent_id"]), + color: random_category_color, } end end @@ -347,13 +357,17 @@ class ImportScripts::IpboardSQL < ImportScripts::Base def import_topics puts "", "importing topics..." - total_count = mysql_query("SELECT count(*) count FROM #{POSTS_TABLE} + total_count = + mysql_query( + "SELECT count(*) count FROM #{POSTS_TABLE} WHERE post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d')) - AND new_topic=1;") - .first['count'] + AND new_topic=1;", + ).first[ + "count" + ] batches(BATCH_SIZE) do |offset| - discussions = mysql_query(<<-SQL + discussions = mysql_query(<<-SQL) SELECT #{TOPICS_TABLE}.tid tid, #{TOPICS_TABLE}.forum_id category, #{POSTS_TABLE}.pid pid, @@ -371,29 +385,29 @@ class ImportScripts::IpboardSQL < ImportScripts::Base LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL - ) break if discussions.size < 1 - next if all_records_exist? :posts, discussions.map { |t| "discussion#" + t['tid'].to_s } + next if all_records_exist? :posts, discussions.map { |t| "discussion#" + t["tid"].to_s } create_posts(discussions, total: total_count, offset: offset) do |discussion| - slug = discussion['slug'] - id = discussion['tid'] - raw = clean_up(discussion['raw']) + slug = discussion["slug"] + id = discussion["tid"] + raw = clean_up(discussion["raw"]) { - id: "discussion#" + discussion['tid'].to_s, - user_id: user_id_from_imported_user_id(discussion['user_id']) || Discourse::SYSTEM_USER_ID, - title: CGI.unescapeHTML(discussion['title']), - category: category_id_from_imported_category_id(discussion['category'].to_s), + id: "discussion#" + discussion["tid"].to_s, + user_id: + user_id_from_imported_user_id(discussion["user_id"]) || Discourse::SYSTEM_USER_ID, + title: CGI.unescapeHTML(discussion["title"]), + category: category_id_from_imported_category_id(discussion["category"].to_s), raw: raw, - pinned_at: discussion['pinned'].to_i == 1 ? Time.zone.at(discussion['created_at']) : nil, - created_at: Time.zone.at(discussion['created_at']), + pinned_at: discussion["pinned"].to_i == 1 ? Time.zone.at(discussion["created_at"]) : nil, + created_at: Time.zone.at(discussion["created_at"]), } end end end - def array_from_members_string(invited_members = 'a:3:{i:0;i:22629;i:1;i:21837;i:2;i:22234;}') + def array_from_members_string(invited_members = "a:3:{i:0;i:22629;i:1;i:21837;i:2;i:22234;}") out = [] count_regex = /a:(\d)+:/ count = count_regex.match(invited_members)[1] @@ -403,7 +417,7 @@ class ImportScripts::IpboardSQL < ImportScripts::Base i = m[1] rest.sub!(i_regex, "") puts "i: #{i}, #{rest}" - out += [ i.to_i ] + out += [i.to_i] end out end @@ -411,12 +425,13 @@ class ImportScripts::IpboardSQL < ImportScripts::Base def import_private_messages puts "", "importing private messages..." - topic_count = mysql_query("SELECT COUNT(msg_id) count FROM #{TABLE_PREFIX}message_posts").first["count"] + topic_count = + mysql_query("SELECT COUNT(msg_id) count FROM #{TABLE_PREFIX}message_posts").first["count"] last_private_message_topic_id = -1 batches(BATCH_SIZE) do |offset| - private_messages = mysql_query(<<-SQL + private_messages = mysql_query(<<-SQL) SELECT msg_id pmtextid, msg_topic_id topic_id, msg_author_id fromuserid, @@ -433,12 +448,12 @@ class ImportScripts::IpboardSQL < ImportScripts::Base LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL - ) puts "Processing #{private_messages.count} messages" break if private_messages.count < 1 puts "Processing . . . " - private_messages = private_messages.reject { |pm| @lookup.post_already_imported?("pm-#{pm['pmtextid']}") } + private_messages = + private_messages.reject { |pm| @lookup.post_already_imported?("pm-#{pm["pmtextid"]}") } title_username_of_pm_first_post = {} @@ -446,11 +461,16 @@ class ImportScripts::IpboardSQL < ImportScripts::Base skip = false mapped = {} - mapped[:id] = "pm-#{m['pmtextid']}" - mapped[:user_id] = user_id_from_imported_user_id(m['fromuserid']) || Discourse::SYSTEM_USER_ID - mapped[:raw] = clean_up(m['message']) rescue nil - mapped[:created_at] = Time.zone.at(m['dateline']) - title = @htmlentities.decode(m['title']).strip[0...255] + mapped[:id] = "pm-#{m["pmtextid"]}" + mapped[:user_id] = user_id_from_imported_user_id(m["fromuserid"]) || + Discourse::SYSTEM_USER_ID + mapped[:raw] = begin + clean_up(m["message"]) + rescue StandardError + nil + end + mapped[:created_at] = Time.zone.at(m["dateline"]) + title = @htmlentities.decode(m["title"]).strip[0...255] topic_id = nil next if mapped[:raw].blank? @@ -459,9 +479,9 @@ class ImportScripts::IpboardSQL < ImportScripts::Base target_usernames = [] target_userids = [] begin - to_user_array = [ m['to_user_id'] ] + array_from_members_string(m['touserarray']) - rescue - puts "#{m['pmtextid']} -- #{m['touserarray']}" + to_user_array = [m["to_user_id"]] + array_from_members_string(m["touserarray"]) + rescue StandardError + puts "#{m["pmtextid"]} -- #{m["touserarray"]}" skip = true end @@ -477,8 +497,8 @@ class ImportScripts::IpboardSQL < ImportScripts::Base puts "Can't find user: #{to_user}" end end - rescue - puts "skipping pm-#{m['pmtextid']} `to_user_array` is broken -- #{to_user_array.inspect}" + rescue StandardError + puts "skipping pm-#{m["pmtextid"]} `to_user_array` is broken -- #{to_user_array.inspect}" skip = true end @@ -486,30 +506,32 @@ class ImportScripts::IpboardSQL < ImportScripts::Base participants << mapped[:user_id] begin participants.sort! - rescue + rescue StandardError puts "one of the participant's id is nil -- #{participants.inspect}" end - if last_private_message_topic_id != m['topic_id'] - last_private_message_topic_id = m['topic_id'] - puts "New message: #{m['topic_id']}: #{title} from #{m['fromuserid']} (#{mapped[:user_id]})" unless QUIET + if last_private_message_topic_id != m["topic_id"] + last_private_message_topic_id = m["topic_id"] + unless QUIET + puts "New message: #{m["topic_id"]}: #{title} from #{m["fromuserid"]} (#{mapped[:user_id]})" + end # topic post message - topic_id = m['topic_id'] + topic_id = m["topic_id"] mapped[:title] = title mapped[:archetype] = Archetype.private_message - mapped[:target_usernames] = target_usernames.join(',') + mapped[:target_usernames] = target_usernames.join(",") if mapped[:target_usernames].size < 1 # pm with yourself? # skip = true mapped[:target_usernames] = "system" - puts "pm-#{m['pmtextid']} has no target (#{m['touserarray']})" + puts "pm-#{m["pmtextid"]} has no target (#{m["touserarray"]})" end else # reply topic_id = topic_lookup_from_imported_post_id("pm-#{topic_id}") - if !topic_id - skip = true - end + skip = true if !topic_id mapped[:topic_id] = topic_id - puts "Reply message #{topic_id}: #{m['topic_id']}: from #{m['fromuserid']} (#{mapped[:user_id]})" unless QUIET + unless QUIET + puts "Reply message #{topic_id}: #{m["topic_id"]}: from #{m["fromuserid"]} (#{mapped[:user_id]})" + end end # puts "#{target_usernames} -- #{mapped[:target_usernames]}" # puts "Adding #{mapped}" @@ -524,9 +546,13 @@ class ImportScripts::IpboardSQL < ImportScripts::Base puts "", "importing gallery albums..." gallery_count = 0 - total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}gallery_images - ;") - .first['count'] + total_count = + mysql_query( + "SELECT count(*) count FROM #{TABLE_PREFIX}gallery_images + ;", + ).first[ + "count" + ] # NOTE: for imports with huge numbers of galleries, this needs to use limits @@ -546,7 +572,7 @@ class ImportScripts::IpboardSQL < ImportScripts::Base # SQL # ) - images = mysql_query(<<-SQL + images = mysql_query(<<-SQL) SELECT #{TABLE_PREFIX}gallery_albums.album_id tid, #{TABLE_PREFIX}gallery_albums.album_category_id category, @@ -570,43 +596,46 @@ class ImportScripts::IpboardSQL < ImportScripts::Base SQL - ) - break if images.size < 1 - next if all_records_exist? :posts, images.map { |t| "gallery#" + t['tid'].to_s + t['image_id'].to_s } + if all_records_exist? :posts, + images.map { |t| "gallery#" + t["tid"].to_s + t["image_id"].to_s } + next + end - last_id = images.first['tid'] - raw = "Gallery ID: #{last_id}\n" + clean_up(images.first['raw']) - raw += "#{clean_up(images.first['description'])}\n" + last_id = images.first["tid"] + raw = "Gallery ID: #{last_id}\n" + clean_up(images.first["raw"]) + raw += "#{clean_up(images.first["description"])}\n" last_gallery = images.first.dup create_posts(images, total: total_count, offset: offset) do |gallery| - id = gallery['tid'].to_i + id = gallery["tid"].to_i #puts "ID: #{id}, last_id: #{last_id}, image: #{gallery['image_id']}" if id == last_id - raw += "### #{gallery['caption']}\n" - raw += "#{UPLOADS}/#{gallery['orig']}\n" + raw += "### #{gallery["caption"]}\n" + raw += "#{UPLOADS}/#{gallery["orig"]}\n" last_gallery = gallery.dup next else insert_raw = raw.dup - last_id = gallery['tid'] + last_id = gallery["tid"] if DEBUG - raw = "Gallery ID: #{last_id}\n" + clean_up(gallery['raw']) - raw += "Cat: #{last_gallery['category'].to_s} - #{category_id_from_imported_category_id(last_gallery['category'].to_s + 'gal')}" + raw = "Gallery ID: #{last_id}\n" + clean_up(gallery["raw"]) + raw += + "Cat: #{last_gallery["category"].to_s} - #{category_id_from_imported_category_id(last_gallery["category"].to_s + "gal")}" end - raw += "#{clean_up(images.first['description'])}\n" - raw += "### #{gallery['caption']}\n" - if DEBUG - raw += "User #{gallery['user_id']}, image_id: #{gallery['image_id']}\n" - end - raw += "#{UPLOADS}/#{gallery['orig']}\n" + raw += "#{clean_up(images.first["description"])}\n" + raw += "### #{gallery["caption"]}\n" + raw += "User #{gallery["user_id"]}, image_id: #{gallery["image_id"]}\n" if DEBUG + raw += "#{UPLOADS}/#{gallery["orig"]}\n" gallery_count += 1 - puts "#{gallery_count}--Cat: #{last_gallery['category'].to_s} ==> #{category_id_from_imported_category_id(last_gallery['category'].to_s + 'gal')}" unless QUIET + unless QUIET + puts "#{gallery_count}--Cat: #{last_gallery["category"].to_s} ==> #{category_id_from_imported_category_id(last_gallery["category"].to_s + "gal")}" + end { - id: "gallery#" + last_gallery['tid'].to_s + last_gallery['image_id'].to_s, - user_id: user_id_from_imported_user_id(last_gallery['user_id']) || Discourse::SYSTEM_USER_ID, - title: CGI.unescapeHTML(last_gallery['title']), - category: category_id_from_imported_category_id(last_gallery['category'].to_s + 'gal'), + id: "gallery#" + last_gallery["tid"].to_s + last_gallery["image_id"].to_s, + user_id: + user_id_from_imported_user_id(last_gallery["user_id"]) || Discourse::SYSTEM_USER_ID, + title: CGI.unescapeHTML(last_gallery["title"]), + category: category_id_from_imported_category_id(last_gallery["category"].to_s + "gal"), raw: insert_raw, } end @@ -630,11 +659,11 @@ class ImportScripts::IpboardSQL < ImportScripts::Base def import_comments puts "", "importing gallery comments..." - total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}gallery_comments;") - .first['count'] + total_count = + mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}gallery_comments;").first["count"] batches(BATCH_SIZE) do |offset| - comments = mysql_query(<<-SQL + comments = mysql_query(<<-SQL) SELECT #{TABLE_PREFIX}gallery_comments.tid tid, #{TABLE_PREFIX}gallery_topics.forum_id category, @@ -652,20 +681,19 @@ class ImportScripts::IpboardSQL < ImportScripts::Base OFFSET #{offset} SQL - ) break if comments.size < 1 - next if all_records_exist? :posts, comments.map { |comment| "comment#" + comment['pid'].to_s } + next if all_records_exist? :posts, comments.map { |comment| "comment#" + comment["pid"].to_s } create_posts(comments, total: total_count, offset: offset) do |comment| - next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['tid'].to_s) - next if comment['raw'].blank? + next unless t = topic_lookup_from_imported_post_id("discussion#" + comment["tid"].to_s) + next if comment["raw"].blank? { - id: "comment#" + comment['pid'].to_s, - user_id: user_id_from_imported_user_id(comment['user_id']) || Discourse::SYSTEM_USER_ID, + id: "comment#" + comment["pid"].to_s, + user_id: user_id_from_imported_user_id(comment["user_id"]) || Discourse::SYSTEM_USER_ID, topic_id: t[:topic_id], - raw: clean_up(comment['raw']), - created_at: Time.zone.at(comment['created_at']) + raw: clean_up(comment["raw"]), + created_at: Time.zone.at(comment["created_at"]), } end end @@ -674,13 +702,17 @@ class ImportScripts::IpboardSQL < ImportScripts::Base def import_posts puts "", "importing posts..." - total_count = mysql_query("SELECT count(*) count FROM #{POSTS_TABLE} + total_count = + mysql_query( + "SELECT count(*) count FROM #{POSTS_TABLE} WHERE post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d')) - AND new_topic=0;") - .first['count'] + AND new_topic=0;", + ).first[ + "count" + ] batches(BATCH_SIZE) do |offset| - comments = mysql_query(<<-SQL + comments = mysql_query(<<-SQL) SELECT #{TOPICS_TABLE}.tid tid, #{TOPICS_TABLE}.forum_id category, #{POSTS_TABLE}.pid pid, @@ -696,20 +728,19 @@ class ImportScripts::IpboardSQL < ImportScripts::Base LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL - ) break if comments.size < 1 - next if all_records_exist? :posts, comments.map { |comment| "comment#" + comment['pid'].to_s } + next if all_records_exist? :posts, comments.map { |comment| "comment#" + comment["pid"].to_s } create_posts(comments, total: total_count, offset: offset) do |comment| - next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['tid'].to_s) - next if comment['raw'].blank? + next unless t = topic_lookup_from_imported_post_id("discussion#" + comment["tid"].to_s) + next if comment["raw"].blank? { - id: "comment#" + comment['pid'].to_s, - user_id: user_id_from_imported_user_id(comment['user_id']) || Discourse::SYSTEM_USER_ID, + id: "comment#" + comment["pid"].to_s, + user_id: user_id_from_imported_user_id(comment["user_id"]) || Discourse::SYSTEM_USER_ID, topic_id: t[:topic_id], - raw: clean_up(comment['raw']), - created_at: Time.zone.at(comment['created_at']) + raw: clean_up(comment["raw"]), + created_at: Time.zone.at(comment["created_at"]), } end end @@ -719,59 +750,61 @@ class ImportScripts::IpboardSQL < ImportScripts::Base # this makes proper quotes with user/topic/post references. # I'm not clear if it is for just some bizarre imported data, or it might ever be useful # It should be integrated into the Nokogiri section of clean_up, though. - @doc = Nokogiri::XML("" + raw + "") + @doc = Nokogiri.XML("" + raw + "") # handle
s with links to original post - @doc.css('blockquote[class=ipsQuote]').each do |b| - # puts "\n#{'#'*50}\n#{b}\n\nCONTENT: #{b['data-ipsquote-contentid']}" - # b.options = Nokogiri::XML::ParseOptions::STRICT - imported_post_id = b['data-ipsquote-contentcommentid'].to_s - content_type = b['data-ipsquote-contenttype'].to_s - content_class = b['data-ipsquote-contentclass'].to_s - content_id = b['data-ipsquote-contentid'].to_s || b['data-cid'].to_s - topic_lookup = topic_lookup_from_imported_post_id("comment#" + imported_post_id) - post_lookup = topic_lookup_from_imported_post_id("discussion#" + content_id) - post = topic_lookup ? topic_lookup[:post_number] : nil - topic = topic_lookup ? topic_lookup[:topic_id] : nil - post ||= post_lookup ? post_lookup[:post_number] : nil - topic ||= post_lookup ? post_lookup[:topic_id] : nil + @doc + .css("blockquote[class=ipsQuote]") + .each do |b| + # puts "\n#{'#'*50}\n#{b}\n\nCONTENT: #{b['data-ipsquote-contentid']}" + # b.options = Nokogiri::XML::ParseOptions::STRICT + imported_post_id = b["data-ipsquote-contentcommentid"].to_s + content_type = b["data-ipsquote-contenttype"].to_s + content_class = b["data-ipsquote-contentclass"].to_s + content_id = b["data-ipsquote-contentid"].to_s || b["data-cid"].to_s + topic_lookup = topic_lookup_from_imported_post_id("comment#" + imported_post_id) + post_lookup = topic_lookup_from_imported_post_id("discussion#" + content_id) + post = topic_lookup ? topic_lookup[:post_number] : nil + topic = topic_lookup ? topic_lookup[:topic_id] : nil + post ||= post_lookup ? post_lookup[:post_number] : nil + topic ||= post_lookup ? post_lookup[:topic_id] : nil - # TODO: consider:
- # consider:
-      # TODO make sure it's the imported username
-      # TODO: do _s still get \-escaped?
-      ips_username = b['data-ipsquote-username'] || b['data-author']
-      username = ips_username
-      new_text = ""
-      if DEBUG
-        # new_text += "post: #{imported_post_id} --> #{post_lookup} --> |#{post}|
\n" - # new_text += "topic: #{content_id} --> #{topic_lookup} --> |#{topic}|
\n" - # new_text += "user: #{ips_username} --> |#{username}|
\n" - # new_text += "class: #{content_class}
\n" - # new_text += "type: #{content_type}
\n" - if content_class.length > 0 && content_class != "forums_Topic" - new_text += "UNEXPECTED CONTENT CLASS! #{content_class}
\n" + # TODO: consider:
+ # consider:
+        # TODO make sure it's the imported username
+        # TODO: do _s still get \-escaped?
+        ips_username = b["data-ipsquote-username"] || b["data-author"]
+        username = ips_username
+        new_text = ""
+        if DEBUG
+          # new_text += "post: #{imported_post_id} --> #{post_lookup} --> |#{post}|
\n" + # new_text += "topic: #{content_id} --> #{topic_lookup} --> |#{topic}|
\n" + # new_text += "user: #{ips_username} --> |#{username}|
\n" + # new_text += "class: #{content_class}
\n" + # new_text += "type: #{content_type}
\n" + if content_class.length > 0 && content_class != "forums_Topic" + new_text += "UNEXPECTED CONTENT CLASS! #{content_class}
\n" + end + if content_type.length > 0 && content_type != "forums" + new_text += "UNEXPECTED CONTENT TYPE! #{content_type}
\n" + end + # puts "#{'-'*20} and NOWWWWW!!!! \n #{new_text}" end - if content_type.length > 0 && content_type != "forums" - new_text += "UNEXPECTED CONTENT TYPE! #{content_type}
\n" - end - # puts "#{'-'*20} and NOWWWWW!!!! \n #{new_text}" - end - if post && topic && username - quote = "\n[quote=\"#{username}, post:#{post}, topic: #{topic}\"]\n\n" - else - if username && username.length > 1 - quote = "\n[quote=\"#{username}\"]\n\n" + if post && topic && username + quote = "\n[quote=\"#{username}, post:#{post}, topic: #{topic}\"]\n\n" else - quote = "\n[quote]\n" + if username && username.length > 1 + quote = "\n[quote=\"#{username}\"]\n\n" + else + quote = "\n[quote]\n" + end + # new_doc = Nokogiri::XML("
#{new_text}
") end - # new_doc = Nokogiri::XML("
#{new_text}
") + puts "QUOTE: #{quote}" + sleep 1 + b.content = quote + b.content + "\n[/quote]\n" + b.name = "div" end - puts "QUOTE: #{quote}" - sleep 1 - b.content = quote + b.content + "\n[/quote]\n" - b.name = 'div' - end raw = @doc.to_html end @@ -783,24 +816,30 @@ class ImportScripts::IpboardSQL < ImportScripts::Base # TODO what about uploads? # raw.gsub!(//,UPLOADS) raw.gsub!(/
/, "\n\n") - raw.gsub!(/
/, "\n\n") - raw.gsub!(/

 <\/p>/, "\n\n") + raw.gsub!(%r{
}, "\n\n") + raw.gsub!(%r{

 

}, "\n\n") raw.gsub!(/\[hr\]/, "\n***\n") raw.gsub!(/'/, "'") - raw.gsub!(/\[url="(.+?)"\]http.+?\[\/url\]/, "\\1\n") - raw.gsub!(/\[media\](.+?)\[\/media\]/, "\n\\1\n\n") - raw.gsub!(/\[php\](.+?)\[\/php\]/m) { |m| "\n\n```php\n\n" + @htmlentities.decode($1.gsub(/\n\n/, "\n")) + "\n\n```\n\n" } - raw.gsub!(/\[code\](.+?)\[\/code\]/m) { |m| "\n\n```\n\n" + @htmlentities.decode($1.gsub(/\n\n/, "\n")) + "\n\n```\n\n" } - raw.gsub!(/\[list\](.+?)\[\/list\]/m) { |m| "\n" + $1.gsub(/\[\*\]/, "\n- ") + "\n\n" } + raw.gsub!(%r{\[url="(.+?)"\]http.+?\[/url\]}, "\\1\n") + raw.gsub!(%r{\[media\](.+?)\[/media\]}, "\n\\1\n\n") + raw.gsub!(%r{\[php\](.+?)\[/php\]}m) do |m| + "\n\n```php\n\n" + @htmlentities.decode($1.gsub(/\n\n/, "\n")) + "\n\n```\n\n" + end + raw.gsub!(%r{\[code\](.+?)\[/code\]}m) do |m| + "\n\n```\n\n" + @htmlentities.decode($1.gsub(/\n\n/, "\n")) + "\n\n```\n\n" + end + raw.gsub!(%r{\[list\](.+?)\[/list\]}m) { |m| "\n" + $1.gsub(/\[\*\]/, "\n- ") + "\n\n" } raw.gsub!(/\[quote\]/, "\n[quote]\n") - raw.gsub!(/\[\/quote\]/, "\n[/quote]\n") - raw.gsub!(/date=\'(.+?)\'/, '') - raw.gsub!(/timestamp=\'(.+?)\' /, '') + raw.gsub!(%r{\[/quote\]}, "\n[/quote]\n") + raw.gsub!(/date=\'(.+?)\'/, "") + raw.gsub!(/timestamp=\'(.+?)\' /, "") quote_regex = /\[quote name=\'(.+?)\'\s+post=\'(\d+?)\'\s*\]/ while quote = quote_regex.match(raw) # get IPB post number and find Discourse post and topic number - puts "----------------------------------------\nName: #{quote[1]}, post: #{quote[2]}" unless QUIET + unless QUIET + puts "----------------------------------------\nName: #{quote[1]}, post: #{quote[2]}" + end imported_post_id = quote[2].to_s topic_lookup = topic_lookup_from_imported_post_id("comment#" + imported_post_id) post_lookup = topic_lookup_from_imported_post_id("discussion#" + imported_post_id) @@ -826,21 +865,24 @@ class ImportScripts::IpboardSQL < ImportScripts::Base while attach = attach_regex.match(raw) attach_id = attach[1] attachments = - mysql_query("SELECT attach_location as loc, + mysql_query( + "SELECT attach_location as loc, attach_file as filename FROM #{ATTACHMENT_TABLE} - WHERE attach_id=#{attach_id}") + WHERE attach_id=#{attach_id}", + ) if attachments.count < 1 puts "Attachment #{attach_id} not found." attach_string = "Attachment #{attach_id} not found." else - attach_url = "#{UPLOADS}/#{attachments.first['loc'].gsub(' ', '%20')}" - if attachments.first['filename'].match(/(png|jpg|jpeg|gif)$/) + attach_url = "#{UPLOADS}/#{attachments.first["loc"].gsub(" ", "%20")}" + if attachments.first["filename"].match(/(png|jpg|jpeg|gif)$/) # images are rendered as a link that contains the image - attach_string = "#{attach_id}\n\n[![#{attachments.first['filename']}](#{attach_url})](#{attach_url})\n" + attach_string = + "#{attach_id}\n\n[![#{attachments.first["filename"]}](#{attach_url})](#{attach_url})\n" else # other attachments are simple download links - attach_string = "#{attach_id}\n\n[#{attachments.first['filename']}](#{attach_url})\n" + attach_string = "#{attach_id}\n\n[#{attachments.first["filename"]}](#{attach_url})\n" end end raw.sub!(attach_regex, attach_string) @@ -850,7 +892,7 @@ class ImportScripts::IpboardSQL < ImportScripts::Base end def random_category_color - colors = SiteSetting.category_colors.split('|') + colors = SiteSetting.category_colors.split("|") colors[rand(colors.count)] end @@ -865,78 +907,78 @@ class ImportScripts::IpboardSQL < ImportScripts::Base raw.gsub!(//, UPLOADS) raw.gsub!(/
/, "\n") - @doc = Nokogiri::XML("" + raw + "") + @doc = Nokogiri.XML("" + raw + "") # handle
s with links to original post - @doc.css('blockquote[class=ipsQuote]').each do |b| - imported_post_id = b['data-ipsquote-contentcommentid'].to_s - content_type = b['data-ipsquote-contenttype'].to_s - content_class = b['data-ipsquote-contentclass'].to_s - content_id = b['data-ipsquote-contentid'].to_s || b['data-cid'].to_s - topic_lookup = topic_lookup_from_imported_post_id("comment#" + imported_post_id) - post_lookup = topic_lookup_from_imported_post_id("discussion#" + content_id) - post = topic_lookup ? topic_lookup[:post_number] : nil - topic = topic_lookup ? topic_lookup[:topic_id] : nil - post ||= post_lookup ? post_lookup[:post_number] : nil - topic ||= post_lookup ? post_lookup[:topic_id] : nil + @doc + .css("blockquote[class=ipsQuote]") + .each do |b| + imported_post_id = b["data-ipsquote-contentcommentid"].to_s + content_type = b["data-ipsquote-contenttype"].to_s + content_class = b["data-ipsquote-contentclass"].to_s + content_id = b["data-ipsquote-contentid"].to_s || b["data-cid"].to_s + topic_lookup = topic_lookup_from_imported_post_id("comment#" + imported_post_id) + post_lookup = topic_lookup_from_imported_post_id("discussion#" + content_id) + post = topic_lookup ? topic_lookup[:post_number] : nil + topic = topic_lookup ? topic_lookup[:topic_id] : nil + post ||= post_lookup ? post_lookup[:post_number] : nil + topic ||= post_lookup ? post_lookup[:topic_id] : nil - # TODO: consider:
- # consider:
-      ips_username = b['data-ipsquote-username'] || b['data-author']
-      username = ips_username
-      new_text = ""
-      if DEBUG
-        if content_class.length > 0 && content_class != "forums_Topic"
-          new_text += "UNEXPECTED CONTENT CLASS! #{content_class}
\n" + # TODO: consider:
+ # consider:
+        ips_username = b["data-ipsquote-username"] || b["data-author"]
+        username = ips_username
+        new_text = ""
+        if DEBUG
+          if content_class.length > 0 && content_class != "forums_Topic"
+            new_text += "UNEXPECTED CONTENT CLASS! #{content_class}
\n" + end + if content_type.length > 0 && content_type != "forums" + new_text += "UNEXPECTED CONTENT TYPE! #{content_type}
\n" + end end - if content_type.length > 0 && content_type != "forums" - new_text += "UNEXPECTED CONTENT TYPE! #{content_type}
\n" - end - end - if post && topic && username - quote = "[quote=\"#{username}, post:#{post}, topic: #{topic}\"]\n\n" - else - if username && username.length > 1 - quote = "[quote=\"#{username}\"]\n\n" + if post && topic && username + quote = "[quote=\"#{username}, post:#{post}, topic: #{topic}\"]\n\n" else - quote = "[quote]\n" + if username && username.length > 1 + quote = "[quote=\"#{username}\"]\n\n" + else + quote = "[quote]\n" + end end + b.content = quote + b.content + "\n[/quote]\n" + b.name = "div" end - b.content = quote + b.content + "\n[/quote]\n" - b.name = 'div' - end - @doc.css('object param embed').each do |embed| - embed.replace("\n#{embed['src']}\n") - end + @doc.css("object param embed").each { |embed| embed.replace("\n#{embed["src"]}\n") } # handle }mix youtube_cooked.gsub!(re) { "\n#{$1}\n" } - re = //mix + re = %r{}mix youtube_cooked.gsub!(re) { "\n#{$1}\n" } - youtube_cooked.gsub!(/^\/\//, "https://") # make sure it has a protocol + youtube_cooked.gsub!(%r{^//}, "https://") # make sure it has a protocol unless /http/.match(youtube_cooked) # handle case of only youtube object number if youtube_cooked.length < 8 || /[<>=]/.match(youtube_cooked) # probably not a youtube id youtube_cooked = "" else - youtube_cooked = 'https://www.youtube.com/watch?v=' + youtube_cooked + youtube_cooked = "https://www.youtube.com/watch?v=" + youtube_cooked end end - print_warning("#{'-' * 40}\nBefore: #{youtube_raw}\nAfter: #{youtube_cooked}") unless QUIET + print_warning("#{"-" * 40}\nBefore: #{youtube_raw}\nAfter: #{youtube_cooked}") unless QUIET youtube_cooked end @@ -313,73 +334,79 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base raw = raw.gsub("\\'", "'") raw = raw.gsub(/\[b\]/i, "") - raw = raw.gsub(/\[\/b\]/i, "") + raw = raw.gsub(%r{\[/b\]}i, "") raw = raw.gsub(/\[i\]/i, "") - raw = raw.gsub(/\[\/i\]/i, "") + raw = raw.gsub(%r{\[/i\]}i, "") raw = raw.gsub(/\[u\]/i, "") - raw = raw.gsub(/\[\/u\]/i, "") + raw = raw.gsub(%r{\[/u\]}i, "") - raw = raw.gsub(/\[url\](\S+)\[\/url\]/im) { "#{$1}" } - raw = raw.gsub(/\[link\](\S+)\[\/link\]/im) { "#{$1}" } + raw = raw.gsub(%r{\[url\](\S+)\[/url\]}im) { "#{$1}" } + raw = raw.gsub(%r{\[link\](\S+)\[/link\]}im) { "#{$1}" } # URL & LINK with text - raw = raw.gsub(/\[url=(\S+?)\](.*?)\[\/url\]/im) { "#{$2}" } - raw = raw.gsub(/\[link=(\S+?)\](.*?)\[\/link\]/im) { "#{$2}" } + raw = raw.gsub(%r{\[url=(\S+?)\](.*?)\[/url\]}im) { "#{$2}" } + raw = raw.gsub(%r{\[link=(\S+?)\](.*?)\[/link\]}im) { "#{$2}" } # remote images - raw = raw.gsub(/\[img\](https?:.+?)\[\/img\]/im) { "" } - raw = raw.gsub(/\[img=(https?.+?)\](.+?)\[\/img\]/im) { "\"#{$2}\"" } + raw = raw.gsub(%r{\[img\](https?:.+?)\[/img\]}im) { "" } + raw = raw.gsub(%r{\[img=(https?.+?)\](.+?)\[/img\]}im) { "\"#{$2}\"" } # local images - raw = raw.gsub(/\[img\](.+?)\[\/img\]/i) { "" } - raw = raw.gsub(/\[img=(.+?)\](https?.+?)\[\/img\]/im) { "\"#{$2}\"" } + raw = raw.gsub(%r{\[img\](.+?)\[/img\]}i) { "" } + raw = + raw.gsub(%r{\[img=(.+?)\](https?.+?)\[/img\]}im) do + "\"#{$2}\"" + end # Convert image bbcode - raw.gsub!(/\[img=(\d+),(\d+)\]([^\]]*)\[\/img\]/im, '') + raw.gsub!(%r{\[img=(\d+),(\d+)\]([^\]]*)\[/img\]}im, '') # [div]s are really [quote]s raw.gsub!(/\[div\]/mix, "[quote]") - raw.gsub!(/\[\/div\]/mix, "[/quote]") + raw.gsub!(%r{\[/div\]}mix, "[/quote]") # [postedby] -> link to @user - raw.gsub(/\[postedby\](.+?)\[b\](.+?)\[\/b\]\[\/postedby\]/i) { "#{$1}@#{$2}" } + raw.gsub(%r{\[postedby\](.+?)\[b\](.+?)\[/b\]\[/postedby\]}i) { "#{$1}@#{$2}" } # CODE (not tested) - raw = raw.gsub(/\[code\](\S+)\[\/code\]/im) { "```\n#{$1}\n```" } - raw = raw.gsub(/\[pre\](\S+)\[\/pre\]/im) { "```\n#{$1}\n```" } + raw = raw.gsub(%r{\[code\](\S+)\[/code\]}im) { "```\n#{$1}\n```" } + raw = raw.gsub(%r{\[pre\](\S+)\[/pre\]}im) { "```\n#{$1}\n```" } - raw = raw.gsub(/(https:\/\/youtu\S+)/i) { "\n#{$1}\n" } #youtube links on line by themselves + raw = raw.gsub(%r{(https://youtu\S+)}i) { "\n#{$1}\n" } #youtube links on line by themselves # no center - raw = raw.gsub(/\[\/?center\]/i, "") + raw = raw.gsub(%r{\[/?center\]}i, "") # no size - raw = raw.gsub(/\[\/?size.*?\]/i, "") + raw = raw.gsub(%r{\[/?size.*?\]}i, "") ### FROM VANILLA: # fix whitespaces - raw = raw.gsub(/(\\r)?\\n/, "\n") - .gsub("\\t", "\t") + raw = raw.gsub(/(\\r)?\\n/, "\n").gsub("\\t", "\t") unless CONVERT_HTML # replace all chevrons with HTML entities # NOTE: must be done # - AFTER all the "code" processing # - BEFORE the "quote" processing - raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" } - .gsub("<", "<") - .gsub("\u2603", "<") + raw = + raw + .gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" } + .gsub("<", "<") + .gsub("\u2603", "<") - raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" } - .gsub(">", ">") - .gsub("\u2603", ">") + raw = + raw + .gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" } + .gsub(">", ">") + .gsub("\u2603", ">") end # Remove the color tag raw.gsub!(/\[color=[#a-z0-9]+\]/i, "") - raw.gsub!(/\[\/color\]/i, "") + raw.gsub!(%r{\[/color\]}i, "") ### END VANILLA: raw @@ -395,54 +422,72 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base end def create_permalinks - puts '', 'Creating redirects...', '' + puts "", "Creating redirects...", "" - puts '', 'Users...', '' + puts "", "Users...", "" User.find_each do |u| ucf = u.custom_fields if ucf && ucf["import_id"] && ucf["import_username"] - Permalink.create(url: "#{BASE}/user-id-#{ucf['import_id']}.html", external_url: "/u/#{u.username}") rescue nil - print '.' + begin + Permalink.create( + url: "#{BASE}/user-id-#{ucf["import_id"]}.html", + external_url: "/u/#{u.username}", + ) + rescue StandardError + nil + end + print "." end end - puts '', 'Posts...', '' + puts "", "Posts...", "" Post.find_each do |post| pcf = post.custom_fields if pcf && pcf["import_id"] topic = post.topic - id = pcf["import_id"].split('#').last + id = pcf["import_id"].split("#").last if post.post_number == 1 - Permalink.create(url: "#{BASE}/forum_entry-id-#{id}.html", topic_id: topic.id) rescue nil + begin + Permalink.create(url: "#{BASE}/forum_entry-id-#{id}.html", topic_id: topic.id) + rescue StandardError + nil + end unless QUIET print_warning("forum_entry-id-#{id}.html --> http://localhost:3000/t/#{topic.id}") end else - Permalink.create(url: "#{BASE}/forum_entry-id-#{id}.html", post_id: post.id) rescue nil + begin + Permalink.create(url: "#{BASE}/forum_entry-id-#{id}.html", post_id: post.id) + rescue StandardError + nil + end unless QUIET - print_warning("forum_entry-id-#{id}.html --> http://localhost:3000/t/#{topic.id}/#{post.id}") + print_warning( + "forum_entry-id-#{id}.html --> http://localhost:3000/t/#{topic.id}/#{post.id}", + ) end end - print '.' + print "." end end - puts '', 'Categories...', '' + puts "", "Categories...", "" Category.find_each do |cat| ccf = cat.custom_fields next unless id = ccf["import_id"] - unless QUIET - print_warning("forum-category-#{id}.html --> /t/#{cat.id}") + print_warning("forum-category-#{id}.html --> /t/#{cat.id}") unless QUIET + begin + Permalink.create(url: "#{BASE}/forum-category-#{id}.html", category_id: cat.id) + rescue StandardError + nil end - Permalink.create(url: "#{BASE}/forum-category-#{id}.html", category_id: cat.id) rescue nil - print '.' + print "." end end def print_warning(message) $stderr.puts "#{message}" end - end ImportScripts::MylittleforumSQL.new.perform diff --git a/script/import_scripts/nabble.rb b/script/import_scripts/nabble.rb index e877e9058d4..c43efe8e5c5 100644 --- a/script/import_scripts/nabble.rb +++ b/script/import_scripts/nabble.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true require File.expand_path(File.dirname(__FILE__) + "/base.rb") -require 'pg' -require_relative 'base/uploader' +require "pg" +require_relative "base/uploader" =begin if you want to create mock users for posts made by anonymous participants, @@ -40,7 +40,7 @@ class ImportScripts::Nabble < ImportScripts::Base BATCH_SIZE = 1000 - DB_NAME = "nabble" + DB_NAME = "nabble" CATEGORY_ID = 6 def initialize @@ -64,14 +64,13 @@ class ImportScripts::Nabble < ImportScripts::Base total_count = @client.exec("SELECT COUNT(user_id) FROM user_")[0]["count"] batches(BATCH_SIZE) do |offset| - users = @client.query(<<-SQL + users = @client.query(<<-SQL) SELECT user_id, name, email, joined FROM user_ ORDER BY joined LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL - ) break if users.ntuples() < 1 @@ -83,24 +82,23 @@ class ImportScripts::Nabble < ImportScripts::Base email: row["email"] || fake_email, created_at: Time.zone.at(@td.decode(row["joined"])), name: row["name"], - post_create_action: proc do |user| - import_avatar(user, row["user_id"]) - end + post_create_action: proc { |user| import_avatar(user, row["user_id"]) }, } end end end def import_avatar(user, org_id) - filename = 'avatar' + org_id.to_s - path = File.join('/tmp/nab', filename) - res = @client.exec("SELECT content FROM file_avatar WHERE name='avatar100.png' AND user_id = #{org_id} LIMIT 1") + filename = "avatar" + org_id.to_s + path = File.join("/tmp/nab", filename) + res = + @client.exec( + "SELECT content FROM file_avatar WHERE name='avatar100.png' AND user_id = #{org_id} LIMIT 1", + ) return if res.ntuples() < 1 - binary = res[0]['content'] - File.open(path, 'wb') { |f| - f.write(PG::Connection.unescape_bytea(binary)) - } + binary = res[0]["content"] + File.open(path, "wb") { |f| f.write(PG::Connection.unescape_bytea(binary)) } upload = @uploader.create_upload(user.id, path, filename) @@ -113,7 +111,6 @@ class ImportScripts::Nabble < ImportScripts::Base else Rails.logger.error("Could not persist avatar for user #{user.username}") end - end def parse_email(msg) @@ -128,11 +125,13 @@ class ImportScripts::Nabble < ImportScripts::Base def create_forum_topics puts "", "creating forum topics" - app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]['node_id'] - topic_count = @client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id = #{app_node_id}")[0]["count"] + app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]["node_id"] + topic_count = + @client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id = #{app_node_id}")[0][ + "count" + ] batches(BATCH_SIZE) do |offset| - topics = @client.exec <<-SQL SELECT n.node_id, n.subject, n.owner_id, n.when_created, nm.message, n.msg_fmt FROM node AS n @@ -145,43 +144,43 @@ class ImportScripts::Nabble < ImportScripts::Base break if topics.ntuples() < 1 - next if all_records_exist? :posts, topics.map { |t| t['node_id'].to_i } + next if all_records_exist? :posts, topics.map { |t| t["node_id"].to_i } create_posts(topics, total: topic_count, offset: offset) do |t| raw = body_from(t) next unless raw raw = process_content(raw) - raw = process_attachments(raw, t['node_id']) + raw = process_attachments(raw, t["node_id"]) { - id: t['node_id'], - title: t['subject'], + id: t["node_id"], + title: t["subject"], user_id: user_id_from_imported_user_id(t["owner_id"]) || Discourse::SYSTEM_USER_ID, created_at: Time.zone.at(@td.decode(t["when_created"])), category: CATEGORY_ID, raw: raw, - cook_method: Post.cook_methods[:regular] + cook_method: Post.cook_methods[:regular], } end end end def body_from(p) - %w(m s).include?(p['msg_fmt']) ? parse_email(p['message']) : p['message'] + %w[m s].include?(p["msg_fmt"]) ? parse_email(p["message"]) : p["message"] rescue Email::Receiver::EmptyEmailError - puts "Skipped #{p['node_id']}" + puts "Skipped #{p["node_id"]}" end def process_content(txt) txt.gsub! /\/, '[quote="\1"]' - txt.gsub! /\<\/quote\>/, '[/quote]' - txt.gsub!(/\(.*?)\<\/raw\>/m) do |match| + txt.gsub! %r{\}, "[/quote]" + txt.gsub!(%r{\(.*?)\}m) do |match| c = Regexp.last_match[1].indent(4) - "\n#{c}\n" + "\n#{c}\n" end # lines starting with # are comments, not headings, insert a space to prevent markdown - txt.gsub! /\n#/m, ' #' + txt.gsub! /\n#/m, " #" # in the languagetool forum, quite a lot of XML was not marked as raw # so we treat ... and ... as raw @@ -202,12 +201,10 @@ class ImportScripts::Nabble < ImportScripts::Base def process_attachments(txt, postid) txt.gsub!(//m) do |match| basename = Regexp.last_match[1] - get_attachment_upload(basename, postid) do |upload| - @uploader.embedded_image_html(upload) - end + get_attachment_upload(basename, postid) { |upload| @uploader.embedded_image_html(upload) } end - txt.gsub!(/(.*?)<\/nabble_a>/m) do |match| + txt.gsub!(%r{(.*?)}m) do |match| basename = Regexp.last_match[1] get_attachment_upload(basename, postid) do |upload| @uploader.attachment_html(upload, basename) @@ -217,13 +214,12 @@ class ImportScripts::Nabble < ImportScripts::Base end def get_attachment_upload(basename, postid) - contents = @client.exec("SELECT content FROM file_node WHERE name='#{basename}' AND node_id = #{postid}") + contents = + @client.exec("SELECT content FROM file_node WHERE name='#{basename}' AND node_id = #{postid}") if contents.any? - binary = contents[0]['content'] - fn = File.join('/tmp/nab', basename) - File.open(fn, 'wb') { |f| - f.write(PG::Connection.unescape_bytea(binary)) - } + binary = contents[0]["content"] + fn = File.join("/tmp/nab", basename) + File.open(fn, "wb") { |f| f.write(PG::Connection.unescape_bytea(binary)) } yield @uploader.create_upload(0, fn, basename) end end @@ -231,8 +227,11 @@ class ImportScripts::Nabble < ImportScripts::Base def import_replies puts "", "creating topic replies" - app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]['node_id'] - post_count = @client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id != #{app_node_id}")[0]["count"] + app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]["node_id"] + post_count = + @client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id != #{app_node_id}")[0][ + "count" + ] topic_ids = {} @@ -249,11 +248,11 @@ class ImportScripts::Nabble < ImportScripts::Base break if posts.ntuples() < 1 - next if all_records_exist? :posts, posts.map { |p| p['node_id'].to_i } + next if all_records_exist? :posts, posts.map { |p| p["node_id"].to_i } create_posts(posts, total: post_count, offset: offset) do |p| - parent_id = p['parent_id'] - id = p['node_id'] + parent_id = p["parent_id"] + id = p["node_id"] topic_id = topic_ids[parent_id] unless topic_id @@ -268,19 +267,21 @@ class ImportScripts::Nabble < ImportScripts::Base next unless raw raw = process_content(raw) raw = process_attachments(raw, id) - { id: id, + { + id: id, topic_id: topic_id, - user_id: user_id_from_imported_user_id(p['owner_id']) || Discourse::SYSTEM_USER_ID, + user_id: user_id_from_imported_user_id(p["owner_id"]) || Discourse::SYSTEM_USER_ID, created_at: Time.zone.at(@td.decode(p["when_created"])), raw: raw, - cook_method: Post.cook_methods[:regular] } + cook_method: Post.cook_methods[:regular], + } end end end end class String - def indent(count, char = ' ') + def indent(count, char = " ") gsub(/([^\n]*)(\n|$)/) do |match| last_iteration = ($1 == "" && $2 == "") line = +"" diff --git a/script/import_scripts/ning.rb b/script/import_scripts/ning.rb index 30612ec9691..3af9b080d1b 100644 --- a/script/import_scripts/ning.rb +++ b/script/import_scripts/ning.rb @@ -5,28 +5,28 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb") # Edit the constants and initialize method for your import data. class ImportScripts::Ning < ImportScripts::Base - JSON_FILES_DIR = "/Users/techapj/Downloads/ben/ADEM" - ATTACHMENT_PREFIXES = ["discussions", "pages", "blogs", "members", "photos"] - EXTRA_AUTHORIZED_EXTENSIONS = ["bmp", "ico", "txt", "pdf", "gif", "jpg", "jpeg", "html"] + ATTACHMENT_PREFIXES = %w[discussions pages blogs members photos] + EXTRA_AUTHORIZED_EXTENSIONS = %w[bmp ico txt pdf gif jpg jpeg html] def initialize super @system_user = Discourse.system_user - @users_json = load_ning_json("ning-members-local.json") + @users_json = load_ning_json("ning-members-local.json") @discussions_json = load_ning_json("ning-discussions-local.json") # An example of a custom category from Ning: @blogs_json = load_ning_json("ning-blogs-local.json") - @photos_json = load_ning_json("ning-photos-local.json") - @pages_json = load_ning_json("ning-pages-local.json") + @photos_json = load_ning_json("ning-photos-local.json") + @pages_json = load_ning_json("ning-pages-local.json") - SiteSetting.max_image_size_kb = 10240 - SiteSetting.max_attachment_size_kb = 10240 - SiteSetting.authorized_extensions = (SiteSetting.authorized_extensions.split("|") + EXTRA_AUTHORIZED_EXTENSIONS).uniq.join("|") + SiteSetting.max_image_size_kb = 10_240 + SiteSetting.max_attachment_size_kb = 10_240 + SiteSetting.authorized_extensions = + (SiteSetting.authorized_extensions.split("|") + EXTRA_AUTHORIZED_EXTENSIONS).uniq.join("|") # Example of importing a custom profile field: # @interests_field = UserField.find_by_name("My interests") @@ -60,23 +60,23 @@ class ImportScripts::Ning < ImportScripts::Base end def repair_json(arg) - arg.gsub!(/^\(/, "") # content of file is surround by ( ) + arg.gsub!(/^\(/, "") # content of file is surround by ( ) arg.gsub!(/\)$/, "") - arg.gsub!(/\]\]$/, "]") # there can be an extra ] at the end + arg.gsub!(/\]\]$/, "]") # there can be an extra ] at the end arg.gsub!(/\}\{/, "},{") # missing commas sometimes! - arg.gsub!("}]{", "},{") # surprise square brackets - arg.gsub!("}[{", "},{") # :troll: + arg.gsub!("}]{", "},{") # surprise square brackets + arg.gsub!("}[{", "},{") # :troll: arg end def import_users - puts '', "Importing users" + puts "", "Importing users" - staff_levels = ["admin", "moderator", "owner"] + staff_levels = %w[admin moderator owner] create_users(@users_json) do |u| { @@ -88,57 +88,58 @@ class ImportScripts::Ning < ImportScripts::Base location: "#{u["location"]} #{u["country"]}", avatar_url: u["profilePhoto"], bio_raw: u["profileQuestions"].is_a?(Hash) ? u["profileQuestions"]["About Me"] : nil, - post_create_action: proc do |newuser| - # if u["profileQuestions"].is_a?(Hash) - # newuser.custom_fields = {"user_field_#{@interests_field.id}" => u["profileQuestions"]["My interests"]} - # end + post_create_action: + proc do |newuser| + # if u["profileQuestions"].is_a?(Hash) + # newuser.custom_fields = {"user_field_#{@interests_field.id}" => u["profileQuestions"]["My interests"]} + # end - if staff_levels.include?(u["level"].downcase) - if u["level"].downcase == "admin" || u["level"].downcase == "owner" - newuser.admin = true - else - newuser.moderator = true - end - end - - # states: ["active", "suspended", "left", "pending"] - if u["state"] == "active" && newuser.approved_at.nil? - newuser.approved = true - newuser.approved_by_id = @system_user.id - newuser.approved_at = newuser.created_at - end - - newuser.save - - if u["profilePhoto"] && newuser.user_avatar.try(:custom_upload_id).nil? - photo_path = file_full_path(u["profilePhoto"]) - if File.exist?(photo_path) - begin - upload = create_upload(newuser.id, photo_path, File.basename(photo_path)) - if upload.persisted? - newuser.import_mode = false - newuser.create_user_avatar - newuser.import_mode = true - newuser.user_avatar.update(custom_upload_id: upload.id) - newuser.update(uploaded_avatar_id: upload.id) - else - puts "Error: Upload did not persist for #{photo_path}!" - end - rescue SystemCallError => err - puts "Could not import avatar #{photo_path}: #{err.message}" + if staff_levels.include?(u["level"].downcase) + if u["level"].downcase == "admin" || u["level"].downcase == "owner" + newuser.admin = true + else + newuser.moderator = true end - else - puts "avatar file not found at #{photo_path}" end - end - end + + # states: ["active", "suspended", "left", "pending"] + if u["state"] == "active" && newuser.approved_at.nil? + newuser.approved = true + newuser.approved_by_id = @system_user.id + newuser.approved_at = newuser.created_at + end + + newuser.save + + if u["profilePhoto"] && newuser.user_avatar.try(:custom_upload_id).nil? + photo_path = file_full_path(u["profilePhoto"]) + if File.exist?(photo_path) + begin + upload = create_upload(newuser.id, photo_path, File.basename(photo_path)) + if upload.persisted? + newuser.import_mode = false + newuser.create_user_avatar + newuser.import_mode = true + newuser.user_avatar.update(custom_upload_id: upload.id) + newuser.update(uploaded_avatar_id: upload.id) + else + puts "Error: Upload did not persist for #{photo_path}!" + end + rescue SystemCallError => err + puts "Could not import avatar #{photo_path}: #{err.message}" + end + else + puts "avatar file not found at #{photo_path}" + end + end + end, } end EmailToken.delete_all end def suspend_users - puts '', "Updating suspended users" + puts "", "Updating suspended users" count = 0 suspended = 0 @@ -151,7 +152,10 @@ class ImportScripts::Ning < ImportScripts::Base user.suspended_till = 200.years.from_now if user.save - StaffActionLogger.new(@system_user).log_user_suspend(user, "Import data indicates account is suspended.") + StaffActionLogger.new(@system_user).log_user_suspend( + user, + "Import data indicates account is suspended.", + ) suspended += 1 else puts "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}" @@ -168,13 +172,15 @@ class ImportScripts::Ning < ImportScripts::Base def import_categories puts "", "Importing categories" - create_categories((["Blog", "Pages", "Photos"] + @discussions_json.map { |d| d["category"] }).uniq.compact) do |name| + create_categories( + (%w[Blog Pages Photos] + @discussions_json.map { |d| d["category"] }).uniq.compact, + ) do |name| if name.downcase == "uncategorized" nil else { id: name, # ning has no id for categories, so use the name - name: name + name: name, } end end @@ -220,9 +226,7 @@ class ImportScripts::Ning < ImportScripts::Base unless topic["category"].nil? || topic["category"].downcase == "uncategorized" mapped[:category] = category_id_from_imported_category_id(topic["category"]) end - if topic["category"].nil? && default_category - mapped[:category] = default_category - end + mapped[:category] = default_category if topic["category"].nil? && default_category mapped[:title] = CGI.unescapeHTML(topic["title"]) mapped[:raw] = process_ning_post_body(topic["description"]) @@ -230,13 +234,9 @@ class ImportScripts::Ning < ImportScripts::Base mapped[:raw] = add_file_attachments(mapped[:raw], topic["fileAttachments"]) end - if topic["photoUrl"] - mapped[:raw] = add_photo(mapped[:raw], topic["photoUrl"]) - end + mapped[:raw] = add_photo(mapped[:raw], topic["photoUrl"]) if topic["photoUrl"] - if topic["embedCode"] - mapped[:raw] = add_video(mapped[:raw], topic["embedCode"]) - end + mapped[:raw] = add_video(mapped[:raw], topic["embedCode"]) if topic["embedCode"] parent_post = create_post(mapped, mapped[:id]) unless parent_post.is_a?(Post) @@ -247,23 +247,24 @@ class ImportScripts::Ning < ImportScripts::Base if topic["comments"].present? topic["comments"].reverse.each do |post| - if post_id_from_imported_post_id(post["id"]) next # already imported this post end raw = process_ning_post_body(post["description"]) - if post["fileAttachments"] - raw = add_file_attachments(raw, post["fileAttachments"]) - end + raw = add_file_attachments(raw, post["fileAttachments"]) if post["fileAttachments"] - new_post = create_post({ - id: post["id"], - topic_id: parent_post.topic_id, - user_id: user_id_from_imported_user_id(post["contributorName"]) || -1, - raw: raw, - created_at: Time.zone.parse(post["createdDate"]) - }, post["id"]) + new_post = + create_post( + { + id: post["id"], + topic_id: parent_post.topic_id, + user_id: user_id_from_imported_user_id(post["contributorName"]) || -1, + raw: raw, + created_at: Time.zone.parse(post["createdDate"]), + }, + post["id"], + ) if new_post.is_a?(Post) posts += 1 @@ -288,11 +289,17 @@ class ImportScripts::Ning < ImportScripts::Base end def attachment_regex - @_attachment_regex ||= Regexp.new(%Q[]*)href="(?:#{ATTACHMENT_PREFIXES.join('|')})\/(?:[^"]+)"(?:[^>]*)>]*)src="([^"]+)"(?:[^>]*)><\/a>]) + @_attachment_regex ||= + Regexp.new( + %Q[]*)href="(?:#{ATTACHMENT_PREFIXES.join("|")})\/(?:[^"]+)"(?:[^>]*)>]*)src="([^"]+)"(?:[^>]*)><\/a>], + ) end def youtube_iframe_regex - @_youtube_iframe_regex ||= Regexp.new(%Q[

]*)src="\/\/www.youtube.com\/embed\/([^"]+)"(?:[^>]*)><\/iframe>(?:[^<]*)<\/p>]) + @_youtube_iframe_regex ||= + Regexp.new( + %Q[

]*)src="\/\/www.youtube.com\/embed\/([^"]+)"(?:[^>]*)><\/iframe>(?:[^<]*)<\/p>], + ) end def process_ning_post_body(arg) @@ -382,15 +389,16 @@ class ImportScripts::Ning < ImportScripts::Base def add_video(arg, embed_code) raw = arg - youtube_regex = Regexp.new(%Q[]*)src="http:\/\/www.youtube.com\/embed\/([^"]+)"(?:[^>]*)><\/iframe>]) + youtube_regex = + Regexp.new( + %Q[]*)src="http:\/\/www.youtube.com\/embed\/([^"]+)"(?:[^>]*)><\/iframe>], + ) raw.gsub!(youtube_regex) do |s| matches = youtube_regex.match(s) video_id = matches[1].split("?").first - if video_id - raw += "\n\nhttps://www.youtube.com/watch?v=#{video_id}\n" - end + raw += "\n\nhttps://www.youtube.com/watch?v=#{video_id}\n" if video_id end raw += "\n" + embed_code + "\n" @@ -398,6 +406,4 @@ class ImportScripts::Ning < ImportScripts::Base end end -if __FILE__ == $0 - ImportScripts::Ning.new.perform -end +ImportScripts::Ning.new.perform if __FILE__ == $0 diff --git a/script/import_scripts/nodebb/mongo.rb b/script/import_scripts/nodebb/mongo.rb index 134704b2b2a..696aec43936 100644 --- a/script/import_scripts/nodebb/mongo.rb +++ b/script/import_scripts/nodebb/mongo.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require 'mongo' +require "mongo" module NodeBB class Mongo @@ -43,8 +43,8 @@ module NodeBB user["joindate"] = timestamp_to_date(user["joindate"]) user["lastonline"] = timestamp_to_date(user["lastonline"]) - user['banned'] = user['banned'].to_s - user['uid'] = user['uid'].to_s + user["banned"] = user["banned"].to_s + user["uid"] = user["uid"].to_s user end @@ -56,17 +56,17 @@ module NodeBB category_keys.each do |category_key| category = mongo.find(_key: "category:#{category_key}").first - category['parentCid'] = category['parentCid'].to_s - category['disabled'] = category['disabled'].to_s - category['cid'] = category['cid'].to_s + category["parentCid"] = category["parentCid"].to_s + category["disabled"] = category["disabled"].to_s + category["cid"] = category["cid"].to_s - categories[category['cid']] = category + categories[category["cid"]] = category end end end def topics(offset = 0, page_size = 2000) - topic_keys = mongo.find(_key: 'topics:tid').skip(offset).limit(page_size).pluck(:value) + topic_keys = mongo.find(_key: "topics:tid").skip(offset).limit(page_size).pluck(:value) topic_keys.map { |topic_key| topic(topic_key) } end @@ -86,11 +86,11 @@ module NodeBB end def topic_count - mongo.find(_key: 'topics:tid').count + mongo.find(_key: "topics:tid").count end def posts(offset = 0, page_size = 2000) - post_keys = mongo.find(_key: 'posts:pid').skip(offset).limit(page_size).pluck(:value) + post_keys = mongo.find(_key: "posts:pid").skip(offset).limit(page_size).pluck(:value) post_keys.map { |post_key| post(post_key) } end @@ -111,7 +111,7 @@ module NodeBB end def post_count - mongo.find(_key: 'posts:pid').count + mongo.find(_key: "posts:pid").count end private diff --git a/script/import_scripts/nodebb/nodebb.rb b/script/import_scripts/nodebb/nodebb.rb index b29f5ee1c6c..df575f78d37 100644 --- a/script/import_scripts/nodebb/nodebb.rb +++ b/script/import_scripts/nodebb/nodebb.rb @@ -1,13 +1,13 @@ # frozen_string_literal: true -require_relative '../base' -require_relative './redis' -require_relative './mongo' +require_relative "../base" +require_relative "./redis" +require_relative "./mongo" class ImportScripts::NodeBB < ImportScripts::Base # CHANGE THESE BEFORE RUNNING THE IMPORTER # ATTACHMENT_DIR needs to be absolute, not relative path - ATTACHMENT_DIR = '/Users/orlando/www/orlando/NodeBB/public/uploads' + ATTACHMENT_DIR = "/Users/orlando/www/orlando/NodeBB/public/uploads" BATCH_SIZE = 2000 def initialize @@ -17,17 +17,13 @@ class ImportScripts::NodeBB < ImportScripts::Base # @client = adapter.new('mongodb://127.0.0.1:27017/nodebb') adapter = NodeBB::Redis - @client = adapter.new( - host: "localhost", - port: "6379", - db: 14 - ) + @client = adapter.new(host: "localhost", port: "6379", db: 14) load_merged_posts end def load_merged_posts - puts 'loading merged posts with topics...' + puts "loading merged posts with topics..." # we keep here the posts that were merged # as topics @@ -35,13 +31,16 @@ class ImportScripts::NodeBB < ImportScripts::Base # { post_id: discourse_post_id } @merged_posts_map = {} - PostCustomField.where(name: 'import_merged_post_id').pluck(:post_id, :value).each do |post_id, import_id| - post = Post.find(post_id) - topic_id = post.topic_id - nodebb_post_id = post.custom_fields['import_merged_post_id'] + PostCustomField + .where(name: "import_merged_post_id") + .pluck(:post_id, :value) + .each do |post_id, import_id| + post = Post.find(post_id) + topic_id = post.topic_id + nodebb_post_id = post.custom_fields["import_merged_post_id"] - @merged_posts_map[nodebb_post_id] = topic_id - end + @merged_posts_map[nodebb_post_id] = topic_id + end end def execute @@ -56,19 +55,14 @@ class ImportScripts::NodeBB < ImportScripts::Base end def import_groups - puts '', 'importing groups' + puts "", "importing groups" groups = @client.groups total_count = groups.count progress_count = 0 start_time = Time.now - create_groups(groups) do |group| - { - id: group["name"], - name: group["slug"] - } - end + create_groups(groups) { |group| { id: group["name"], name: group["slug"] } } end def import_categories @@ -107,15 +101,18 @@ class ImportScripts::NodeBB < ImportScripts::Base name: category["name"], position: category["order"], description: category["description"], - parent_category_id: category_id_from_imported_category_id(category["parentCid"]) + parent_category_id: category_id_from_imported_category_id(category["parentCid"]), } end categories.each do |source_category| - cid = category_id_from_imported_category_id(source_category['cid']) - Permalink.create(url: "/category/#{source_category['slug']}", category_id: cid) rescue nil + cid = category_id_from_imported_category_id(source_category["cid"]) + begin + Permalink.create(url: "/category/#{source_category["slug"]}", category_id: cid) + rescue StandardError + nil + end end - end def import_users @@ -158,12 +155,13 @@ class ImportScripts::NodeBB < ImportScripts::Base bio_raw: user["aboutme"], active: true, custom_fields: { - import_pass: user["password"] + import_pass: user["password"], }, - post_create_action: proc do |u| - import_profile_picture(user, u) - import_profile_background(user, u) - end + post_create_action: + proc do |u| + import_profile_picture(user, u) + import_profile_background(user, u) + end, } end end @@ -204,7 +202,7 @@ class ImportScripts::NodeBB < ImportScripts::Base end # write tmp file - file = Tempfile.new(filename, encoding: 'ascii-8bit') + file = Tempfile.new(filename, encoding: "ascii-8bit") file.write string_io.read file.rewind @@ -230,9 +228,21 @@ class ImportScripts::NodeBB < ImportScripts::Base imported_user.user_avatar.update(custom_upload_id: upload.id) imported_user.update(uploaded_avatar_id: upload.id) ensure - string_io.close rescue nil - file.close rescue nil - file.unlind rescue nil + begin + string_io.close + rescue StandardError + nil + end + begin + file.close + rescue StandardError + nil + end + begin + file.unlind + rescue StandardError + nil + end end def import_profile_background(old_user, imported_user) @@ -264,7 +274,7 @@ class ImportScripts::NodeBB < ImportScripts::Base end # write tmp file - file = Tempfile.new(filename, encoding: 'ascii-8bit') + file = Tempfile.new(filename, encoding: "ascii-8bit") file.write string_io.read file.rewind @@ -288,9 +298,21 @@ class ImportScripts::NodeBB < ImportScripts::Base imported_user.user_profile.upload_profile_background(upload) ensure - string_io.close rescue nil - file.close rescue nil - file.unlink rescue nil + begin + string_io.close + rescue StandardError + nil + end + begin + file.close + rescue StandardError + nil + end + begin + file.unlink + rescue StandardError + nil + end end def add_users_to_groups @@ -305,7 +327,7 @@ class ImportScripts::NodeBB < ImportScripts::Base dgroup = find_group_by_import_id(group["name"]) # do thing if we migrated this group already - next if dgroup.custom_fields['import_users_added'] + next if dgroup.custom_fields["import_users_added"] group_member_ids = group["member_ids"].map { |uid| user_id_from_imported_user_id(uid) } group_owner_ids = group["owner_ids"].map { |uid| user_id_from_imported_user_id(uid) } @@ -320,7 +342,7 @@ class ImportScripts::NodeBB < ImportScripts::Base owners = User.find(group_owner_ids) owners.each { |owner| dgroup.add_owner(owner) } - dgroup.custom_fields['import_users_added'] = true + dgroup.custom_fields["import_users_added"] = true dgroup.save progress_count += 1 @@ -357,12 +379,13 @@ class ImportScripts::NodeBB < ImportScripts::Base created_at: topic["timestamp"], views: topic["viewcount"], closed: topic["locked"] == "1", - post_create_action: proc do |p| - # keep track of this to use in import_posts - p.custom_fields["import_merged_post_id"] = topic["mainPid"] - p.save - @merged_posts_map[topic["mainPid"]] = p.id - end + post_create_action: + proc do |p| + # keep track of this to use in import_posts + p.custom_fields["import_merged_post_id"] = topic["mainPid"] + p.save + @merged_posts_map[topic["mainPid"]] = p.id + end, } data[:pinned_at] = data[:created_at] if topic["pinned"] == "1" @@ -372,7 +395,11 @@ class ImportScripts::NodeBB < ImportScripts::Base topics.each do |import_topic| topic = topic_lookup_from_imported_post_id("t#{import_topic["tid"]}") - Permalink.create(url: "/topic/#{import_topic['slug']}", topic_id: topic[:topic_id]) rescue nil + begin + Permalink.create(url: "/topic/#{import_topic["slug"]}", topic_id: topic[:topic_id]) + rescue StandardError + nil + end end end end @@ -411,21 +438,23 @@ class ImportScripts::NodeBB < ImportScripts::Base topic_id: topic[:topic_id], raw: raw, created_at: post["timestamp"], - post_create_action: proc do |p| - post["upvoted_by"].each do |upvoter_id| - user = User.new - user.id = user_id_from_imported_user_id(upvoter_id) || Discourse::SYSTEM_USER_ID - PostActionCreator.like(user, p) - end - end + post_create_action: + proc do |p| + post["upvoted_by"].each do |upvoter_id| + user = User.new + user.id = user_id_from_imported_user_id(upvoter_id) || Discourse::SYSTEM_USER_ID + PostActionCreator.like(user, p) + end + end, } - if post['toPid'] + if post["toPid"] # Look reply to topic - parent_id = topic_lookup_from_imported_post_id("t#{post['toPid']}").try(:[], :post_number) + parent_id = topic_lookup_from_imported_post_id("t#{post["toPid"]}").try(:[], :post_number) # Look reply post if topic is missing - parent_id ||= topic_lookup_from_imported_post_id("p#{post['toPid']}").try(:[], :post_number) + parent_id ||= + topic_lookup_from_imported_post_id("p#{post["toPid"]}").try(:[], :post_number) if parent_id data[:reply_to_post_number] = parent_id @@ -448,12 +477,12 @@ class ImportScripts::NodeBB < ImportScripts::Base Post.find_each do |post| begin - next if post.custom_fields['import_post_processing'] + next if post.custom_fields["import_post_processing"] new_raw = postprocess_post(post) if new_raw != post.raw post.raw = new_raw - post.custom_fields['import_post_processing'] = true + post.custom_fields["import_post_processing"] = true post.save end ensure @@ -463,7 +492,7 @@ class ImportScripts::NodeBB < ImportScripts::Base end def import_attachments - puts '', 'importing attachments...' + puts "", "importing attachments..." current = 0 max = Post.count @@ -474,7 +503,7 @@ class ImportScripts::NodeBB < ImportScripts::Base print_status(current, max, start_time) new_raw = post.raw.dup - new_raw.gsub!(/\[(.*)\]\((\/assets\/uploads\/files\/.*)\)/) do + new_raw.gsub!(%r{\[(.*)\]\((/assets/uploads/files/.*)\)}) do image_md = Regexp.last_match[0] text, filepath = $1, $2 filepath = filepath.gsub("/assets/uploads", ATTACHMENT_DIR) @@ -493,7 +522,12 @@ class ImportScripts::NodeBB < ImportScripts::Base end if new_raw != post.raw - PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from NodeBB') + PostRevisor.new(post).revise!( + post.user, + { raw: new_raw }, + bypass_bump: true, + edit_reason: "Import attachments from NodeBB", + ) end end end @@ -502,28 +536,30 @@ class ImportScripts::NodeBB < ImportScripts::Base raw = post.raw # [link to post](/post/:id) - raw = raw.gsub(/\[(.*)\]\(\/post\/(\d+).*\)/) do - text, post_id = $1, $2 + raw = + raw.gsub(%r{\[(.*)\]\(/post/(\d+).*\)}) do + text, post_id = $1, $2 - if topic_lookup = topic_lookup_from_imported_post_id("p#{post_id}") - url = topic_lookup[:url] - "[#{text}](#{url})" - else - "/404" + if topic_lookup = topic_lookup_from_imported_post_id("p#{post_id}") + url = topic_lookup[:url] + "[#{text}](#{url})" + else + "/404" + end end - end # [link to topic](/topic/:id) - raw = raw.gsub(/\[(.*)\]\(\/topic\/(\d+).*\)/) do - text, topic_id = $1, $2 + raw = + raw.gsub(%r{\[(.*)\]\(/topic/(\d+).*\)}) do + text, topic_id = $1, $2 - if topic_lookup = topic_lookup_from_imported_post_id("t#{topic_id}") - url = topic_lookup[:url] - "[#{text}](#{url})" - else - "/404" + if topic_lookup = topic_lookup_from_imported_post_id("t#{topic_id}") + url = topic_lookup[:url] + "[#{text}](#{url})" + else + "/404" + end end - end raw end diff --git a/script/import_scripts/nodebb/redis.rb b/script/import_scripts/nodebb/redis.rb index f8877c5e15f..3d1f08a3f37 100644 --- a/script/import_scripts/nodebb/redis.rb +++ b/script/import_scripts/nodebb/redis.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require 'redis' +require "redis" module NodeBB class Redis @@ -11,7 +11,7 @@ module NodeBB end def groups - group_keys = redis.zrange('groups:visible:createtime', 0, -1) + group_keys = redis.zrange("groups:visible:createtime", 0, -1) group_keys.map { |group_key| group(group_key) } end @@ -26,7 +26,7 @@ module NodeBB end def users - user_keys = redis.zrange('users:joindate', 0, -1) + user_keys = redis.zrange("users:joindate", 0, -1) user_keys.map { |user_key| user(user_key) } end @@ -41,13 +41,13 @@ module NodeBB end def categories - category_keys = redis.zrange('categories:cid', 0, -1) + category_keys = redis.zrange("categories:cid", 0, -1) {}.tap do |categories| category_keys.each do |category_key| category = redis.hgetall("category:#{category_key}") - categories[category['cid']] = category + categories[category["cid"]] = category end end end @@ -59,7 +59,7 @@ module NodeBB from = offset to = page_size + offset - topic_keys = redis.zrange('topics:tid', from, to) + topic_keys = redis.zrange("topics:tid", from, to) topic_keys.map { |topic_key| topic(topic_key) } end @@ -75,7 +75,7 @@ module NodeBB end def topic_count - redis.zcard('topics:tid') + redis.zcard("topics:tid") end def posts(offset = 0, page_size = 2000) @@ -85,7 +85,7 @@ module NodeBB from = offset to = page_size + offset - post_keys = redis.zrange('posts:pid', from, to) + post_keys = redis.zrange("posts:pid", from, to) post_keys.map { |post_key| post(post_key) } end @@ -99,7 +99,7 @@ module NodeBB end def post_count - redis.zcard('posts:pid') + redis.zcard("posts:pid") end private diff --git a/script/import_scripts/phorum.rb b/script/import_scripts/phorum.rb index dc2639933ec..f03db50ea42 100644 --- a/script/import_scripts/phorum.rb +++ b/script/import_scripts/phorum.rb @@ -5,7 +5,6 @@ require "mysql2" require File.expand_path(File.dirname(__FILE__) + "/base.rb") class ImportScripts::Phorum < ImportScripts::Base - PHORUM_DB = "piwik" TABLE_PREFIX = "pw_" BATCH_SIZE = 1000 @@ -13,12 +12,13 @@ class ImportScripts::Phorum < ImportScripts::Base def initialize super - @client = Mysql2::Client.new( - host: "localhost", - username: "root", - password: "pa$$word", - database: PHORUM_DB - ) + @client = + Mysql2::Client.new( + host: "localhost", + username: "root", + password: "pa$$word", + database: PHORUM_DB, + ) end def execute @@ -29,30 +29,34 @@ class ImportScripts::Phorum < ImportScripts::Base end def import_users - puts '', "creating users" + puts "", "creating users" - total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}users;").first['count'] + total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}users;").first["count"] batches(BATCH_SIZE) do |offset| - results = mysql_query( - "SELECT user_id id, username, TRIM(email) AS email, username name, date_added created_at, + results = + mysql_query( + "SELECT user_id id, username, TRIM(email) AS email, username name, date_added created_at, date_last_active last_seen_at, admin FROM #{TABLE_PREFIX}users WHERE #{TABLE_PREFIX}users.active = 1 LIMIT #{BATCH_SIZE} - OFFSET #{offset};") + OFFSET #{offset};", + ) break if results.size < 1 create_users(results, total: total_count, offset: offset) do |user| - next if user['username'].blank? - { id: user['id'], - email: user['email'], - username: user['username'], - name: user['name'], - created_at: Time.zone.at(user['created_at']), - last_seen_at: Time.zone.at(user['last_seen_at']), - admin: user['admin'] == 1 } + next if user["username"].blank? + { + id: user["id"], + email: user["email"], + username: user["username"], + name: user["name"], + created_at: Time.zone.at(user["created_at"]), + last_seen_at: Time.zone.at(user["last_seen_at"]), + admin: user["admin"] == 1, + } end end end @@ -60,19 +64,18 @@ class ImportScripts::Phorum < ImportScripts::Base def import_categories puts "", "importing categories..." - categories = mysql_query(" + categories = + mysql_query( + " SELECT forum_id id, name, description, active FROM #{TABLE_PREFIX}forums ORDER BY forum_id ASC - ").to_a + ", + ).to_a create_categories(categories) do |category| - next if category['active'] == 0 - { - id: category['id'], - name: category["name"], - description: category["description"] - } + next if category["active"] == 0 + { id: category["id"], name: category["name"], description: category["description"] } end # uncomment below lines to create permalink @@ -87,7 +90,9 @@ class ImportScripts::Phorum < ImportScripts::Base total_count = mysql_query("SELECT count(*) count from #{TABLE_PREFIX}messages").first["count"] batches(BATCH_SIZE) do |offset| - results = mysql_query(" + results = + mysql_query( + " SELECT m.message_id id, m.parent_id, m.forum_id category_id, @@ -100,7 +105,8 @@ class ImportScripts::Phorum < ImportScripts::Base ORDER BY m.datestamp LIMIT #{BATCH_SIZE} OFFSET #{offset}; - ").to_a + ", + ).to_a break if results.size < 1 @@ -108,20 +114,20 @@ class ImportScripts::Phorum < ImportScripts::Base skip = false mapped = {} - mapped[:id] = m['id'] - mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 - mapped[:raw] = process_raw_post(m['raw'], m['id']) - mapped[:created_at] = Time.zone.at(m['created_at']) + mapped[:id] = m["id"] + mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1 + mapped[:raw] = process_raw_post(m["raw"], m["id"]) + mapped[:created_at] = Time.zone.at(m["created_at"]) - if m['parent_id'] == 0 - mapped[:category] = category_id_from_imported_category_id(m['category_id'].to_i) - mapped[:title] = CGI.unescapeHTML(m['title']) + if m["parent_id"] == 0 + mapped[:category] = category_id_from_imported_category_id(m["category_id"].to_i) + mapped[:title] = CGI.unescapeHTML(m["title"]) else - parent = topic_lookup_from_imported_post_id(m['parent_id']) + parent = topic_lookup_from_imported_post_id(m["parent_id"]) if parent mapped[:topic_id] = parent[:topic_id] else - puts "Parent post #{m['parent_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" + puts "Parent post #{m["parent_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" skip = true end end @@ -137,25 +143,24 @@ class ImportScripts::Phorum < ImportScripts::Base # end # end end - end def process_raw_post(raw, import_id) s = raw.dup # :) is encoded as :) - s.gsub!(/]+) \/>/, '\1') + s.gsub!(%r{]+) />}, '\1') # Some links look like this: http://www.onegameamonth.com - s.gsub!(/(.+)<\/a>/, '[\2](\1)') + s.gsub!(%r{(.+)}, '[\2](\1)') # Many phpbb bbcode tags have a hash attached to them. Examples: # [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky] # [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex] - s.gsub!(/:(?:\w{8})\]/, ']') + s.gsub!(/:(?:\w{8})\]/, "]") # Remove mybb video tags. - s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '') + s.gsub!(%r{(^\[video=.*?\])|(\[/video\]$)}, "") s = CGI.unescapeHTML(s) @@ -163,50 +168,54 @@ class ImportScripts::Phorum < ImportScripts::Base # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli) # # Work around it for now: - s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[') + s.gsub!(%r{\[http(s)?://(www\.)?}, "[") # [QUOTE]...[/QUOTE] - s.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" } + s.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n> #{$1}\n" } # [URL=...]...[/URL] - s.gsub!(/\[url="?(.+?)"?\](.+)\[\/url\]/i) { "[#{$2}](#{$1})" } + s.gsub!(%r{\[url="?(.+?)"?\](.+)\[/url\]}i) { "[#{$2}](#{$1})" } # [IMG]...[/IMG] - s.gsub!(/\[\/?img\]/i, "") + s.gsub!(%r{\[/?img\]}i, "") # convert list tags to ul and list=1 tags to ol # (basically, we're only missing list=a here...) - s.gsub!(/\[list\](.*?)\[\/list\]/m, '[ul]\1[/ul]') - s.gsub!(/\[list=1\](.*?)\[\/list\]/m, '[ol]\1[/ol]') + s.gsub!(%r{\[list\](.*?)\[/list\]}m, '[ul]\1[/ul]') + s.gsub!(%r{\[list=1\](.*?)\[/list\]}m, '[ol]\1[/ol]') # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists: s.gsub!(/\[\*\](.*?)\n/, '[li]\1[/li]') # [CODE]...[/CODE] - s.gsub!(/\[\/?code\]/i, "\n```\n") + s.gsub!(%r{\[/?code\]}i, "\n```\n") # [HIGHLIGHT]...[/HIGHLIGHT] - s.gsub!(/\[\/?highlight\]/i, "\n```\n") + s.gsub!(%r{\[/?highlight\]}i, "\n```\n") # [YOUTUBE][/YOUTUBE] - s.gsub!(/\[youtube\](.+?)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } + s.gsub!(%r{\[youtube\](.+?)\[/youtube\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } # [youtube=425,350]id[/youtube] - s.gsub!(/\[youtube="?(.+?)"?\](.+)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$2}\n" } + s.gsub!(%r{\[youtube="?(.+?)"?\](.+)\[/youtube\]}i) do + "\nhttps://www.youtube.com/watch?v=#{$2}\n" + end # [MEDIA=youtube]id[/MEDIA] - s.gsub!(/\[MEDIA=youtube\](.+?)\[\/MEDIA\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } + s.gsub!(%r{\[MEDIA=youtube\](.+?)\[/MEDIA\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } # [ame="youtube_link"]title[/ame] - s.gsub!(/\[ame="?(.+?)"?\](.+)\[\/ame\]/i) { "\n#{$1}\n" } + s.gsub!(%r{\[ame="?(.+?)"?\](.+)\[/ame\]}i) { "\n#{$1}\n" } # [VIDEO=youtube;]...[/VIDEO] - s.gsub!(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } + s.gsub!(%r{\[video=youtube;([^\]]+)\].*?\[/video\]}i) do + "\nhttps://www.youtube.com/watch?v=#{$1}\n" + end # [USER=706]@username[/USER] - s.gsub!(/\[user="?(.+?)"?\](.+)\[\/user\]/i) { $2 } + s.gsub!(%r{\[user="?(.+?)"?\](.+)\[/user\]}i) { $2 } # Remove the color tag s.gsub!(/\[color=[#a-z0-9]+\]/i, "") - s.gsub!(/\[\/color\]/i, "") + s.gsub!(%r{\[/color\]}i, "") s.gsub!(/\[hr\]/i, "


") @@ -221,7 +230,7 @@ class ImportScripts::Phorum < ImportScripts::Base end def import_attachments - puts '', 'importing attachments...' + puts "", "importing attachments..." uploads = mysql_query <<-SQL SELECT message_id, filename, FROM_BASE64(file_data) AS file_data, file_id @@ -234,26 +243,23 @@ class ImportScripts::Phorum < ImportScripts::Base total_count = uploads.count uploads.each do |upload| - # puts "*** processing file #{upload['file_id']}" - post_id = post_id_from_imported_post_id(upload['message_id']) + post_id = post_id_from_imported_post_id(upload["message_id"]) if post_id.nil? - puts "Post #{upload['message_id']} for attachment #{upload['file_id']} not found" + puts "Post #{upload["message_id"]} for attachment #{upload["file_id"]} not found" next end post = Post.find(post_id) - real_filename = upload['filename'] - real_filename.prepend SecureRandom.hex if real_filename[0] == '.' + real_filename = upload["filename"] + real_filename.prepend SecureRandom.hex if real_filename[0] == "." - tmpfile = 'attach_' + upload['file_id'].to_s - filename = File.join('/tmp/', tmpfile) - File.open(filename, 'wb') { |f| - f.write(upload['file_data']) - } + tmpfile = "attach_" + upload["file_id"].to_s + filename = File.join("/tmp/", tmpfile) + File.open(filename, "wb") { |f| f.write(upload["file_data"]) } upl_obj = create_upload(post.user.id, filename, real_filename) @@ -265,16 +271,16 @@ class ImportScripts::Phorum < ImportScripts::Base post.raw += "\n\n#{html}\n\n" post.save! if PostUpload.where(post: post, upload: upl_obj).exists? - puts "skipping creating uploaded for previously uploaded file #{upload['file_id']}" + puts "skipping creating uploaded for previously uploaded file #{upload["file_id"]}" else PostUpload.create!(post: post, upload: upl_obj) end # PostUpload.create!(post: post, upload: upl_obj) unless PostUpload.where(post: post, upload: upl_obj).exists? else - puts "Skipping attachment #{upload['file_id']}" + puts "Skipping attachment #{upload["file_id"]}" end else - puts "Failed to upload attachment #{upload['file_id']}" + puts "Failed to upload attachment #{upload["file_id"]}" exit end @@ -282,7 +288,6 @@ class ImportScripts::Phorum < ImportScripts::Base print_status(current_count, total_count) end end - end ImportScripts::Phorum.new.perform diff --git a/script/import_scripts/phpbb3.rb b/script/import_scripts/phpbb3.rb index fb1807f911e..2c5ae75e449 100644 --- a/script/import_scripts/phpbb3.rb +++ b/script/import_scripts/phpbb3.rb @@ -4,32 +4,34 @@ # Documentation: https://meta.discourse.org/t/importing-from-phpbb3/30810 if ARGV.length != 1 || !File.exist?(ARGV[0]) - STDERR.puts '', 'Usage of phpBB3 importer:', 'bundle exec ruby phpbb3.rb ' - STDERR.puts '', "Use the settings file from #{File.expand_path('phpbb3/settings.yml', File.dirname(__FILE__))} as an example." - STDERR.puts '', 'Still having problems? Take a look at https://meta.discourse.org/t/importing-from-phpbb3/30810' + STDERR.puts "", "Usage of phpBB3 importer:", "bundle exec ruby phpbb3.rb " + STDERR.puts "", + "Use the settings file from #{File.expand_path("phpbb3/settings.yml", File.dirname(__FILE__))} as an example." + STDERR.puts "", + "Still having problems? Take a look at https://meta.discourse.org/t/importing-from-phpbb3/30810" exit 1 end module ImportScripts module PhpBB3 - require_relative 'phpbb3/support/settings' - require_relative 'phpbb3/database/database' + require_relative "phpbb3/support/settings" + require_relative "phpbb3/database/database" @settings = Settings.load(ARGV[0]) # We need to load the gem files for ruby-bbcode-to-md and the database adapter # (e.g. mysql2) before bundler gets initialized by the base importer. # Otherwise we get an error since those gems are not always in the Gemfile. - require 'ruby-bbcode-to-md' if @settings.use_bbcode_to_md + require "ruby-bbcode-to-md" if @settings.use_bbcode_to_md begin @database = Database.create(@settings.database) rescue UnsupportedVersionError => error - STDERR.puts '', error.message + STDERR.puts "", error.message exit 1 end - require_relative 'phpbb3/importer' + require_relative "phpbb3/importer" Importer.new(@settings, @database).perform end end diff --git a/script/import_scripts/phpbb3/database/database.rb b/script/import_scripts/phpbb3/database/database.rb index 240003edae9..c31b6bb6204 100644 --- a/script/import_scripts/phpbb3/database/database.rb +++ b/script/import_scripts/phpbb3/database/database.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require 'mysql2' +require "mysql2" module ImportScripts::PhpBB3 class Database @@ -19,11 +19,11 @@ module ImportScripts::PhpBB3 def create_database version = get_phpbb_version - if version.start_with?('3.0') - require_relative 'database_3_0' + if version.start_with?("3.0") + require_relative "database_3_0" Database_3_0.new(@database_client, @database_settings) - elsif version.start_with?('3.1') || version.start_with?('3.2') || version.start_with?('3.3') - require_relative 'database_3_1' + elsif version.start_with?("3.1") || version.start_with?("3.2") || version.start_with?("3.3") + require_relative "database_3_1" Database_3_1.new(@database_client, @database_settings) else raise UnsupportedVersionError, <<~TEXT @@ -42,7 +42,7 @@ module ImportScripts::PhpBB3 username: @database_settings.username, password: @database_settings.password, database: @database_settings.schema, - reconnect: true + reconnect: true, ) end diff --git a/script/import_scripts/phpbb3/database/database_3_0.rb b/script/import_scripts/phpbb3/database/database_3_0.rb index 49f042a6e75..f45b38824a5 100644 --- a/script/import_scripts/phpbb3/database/database_3_0.rb +++ b/script/import_scripts/phpbb3/database/database_3_0.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true -require_relative 'database_base' -require_relative '../support/constants' +require_relative "database_base" +require_relative "../support/constants" module ImportScripts::PhpBB3 class Database_3_0 < DatabaseBase diff --git a/script/import_scripts/phpbb3/database/database_3_1.rb b/script/import_scripts/phpbb3/database/database_3_1.rb index ee666bbbc01..3255e484b1a 100644 --- a/script/import_scripts/phpbb3/database/database_3_1.rb +++ b/script/import_scripts/phpbb3/database/database_3_1.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true -require_relative 'database_3_0' -require_relative '../support/constants' +require_relative "database_3_0" +require_relative "../support/constants" module ImportScripts::PhpBB3 class Database_3_1 < Database_3_0 @@ -32,14 +32,15 @@ module ImportScripts::PhpBB3 private def profile_fields_query(profile_fields) - @profile_fields_query ||= begin - if profile_fields.present? - columns = profile_fields.map { |field| "pf_#{field[:phpbb_field_name]}" } - ", #{columns.join(', ')}" - else - "" + @profile_fields_query ||= + begin + if profile_fields.present? + columns = profile_fields.map { |field| "pf_#{field[:phpbb_field_name]}" } + ", #{columns.join(", ")}" + else + "" + end end - end end end end diff --git a/script/import_scripts/phpbb3/database/database_base.rb b/script/import_scripts/phpbb3/database/database_base.rb index a51bcde3a5d..4419d4e78cb 100644 --- a/script/import_scripts/phpbb3/database/database_base.rb +++ b/script/import_scripts/phpbb3/database/database_base.rb @@ -39,9 +39,7 @@ module ImportScripts::PhpBB3 def find_last_row(rows) last_index = rows.size - 1 - rows.each_with_index do |row, index| - return row if index == last_index - end + rows.each_with_index { |row, index| return row if index == last_index } nil end diff --git a/script/import_scripts/phpbb3/importer.rb b/script/import_scripts/phpbb3/importer.rb index b8b84e29e69..a4f64f82c46 100644 --- a/script/import_scripts/phpbb3/importer.rb +++ b/script/import_scripts/phpbb3/importer.rb @@ -1,9 +1,9 @@ # frozen_string_literal: true -require_relative '../base' -require_relative 'support/settings' -require_relative 'database/database' -require_relative 'importers/importer_factory' +require_relative "../base" +require_relative "support/settings" +require_relative "database/database" +require_relative "importers/importer_factory" module ImportScripts::PhpBB3 class Importer < ImportScripts::Base @@ -25,7 +25,7 @@ module ImportScripts::PhpBB3 protected def execute - puts '', "importing from phpBB #{@php_config[:phpbb_version]}" + puts "", "importing from phpBB #{@php_config[:phpbb_version]}" SiteSetting.tagging_enabled = true if @settings.tag_mappings.present? @@ -55,8 +55,14 @@ module ImportScripts::PhpBB3 settings[:max_attachment_size_kb] = [max_file_size_kb, SiteSetting.max_attachment_size_kb].max # temporarily disable validation since we want to import all existing images and attachments - SiteSetting.type_supervisor.load_setting(:max_image_size_kb, max: settings[:max_image_size_kb]) - SiteSetting.type_supervisor.load_setting(:max_attachment_size_kb, max: settings[:max_attachment_size_kb]) + SiteSetting.type_supervisor.load_setting( + :max_image_size_kb, + max: settings[:max_image_size_kb], + ) + SiteSetting.type_supervisor.load_setting( + :max_attachment_size_kb, + max: settings[:max_attachment_size_kb], + ) settings end @@ -66,7 +72,7 @@ module ImportScripts::PhpBB3 end def import_users - puts '', 'creating users' + puts "", "creating users" total_count = @database.count_users importer = @importers.user_importer last_user_id = 0 @@ -88,10 +94,10 @@ module ImportScripts::PhpBB3 end def import_anonymous_users - puts '', 'creating anonymous users' + puts "", "creating anonymous users" total_count = @database.count_anonymous_users importer = @importers.user_importer - last_username = '' + last_username = "" batches do |offset| rows, last_username = @database.fetch_anonymous_users(last_username) @@ -109,26 +115,34 @@ module ImportScripts::PhpBB3 end def import_groups - puts '', 'creating groups' + puts "", "creating groups" rows = @database.fetch_groups create_groups(rows) do |row| begin next if row[:group_type] == 3 - group_name = if @settings.site_name.present? - "#{@settings.site_name}_#{row[:group_name]}" - else - row[:group_name] - end[0..19].gsub(/[^a-zA-Z0-9\-_. ]/, '_') + group_name = + if @settings.site_name.present? + "#{@settings.site_name}_#{row[:group_name]}" + else + row[:group_name] + end[ + 0..19 + ].gsub(/[^a-zA-Z0-9\-_. ]/, "_") - bio_raw = @importers.text_processor.process_raw_text(row[:group_desc]) rescue row[:group_desc] + bio_raw = + begin + @importers.text_processor.process_raw_text(row[:group_desc]) + rescue StandardError + row[:group_desc] + end { id: @settings.prefix(row[:group_id]), name: group_name, full_name: row[:group_name], - bio_raw: bio_raw + bio_raw: bio_raw, } rescue => e log_error("Failed to map group with ID #{row[:group_id]}", e) @@ -137,7 +151,7 @@ module ImportScripts::PhpBB3 end def import_user_groups - puts '', 'creating user groups' + puts "", "creating user groups" rows = @database.fetch_group_users rows.each do |row| @@ -147,7 +161,11 @@ module ImportScripts::PhpBB3 user_id = @lookup.user_id_from_imported_user_id(@settings.prefix(row[:user_id])) begin - GroupUser.find_or_create_by(user_id: user_id, group_id: group_id, owner: row[:group_leader]) + GroupUser.find_or_create_by( + user_id: user_id, + group_id: group_id, + owner: row[:group_leader], + ) rescue => e log_error("Failed to add user #{row[:user_id]} to group #{row[:group_id]}", e) end @@ -155,7 +173,7 @@ module ImportScripts::PhpBB3 end def import_new_categories - puts '', 'creating new categories' + puts "", "creating new categories" create_categories(@settings.new_categories) do |row| next if row == "SKIP" @@ -163,13 +181,14 @@ module ImportScripts::PhpBB3 { id: @settings.prefix(row[:forum_id]), name: row[:name], - parent_category_id: @lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id])) + parent_category_id: + @lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id])), } end end def import_categories - puts '', 'creating categories' + puts "", "creating categories" rows = @database.fetch_categories importer = @importers.category_importer @@ -181,7 +200,7 @@ module ImportScripts::PhpBB3 end def import_posts - puts '', 'creating topics and posts' + puts "", "creating topics and posts" total_count = @database.count_posts importer = @importers.post_importer last_post_id = 0 @@ -202,7 +221,7 @@ module ImportScripts::PhpBB3 end def import_private_messages - puts '', 'creating private messages' + puts "", "creating private messages" total_count = @database.count_messages importer = @importers.message_importer last_msg_id = 0 @@ -223,7 +242,7 @@ module ImportScripts::PhpBB3 end def import_bookmarks - puts '', 'creating bookmarks' + puts "", "creating bookmarks" total_count = @database.count_bookmarks importer = @importers.bookmark_importer last_user_id = last_topic_id = 0 @@ -243,7 +262,7 @@ module ImportScripts::PhpBB3 end def import_likes - puts '', 'importing likes' + puts "", "importing likes" total_count = @database.count_likes last_post_id = last_user_id = 0 @@ -255,7 +274,7 @@ module ImportScripts::PhpBB3 { post_id: @settings.prefix(row[:post_id]), user_id: @settings.prefix(row[:user_id]), - created_at: Time.zone.at(row[:thanks_time]) + created_at: Time.zone.at(row[:thanks_time]), } end end diff --git a/script/import_scripts/phpbb3/importers/avatar_importer.rb b/script/import_scripts/phpbb3/importers/avatar_importer.rb index bb72572c0ba..4e6e3b13bb1 100644 --- a/script/import_scripts/phpbb3/importers/avatar_importer.rb +++ b/script/import_scripts/phpbb3/importers/avatar_importer.rb @@ -49,12 +49,12 @@ module ImportScripts::PhpBB3 def get_avatar_path(avatar_type, filename) case avatar_type - when Constants::AVATAR_TYPE_UPLOADED, Constants::AVATAR_TYPE_STRING_UPLOADED then - filename.gsub!(/_[0-9]+\./, '.') # we need 1337.jpg, not 1337_2983745.jpg - get_uploaded_path(filename) - when Constants::AVATAR_TYPE_GALLERY, Constants::AVATAR_TYPE_STRING_GALLERY then + when Constants::AVATAR_TYPE_UPLOADED, Constants::AVATAR_TYPE_STRING_UPLOADED + filename.gsub!(/_[0-9]+\./, ".") # we need 1337.jpg, not 1337_2983745.jpg + get_uploaded_path(filename) + when Constants::AVATAR_TYPE_GALLERY, Constants::AVATAR_TYPE_STRING_GALLERY get_gallery_path(filename) - when Constants::AVATAR_TYPE_REMOTE, Constants::AVATAR_TYPE_STRING_REMOTE then + when Constants::AVATAR_TYPE_REMOTE, Constants::AVATAR_TYPE_STRING_REMOTE download_avatar(filename) else puts "Invalid avatar type #{avatar_type}. Skipping..." @@ -67,12 +67,13 @@ module ImportScripts::PhpBB3 max_image_size_kb = SiteSetting.max_image_size_kb.kilobytes begin - avatar_file = FileHelper.download( - url, - max_file_size: max_image_size_kb, - tmp_file_name: 'discourse-avatar', - follow_redirect: true - ) + avatar_file = + FileHelper.download( + url, + max_file_size: max_image_size_kb, + tmp_file_name: "discourse-avatar", + follow_redirect: true, + ) rescue StandardError => err warn "Error downloading avatar: #{err.message}. Skipping..." return nil @@ -100,11 +101,11 @@ module ImportScripts::PhpBB3 def is_allowed_avatar_type?(avatar_type) case avatar_type - when Constants::AVATAR_TYPE_UPLOADED, Constants::AVATAR_TYPE_STRING_UPLOADED then + when Constants::AVATAR_TYPE_UPLOADED, Constants::AVATAR_TYPE_STRING_UPLOADED @settings.import_uploaded_avatars - when Constants::AVATAR_TYPE_REMOTE, Constants::AVATAR_TYPE_STRING_REMOTE then + when Constants::AVATAR_TYPE_REMOTE, Constants::AVATAR_TYPE_STRING_REMOTE @settings.import_remote_avatars - when Constants::AVATAR_TYPE_GALLERY, Constants::AVATAR_TYPE_STRING_GALLERY then + when Constants::AVATAR_TYPE_GALLERY, Constants::AVATAR_TYPE_STRING_GALLERY @settings.import_gallery_avatars else false diff --git a/script/import_scripts/phpbb3/importers/bookmark_importer.rb b/script/import_scripts/phpbb3/importers/bookmark_importer.rb index 784e6c74768..6dd2a793d8f 100644 --- a/script/import_scripts/phpbb3/importers/bookmark_importer.rb +++ b/script/import_scripts/phpbb3/importers/bookmark_importer.rb @@ -9,7 +9,7 @@ module ImportScripts::PhpBB3 def map_bookmark(row) { user_id: @settings.prefix(row[:user_id]), - post_id: @settings.prefix(row[:topic_first_post_id]) + post_id: @settings.prefix(row[:topic_first_post_id]), } end end diff --git a/script/import_scripts/phpbb3/importers/category_importer.rb b/script/import_scripts/phpbb3/importers/category_importer.rb index 6e5725b97e5..e0b95bc79b6 100644 --- a/script/import_scripts/phpbb3/importers/category_importer.rb +++ b/script/import_scripts/phpbb3/importers/category_importer.rb @@ -23,11 +23,13 @@ module ImportScripts::PhpBB3 { id: @settings.prefix(row[:forum_id]), name: CGI.unescapeHTML(row[:forum_name]), - parent_category_id: @lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id])), - post_create_action: proc do |category| - update_category_description(category, row) - @permalink_importer.create_for_category(category, row[:forum_id]) # skip @settings.prefix because ID is used in permalink generation - end + parent_category_id: + @lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id])), + post_create_action: + proc do |category| + update_category_description(category, row) + @permalink_importer.create_for_category(category, row[:forum_id]) # skip @settings.prefix because ID is used in permalink generation + end, } end @@ -51,7 +53,16 @@ module ImportScripts::PhpBB3 end if row[:forum_desc].present? - changes = { raw: (@text_processor.process_raw_text(row[:forum_desc]) rescue row[:forum_desc]) } + changes = { + raw: + ( + begin + @text_processor.process_raw_text(row[:forum_desc]) + rescue StandardError + row[:forum_desc] + end + ), + } opts = { revised_at: post.created_at, bypass_bump: true } post.revise(Discourse.system_user, changes, opts) end diff --git a/script/import_scripts/phpbb3/importers/importer_factory.rb b/script/import_scripts/phpbb3/importers/importer_factory.rb index b02cb92ff0d..d7a3fe9c9fc 100644 --- a/script/import_scripts/phpbb3/importers/importer_factory.rb +++ b/script/import_scripts/phpbb3/importers/importer_factory.rb @@ -1,16 +1,16 @@ # frozen_string_literal: true -require_relative 'attachment_importer' -require_relative 'avatar_importer' -require_relative 'bookmark_importer' -require_relative 'category_importer' -require_relative 'message_importer' -require_relative 'poll_importer' -require_relative 'post_importer' -require_relative 'permalink_importer' -require_relative 'user_importer' -require_relative '../support/smiley_processor' -require_relative '../support/text_processor' +require_relative "attachment_importer" +require_relative "avatar_importer" +require_relative "bookmark_importer" +require_relative "category_importer" +require_relative "message_importer" +require_relative "poll_importer" +require_relative "post_importer" +require_relative "permalink_importer" +require_relative "user_importer" +require_relative "../support/smiley_processor" +require_relative "../support/text_processor" module ImportScripts::PhpBB3 class ImporterFactory @@ -36,7 +36,14 @@ module ImportScripts::PhpBB3 end def post_importer - PostImporter.new(@lookup, text_processor, attachment_importer, poll_importer, permalink_importer, @settings) + PostImporter.new( + @lookup, + text_processor, + attachment_importer, + poll_importer, + permalink_importer, + @settings, + ) end def message_importer @@ -64,7 +71,8 @@ module ImportScripts::PhpBB3 end def text_processor - @text_processor ||= TextProcessor.new(@lookup, @database, smiley_processor, @settings, @phpbb_config) + @text_processor ||= + TextProcessor.new(@lookup, @database, smiley_processor, @settings, @phpbb_config) end def smiley_processor diff --git a/script/import_scripts/phpbb3/importers/message_importer.rb b/script/import_scripts/phpbb3/importers/message_importer.rb index 65c5874bdae..c71795b3280 100644 --- a/script/import_scripts/phpbb3/importers/message_importer.rb +++ b/script/import_scripts/phpbb3/importers/message_importer.rb @@ -20,14 +20,16 @@ module ImportScripts::PhpBB3 end def map_message(row) - user_id = @lookup.user_id_from_imported_user_id(@settings.prefix(row[:author_id])) || Discourse.system_user.id + user_id = + @lookup.user_id_from_imported_user_id(@settings.prefix(row[:author_id])) || + Discourse.system_user.id attachments = import_attachments(row, user_id) mapped = { id: get_import_id(row[:msg_id]), user_id: user_id, created_at: Time.zone.at(row[:message_time]), - raw: @text_processor.process_private_msg(row[:message_text], attachments) + raw: @text_processor.process_private_msg(row[:message_text], attachments), } root_user_ids = sorted_user_ids(row[:root_author_id], row[:root_to_address]) @@ -43,7 +45,7 @@ module ImportScripts::PhpBB3 protected - RE_PREFIX = 're: ' + RE_PREFIX = "re: " def import_attachments(row, user_id) if @settings.import_attachments && row[:attachment_count] > 0 @@ -55,7 +57,7 @@ module ImportScripts::PhpBB3 mapped[:title] = get_topic_title(row) mapped[:archetype] = Archetype.private_message mapped[:target_usernames] = get_recipient_usernames(row) - mapped[:custom_fields] = { import_user_ids: current_user_ids.join(',') } + mapped[:custom_fields] = { import_user_ids: current_user_ids.join(",") } if mapped[:target_usernames].empty? puts "Private message without recipients. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}" @@ -75,9 +77,9 @@ module ImportScripts::PhpBB3 # to_address looks like this: "u_91:u_1234:g_200" # If there is a "u_" prefix, the prefix is discarded and the rest is a user_id - user_ids = to_address.split(':') + user_ids = to_address.split(":") user_ids.uniq! - user_ids.map! { |u| u[2..-1].to_i if u[0..1] == 'u_' }.compact + user_ids.map! { |u| u[2..-1].to_i if u[0..1] == "u_" }.compact end def get_recipient_group_ids(to_address) @@ -85,16 +87,19 @@ module ImportScripts::PhpBB3 # to_address looks like this: "u_91:u_1234:g_200" # If there is a "g_" prefix, the prefix is discarded and the rest is a group_id - group_ids = to_address.split(':') + group_ids = to_address.split(":") group_ids.uniq! - group_ids.map! { |g| g[2..-1].to_i if g[0..1] == 'g_' }.compact + group_ids.map! { |g| g[2..-1].to_i if g[0..1] == "g_" }.compact end def get_recipient_usernames(row) import_user_ids = get_recipient_user_ids(row[:to_address]) - usernames = import_user_ids.map do |import_user_id| - @lookup.find_user_by_import_id(@settings.prefix(import_user_id)).try(:username) - end.compact + usernames = + import_user_ids + .map do |import_user_id| + @lookup.find_user_by_import_id(@settings.prefix(import_user_id)).try(:username) + end + .compact import_group_ids = get_recipient_group_ids(row[:to_address]) import_group_ids.each do |import_group_id| @@ -142,13 +147,19 @@ module ImportScripts::PhpBB3 topic_titles = [topic_title] topic_titles << topic_title[RE_PREFIX.length..-1] if topic_title.start_with?(RE_PREFIX) - Post.select(:topic_id) + Post + .select(:topic_id) .joins(:topic) .joins(:_custom_fields) - .where(["LOWER(topics.title) IN (:titles) AND post_custom_fields.name = 'import_user_ids' AND post_custom_fields.value = :user_ids", - { titles: topic_titles, user_ids: current_user_ids.join(',') }]) - .order('topics.created_at DESC') - .first.try(:topic_id) + .where( + [ + "LOWER(topics.title) IN (:titles) AND post_custom_fields.name = 'import_user_ids' AND post_custom_fields.value = :user_ids", + { titles: topic_titles, user_ids: current_user_ids.join(",") }, + ], + ) + .order("topics.created_at DESC") + .first + .try(:topic_id) end end end diff --git a/script/import_scripts/phpbb3/importers/permalink_importer.rb b/script/import_scripts/phpbb3/importers/permalink_importer.rb index 051604ba87e..5dcd9ffe60f 100644 --- a/script/import_scripts/phpbb3/importers/permalink_importer.rb +++ b/script/import_scripts/phpbb3/importers/permalink_importer.rb @@ -13,13 +13,15 @@ module ImportScripts::PhpBB3 def change_site_settings normalizations = SiteSetting.permalink_normalizations - normalizations = normalizations.blank? ? [] : normalizations.split('|') + normalizations = normalizations.blank? ? [] : normalizations.split("|") - add_normalization(normalizations, CATEGORY_LINK_NORMALIZATION) if @settings.create_category_links + if @settings.create_category_links + add_normalization(normalizations, CATEGORY_LINK_NORMALIZATION) + end add_normalization(normalizations, POST_LINK_NORMALIZATION) if @settings.create_post_links add_normalization(normalizations, TOPIC_LINK_NORMALIZATION) if @settings.create_topic_links - SiteSetting.permalink_normalizations = normalizations.join('|') + SiteSetting.permalink_normalizations = normalizations.join("|") end def create_for_category(category, import_id) @@ -50,8 +52,8 @@ module ImportScripts::PhpBB3 def add_normalization(normalizations, normalization) if @settings.normalization_prefix.present? - prefix = @settings.normalization_prefix[%r|^/?(.*?)/?$|, 1] - normalization = "/#{prefix.gsub('/', '\/')}\\#{normalization}" + prefix = @settings.normalization_prefix[%r{^/?(.*?)/?$}, 1] + normalization = "/#{prefix.gsub("/", '\/')}\\#{normalization}" end normalizations << normalization unless normalizations.include?(normalization) diff --git a/script/import_scripts/phpbb3/importers/poll_importer.rb b/script/import_scripts/phpbb3/importers/poll_importer.rb index 785fbb60b27..df4696201c3 100644 --- a/script/import_scripts/phpbb3/importers/poll_importer.rb +++ b/script/import_scripts/phpbb3/importers/poll_importer.rb @@ -49,7 +49,12 @@ module ImportScripts::PhpBB3 end def get_option_text(row) - text = @text_processor.process_raw_text(row[:poll_option_text]) rescue row[:poll_option_text] + text = + begin + @text_processor.process_raw_text(row[:poll_option_text]) + rescue StandardError + row[:poll_option_text] + end text.squish! text.gsub!(/^(\d+)\./, '\1\.') text @@ -57,7 +62,12 @@ module ImportScripts::PhpBB3 # @param poll_data [ImportScripts::PhpBB3::PollData] def get_poll_text(poll_data) - title = @text_processor.process_raw_text(poll_data.title) rescue poll_data.title + title = + begin + @text_processor.process_raw_text(poll_data.title) + rescue StandardError + poll_data.title + end text = +"#{title}\n\n" arguments = ["results=always"] @@ -69,11 +79,9 @@ module ImportScripts::PhpBB3 arguments << "type=regular" end - text << "[poll #{arguments.join(' ')}]" + text << "[poll #{arguments.join(" ")}]" - poll_data.options.each do |option| - text << "\n* #{option[:text]}" - end + poll_data.options.each { |option| text << "\n* #{option[:text]}" } text << "\n[/poll]" end @@ -104,9 +112,7 @@ module ImportScripts::PhpBB3 poll.poll_options.each_with_index do |option, index| imported_option = poll_data.options[index] - imported_option[:ids].each do |imported_id| - option_ids[imported_id] = option.id - end + imported_option[:ids].each { |imported_id| option_ids[imported_id] = option.id } end option_ids diff --git a/script/import_scripts/phpbb3/importers/post_importer.rb b/script/import_scripts/phpbb3/importers/post_importer.rb index 8f41e9ed669..4f66560e349 100644 --- a/script/import_scripts/phpbb3/importers/post_importer.rb +++ b/script/import_scripts/phpbb3/importers/post_importer.rb @@ -8,7 +8,14 @@ module ImportScripts::PhpBB3 # @param poll_importer [ImportScripts::PhpBB3::PollImporter] # @param permalink_importer [ImportScripts::PhpBB3::PermalinkImporter] # @param settings [ImportScripts::PhpBB3::Settings] - def initialize(lookup, text_processor, attachment_importer, poll_importer, permalink_importer, settings) + def initialize( + lookup, + text_processor, + attachment_importer, + poll_importer, + permalink_importer, + settings + ) @lookup = lookup @text_processor = text_processor @attachment_importer = attachment_importer @@ -24,7 +31,8 @@ module ImportScripts::PhpBB3 def map_post(row) return if @settings.category_mappings.dig(row[:forum_id].to_s, :skip) - imported_user_id = @settings.prefix(row[:post_username].blank? ? row[:poster_id] : row[:post_username]) + imported_user_id = + @settings.prefix(row[:post_username].blank? ? row[:poster_id] : row[:post_username]) user_id = @lookup.user_id_from_imported_user_id(imported_user_id) || -1 is_first_post = row[:post_id] == row[:topic_first_post_id] @@ -35,7 +43,7 @@ module ImportScripts::PhpBB3 user_id: user_id, created_at: Time.zone.at(row[:post_time]), raw: @text_processor.process_post(row[:post_text], attachments), - import_topic_id: @settings.prefix(row[:topic_id]) + import_topic_id: @settings.prefix(row[:topic_id]), } if is_first_post @@ -58,7 +66,9 @@ module ImportScripts::PhpBB3 mapped[:category] = if category_mapping = @settings.category_mappings[row[:forum_id].to_s] category_mapping[:discourse_category_id] || - @lookup.category_id_from_imported_category_id(@settings.prefix(category_mapping[:target_category_id])) + @lookup.category_id_from_imported_category_id( + @settings.prefix(category_mapping[:target_category_id]), + ) else @lookup.category_id_from_imported_category_id(@settings.prefix(row[:forum_id])) end @@ -81,7 +91,8 @@ module ImportScripts::PhpBB3 end def map_other_post(row, mapped) - parent = @lookup.topic_lookup_from_imported_post_id(@settings.prefix(row[:topic_first_post_id])) + parent = + @lookup.topic_lookup_from_imported_post_id(@settings.prefix(row[:topic_first_post_id])) if parent.blank? puts "Parent post #{@settings.prefix(row[:topic_first_post_id])} doesn't exist. Skipping #{@settings.prefix(row[:post_id])}: #{row[:topic_title][0..40]}" diff --git a/script/import_scripts/phpbb3/importers/user_importer.rb b/script/import_scripts/phpbb3/importers/user_importer.rb index 3fa61d6e17e..6f32223232c 100644 --- a/script/import_scripts/phpbb3/importers/user_importer.rb +++ b/script/import_scripts/phpbb3/importers/user_importer.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative '../support/constants' +require_relative "../support/constants" module ImportScripts::PhpBB3 class UserImporter @@ -29,8 +29,22 @@ module ImportScripts::PhpBB3 password: @settings.import_passwords ? row[:user_password] : nil, name: @settings.username_as_name ? row[:username] : row[:name].presence, created_at: Time.zone.at(row[:user_regdate]), - last_seen_at: row[:user_lastvisit] == 0 ? Time.zone.at(row[:user_regdate]) : Time.zone.at(row[:user_lastvisit]), - registration_ip_address: (IPAddr.new(row[:user_ip]) rescue nil), + last_seen_at: + ( + if row[:user_lastvisit] == 0 + Time.zone.at(row[:user_regdate]) + else + Time.zone.at(row[:user_lastvisit]) + end + ), + registration_ip_address: + ( + begin + IPAddr.new(row[:user_ip]) + rescue StandardError + nil + end + ), active: is_active_user, trust_level: trust_level, manual_locked_trust_level: manual_locked_trust_level, @@ -43,10 +57,11 @@ module ImportScripts::PhpBB3 location: row[:user_from], date_of_birth: parse_birthdate(row), custom_fields: custom_fields(row), - post_create_action: proc do |user| - suspend_user(user, row) - @avatar_importer.import_avatar(user, row) if row[:user_avatar_type].present? - end + post_create_action: + proc do |user| + suspend_user(user, row) + @avatar_importer.import_avatar(user, row) if row[:user_avatar_type].present? + end, } end @@ -61,18 +76,19 @@ module ImportScripts::PhpBB3 id: @settings.prefix(username), email: "anonymous_#{SecureRandom.hex}@no-email.invalid", username: username, - name: @settings.username_as_name ? username : '', + name: @settings.username_as_name ? username : "", created_at: Time.zone.at(row[:first_post_time]), active: true, trust_level: TrustLevel[0], approved: true, approved_by_id: Discourse.system_user.id, approved_at: Time.now, - post_create_action: proc do |user| - row[:user_inactive_reason] = Constants::INACTIVE_MANUAL - row[:ban_reason] = 'Anonymous user from phpBB3' # TODO i18n - suspend_user(user, row, true) - end + post_create_action: + proc do |user| + row[:user_inactive_reason] = Constants::INACTIVE_MANUAL + row[:ban_reason] = "Anonymous user from phpBB3" # TODO i18n + suspend_user(user, row, true) + end, } end @@ -80,25 +96,32 @@ module ImportScripts::PhpBB3 def parse_birthdate(row) return nil if row[:user_birthday].blank? - birthdate = Date.strptime(row[:user_birthday].delete(' '), '%d-%m-%Y') rescue nil + birthdate = + begin + Date.strptime(row[:user_birthday].delete(" "), "%d-%m-%Y") + rescue StandardError + nil + end birthdate && birthdate.year > 0 ? birthdate : nil end def user_fields - @user_fields ||= begin - Hash[UserField.all.map { |field| [field.name, field] }] - end + @user_fields ||= + begin + Hash[UserField.all.map { |field| [field.name, field] }] + end end def field_mappings - @field_mappings ||= begin - @settings.custom_fields.map do |field| - { - phpbb_field_name: "pf_#{field[:phpbb_field_name]}".to_sym, - discourse_user_field: user_fields[field[:discourse_field_name]] - } + @field_mappings ||= + begin + @settings.custom_fields.map do |field| + { + phpbb_field_name: "pf_#{field[:phpbb_field_name]}".to_sym, + discourse_user_field: user_fields[field[:discourse_field_name]], + } + end end - end end def custom_fields(row) @@ -114,7 +137,8 @@ module ImportScripts::PhpBB3 when "confirm" value = value == 1 ? true : nil when "dropdown" - value = user_field.user_field_options.find { |option| option.value == value } ? value : nil + value = + user_field.user_field_options.find { |option| option.value == value } ? value : nil end custom_fields["user_field_#{user_field.id}"] = value if value.present? @@ -128,7 +152,8 @@ module ImportScripts::PhpBB3 if row[:user_inactive_reason] == Constants::INACTIVE_MANUAL user.suspended_at = Time.now user.suspended_till = 200.years.from_now - ban_reason = row[:ban_reason].blank? ? 'Account deactivated by administrator' : row[:ban_reason] # TODO i18n + ban_reason = + row[:ban_reason].blank? ? "Account deactivated by administrator" : row[:ban_reason] # TODO i18n elsif row[:ban_start].present? user.suspended_at = Time.zone.at(row[:ban_start]) user.suspended_till = row[:ban_end] > 0 ? Time.zone.at(row[:ban_end]) : 200.years.from_now @@ -148,7 +173,9 @@ module ImportScripts::PhpBB3 if user.save StaffActionLogger.new(Discourse.system_user).log_user_suspend(user, ban_reason) else - Rails.logger.error("Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}") + Rails.logger.error( + "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}", + ) end end end diff --git a/script/import_scripts/phpbb3/support/bbcode/markdown_node.rb b/script/import_scripts/phpbb3/support/bbcode/markdown_node.rb index 5a42a1bf408..c5e5048a9f1 100644 --- a/script/import_scripts/phpbb3/support/bbcode/markdown_node.rb +++ b/script/import_scripts/phpbb3/support/bbcode/markdown_node.rb @@ -1,7 +1,9 @@ # frozen_string_literal: true -module ImportScripts; end -module ImportScripts::PhpBB3; end +module ImportScripts +end +module ImportScripts::PhpBB3 +end module ImportScripts::PhpBB3::BBCode LINEBREAK_AUTO = :auto diff --git a/script/import_scripts/phpbb3/support/bbcode/xml_to_markdown.rb b/script/import_scripts/phpbb3/support/bbcode/xml_to_markdown.rb index 7041c5923f7..004601d2471 100644 --- a/script/import_scripts/phpbb3/support/bbcode/xml_to_markdown.rb +++ b/script/import_scripts/phpbb3/support/bbcode/xml_to_markdown.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true -require 'nokogiri' -require_relative 'markdown_node' +require "nokogiri" +require_relative "markdown_node" module ImportScripts::PhpBB3::BBCode class XmlToMarkdown @@ -14,7 +14,7 @@ module ImportScripts::PhpBB3::BBCode @allow_inline_code = opts.fetch(:allow_inline_code, false) @traditional_linebreaks = opts.fetch(:traditional_linebreaks, false) - @doc = Nokogiri::XML(xml) + @doc = Nokogiri.XML(xml) @list_stack = [] end @@ -28,9 +28,9 @@ module ImportScripts::PhpBB3::BBCode private - IGNORED_ELEMENTS = ["s", "e", "i"] - ELEMENTS_WITHOUT_LEADING_WHITESPACES = ["LIST", "LI"] - ELEMENTS_WITH_HARD_LINEBREAKS = ["B", "I", "U"] + IGNORED_ELEMENTS = %w[s e i] + ELEMENTS_WITHOUT_LEADING_WHITESPACES = %w[LIST LI] + ELEMENTS_WITH_HARD_LINEBREAKS = %w[B I U] EXPLICIT_LINEBREAK_THRESHOLD = 2 def preprocess_xml @@ -65,9 +65,7 @@ module ImportScripts::PhpBB3::BBCode xml_node.children.each { |xml_child| visit(xml_child, md_node || md_parent) } after_hook = "after_#{xml_node.name}" - if respond_to?(after_hook, include_all: true) - send(after_hook, xml_node, md_node) - end + send(after_hook, xml_node, md_node) if respond_to?(after_hook, include_all: true) end def create_node(xml_node, md_parent) @@ -84,19 +82,15 @@ module ImportScripts::PhpBB3::BBCode end def visit_B(xml_node, md_node) - if xml_node.parent&.name != 'B' - md_node.enclosed_with = "**" - end + md_node.enclosed_with = "**" if xml_node.parent&.name != "B" end def visit_I(xml_node, md_node) - if xml_node.parent&.name != 'I' - md_node.enclosed_with = "_" - end + md_node.enclosed_with = "_" if xml_node.parent&.name != "I" end def visit_U(xml_node, md_node) - if xml_node.parent&.name != 'U' + if xml_node.parent&.name != "U" md_node.prefix = "[u]" md_node.postfix = "[/u]" end @@ -122,10 +116,7 @@ module ImportScripts::PhpBB3::BBCode md_node.prefix_linebreaks = md_node.postfix_linebreaks = @list_stack.size == 0 ? 2 : 1 md_node.prefix_linebreak_type = LINEBREAK_HTML if @list_stack.size == 0 - @list_stack << { - unordered: xml_node.attribute('type').nil?, - item_count: 0 - } + @list_stack << { unordered: xml_node.attribute("type").nil?, item_count: 0 } end def after_LIST(xml_node, md_node) @@ -138,21 +129,21 @@ module ImportScripts::PhpBB3::BBCode list[:item_count] += 1 - indentation = ' ' * 2 * depth - symbol = list[:unordered] ? '*' : "#{list[:item_count]}." + indentation = " " * 2 * depth + symbol = list[:unordered] ? "*" : "#{list[:item_count]}." md_node.prefix = "#{indentation}#{symbol} " md_node.postfix_linebreaks = 1 end def visit_IMG(xml_node, md_node) - md_node.text = +"![](#{xml_node.attribute('src')})" + md_node.text = +"![](#{xml_node.attribute("src")})" md_node.prefix_linebreaks = md_node.postfix_linebreaks = 2 md_node.skip_children end def visit_URL(xml_node, md_node) - original_url = xml_node.attribute('url').to_s + original_url = xml_node.attribute("url").to_s url = CGI.unescapeHTML(original_url) url = @url_replacement.call(url) if @url_replacement @@ -173,7 +164,8 @@ module ImportScripts::PhpBB3::BBCode def visit_br(xml_node, md_node) md_node.postfix_linebreaks += 1 - if md_node.postfix_linebreaks > 1 && ELEMENTS_WITH_HARD_LINEBREAKS.include?(xml_node.parent&.name) + if md_node.postfix_linebreaks > 1 && + ELEMENTS_WITH_HARD_LINEBREAKS.include?(xml_node.parent&.name) md_node.postfix_linebreak_type = LINEBREAK_HARD end end @@ -194,7 +186,8 @@ module ImportScripts::PhpBB3::BBCode def visit_QUOTE(xml_node, md_node) if post = quoted_post(xml_node) - md_node.prefix = %Q{[quote="#{post[:username]}, post:#{post[:post_number]}, topic:#{post[:topic_id]}"]\n} + md_node.prefix = + %Q{[quote="#{post[:username]}, post:#{post[:post_number]}, topic:#{post[:topic_id]}"]\n} md_node.postfix = "\n[/quote]" elsif username = quoted_username(xml_node) md_node.prefix = %Q{[quote="#{username}"]\n} @@ -242,11 +235,11 @@ module ImportScripts::PhpBB3::BBCode return if size.nil? if size.between?(1, 99) - md_node.prefix = '' - md_node.postfix = '' + md_node.prefix = "" + md_node.postfix = "" elsif size.between?(101, 200) - md_node.prefix = '' - md_node.postfix = '' + md_node.prefix = "" + md_node.postfix = "" end end @@ -267,7 +260,8 @@ module ImportScripts::PhpBB3::BBCode parent_prefix = prefix_from_parent(md_parent) - if parent_prefix && md_node.xml_node_name != "br" && (md_parent.prefix_children || !markdown.empty?) + if parent_prefix && md_node.xml_node_name != "br" && + (md_parent.prefix_children || !markdown.empty?) prefix = "#{parent_prefix}#{prefix}" end @@ -275,11 +269,21 @@ module ImportScripts::PhpBB3::BBCode text, prefix, postfix = hoist_whitespaces!(markdown, text, prefix, postfix) end - add_linebreaks!(markdown, md_node.prefix_linebreaks, md_node.prefix_linebreak_type, parent_prefix) + add_linebreaks!( + markdown, + md_node.prefix_linebreaks, + md_node.prefix_linebreak_type, + parent_prefix, + ) markdown << prefix markdown << text markdown << postfix - add_linebreaks!(markdown, md_node.postfix_linebreaks, md_node.postfix_linebreak_type, parent_prefix) + add_linebreaks!( + markdown, + md_node.postfix_linebreaks, + md_node.postfix_linebreak_type, + parent_prefix, + ) end markdown @@ -296,9 +300,7 @@ module ImportScripts::PhpBB3::BBCode end unless postfix.empty? - if ends_with_whitespace?(text) - postfix = "#{postfix}#{text[-1]}" - end + postfix = "#{postfix}#{text[-1]}" if ends_with_whitespace?(text) text = text.rstrip end @@ -319,16 +321,24 @@ module ImportScripts::PhpBB3::BBCode if linebreak_type == LINEBREAK_HTML max_linebreak_count = [existing_linebreak_count, required_linebreak_count - 1].max + 1 - required_linebreak_count = max_linebreak_count if max_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD + required_linebreak_count = max_linebreak_count if max_linebreak_count > + EXPLICIT_LINEBREAK_THRESHOLD end return if existing_linebreak_count >= required_linebreak_count rstrip!(markdown) - alternative_linebreak_start_index = required_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD ? 1 : 2 + alternative_linebreak_start_index = + required_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD ? 1 : 2 required_linebreak_count.times do |index| - linebreak = linebreak(linebreak_type, index, alternative_linebreak_start_index, required_linebreak_count) + linebreak = + linebreak( + linebreak_type, + index, + alternative_linebreak_start_index, + required_linebreak_count, + ) markdown << (linebreak == "\n" ? prefix.rstrip : prefix) if prefix && index > 0 markdown << linebreak @@ -336,18 +346,25 @@ module ImportScripts::PhpBB3::BBCode end def rstrip!(markdown) - markdown.gsub!(/\s*(?:\\?\n|
\n)*\z/, '') + markdown.gsub!(/\s*(?:\\?\n|
\n)*\z/, "") end - def linebreak(linebreak_type, linebreak_index, alternative_linebreak_start_index, required_linebreak_count) + def linebreak( + linebreak_type, + linebreak_index, + alternative_linebreak_start_index, + required_linebreak_count + ) use_alternative_linebreak = linebreak_index >= alternative_linebreak_start_index is_last_linebreak = linebreak_index + 1 == required_linebreak_count - return "
\n" if linebreak_type == LINEBREAK_HTML && - use_alternative_linebreak && is_last_linebreak + if linebreak_type == LINEBREAK_HTML && use_alternative_linebreak && is_last_linebreak + return "
\n" + end - return "\\\n" if linebreak_type == LINEBREAK_HARD || - @traditional_linebreaks || use_alternative_linebreak + if linebreak_type == LINEBREAK_HARD || @traditional_linebreaks || use_alternative_linebreak + return "\\\n" + end "\n" end diff --git a/script/import_scripts/phpbb3/support/constants.rb b/script/import_scripts/phpbb3/support/constants.rb index af8d62dc439..c832cfee8f8 100644 --- a/script/import_scripts/phpbb3/support/constants.rb +++ b/script/import_scripts/phpbb3/support/constants.rb @@ -8,8 +8,8 @@ module ImportScripts::PhpBB3 INACTIVE_MANUAL = 3 # Account deactivated by administrator INACTIVE_REMIND = 4 # Forced user account reactivation - GROUP_ADMINISTRATORS = 'ADMINISTRATORS' - GROUP_MODERATORS = 'GLOBAL_MODERATORS' + GROUP_ADMINISTRATORS = "ADMINISTRATORS" + GROUP_MODERATORS = "GLOBAL_MODERATORS" # https://wiki.phpbb.com/Table.phpbb_users USER_TYPE_NORMAL = 0 @@ -21,9 +21,9 @@ module ImportScripts::PhpBB3 AVATAR_TYPE_REMOTE = 2 AVATAR_TYPE_GALLERY = 3 - AVATAR_TYPE_STRING_UPLOADED = 'avatar.driver.upload' - AVATAR_TYPE_STRING_REMOTE = 'avatar.driver.remote' - AVATAR_TYPE_STRING_GALLERY = 'avatar.driver.local' + AVATAR_TYPE_STRING_UPLOADED = "avatar.driver.upload" + AVATAR_TYPE_STRING_REMOTE = "avatar.driver.remote" + AVATAR_TYPE_STRING_GALLERY = "avatar.driver.local" FORUM_TYPE_CATEGORY = 0 FORUM_TYPE_POST = 1 diff --git a/script/import_scripts/phpbb3/support/settings.rb b/script/import_scripts/phpbb3/support/settings.rb index b259821ab3c..e308e322cf5 100644 --- a/script/import_scripts/phpbb3/support/settings.rb +++ b/script/import_scripts/phpbb3/support/settings.rb @@ -1,13 +1,13 @@ # frozen_string_literal: true -require 'csv' -require 'yaml' -require_relative '../../base' +require "csv" +require "yaml" +require_relative "../../base" module ImportScripts::PhpBB3 class Settings def self.load(filename) - yaml = YAML::load_file(filename) + yaml = YAML.load_file(filename) Settings.new(yaml.deep_stringify_keys.with_indifferent_access) end @@ -44,40 +44,41 @@ module ImportScripts::PhpBB3 attr_reader :database def initialize(yaml) - import_settings = yaml['import'] + import_settings = yaml["import"] - @site_name = import_settings['site_name'] + @site_name = import_settings["site_name"] - @new_categories = import_settings['new_categories'] - @category_mappings = import_settings.fetch('category_mappings', []).to_h { |m| [m[:source_category_id].to_s, m] } - @tag_mappings = import_settings['tag_mappings'] - @rank_mapping = import_settings['rank_mapping'] + @new_categories = import_settings["new_categories"] + @category_mappings = + import_settings.fetch("category_mappings", []).to_h { |m| [m[:source_category_id].to_s, m] } + @tag_mappings = import_settings["tag_mappings"] + @rank_mapping = import_settings["rank_mapping"] - @import_anonymous_users = import_settings['anonymous_users'] - @import_attachments = import_settings['attachments'] - @import_private_messages = import_settings['private_messages'] - @import_polls = import_settings['polls'] - @import_bookmarks = import_settings['bookmarks'] - @import_passwords = import_settings['passwords'] - @import_likes = import_settings['likes'] + @import_anonymous_users = import_settings["anonymous_users"] + @import_attachments = import_settings["attachments"] + @import_private_messages = import_settings["private_messages"] + @import_polls = import_settings["polls"] + @import_bookmarks = import_settings["bookmarks"] + @import_passwords = import_settings["passwords"] + @import_likes = import_settings["likes"] - avatar_settings = import_settings['avatars'] - @import_uploaded_avatars = avatar_settings['uploaded'] - @import_remote_avatars = avatar_settings['remote'] - @import_gallery_avatars = avatar_settings['gallery'] + avatar_settings = import_settings["avatars"] + @import_uploaded_avatars = avatar_settings["uploaded"] + @import_remote_avatars = avatar_settings["remote"] + @import_gallery_avatars = avatar_settings["gallery"] - @use_bbcode_to_md = import_settings['use_bbcode_to_md'] + @use_bbcode_to_md = import_settings["use_bbcode_to_md"] - @original_site_prefix = import_settings['site_prefix']['original'] - @new_site_prefix = import_settings['site_prefix']['new'] - @base_dir = import_settings['phpbb_base_dir'] - @permalinks = PermalinkSettings.new(import_settings['permalinks']) + @original_site_prefix = import_settings["site_prefix"]["original"] + @new_site_prefix = import_settings["site_prefix"]["new"] + @base_dir = import_settings["phpbb_base_dir"] + @permalinks = PermalinkSettings.new(import_settings["permalinks"]) - @username_as_name = import_settings['username_as_name'] - @emojis = import_settings.fetch('emojis', []) - @custom_fields = import_settings.fetch('custom_fields', []) + @username_as_name = import_settings["username_as_name"] + @emojis = import_settings.fetch("emojis", []) + @custom_fields = import_settings.fetch("custom_fields", []) - @database = DatabaseSettings.new(yaml['database']) + @database = DatabaseSettings.new(yaml["database"]) end def prefix(val) @@ -87,7 +88,7 @@ module ImportScripts::PhpBB3 def trust_level_for_posts(rank, trust_level: 0) if @rank_mapping.present? @rank_mapping.each do |key, value| - trust_level = [trust_level, key.gsub('trust_level_', '').to_i].max if rank >= value + trust_level = [trust_level, key.gsub("trust_level_", "").to_i].max if rank >= value end end @@ -106,14 +107,14 @@ module ImportScripts::PhpBB3 attr_reader :batch_size def initialize(yaml) - @type = yaml['type'] - @host = yaml['host'] - @port = yaml['port'] - @username = yaml['username'] - @password = yaml['password'] - @schema = yaml['schema'] - @table_prefix = yaml['table_prefix'] - @batch_size = yaml['batch_size'] + @type = yaml["type"] + @host = yaml["host"] + @port = yaml["port"] + @username = yaml["username"] + @password = yaml["password"] + @schema = yaml["schema"] + @table_prefix = yaml["table_prefix"] + @batch_size = yaml["batch_size"] end end @@ -124,10 +125,10 @@ module ImportScripts::PhpBB3 attr_reader :normalization_prefix def initialize(yaml) - @create_category_links = yaml['categories'] - @create_topic_links = yaml['topics'] - @create_post_links = yaml['posts'] - @normalization_prefix = yaml['prefix'] + @create_category_links = yaml["categories"] + @create_topic_links = yaml["topics"] + @create_post_links = yaml["posts"] + @normalization_prefix = yaml["prefix"] end end end diff --git a/script/import_scripts/phpbb3/support/smiley_processor.rb b/script/import_scripts/phpbb3/support/smiley_processor.rb index 618f99ddd22..4a861fc4c10 100644 --- a/script/import_scripts/phpbb3/support/smiley_processor.rb +++ b/script/import_scripts/phpbb3/support/smiley_processor.rb @@ -18,15 +18,16 @@ module ImportScripts::PhpBB3 def replace_smilies(text) # :) is encoded as :) - text.gsub!(/.*?/) do - emoji($1) - end + text.gsub!( + /.*?/, + ) { emoji($1) } end def emoji(smiley_code) @smiley_map.fetch(smiley_code) do smiley = @database.get_smiley(smiley_code) - emoji = upload_smiley(smiley_code, smiley[:smiley_url], smiley_code, smiley[:emotion]) if smiley + emoji = + upload_smiley(smiley_code, smiley[:smiley_url], smiley_code, smiley[:emotion]) if smiley emoji || smiley_as_text(smiley_code) end end @@ -35,37 +36,34 @@ module ImportScripts::PhpBB3 def add_default_smilies { - [':D', ':-D', ':grin:'] => ':smiley:', - [':)', ':-)', ':smile:'] => ':slight_smile:', - [';)', ';-)', ':wink:'] => ':wink:', - [':(', ':-(', ':sad:'] => ':frowning:', - [':o', ':-o', ':eek:'] => ':astonished:', - [':shock:'] => ':open_mouth:', - [':?', ':-?', ':???:'] => ':confused:', - ['8)', '8-)', ':cool:'] => ':sunglasses:', - [':lol:'] => ':laughing:', - [':x', ':-x', ':mad:'] => ':angry:', - [':P', ':-P', ':razz:'] => ':stuck_out_tongue:', - [':oops:'] => ':blush:', - [':cry:'] => ':cry:', - [':evil:'] => ':imp:', - [':twisted:'] => ':smiling_imp:', - [':roll:'] => ':unamused:', - [':!:'] => ':exclamation:', - [':?:'] => ':question:', - [':idea:'] => ':bulb:', - [':arrow:'] => ':arrow_right:', - [':|', ':-|'] => ':neutral_face:', - [':geek:'] => ':nerd:' - }.each do |smilies, emoji| - smilies.each { |smiley| @smiley_map[smiley] = emoji } - end + %w[:D :-D :grin:] => ":smiley:", + %w[:) :-) :smile:] => ":slight_smile:", + %w[;) ;-) :wink:] => ":wink:", + %w[:( :-( :sad:] => ":frowning:", + %w[:o :-o :eek:] => ":astonished:", + [":shock:"] => ":open_mouth:", + %w[:? :-? :???:] => ":confused:", + %w[8) 8-) :cool:] => ":sunglasses:", + [":lol:"] => ":laughing:", + %w[:x :-x :mad:] => ":angry:", + %w[:P :-P :razz:] => ":stuck_out_tongue:", + [":oops:"] => ":blush:", + [":cry:"] => ":cry:", + [":evil:"] => ":imp:", + [":twisted:"] => ":smiling_imp:", + [":roll:"] => ":unamused:", + [":!:"] => ":exclamation:", + [":?:"] => ":question:", + [":idea:"] => ":bulb:", + [":arrow:"] => ":arrow_right:", + %w[:| :-|] => ":neutral_face:", + [":geek:"] => ":nerd:", + }.each { |smilies, emoji| smilies.each { |smiley| @smiley_map[smiley] = emoji } } end def add_configured_smilies(emojis) emojis.each do |emoji, smilies| - Array.wrap(smilies) - .each { |smiley| @smiley_map[smiley] = ":#{emoji}:" } + Array.wrap(smilies).each { |smiley| @smiley_map[smiley] = ":#{emoji}:" } end end diff --git a/script/import_scripts/phpbb3/support/text_processor.rb b/script/import_scripts/phpbb3/support/text_processor.rb index 62547b4f156..fb788bf5370 100644 --- a/script/import_scripts/phpbb3/support/text_processor.rb +++ b/script/import_scripts/phpbb3/support/text_processor.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative 'bbcode/xml_to_markdown' +require_relative "bbcode/xml_to_markdown" module ImportScripts::PhpBB3 class TextProcessor @@ -14,7 +14,9 @@ module ImportScripts::PhpBB3 @database = database @smiley_processor = smiley_processor @he = HTMLEntities.new - @use_xml_to_markdown = phpbb_config[:phpbb_version].start_with?('3.2') || phpbb_config[:phpbb_version].start_with?('3.3') + @use_xml_to_markdown = + phpbb_config[:phpbb_version].start_with?("3.2") || + phpbb_config[:phpbb_version].start_with?("3.3") @settings = settings @new_site_prefix = settings.new_site_prefix @@ -25,24 +27,27 @@ module ImportScripts::PhpBB3 if @use_xml_to_markdown unreferenced_attachments = attachments&.dup - converter = BBCode::XmlToMarkdown.new( - raw, - username_from_user_id: lambda { |user_id| @lookup.find_username_by_import_id(user_id) }, - smilie_to_emoji: lambda { |smilie| @smiley_processor.emoji(smilie).dup }, - quoted_post_from_post_id: lambda { |post_id| @lookup.topic_lookup_from_imported_post_id(post_id) }, - upload_md_from_file: (lambda do |filename, index| - unreferenced_attachments[index] = nil - attachments.fetch(index, filename).dup - end if attachments), - url_replacement: nil, - allow_inline_code: false - ) + converter = + BBCode::XmlToMarkdown.new( + raw, + username_from_user_id: lambda { |user_id| @lookup.find_username_by_import_id(user_id) }, + smilie_to_emoji: lambda { |smilie| @smiley_processor.emoji(smilie).dup }, + quoted_post_from_post_id: + lambda { |post_id| @lookup.topic_lookup_from_imported_post_id(post_id) }, + upload_md_from_file: + ( + lambda do |filename, index| + unreferenced_attachments[index] = nil + attachments.fetch(index, filename).dup + end if attachments + ), + url_replacement: nil, + allow_inline_code: false, + ) text = converter.convert - text.gsub!(@short_internal_link_regexp) do |link| - replace_internal_link(link, $1, $2) - end + text.gsub!(@short_internal_link_regexp) { |link| replace_internal_link(link, $1, $2) } add_unreferenced_attachments(text, unreferenced_attachments) else @@ -50,9 +55,7 @@ module ImportScripts::PhpBB3 text = CGI.unescapeHTML(text) clean_bbcodes(text) - if @settings.use_bbcode_to_md - text = bbcode_to_md(text) - end + text = bbcode_to_md(text) if @settings.use_bbcode_to_md process_smilies(text) process_links(text) process_lists(text) @@ -65,11 +68,19 @@ module ImportScripts::PhpBB3 end def process_post(raw, attachments) - process_raw_text(raw, attachments) rescue raw + begin + process_raw_text(raw, attachments) + rescue StandardError + raw + end end def process_private_msg(raw, attachments) - process_raw_text(raw, attachments) rescue raw + begin + process_raw_text(raw, attachments) + rescue StandardError + raw + end end protected @@ -78,10 +89,10 @@ module ImportScripts::PhpBB3 # Many phpbb bbcode tags have a hash attached to them. Examples: # [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky] # [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex] - text.gsub!(/:(?:\w{5,8})\]/, ']') + text.gsub!(/:(?:\w{5,8})\]/, "]") # remove color tags - text.gsub!(/\[\/?color(=#?[a-z0-9]*)?\]/i, "") + text.gsub!(%r{\[/?color(=#?[a-z0-9]*)?\]}i, "") end def bbcode_to_md(text) @@ -101,23 +112,19 @@ module ImportScripts::PhpBB3 # Internal forum links can have this forms: # for topics: viewtopic.php?f=26&t=3412 # for posts: viewtopic.php?p=1732#p1732 - text.gsub!(@long_internal_link_regexp) do |link| - replace_internal_link(link, $1, $2) - end + text.gsub!(@long_internal_link_regexp) { |link| replace_internal_link(link, $1, $2) } # Some links look like this: http://www.onegameamonth.com - text.gsub!(/(.+)<\/a>/i, '[\2](\1)') + text.gsub!(%r{(.+)}i, '[\2](\1)') # Replace internal forum links that aren't in the format - text.gsub!(@short_internal_link_regexp) do |link| - replace_internal_link(link, $1, $2) - end + text.gsub!(@short_internal_link_regexp) { |link| replace_internal_link(link, $1, $2) } # phpBB shortens link text like this, which breaks our markdown processing: # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli) # # Work around it for now: - text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[') + text.gsub!(%r{\[http(s)?://(www\.)?}i, "[") end def replace_internal_link(link, import_topic_id, import_post_id) @@ -144,19 +151,20 @@ module ImportScripts::PhpBB3 # convert list tags to ul and list=1 tags to ol # list=a is not supported, so handle it like list=1 # list=9 and list=x have the same result as list=1 and list=a - text.gsub!(/\[list\](.*?)\[\/list:u\]/mi) do - $1.gsub(/\[\*\](.*?)\[\/\*:m\]\n*/mi) { "* #{$1}\n" } + text.gsub!(%r{\[list\](.*?)\[/list:u\]}mi) do + $1.gsub(%r{\[\*\](.*?)\[/\*:m\]\n*}mi) { "* #{$1}\n" } end - text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi) do - $1.gsub(/\[\*\](.*?)\[\/\*:m\]\n*/mi) { "1. #{$1}\n" } + text.gsub!(%r{\[list=.*?\](.*?)\[/list:o\]}mi) do + $1.gsub(%r{\[\*\](.*?)\[/\*:m\]\n*}mi) { "1. #{$1}\n" } end end # This replaces existing [attachment] BBCodes with the corresponding HTML tags for Discourse. # All attachments that haven't been referenced in the text are appended to the end of the text. def process_attachments(text, attachments) - attachment_regexp = /\[attachment=([\d])+\]([^<]+)\[\/attachment\]?/i + attachment_regexp = + %r{\[attachment=([\d])+\]([^<]+)\[/attachment\]?}i unreferenced_attachments = attachments.dup text.gsub!(attachment_regexp) do @@ -178,29 +186,34 @@ module ImportScripts::PhpBB3 end def create_internal_link_regexps(original_site_prefix) - host = original_site_prefix.gsub('.', '\.') - link_regex = "http(?:s)?://#{host}/viewtopic\\.php\\?(?:\\S*)(?:t=(\\d+)|p=(\\d+)(?:#p\\d+)?)(?:[^\\s\\)\\]]*)" + host = original_site_prefix.gsub(".", '\.') + link_regex = + "http(?:s)?://#{host}/viewtopic\\.php\\?(?:\\S*)(?:t=(\\d+)|p=(\\d+)(?:#p\\d+)?)(?:[^\\s\\)\\]]*)" - @long_internal_link_regexp = Regexp.new(%Q||, Regexp::IGNORECASE) + @long_internal_link_regexp = + Regexp.new( + %Q||, + Regexp::IGNORECASE, + ) @short_internal_link_regexp = Regexp.new(link_regex, Regexp::IGNORECASE) end def process_code(text) - text.gsub!(//, "\n") + text.gsub!(%r{}, "\n") text end def fix_markdown(text) - text.gsub!(/(\n*\[\/?quote.*?\]\n*)/mi) { |q| "\n#{q.strip}\n" } + text.gsub!(%r{(\n*\[/?quote.*?\]\n*)}mi) { |q| "\n#{q.strip}\n" } text.gsub!(/^!\[[^\]]*\]\([^\]]*\)$/i) { |img| "\n#{img.strip}\n" } # space out images single on line text end def process_videos(text) # [YOUTUBE][/YOUTUBE] - text.gsub(/\[youtube\](.+?)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } + text.gsub(%r{\[youtube\](.+?)\[/youtube\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } text end end diff --git a/script/import_scripts/punbb.rb b/script/import_scripts/punbb.rb index 64cce9bbcf4..b73e4f71281 100644 --- a/script/import_scripts/punbb.rb +++ b/script/import_scripts/punbb.rb @@ -7,19 +7,19 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb") # Call it like this: # RAILS_ENV=production bundle exec ruby script/import_scripts/punbb.rb class ImportScripts::PunBB < ImportScripts::Base - PUNBB_DB = "punbb_db" BATCH_SIZE = 1000 def initialize super - @client = Mysql2::Client.new( - host: "localhost", - username: "root", - password: "pa$$word", - database: PUNBB_DB - ) + @client = + Mysql2::Client.new( + host: "localhost", + username: "root", + password: "pa$$word", + database: PUNBB_DB, + ) end def execute @@ -30,36 +30,41 @@ class ImportScripts::PunBB < ImportScripts::Base end def import_users - puts '', "creating users" + puts "", "creating users" - total_count = mysql_query("SELECT count(*) count FROM users;").first['count'] + total_count = mysql_query("SELECT count(*) count FROM users;").first["count"] batches(BATCH_SIZE) do |offset| - results = mysql_query( - "SELECT id, username, realname name, url website, email email, registered created_at, + results = + mysql_query( + "SELECT id, username, realname name, url website, email email, registered created_at, registration_ip registration_ip_address, last_visit last_visit_time, last_email_sent last_emailed_at, last_email_sent last_emailed_at, location, group_id FROM users LIMIT #{BATCH_SIZE} - OFFSET #{offset};") + OFFSET #{offset};", + ) break if results.size < 1 next if all_records_exist? :users, results.map { |u| u["id"].to_i } create_users(results, total: total_count, offset: offset) do |user| - { id: user['id'], - email: user['email'], - username: user['username'], - name: user['name'], - created_at: Time.zone.at(user['created_at']), - website: user['website'], - registration_ip_address: user['registration_ip_address'], - last_seen_at: Time.zone.at(user['last_visit_time']), - last_emailed_at: user['last_emailed_at'] == nil ? 0 : Time.zone.at(user['last_emailed_at']), - location: user['location'], - moderator: user['group_id'] == 4, - admin: user['group_id'] == 1 } + { + id: user["id"], + email: user["email"], + username: user["username"], + name: user["name"], + created_at: Time.zone.at(user["created_at"]), + website: user["website"], + registration_ip_address: user["registration_ip_address"], + last_seen_at: Time.zone.at(user["last_visit_time"]), + last_emailed_at: + user["last_emailed_at"] == nil ? 0 : Time.zone.at(user["last_emailed_at"]), + location: user["location"], + moderator: user["group_id"] == 4, + admin: user["group_id"] == 1, + } end end end @@ -67,33 +72,34 @@ class ImportScripts::PunBB < ImportScripts::Base def import_categories puts "", "importing top level categories..." - categories = mysql_query(" + categories = + mysql_query( + " SELECT id, cat_name name, disp_position position FROM categories ORDER BY id ASC - ").to_a + ", + ).to_a - create_categories(categories) do |category| - { - id: category["id"], - name: category["name"] - } - end + create_categories(categories) { |category| { id: category["id"], name: category["name"] } } puts "", "importing children categories..." - children_categories = mysql_query(" + children_categories = + mysql_query( + " SELECT id, forum_name name, forum_desc description, disp_position position, cat_id parent_category_id FROM forums ORDER BY id - ").to_a + ", + ).to_a create_categories(children_categories) do |category| { - id: "child##{category['id']}", + id: "child##{category["id"]}", name: category["name"], description: category["description"], - parent_category_id: category_id_from_imported_category_id(category["parent_category_id"]) + parent_category_id: category_id_from_imported_category_id(category["parent_category_id"]), } end end @@ -104,7 +110,9 @@ class ImportScripts::PunBB < ImportScripts::Base total_count = mysql_query("SELECT count(*) count from posts").first["count"] batches(BATCH_SIZE) do |offset| - results = mysql_query(" + results = + mysql_query( + " SELECT p.id id, t.id topic_id, t.forum_id category_id, @@ -119,29 +127,30 @@ class ImportScripts::PunBB < ImportScripts::Base ORDER BY p.posted LIMIT #{BATCH_SIZE} OFFSET #{offset}; - ").to_a + ", + ).to_a break if results.size < 1 - next if all_records_exist? :posts, results.map { |m| m['id'].to_i } + next if all_records_exist? :posts, results.map { |m| m["id"].to_i } create_posts(results, total: total_count, offset: offset) do |m| skip = false mapped = {} - mapped[:id] = m['id'] - mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 - mapped[:raw] = process_punbb_post(m['raw'], m['id']) - mapped[:created_at] = Time.zone.at(m['created_at']) + mapped[:id] = m["id"] + mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1 + mapped[:raw] = process_punbb_post(m["raw"], m["id"]) + mapped[:created_at] = Time.zone.at(m["created_at"]) - if m['id'] == m['first_post_id'] - mapped[:category] = category_id_from_imported_category_id("child##{m['category_id']}") - mapped[:title] = CGI.unescapeHTML(m['title']) + if m["id"] == m["first_post_id"] + mapped[:category] = category_id_from_imported_category_id("child##{m["category_id"]}") + mapped[:title] = CGI.unescapeHTML(m["title"]) else - parent = topic_lookup_from_imported_post_id(m['first_post_id']) + parent = topic_lookup_from_imported_post_id(m["first_post_id"]) if parent mapped[:topic_id] = parent[:topic_id] else - puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" + puts "Parent post #{m["first_post_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" skip = true end end @@ -152,16 +161,16 @@ class ImportScripts::PunBB < ImportScripts::Base end def suspend_users - puts '', "updating banned users" + puts "", "updating banned users" banned = 0 failed = 0 - total = mysql_query("SELECT count(*) count FROM bans").first['count'] + total = mysql_query("SELECT count(*) count FROM bans").first["count"] system_user = Discourse.system_user mysql_query("SELECT username, email FROM bans").each do |b| - user = User.find_by_email(b['email']) + user = User.find_by_email(b["email"]) if user user.suspended_at = Time.now user.suspended_till = 200.years.from_now @@ -174,7 +183,7 @@ class ImportScripts::PunBB < ImportScripts::Base failed += 1 end else - puts "Not found: #{b['email']}" + puts "Not found: #{b["email"]}" failed += 1 end @@ -189,15 +198,15 @@ class ImportScripts::PunBB < ImportScripts::Base s.gsub!(/(?:.*)/, '\1') # Some links look like this: http://www.onegameamonth.com - s.gsub!(/(.+)<\/a>/, '[\2](\1)') + s.gsub!(%r{(.+)}, '[\2](\1)') # Many phpbb bbcode tags have a hash attached to them. Examples: # [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky] # [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex] - s.gsub!(/:(?:\w{8})\]/, ']') + s.gsub!(/:(?:\w{8})\]/, "]") # Remove mybb video tags. - s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '') + s.gsub!(%r{(^\[video=.*?\])|(\[/video\]$)}, "") s = CGI.unescapeHTML(s) @@ -205,7 +214,7 @@ class ImportScripts::PunBB < ImportScripts::Base # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli) # # Work around it for now: - s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[') + s.gsub!(%r{\[http(s)?://(www\.)?}, "[") s end diff --git a/script/import_scripts/quandora/export.rb b/script/import_scripts/quandora/export.rb index e1f87b7ec32..fbbe146ca6c 100644 --- a/script/import_scripts/quandora/export.rb +++ b/script/import_scripts/quandora/export.rb @@ -1,25 +1,25 @@ # frozen_string_literal: true -require 'yaml' -require_relative 'quandora_api' +require "yaml" +require_relative "quandora_api" def load_config(file) - config = YAML::load_file(File.join(__dir__, file)) - @domain = config['domain'] - @username = config['username'] - @password = config['password'] + config = YAML.load_file(File.join(__dir__, file)) + @domain = config["domain"] + @username = config["username"] + @password = config["password"] end def export api = QuandoraApi.new @domain, @username, @password bases = api.list_bases bases.each do |base| - question_list = api.list_questions base['objectId'], 1000 + question_list = api.list_questions base["objectId"], 1000 question_list.each do |q| - question_id = q['uid'] + question_id = q["uid"] question = api.get_question question_id - File.open("output/#{question_id}.json", 'w') do |f| - puts question['title'] + File.open("output/#{question_id}.json", "w") do |f| + puts question["title"] f.write question.to_json f.close end diff --git a/script/import_scripts/quandora/import.rb b/script/import_scripts/quandora/import.rb index 7df8be302c4..a3dc5dfe290 100644 --- a/script/import_scripts/quandora/import.rb +++ b/script/import_scripts/quandora/import.rb @@ -1,10 +1,9 @@ # frozen_string_literal: true -require_relative './quandora_question.rb' +require_relative "./quandora_question.rb" require File.expand_path(File.dirname(__FILE__) + "/../base.rb") class ImportScripts::Quandora < ImportScripts::Base - JSON_FILES_DIR = "output" def initialize @@ -12,8 +11,8 @@ class ImportScripts::Quandora < ImportScripts::Base @system_user = Discourse.system_user @questions = [] Dir.foreach(JSON_FILES_DIR) do |filename| - next if filename == ('.') || filename == ('..') - question = File.read JSON_FILES_DIR + '/' + filename + next if filename == (".") || filename == ("..") + question = File.read JSON_FILES_DIR + "/" + filename @questions << question end end @@ -33,9 +32,7 @@ class ImportScripts::Quandora < ImportScripts::Base q = QuandoraQuestion.new question import_users q.users created_topic = import_topic q.topic - if created_topic - import_posts q.replies, created_topic.topic_id - end + import_posts q.replies, created_topic.topic_id if created_topic topics += 1 print_status topics, total end @@ -43,9 +40,7 @@ class ImportScripts::Quandora < ImportScripts::Base end def import_users(users) - users.each do |user| - create_user user, user[:id] - end + users.each { |user| create_user user, user[:id] } end def import_topic(topic) @@ -54,7 +49,7 @@ class ImportScripts::Quandora < ImportScripts::Base post = Post.find(post_id) # already imported this topic else topic[:user_id] = user_id_from_imported_user_id(topic[:author_id]) || -1 - topic[:category] = 'quandora-import' + topic[:category] = "quandora-import" post = create_post(topic, topic[:id]) @@ -68,9 +63,7 @@ class ImportScripts::Quandora < ImportScripts::Base end def import_posts(posts, topic_id) - posts.each do |post| - import_post post, topic_id - end + posts.each { |post| import_post post, topic_id } end def import_post(post, topic_id) @@ -91,6 +84,4 @@ class ImportScripts::Quandora < ImportScripts::Base end end -if __FILE__ == $0 - ImportScripts::Quandora.new.perform -end +ImportScripts::Quandora.new.perform if __FILE__ == $0 diff --git a/script/import_scripts/quandora/quandora_api.rb b/script/import_scripts/quandora/quandora_api.rb index 747473bb793..9a743087720 100644 --- a/script/import_scripts/quandora/quandora_api.rb +++ b/script/import_scripts/quandora/quandora_api.rb @@ -1,10 +1,9 @@ # frozen_string_literal: true -require 'base64' -require 'json' +require "base64" +require "json" class QuandoraApi - attr_accessor :domain, :username, :password def initialize(domain, username, password) @@ -38,18 +37,18 @@ class QuandoraApi def list_bases response = request list_bases_url - response['data'] + response["data"] end def list_questions(kb_id, limit = nil) url = list_questions_url(kb_id, limit) response = request url - response['data']['result'] + response["data"]["result"] end def get_question(question_id) url = "#{base_url @domain}/q/#{question_id}" response = request url - response['data'] + response["data"] end end diff --git a/script/import_scripts/quandora/quandora_question.rb b/script/import_scripts/quandora/quandora_question.rb index abbaaeeda6a..767dad16fc9 100644 --- a/script/import_scripts/quandora/quandora_question.rb +++ b/script/import_scripts/quandora/quandora_question.rb @@ -1,28 +1,27 @@ # frozen_string_literal: true -require 'json' -require 'cgi' -require 'time' +require "json" +require "cgi" +require "time" class QuandoraQuestion - def initialize(question_json) @question = JSON.parse question_json end def topic topic = {} - topic[:id] = @question['uid'] - topic[:author_id] = @question['author']['uid'] - topic[:title] = unescape @question['title'] - topic[:raw] = unescape @question['content'] - topic[:created_at] = Time.parse @question['created'] + topic[:id] = @question["uid"] + topic[:author_id] = @question["author"]["uid"] + topic[:title] = unescape @question["title"] + topic[:raw] = unescape @question["content"] + topic[:created_at] = Time.parse @question["created"] topic end def users users = {} - user = user_from_author @question['author'] + user = user_from_author @question["author"] users[user[:id]] = user replies.each do |reply| user = user_from_author reply[:author] @@ -32,12 +31,12 @@ class QuandoraQuestion end def user_from_author(author) - email = author['email'] - email = "#{author['uid']}@noemail.com" unless email + email = author["email"] + email = "#{author["uid"]}@noemail.com" unless email user = {} - user[:id] = author['uid'] - user[:name] = "#{author['firstName']} #{author['lastName']}" + user[:id] = author["uid"] + user[:name] = "#{author["firstName"]} #{author["lastName"]}" user[:email] = email user[:staged] = true user @@ -45,26 +44,20 @@ class QuandoraQuestion def replies posts = [] - answers = @question['answersList'] - comments = @question['comments'] - comments.each_with_index do |comment, i| - posts << post_from_comment(comment, i, @question) - end + answers = @question["answersList"] + comments = @question["comments"] + comments.each_with_index { |comment, i| posts << post_from_comment(comment, i, @question) } answers.each do |answer| posts << post_from_answer(answer) - comments = answer['comments'] - comments.each_with_index do |comment, i| - posts << post_from_comment(comment, i, answer) - end + comments = answer["comments"] + comments.each_with_index { |comment, i| posts << post_from_comment(comment, i, answer) } end order_replies posts end def order_replies(posts) posts = posts.sort_by { |p| p[:created_at] } - posts.each_with_index do |p, i| - p[:post_number] = i + 2 - end + posts.each_with_index { |p, i| p[:post_number] = i + 2 } posts.each do |p| parent = posts.select { |pp| pp[:id] == p[:parent_id] } p[:reply_to_post_number] = parent[0][:post_number] if parent.size > 0 @@ -74,35 +67,35 @@ class QuandoraQuestion def post_from_answer(answer) post = {} - post[:id] = answer['uid'] - post[:parent_id] = @question['uid'] - post[:author] = answer['author'] - post[:author_id] = answer['author']['uid'] - post[:raw] = unescape answer['content'] - post[:created_at] = Time.parse answer['created'] + post[:id] = answer["uid"] + post[:parent_id] = @question["uid"] + post[:author] = answer["author"] + post[:author_id] = answer["author"]["uid"] + post[:raw] = unescape answer["content"] + post[:created_at] = Time.parse answer["created"] post end def post_from_comment(comment, index, parent) - if comment['created'] - created_at = Time.parse comment['created'] + if comment["created"] + created_at = Time.parse comment["created"] else - created_at = Time.parse parent['created'] + created_at = Time.parse parent["created"] end - parent_id = parent['uid'] - parent_id = "#{parent['uid']}-#{index - 1}" if index > 0 + parent_id = parent["uid"] + parent_id = "#{parent["uid"]}-#{index - 1}" if index > 0 post = {} - id = "#{parent['uid']}-#{index}" + id = "#{parent["uid"]}-#{index}" post[:id] = id post[:parent_id] = parent_id - post[:author] = comment['author'] - post[:author_id] = comment['author']['uid'] - post[:raw] = unescape comment['text'] + post[:author] = comment["author"] + post[:author_id] = comment["author"]["uid"] + post[:raw] = unescape comment["text"] post[:created_at] = created_at post end - private + private def unescape(html) return nil unless html diff --git a/script/import_scripts/quandora/test/test_data.rb b/script/import_scripts/quandora/test/test_data.rb index 3166d6c44da..172e753e31d 100644 --- a/script/import_scripts/quandora/test/test_data.rb +++ b/script/import_scripts/quandora/test/test_data.rb @@ -1,5 +1,6 @@ - # frozen_string_literal: true - BASES = '{ +# frozen_string_literal: true +BASES = + '{ "type" : "kbase", "data" : [ { "objectId" : "90b1ccf3-35aa-4d6f-848e-e7c122d92c58", @@ -9,7 +10,8 @@ } ] }' - QUESTIONS = '{ +QUESTIONS = + '{ "type": "question-search-result", "data": { "totalSize": 445, @@ -50,7 +52,8 @@ } }' - QUESTION = '{ +QUESTION = + '{ "type" : "question", "data" : { "uid" : "de20ed0a-5fe5-48a5-9c14-d854f9af99f1", diff --git a/script/import_scripts/quandora/test/test_quandora_api.rb b/script/import_scripts/quandora/test/test_quandora_api.rb index 784ba4fb855..a167ca9ad7a 100644 --- a/script/import_scripts/quandora/test/test_quandora_api.rb +++ b/script/import_scripts/quandora/test/test_quandora_api.rb @@ -1,21 +1,20 @@ # frozen_string_literal: true -require 'minitest/autorun' -require 'yaml' -require_relative '../quandora_api.rb' -require_relative './test_data.rb' +require "minitest/autorun" +require "yaml" +require_relative "../quandora_api.rb" +require_relative "./test_data.rb" class TestQuandoraApi < Minitest::Test - DEBUG = false def initialize(args) - config = YAML::load_file(File.join(__dir__, 'config.yml')) - @domain = config['domain'] - @username = config['username'] - @password = config['password'] - @kb_id = config['kb_id'] - @question_id = config['question_id'] + config = YAML.load_file(File.join(__dir__, "config.yml")) + @domain = config["domain"] + @username = config["username"] + @password = config["password"] + @kb_id = config["kb_id"] + @question_id = config["question_id"] super args end @@ -30,19 +29,19 @@ class TestQuandoraApi < Minitest::Test end def test_base_url - assert_equal 'https://mydomain.quandora.com/m/json', @quandora.base_url('mydomain') + assert_equal "https://mydomain.quandora.com/m/json", @quandora.base_url("mydomain") end def test_auth_header - user = 'Aladdin' - password = 'open sesame' + user = "Aladdin" + password = "open sesame" auth_header = @quandora.auth_header user, password - assert_equal 'Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==', auth_header[:Authorization] + assert_equal "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==", auth_header[:Authorization] end def test_list_bases_element_has_expected_structure element = @quandora.list_bases[0] - expected = JSON.parse(BASES)['data'][0] + expected = JSON.parse(BASES)["data"][0] debug element check_keys expected, element end @@ -50,24 +49,24 @@ class TestQuandoraApi < Minitest::Test def test_list_questions_has_expected_structure response = @quandora.list_questions @kb_id, 1 debug response - check_keys JSON.parse(QUESTIONS)['data']['result'][0], response[0] + check_keys JSON.parse(QUESTIONS)["data"]["result"][0], response[0] end def test_get_question_has_expected_structure question = @quandora.get_question @question_id - expected = JSON.parse(QUESTION)['data'] + expected = JSON.parse(QUESTION)["data"] check_keys expected, question - expected_comment = expected['comments'][0] - actual_comment = question['comments'][0] + expected_comment = expected["comments"][0] + actual_comment = question["comments"][0] check_keys expected_comment, actual_comment - expected_answer = expected['answersList'][1] - actual_answer = question['answersList'][0] + expected_answer = expected["answersList"][1] + actual_answer = question["answersList"][0] check_keys expected_answer, actual_answer - expected_answer_comment = expected_answer['comments'][0] - actual_answer_comment = actual_answer['comments'][0] + expected_answer_comment = expected_answer["comments"][0] + actual_answer_comment = actual_answer["comments"][0] check_keys expected_answer_comment, actual_answer_comment end @@ -75,18 +74,16 @@ class TestQuandoraApi < Minitest::Test def check_keys(expected, actual) msg = "### caller[0]:\nKey not found in actual keys: #{actual.keys}\n" - expected.keys.each do |k| - assert (actual.keys.include? k), "#{k}" - end + expected.keys.each { |k| assert (actual.keys.include? k), "#{k}" } end def debug(message, show = false) if show || DEBUG - puts '### ' + caller[0] - puts '' + puts "### " + caller[0] + puts "" puts message - puts '' - puts '' + puts "" + puts "" end end end diff --git a/script/import_scripts/quandora/test/test_quandora_question.rb b/script/import_scripts/quandora/test/test_quandora_question.rb index 28b5dd9885c..6044951c5b8 100644 --- a/script/import_scripts/quandora/test/test_quandora_question.rb +++ b/script/import_scripts/quandora/test/test_quandora_question.rb @@ -1,47 +1,46 @@ # frozen_string_literal: true -require 'minitest/autorun' -require 'cgi' -require 'time' -require_relative '../quandora_question.rb' -require_relative './test_data.rb' +require "minitest/autorun" +require "cgi" +require "time" +require_relative "../quandora_question.rb" +require_relative "./test_data.rb" class TestQuandoraQuestion < Minitest::Test - def setup - @data = JSON.parse(QUESTION)['data'] + @data = JSON.parse(QUESTION)["data"] @question = QuandoraQuestion.new @data.to_json end def test_topic topic = @question.topic - assert_equal @data['uid'], topic[:id] - assert_equal @data['author']['uid'], topic[:author_id] - assert_equal unescape(@data['title']), topic[:title] - assert_equal unescape(@data['content']), topic[:raw] - assert_equal Time.parse(@data['created']), topic[:created_at] + assert_equal @data["uid"], topic[:id] + assert_equal @data["author"]["uid"], topic[:author_id] + assert_equal unescape(@data["title"]), topic[:title] + assert_equal unescape(@data["content"]), topic[:raw] + assert_equal Time.parse(@data["created"]), topic[:created_at] end def test_user_from_author author = {} - author['uid'] = 'uid' - author['firstName'] = 'Joe' - author['lastName'] = 'Schmoe' - author['email'] = 'joe.schmoe@mydomain.com' + author["uid"] = "uid" + author["firstName"] = "Joe" + author["lastName"] = "Schmoe" + author["email"] = "joe.schmoe@mydomain.com" user = @question.user_from_author author - assert_equal 'uid', user[:id] - assert_equal 'Joe Schmoe', user[:name] - assert_equal 'joe.schmoe@mydomain.com', user[:email] + assert_equal "uid", user[:id] + assert_equal "Joe Schmoe", user[:name] + assert_equal "joe.schmoe@mydomain.com", user[:email] assert_equal true, user[:staged] end def test_user_from_author_with_no_email author = {} - author['uid'] = 'foo' + author["uid"] = "foo" user = @question.user_from_author author - assert_equal 'foo@noemail.com', user[:email] + assert_equal "foo@noemail.com", user[:email] end def test_replies @@ -57,77 +56,77 @@ class TestQuandoraQuestion < Minitest::Test assert_equal nil, replies[2][:reply_to_post_number] assert_equal 4, replies[3][:reply_to_post_number] assert_equal 3, replies[4][:reply_to_post_number] - assert_equal '2013-01-07 04:59:56 UTC', replies[0][:created_at].to_s - assert_equal '2013-01-08 16:49:32 UTC', replies[1][:created_at].to_s - assert_equal '2016-01-20 15:38:55 UTC', replies[2][:created_at].to_s - assert_equal '2016-01-21 15:38:55 UTC', replies[3][:created_at].to_s - assert_equal '2016-01-22 15:38:55 UTC', replies[4][:created_at].to_s + assert_equal "2013-01-07 04:59:56 UTC", replies[0][:created_at].to_s + assert_equal "2013-01-08 16:49:32 UTC", replies[1][:created_at].to_s + assert_equal "2016-01-20 15:38:55 UTC", replies[2][:created_at].to_s + assert_equal "2016-01-21 15:38:55 UTC", replies[3][:created_at].to_s + assert_equal "2016-01-22 15:38:55 UTC", replies[4][:created_at].to_s end def test_post_from_answer answer = {} - answer['uid'] = 'uid' - answer['content'] = 'content' - answer['created'] = '2013-01-06T18:24:54.62Z' - answer['author'] = { 'uid' => 'auid' } + answer["uid"] = "uid" + answer["content"] = "content" + answer["created"] = "2013-01-06T18:24:54.62Z" + answer["author"] = { "uid" => "auid" } post = @question.post_from_answer answer - assert_equal 'uid', post[:id] + assert_equal "uid", post[:id] assert_equal @question.topic[:id], post[:parent_id] - assert_equal answer['author'], post[:author] - assert_equal 'auid', post[:author_id] - assert_equal 'content', post[:raw] - assert_equal Time.parse('2013-01-06T18:24:54.62Z'), post[:created_at] + assert_equal answer["author"], post[:author] + assert_equal "auid", post[:author_id] + assert_equal "content", post[:raw] + assert_equal Time.parse("2013-01-06T18:24:54.62Z"), post[:created_at] end def test_post_from_comment comment = {} - comment['text'] = 'text' - comment['created'] = '2013-01-06T18:24:54.62Z' - comment['author'] = { 'uid' => 'auid' } - parent = { 'uid' => 'parent-uid' } + comment["text"] = "text" + comment["created"] = "2013-01-06T18:24:54.62Z" + comment["author"] = { "uid" => "auid" } + parent = { "uid" => "parent-uid" } post = @question.post_from_comment comment, 0, parent - assert_equal 'parent-uid-0', post[:id] - assert_equal 'parent-uid', post[:parent_id] - assert_equal comment['author'], post[:author] - assert_equal 'auid', post[:author_id] - assert_equal 'text', post[:raw] - assert_equal Time.parse('2013-01-06T18:24:54.62Z'), post[:created_at] + assert_equal "parent-uid-0", post[:id] + assert_equal "parent-uid", post[:parent_id] + assert_equal comment["author"], post[:author] + assert_equal "auid", post[:author_id] + assert_equal "text", post[:raw] + assert_equal Time.parse("2013-01-06T18:24:54.62Z"), post[:created_at] end def test_post_from_comment_uses_parent_created_if_necessary comment = {} - comment['author'] = { 'uid' => 'auid' } - parent = { 'created' => '2013-01-06T18:24:54.62Z' } + comment["author"] = { "uid" => "auid" } + parent = { "created" => "2013-01-06T18:24:54.62Z" } post = @question.post_from_comment comment, 0, parent - assert_equal Time.parse('2013-01-06T18:24:54.62Z'), post[:created_at] + assert_equal Time.parse("2013-01-06T18:24:54.62Z"), post[:created_at] end def test_post_from_comment_uses_previous_comment_as_parent comment = {} - comment['author'] = { 'uid' => 'auid' } - parent = { 'uid' => 'parent-uid', 'created' => '2013-01-06T18:24:54.62Z' } + comment["author"] = { "uid" => "auid" } + parent = { "uid" => "parent-uid", "created" => "2013-01-06T18:24:54.62Z" } post = @question.post_from_comment comment, 1, parent - assert_equal 'parent-uid-1', post[:id] - assert_equal 'parent-uid-0', post[:parent_id] - assert_equal Time.parse('2013-01-06T18:24:54.62Z'), post[:created_at] + assert_equal "parent-uid-1", post[:id] + assert_equal "parent-uid-0", post[:parent_id] + assert_equal Time.parse("2013-01-06T18:24:54.62Z"), post[:created_at] end def test_users users = @question.users assert_equal 5, users.size - assert_equal 'Ida Inquisitive', users[0][:name] - assert_equal 'Harry Helpful', users[1][:name] - assert_equal 'Sam Smarty-Pants', users[2][:name] - assert_equal 'Greta Greatful', users[3][:name] - assert_equal 'Eddy Excited', users[4][:name] + assert_equal "Ida Inquisitive", users[0][:name] + assert_equal "Harry Helpful", users[1][:name] + assert_equal "Sam Smarty-Pants", users[2][:name] + assert_equal "Greta Greatful", users[3][:name] + assert_equal "Eddy Excited", users[4][:name] end private diff --git a/script/import_scripts/question2answer.rb b/script/import_scripts/question2answer.rb index acd5b70bebf..3820b8050bd 100644 --- a/script/import_scripts/question2answer.rb +++ b/script/import_scripts/question2answer.rb @@ -1,21 +1,21 @@ # frozen_string_literal: true -require 'mysql2' +require "mysql2" require File.expand_path(File.dirname(__FILE__) + "/base.rb") -require 'htmlentities' -require 'php_serialize' # https://github.com/jqr/php-serialize +require "htmlentities" +require "php_serialize" # https://github.com/jqr/php-serialize class ImportScripts::Question2Answer < ImportScripts::Base BATCH_SIZE = 1000 # CHANGE THESE BEFORE RUNNING THE IMPORTER - DB_HOST ||= ENV['DB_HOST'] || "localhost" - DB_NAME ||= ENV['DB_NAME'] || "qa_db" - DB_PW ||= ENV['DB_PW'] || "" - DB_USER ||= ENV['DB_USER'] || "root" - TIMEZONE ||= ENV['TIMEZONE'] || "America/Los_Angeles" - TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "qa_" + DB_HOST ||= ENV["DB_HOST"] || "localhost" + DB_NAME ||= ENV["DB_NAME"] || "qa_db" + DB_PW ||= ENV["DB_PW"] || "" + DB_USER ||= ENV["DB_USER"] || "root" + TIMEZONE ||= ENV["TIMEZONE"] || "America/Los_Angeles" + TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "qa_" def initialize super @@ -26,12 +26,8 @@ class ImportScripts::Question2Answer < ImportScripts::Base @htmlentities = HTMLEntities.new - @client = Mysql2::Client.new( - host: DB_HOST, - username: DB_USER, - password: DB_PW, - database: DB_NAME - ) + @client = + Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME) end def execute @@ -51,11 +47,16 @@ class ImportScripts::Question2Answer < ImportScripts::Base # only import users that have posted or voted on Q2A # if you want to import all users, just leave out the WHERE and everything after it (and remove line 95 as well) - user_count = mysql_query("SELECT COUNT(userid) count FROM #{TABLE_PREFIX}users u WHERE EXISTS (SELECT 1 FROM #{TABLE_PREFIX}posts p WHERE p.userid=u.userid) or EXISTS (SELECT 1 FROM #{TABLE_PREFIX}uservotes uv WHERE u.userid=uv.userid)").first["count"] + user_count = + mysql_query( + "SELECT COUNT(userid) count FROM #{TABLE_PREFIX}users u WHERE EXISTS (SELECT 1 FROM #{TABLE_PREFIX}posts p WHERE p.userid=u.userid) or EXISTS (SELECT 1 FROM #{TABLE_PREFIX}uservotes uv WHERE u.userid=uv.userid)", + ).first[ + "count" + ] last_user_id = -1 batches(BATCH_SIZE) do |offset| - users = mysql_query(<<-SQL + users = mysql_query(<<-SQL).to_a SELECT u.userid AS id, u.email, u.handle AS username, u.created AS created_at, u.loggedin AS last_sign_in_at, u.avatarblobid FROM #{TABLE_PREFIX}users u WHERE u.userid > #{last_user_id} @@ -63,7 +64,6 @@ class ImportScripts::Question2Answer < ImportScripts::Base ORDER BY u.userid LIMIT #{BATCH_SIZE} SQL - ).to_a break if users.empty? last_user_id = users[-1]["id"] @@ -73,18 +73,17 @@ class ImportScripts::Question2Answer < ImportScripts::Base email = user["email"].presence username = @htmlentities.decode(user["email"]).strip.split("@").first - avatar_url = "https://your_image_bucket/#{user['cdn_slug']}" if user['cdn_slug'] + avatar_url = "https://your_image_bucket/#{user["cdn_slug"]}" if user["cdn_slug"] { id: user["id"], - name: "#{user['username']}", - username: "#{user['username']}", - password: user['password'], + name: "#{user["username"]}", + username: "#{user["username"]}", + password: user["password"], email: email, created_at: user["created_at"], last_seen_at: user["last_sign_in_at"], - post_create_action: proc do |u| - @old_username_to_new_usernames[user["username"]] = u.username - end + post_create_action: + proc { |u| @old_username_to_new_usernames[user["username"]] = u.username }, } end end @@ -93,7 +92,10 @@ class ImportScripts::Question2Answer < ImportScripts::Base def import_categories puts "", "importing top level categories..." - categories = mysql_query("SELECT categoryid, parentid, title, position FROM #{TABLE_PREFIX}categories ORDER BY categoryid").to_a + categories = + mysql_query( + "SELECT categoryid, parentid, title, position FROM #{TABLE_PREFIX}categories ORDER BY categoryid", + ).to_a top_level_categories = categories.select { |c| c["parentid"].nil? } @@ -101,7 +103,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base { id: category["categoryid"], name: @htmlentities.decode(category["title"]).strip, - position: category["position"] + position: category["position"], } end @@ -122,7 +124,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base id: category["categoryid"], name: @htmlentities.decode(category["title"]).strip, position: category["position"], - parent_category_id: category_id_from_imported_category_id(category["parentid"]) + parent_category_id: category_id_from_imported_category_id(category["parentid"]), } end end @@ -130,12 +132,15 @@ class ImportScripts::Question2Answer < ImportScripts::Base def import_topics puts "", "importing topics..." - topic_count = mysql_query("SELECT COUNT(postid) count FROM #{TABLE_PREFIX}posts WHERE type = 'Q'").first["count"] + topic_count = + mysql_query("SELECT COUNT(postid) count FROM #{TABLE_PREFIX}posts WHERE type = 'Q'").first[ + "count" + ] last_topic_id = -1 batches(BATCH_SIZE) do |offset| - topics = mysql_query(<<-SQL + topics = mysql_query(<<-SQL).to_a SELECT p.postid, p.type, p.categoryid, p.closedbyid, p.userid postuserid, p.views, p.created, p.title, p.content raw FROM #{TABLE_PREFIX}posts p WHERE type = 'Q' @@ -143,7 +148,6 @@ class ImportScripts::Question2Answer < ImportScripts::Base ORDER BY p.postid LIMIT #{BATCH_SIZE} SQL - ).to_a break if topics.empty? @@ -179,20 +183,19 @@ class ImportScripts::Question2Answer < ImportScripts::Base if topic.present? title_slugified = slugify(thread["title"], false, 50) if thread["title"].present? url_slug = "qa/#{thread["postid"]}/#{title_slugified}" if thread["title"].present? - Permalink.create(url: url_slug, topic_id: topic[:topic_id].to_i) if url_slug.present? && topic[:topic_id].present? + if url_slug.present? && topic[:topic_id].present? + Permalink.create(url: url_slug, topic_id: topic[:topic_id].to_i) + end end end - end end def slugify(title, ascii_only, max_length) - words = title.downcase.gsub(/[^a-zA-Z0-9\s]/, '').split(" ") + words = title.downcase.gsub(/[^a-zA-Z0-9\s]/, "").split(" ") word_lengths = {} - words.each_with_index do |word, idx| - word_lengths[idx] = word.length - end + words.each_with_index { |word, idx| word_lengths[idx] = word.length } remaining = max_length if word_lengths.inject(0) { |sum, (_, v)| sum + v } > remaining @@ -211,17 +214,16 @@ class ImportScripts::Question2Answer < ImportScripts::Base def import_posts puts "", "importing posts..." - post_count = mysql_query(<<-SQL + post_count = mysql_query(<<-SQL).first["count"] SELECT COUNT(postid) count FROM #{TABLE_PREFIX}posts p WHERE p.parentid IS NOT NULL SQL - ).first["count"] last_post_id = -1 batches(BATCH_SIZE) do |offset| - posts = mysql_query(<<-SQL + posts = mysql_query(<<-SQL).to_a SELECT p.postid, p.type, p.parentid, p.categoryid, p.closedbyid, p.userid, p.views, p.created, p.title, p.content, parent.type AS parenttype, parent.parentid AS qid FROM #{TABLE_PREFIX}posts p @@ -233,7 +235,6 @@ class ImportScripts::Question2Answer < ImportScripts::Base ORDER BY p.postid LIMIT #{BATCH_SIZE} SQL - ).to_a break if posts.empty? last_post_id = posts[-1]["postid"] @@ -250,11 +251,11 @@ class ImportScripts::Question2Answer < ImportScripts::Base # this works as long as comments can not have a comment as parent # it's always Q-A Q-C or A-C - if post['type'] == 'A' # for answers the question/topic is always the parent + if post["type"] == "A" # for answers the question/topic is always the parent topic = topic_lookup_from_imported_post_id("thread-#{post["parentid"]}") next if topic.nil? else - if post['parenttype'] == 'Q' # for comments to questions, the question/topic is the parent as well + if post["parenttype"] == "Q" # for comments to questions, the question/topic is the parent as well topic = topic_lookup_from_imported_post_id("thread-#{post["parentid"]}") next if topic.nil? else # for comments to answers, the question/topic is the parent of the parent @@ -284,7 +285,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base ans = mysql_query("select postid, selchildid from qa_posts where selchildid is not null").to_a ans.each do |answer| begin - post = Post.find_by(id: post_id_from_imported_post_id("#{answer['selchildid']}")) + post = Post.find_by(id: post_id_from_imported_post_id("#{answer["selchildid"]}")) post.custom_fields["is_accepted_answer"] = "true" post.save topic = Topic.find(post.topic_id) @@ -293,20 +294,18 @@ class ImportScripts::Question2Answer < ImportScripts::Base rescue => e puts "error acting on post #{e}" end - end end def import_likes puts "", "importing likes..." - likes = mysql_query(<<-SQL + likes = mysql_query(<<-SQL).to_a SELECT postid, userid FROM #{TABLE_PREFIX}uservotes u WHERE u.vote=1 SQL - ).to_a likes.each do |like| - post = Post.find_by(id: post_id_from_imported_post_id("thread-#{like['postid']}")) + post = Post.find_by(id: post_id_from_imported_post_id("thread-#{like["postid"]}")) user = User.find_by(id: user_id_from_imported_user_id(like["userid"])) begin PostActionCreator.like(user, post) if user && post @@ -340,10 +339,10 @@ class ImportScripts::Question2Answer < ImportScripts::Base def preprocess_post_raw(raw) return "" if raw.blank? - raw.gsub!(/(.+)<\/a>/i, '[\2](\1)') - raw.gsub!(/

(.+?)<\/p>/im) { "#{$1}\n\n" } - raw.gsub!('
', "\n") - raw.gsub!(/(.*?)<\/strong>/im, '[b]\1[/b]') + raw.gsub!(%r{(.+)}i, '[\2](\1)') + raw.gsub!(%r{

(.+?)

}im) { "#{$1}\n\n" } + raw.gsub!("
", "\n") + raw.gsub!(%r{(.*?)}im, '[b]\1[/b]') # decode HTML entities raw = @htmlentities.decode(raw) @@ -355,22 +354,22 @@ class ImportScripts::Question2Answer < ImportScripts::Base # [HTML]...[/HTML] raw.gsub!(/\[html\]/i, "\n```html\n") - raw.gsub!(/\[\/html\]/i, "\n```\n") + raw.gsub!(%r{\[/html\]}i, "\n```\n") # [PHP]...[/PHP] raw.gsub!(/\[php\]/i, "\n```php\n") - raw.gsub!(/\[\/php\]/i, "\n```\n") + raw.gsub!(%r{\[/php\]}i, "\n```\n") # [HIGHLIGHT="..."] raw.gsub!(/\[highlight="?(\w+)"?\]/i) { "\n```#{$1.downcase}\n" } # [CODE]...[/CODE] # [HIGHLIGHT]...[/HIGHLIGHT] - raw.gsub!(/\[\/?code\]/i, "\n```\n") - raw.gsub!(/\[\/?highlight\]/i, "\n```\n") + raw.gsub!(%r{\[/?code\]}i, "\n```\n") + raw.gsub!(%r{\[/?highlight\]}i, "\n```\n") # [SAMP]...[/SAMP] - raw.gsub!(/\[\/?samp\]/i, "`") + raw.gsub!(%r{\[/?samp\]}i, "`") # replace all chevrons with HTML entities # NOTE: must be done @@ -385,16 +384,16 @@ class ImportScripts::Question2Answer < ImportScripts::Base raw.gsub!("\u2603", ">") # [URL=...]...[/URL] - raw.gsub!(/\[url="?([^"]+?)"?\](.*?)\[\/url\]/im) { "[#{$2.strip}](#{$1})" } - raw.gsub!(/\[url="?(.+?)"?\](.+)\[\/url\]/im) { "[#{$2.strip}](#{$1})" } + raw.gsub!(%r{\[url="?([^"]+?)"?\](.*?)\[/url\]}im) { "[#{$2.strip}](#{$1})" } + raw.gsub!(%r{\[url="?(.+?)"?\](.+)\[/url\]}im) { "[#{$2.strip}](#{$1})" } # [URL]...[/URL] # [MP3]...[/MP3] - raw.gsub!(/\[\/?url\]/i, "") - raw.gsub!(/\[\/?mp3\]/i, "") + raw.gsub!(%r{\[/?url\]}i, "") + raw.gsub!(%r{\[/?mp3\]}i, "") # [MENTION][/MENTION] - raw.gsub!(/\[mention\](.+?)\[\/mention\]/i) do + raw.gsub!(%r{\[mention\](.+?)\[/mention\]}i) do old_username = $1 if @old_username_to_new_usernames.has_key?(old_username) old_username = @old_username_to_new_usernames[old_username] @@ -403,31 +402,31 @@ class ImportScripts::Question2Answer < ImportScripts::Base end # [FONT=blah] and [COLOR=blah] - raw.gsub!(/\[FONT=.*?\](.*?)\[\/FONT\]/im, '\1') - raw.gsub!(/\[COLOR=.*?\](.*?)\[\/COLOR\]/im, '\1') - raw.gsub!(/\[COLOR=#.*?\](.*?)\[\/COLOR\]/im, '\1') + raw.gsub!(%r{\[FONT=.*?\](.*?)\[/FONT\]}im, '\1') + raw.gsub!(%r{\[COLOR=.*?\](.*?)\[/COLOR\]}im, '\1') + raw.gsub!(%r{\[COLOR=#.*?\](.*?)\[/COLOR\]}im, '\1') - raw.gsub!(/\[SIZE=.*?\](.*?)\[\/SIZE\]/im, '\1') - raw.gsub!(/\[h=.*?\](.*?)\[\/h\]/im, '\1') + raw.gsub!(%r{\[SIZE=.*?\](.*?)\[/SIZE\]}im, '\1') + raw.gsub!(%r{\[h=.*?\](.*?)\[/h\]}im, '\1') # [CENTER]...[/CENTER] - raw.gsub!(/\[CENTER\](.*?)\[\/CENTER\]/im, '\1') + raw.gsub!(%r{\[CENTER\](.*?)\[/CENTER\]}im, '\1') # [INDENT]...[/INDENT] - raw.gsub!(/\[INDENT\](.*?)\[\/INDENT\]/im, '\1') - raw.gsub!(/\[TABLE\](.*?)\[\/TABLE\]/im, '\1') - raw.gsub!(/\[TR\](.*?)\[\/TR\]/im, '\1') - raw.gsub!(/\[TD\](.*?)\[\/TD\]/im, '\1') - raw.gsub!(/\[TD="?.*?"?\](.*?)\[\/TD\]/im, '\1') + raw.gsub!(%r{\[INDENT\](.*?)\[/INDENT\]}im, '\1') + raw.gsub!(%r{\[TABLE\](.*?)\[/TABLE\]}im, '\1') + raw.gsub!(%r{\[TR\](.*?)\[/TR\]}im, '\1') + raw.gsub!(%r{\[TD\](.*?)\[/TD\]}im, '\1') + raw.gsub!(%r{\[TD="?.*?"?\](.*?)\[/TD\]}im, '\1') # [QUOTE]...[/QUOTE] - raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { |quote| - quote.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n#{$1}\n" } + raw.gsub!(%r{\[quote\](.+?)\[/quote\]}im) do |quote| + quote.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n#{$1}\n" } quote.gsub!(/\n(.+?)/) { "\n> #{$1}" } - } + end # [QUOTE=]...[/QUOTE] - raw.gsub!(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do + raw.gsub!(%r{\[quote=([^;\]]+)\](.+?)\[/quote\]}im) do old_username, quote = $1, $2 if @old_username_to_new_usernames.has_key?(old_username) old_username = @old_username_to_new_usernames[old_username] @@ -436,31 +435,33 @@ class ImportScripts::Question2Answer < ImportScripts::Base end # [YOUTUBE][/YOUTUBE] - raw.gsub!(/\[youtube\](.+?)\[\/youtube\]/i) { "\n//youtu.be/#{$1}\n" } + raw.gsub!(%r{\[youtube\](.+?)\[/youtube\]}i) { "\n//youtu.be/#{$1}\n" } # [VIDEO=youtube;]...[/VIDEO] - raw.gsub!(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\n//youtu.be/#{$1}\n" } + raw.gsub!(%r{\[video=youtube;([^\]]+)\].*?\[/video\]}i) { "\n//youtu.be/#{$1}\n" } # More Additions .... # [spoiler=Some hidden stuff]SPOILER HERE!![/spoiler] - raw.gsub!(/\[spoiler="?(.+?)"?\](.+?)\[\/spoiler\]/im) { "\n#{$1}\n[spoiler]#{$2}[/spoiler]\n" } + raw.gsub!(%r{\[spoiler="?(.+?)"?\](.+?)\[/spoiler\]}im) do + "\n#{$1}\n[spoiler]#{$2}[/spoiler]\n" + end # [IMG][IMG]http://i63.tinypic.com/akga3r.jpg[/IMG][/IMG] - raw.gsub!(/\[IMG\]\[IMG\](.+?)\[\/IMG\]\[\/IMG\]/i) { "[IMG]#{$1}[/IMG]" } + raw.gsub!(%r{\[IMG\]\[IMG\](.+?)\[/IMG\]\[/IMG\]}i) { "[IMG]#{$1}[/IMG]" } # convert list tags to ul and list=1 tags to ol # (basically, we're only missing list=a here...) # (https://meta.discourse.org/t/phpbb-3-importer-old/17397) - raw.gsub!(/\[list\](.*?)\[\/list\]/im, '[ul]\1[/ul]') - raw.gsub!(/\[list=1\](.*?)\[\/list\]/im, '[ol]\1[/ol]') - raw.gsub!(/\[list\](.*?)\[\/list:u\]/im, '[ul]\1[/ul]') - raw.gsub!(/\[list=1\](.*?)\[\/list:o\]/im, '[ol]\1[/ol]') + raw.gsub!(%r{\[list\](.*?)\[/list\]}im, '[ul]\1[/ul]') + raw.gsub!(%r{\[list=1\](.*?)\[/list\]}im, '[ol]\1[/ol]') + raw.gsub!(%r{\[list\](.*?)\[/list:u\]}im, '[ul]\1[/ul]') + raw.gsub!(%r{\[list=1\](.*?)\[/list:o\]}im, '[ol]\1[/ol]') # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists: - raw.gsub!(/\[\*\]\n/, '') - raw.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]') + raw.gsub!(/\[\*\]\n/, "") + raw.gsub!(%r{\[\*\](.*?)\[/\*:m\]}, '[li]\1[/li]') raw.gsub!(/\[\*\](.*?)\n/, '[li]\1[/li]') - raw.gsub!(/\[\*=1\]/, '') + raw.gsub!(/\[\*=1\]/, "") raw.strip! raw @@ -468,7 +469,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base def postprocess_post_raw(raw) # [QUOTE=;]...[/QUOTE] - raw.gsub!(/\[quote=([^;]+);(\d+)\](.+?)\[\/quote\]/im) do + raw.gsub!(%r{\[quote=([^;]+);(\d+)\](.+?)\[/quote\]}im) do old_username, post_id, quote = $1, $2, $3 if @old_username_to_new_usernames.has_key?(old_username) @@ -477,7 +478,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base if topic_lookup = topic_lookup_from_imported_post_id(post_id) post_number = topic_lookup[:post_number] - topic_id = topic_lookup[:topic_id] + topic_id = topic_lookup[:topic_id] "\n[quote=\"#{old_username},post:#{post_number},topic:#{topic_id}\"]\n#{quote}\n[/quote]\n" else "\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n" @@ -485,11 +486,11 @@ class ImportScripts::Question2Answer < ImportScripts::Base end # remove attachments - raw.gsub!(/\[attach[^\]]*\]\d+\[\/attach\]/i, "") + raw.gsub!(%r{\[attach[^\]]*\]\d+\[/attach\]}i, "") # [THREAD][/THREAD] # ==> http://my.discourse.org/t/slug/ - raw.gsub!(/\[thread\](\d+)\[\/thread\]/i) do + raw.gsub!(%r{\[thread\](\d+)\[/thread\]}i) do thread_id = $1 if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}") topic_lookup[:url] @@ -500,7 +501,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base # [THREAD=]...[/THREAD] # ==> [...](http://my.discourse.org/t/slug/) - raw.gsub!(/\[thread=(\d+)\](.+?)\[\/thread\]/i) do + raw.gsub!(%r{\[thread=(\d+)\](.+?)\[/thread\]}i) do thread_id, link = $1, $2 if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}") url = topic_lookup[:url] @@ -512,7 +513,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base # [POST][/POST] # ==> http://my.discourse.org/t/slug// - raw.gsub!(/\[post\](\d+)\[\/post\]/i) do + raw.gsub!(%r{\[post\](\d+)\[/post\]}i) do post_id = $1 if topic_lookup = topic_lookup_from_imported_post_id(post_id) topic_lookup[:url] @@ -523,7 +524,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base # [POST=]...[/POST] # ==> [...](http://my.discourse.org/t///) - raw.gsub!(/\[post=(\d+)\](.+?)\[\/post\]/i) do + raw.gsub!(%r{\[post=(\d+)\](.+?)\[/post\]}i) do post_id, link = $1, $2 if topic_lookup = topic_lookup_from_imported_post_id(post_id) url = topic_lookup[:url] @@ -537,7 +538,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base end def create_permalinks - puts '', 'Creating permalinks...' + puts "", "Creating permalinks..." # topics Topic.find_each do |topic| @@ -546,7 +547,11 @@ class ImportScripts::Question2Answer < ImportScripts::Base if tcf && tcf["import_id"] question_id = tcf["import_id"][/thread-(\d)/, 0] url = "#{question_id}" - Permalink.create(url: url, topic_id: topic.id) rescue nil + begin + Permalink.create(url: url, topic_id: topic.id) + rescue StandardError + nil + end end end @@ -555,11 +560,21 @@ class ImportScripts::Question2Answer < ImportScripts::Base ccf = category.custom_fields if ccf && ccf["import_id"] - url = category.parent_category ? "#{category.parent_category.slug}/#{category.slug}" : category.slug - Permalink.create(url: url, category_id: category.id) rescue nil + url = + ( + if category.parent_category + "#{category.parent_category.slug}/#{category.slug}" + else + category.slug + end + ) + begin + Permalink.create(url: url, category_id: category.id) + rescue StandardError + nil + end end end - end def parse_timestamp(timestamp) @@ -569,7 +584,6 @@ class ImportScripts::Question2Answer < ImportScripts::Base def mysql_query(sql) @client.query(sql, cache_rows: true) end - end ImportScripts::Question2Answer.new.perform diff --git a/script/import_scripts/sfn.rb b/script/import_scripts/sfn.rb index e9270813d72..e10c9c1b0aa 100644 --- a/script/import_scripts/sfn.rb +++ b/script/import_scripts/sfn.rb @@ -8,7 +8,6 @@ require "mysql2" require File.expand_path(File.dirname(__FILE__) + "/base.rb") class ImportScripts::Sfn < ImportScripts::Base - BATCH_SIZE = 100_000 MIN_CREATED_AT = "2003-11-01" @@ -96,22 +95,27 @@ class ImportScripts::Sfn < ImportScripts::Base username: email.split("@")[0], bio_raw: bio, created_at: user["created_at"], - post_create_action: proc do |newuser| - next if user["avatar"].blank? + post_create_action: + proc do |newuser| + next if user["avatar"].blank? - avatar = Tempfile.new("sfn-avatar") - avatar.write(user["avatar"].encode("ASCII-8BIT").force_encoding("UTF-8")) - avatar.rewind + avatar = Tempfile.new("sfn-avatar") + avatar.write(user["avatar"].encode("ASCII-8BIT").force_encoding("UTF-8")) + avatar.rewind - upload = UploadCreator.new(avatar, "avatar.jpg").create_for(newuser.id) - if upload.persisted? - newuser.create_user_avatar - newuser.user_avatar.update(custom_upload_id: upload.id) - newuser.update(uploaded_avatar_id: upload.id) - end + upload = UploadCreator.new(avatar, "avatar.jpg").create_for(newuser.id) + if upload.persisted? + newuser.create_user_avatar + newuser.user_avatar.update(custom_upload_id: upload.id) + newuser.update(uploaded_avatar_id: upload.id) + end - avatar.try(:close!) rescue nil - end + begin + avatar.try(:close!) + rescue StandardError + nil + end + end, } end end @@ -198,9 +202,7 @@ class ImportScripts::Sfn < ImportScripts::Base def import_categories puts "", "importing categories..." - create_categories(NEW_CATEGORIES) do |category| - { id: category, name: category } - end + create_categories(NEW_CATEGORIES) { |category| { id: category, name: category } } end def import_topics @@ -234,7 +236,7 @@ class ImportScripts::Sfn < ImportScripts::Base SQL break if topics.size < 1 - next if all_records_exist? :posts, topics.map { |t| t['id'].to_i } + next if all_records_exist? :posts, topics.map { |t| t["id"].to_i } create_posts(topics, total: topic_count, offset: offset) do |topic| next unless category_id = CATEGORY_MAPPING[topic["category_id"]] @@ -286,7 +288,7 @@ class ImportScripts::Sfn < ImportScripts::Base break if posts.size < 1 - next if all_records_exist? :posts, posts.map { |p| p['id'].to_i } + next if all_records_exist? :posts, posts.map { |p| p["id"].to_i } create_posts(posts, total: posts_count, offset: offset) do |post| next unless parent = topic_lookup_from_imported_post_id(post["topic_id"]) @@ -307,7 +309,7 @@ class ImportScripts::Sfn < ImportScripts::Base def cleanup_raw(raw) # fix some html - raw.gsub!(//i, "\n") + raw.gsub!(%r{}i, "\n") # remove "This message has been cross posted to the following eGroups: ..." raw.gsub!(/^This message has been cross posted to the following eGroups: .+\n-{3,}/i, "") # remove signatures @@ -320,7 +322,6 @@ class ImportScripts::Sfn < ImportScripts::Base @client ||= Mysql2::Client.new(username: "root", database: "sfn") @client.query(sql) end - end ImportScripts::Sfn.new.perform diff --git a/script/import_scripts/simplepress.rb b/script/import_scripts/simplepress.rb index 3375258790f..b0aa7f5f114 100644 --- a/script/import_scripts/simplepress.rb +++ b/script/import_scripts/simplepress.rb @@ -1,22 +1,17 @@ # frozen_string_literal: true -require 'mysql2' +require "mysql2" require File.expand_path(File.dirname(__FILE__) + "/base.rb") class ImportScripts::SimplePress < ImportScripts::Base - - SIMPLE_PRESS_DB ||= ENV['SIMPLEPRESS_DB'] || "simplepress" + SIMPLE_PRESS_DB ||= ENV["SIMPLEPRESS_DB"] || "simplepress" TABLE_PREFIX = "wp_sf" BATCH_SIZE ||= 1000 def initialize super - @client = Mysql2::Client.new( - host: "localhost", - username: "root", - database: SIMPLE_PRESS_DB, - ) + @client = Mysql2::Client.new(host: "localhost", username: "root", database: SIMPLE_PRESS_DB) SiteSetting.max_username_length = 50 end @@ -32,10 +27,11 @@ class ImportScripts::SimplePress < ImportScripts::Base puts "", "importing users..." last_user_id = -1 - total_users = mysql_query("SELECT COUNT(*) count FROM wp_users WHERE user_email LIKE '%@%'").first["count"] + total_users = + mysql_query("SELECT COUNT(*) count FROM wp_users WHERE user_email LIKE '%@%'").first["count"] batches(BATCH_SIZE) do |offset| - users = mysql_query(<<-SQL + users = mysql_query(<<-SQL).to_a SELECT ID id, user_nicename, display_name, user_email, user_registered, user_url FROM wp_users WHERE user_email LIKE '%@%' @@ -43,7 +39,6 @@ class ImportScripts::SimplePress < ImportScripts::Base ORDER BY id LIMIT #{BATCH_SIZE} SQL - ).to_a break if users.empty? @@ -55,13 +50,12 @@ class ImportScripts::SimplePress < ImportScripts::Base user_ids_sql = user_ids.join(",") users_description = {} - mysql_query(<<-SQL + mysql_query(<<-SQL).each { |um| users_description[um["user_id"]] = um["description"] } SELECT user_id, meta_value description FROM wp_usermeta WHERE user_id IN (#{user_ids_sql}) AND meta_key = 'description' SQL - ).each { |um| users_description[um["user_id"]] = um["description"] } create_users(users, total: total_users, offset: offset) do |u| { @@ -71,7 +65,7 @@ class ImportScripts::SimplePress < ImportScripts::Base name: u["display_name"], created_at: u["user_registered"], website: u["user_url"], - bio_raw: users_description[u["id"]] + bio_raw: users_description[u["id"]], } end end @@ -80,16 +74,20 @@ class ImportScripts::SimplePress < ImportScripts::Base def import_categories puts "", "importing categories..." - categories = mysql_query(<<-SQL + categories = mysql_query(<<-SQL) SELECT forum_id, forum_name, forum_seq, forum_desc, parent FROM #{TABLE_PREFIX}forums ORDER BY forum_id SQL - ) create_categories(categories) do |c| - category = { id: c['forum_id'], name: CGI.unescapeHTML(c['forum_name']), description: CGI.unescapeHTML(c['forum_desc']), position: c['forum_seq'] } - if (parent_id = c['parent'].to_i) > 0 + category = { + id: c["forum_id"], + name: CGI.unescapeHTML(c["forum_name"]), + description: CGI.unescapeHTML(c["forum_desc"]), + position: c["forum_seq"], + } + if (parent_id = c["parent"].to_i) > 0 category[:parent_category_id] = category_id_from_imported_category_id(parent_id) end category @@ -99,10 +97,15 @@ class ImportScripts::SimplePress < ImportScripts::Base def import_topics puts "", "creating topics" - total_count = mysql_query("SELECT COUNT(*) count FROM #{TABLE_PREFIX}posts WHERE post_index = 1").first["count"] + total_count = + mysql_query("SELECT COUNT(*) count FROM #{TABLE_PREFIX}posts WHERE post_index = 1").first[ + "count" + ] batches(BATCH_SIZE) do |offset| - results = mysql_query(" + results = + mysql_query( + " SELECT p.post_id id, p.topic_id topic_id, t.forum_id category_id, @@ -119,23 +122,24 @@ class ImportScripts::SimplePress < ImportScripts::Base ORDER BY p.post_id LIMIT #{BATCH_SIZE} OFFSET #{offset}; - ") + ", + ) break if results.size < 1 - next if all_records_exist? :posts, results.map { |m| m['id'].to_i } + next if all_records_exist? :posts, results.map { |m| m["id"].to_i } create_posts(results, total: total_count, offset: offset) do |m| - created_at = Time.zone.at(m['post_time']) + created_at = Time.zone.at(m["post_time"]) { - id: m['id'], - user_id: user_id_from_imported_user_id(m['user_id']) || -1, - raw: process_simplepress_post(m['raw'], m['id']), + id: m["id"], + user_id: user_id_from_imported_user_id(m["user_id"]) || -1, + raw: process_simplepress_post(m["raw"], m["id"]), created_at: created_at, - category: category_id_from_imported_category_id(m['category_id']), - title: CGI.unescapeHTML(m['title']), - views: m['views'], - pinned_at: m['pinned'] == 1 ? created_at : nil, + category: category_id_from_imported_category_id(m["category_id"]), + title: CGI.unescapeHTML(m["title"]), + views: m["views"], + pinned_at: m["pinned"] == 1 ? created_at : nil, } end end @@ -146,17 +150,24 @@ class ImportScripts::SimplePress < ImportScripts::Base topic_first_post_id = {} - mysql_query(" + mysql_query( + " SELECT t.topic_id, p.post_id FROM #{TABLE_PREFIX}topics t JOIN #{TABLE_PREFIX}posts p ON p.topic_id = t.topic_id WHERE p.post_index = 1 - ").each { |r| topic_first_post_id[r["topic_id"]] = r["post_id"] } + ", + ).each { |r| topic_first_post_id[r["topic_id"]] = r["post_id"] } - total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}posts WHERE post_index <> 1").first["count"] + total_count = + mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}posts WHERE post_index <> 1").first[ + "count" + ] batches(BATCH_SIZE) do |offset| - results = mysql_query(" + results = + mysql_query( + " SELECT p.post_id id, p.topic_id topic_id, p.user_id user_id, @@ -169,23 +180,24 @@ class ImportScripts::SimplePress < ImportScripts::Base ORDER BY p.post_id LIMIT #{BATCH_SIZE} OFFSET #{offset}; - ") + ", + ) break if results.size < 1 - next if all_records_exist? :posts, results.map { |m| m['id'].to_i } + next if all_records_exist? :posts, results.map { |m| m["id"].to_i } create_posts(results, total: total_count, offset: offset) do |m| - if parent = topic_lookup_from_imported_post_id(topic_first_post_id[m['topic_id']]) + if parent = topic_lookup_from_imported_post_id(topic_first_post_id[m["topic_id"]]) { - id: m['id'], - user_id: user_id_from_imported_user_id(m['user_id']) || -1, + id: m["id"], + user_id: user_id_from_imported_user_id(m["user_id"]) || -1, topic_id: parent[:topic_id], - raw: process_simplepress_post(m['raw'], m['id']), - created_at: Time.zone.at(m['post_time']), + raw: process_simplepress_post(m["raw"], m["id"]), + created_at: Time.zone.at(m["post_time"]), } else - puts "Parent post #{m['topic_id']} doesn't exist. Skipping #{m["id"]}" + puts "Parent post #{m["topic_id"]} doesn't exist. Skipping #{m["id"]}" nil end end @@ -196,28 +208,27 @@ class ImportScripts::SimplePress < ImportScripts::Base s = raw.dup # fix invalid byte sequence in UTF-8 (ArgumentError) - unless s.valid_encoding? - s.force_encoding("UTF-8") - end + s.force_encoding("UTF-8") unless s.valid_encoding? # convert the quote line - s.gsub!(/\[quote='([^']+)'.*?pid='(\d+).*?\]/) { - "[quote=\"#{convert_username($1, import_id)}, " + post_id_to_post_num_and_topic($2, import_id) + '"]' - } + s.gsub!(/\[quote='([^']+)'.*?pid='(\d+).*?\]/) do + "[quote=\"#{convert_username($1, import_id)}, " + + post_id_to_post_num_and_topic($2, import_id) + '"]' + end # :) is encoded as :) s.gsub!(/(?:.*)/, '\1') # Some links look like this: http://www.onegameamonth.com - s.gsub!(/(.+)<\/a>/, '[\2](\1)') + s.gsub!(%r{(.+)}, '[\2](\1)') # Many phpbb bbcode tags have a hash attached to them. Examples: # [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky] # [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex] - s.gsub!(/:(?:\w{8})\]/, ']') + s.gsub!(/:(?:\w{8})\]/, "]") # Remove mybb video tags. - s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '') + s.gsub!(%r{(^\[video=.*?\])|(\[/video\]$)}, "") s = CGI.unescapeHTML(s) @@ -225,7 +236,7 @@ class ImportScripts::SimplePress < ImportScripts::Base # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli) # # Work around it for now: - s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[') + s.gsub!(%r{\[http(s)?://(www\.)?}, "[") s end @@ -233,7 +244,6 @@ class ImportScripts::SimplePress < ImportScripts::Base def mysql_query(sql) @client.query(sql, cache_rows: false) end - end ImportScripts::SimplePress.new.perform diff --git a/script/import_scripts/smf1.rb b/script/import_scripts/smf1.rb index 90ec314ea97..5d601f2fa2e 100644 --- a/script/import_scripts/smf1.rb +++ b/script/import_scripts/smf1.rb @@ -5,21 +5,21 @@ require "htmlentities" require File.expand_path(File.dirname(__FILE__) + "/base.rb") class ImportScripts::Smf1 < ImportScripts::Base - - BATCH_SIZE ||= 5000 + BATCH_SIZE ||= 5000 UPLOADS_DIR ||= ENV["UPLOADS_DIR"].presence - FORUM_URL ||= ENV["FORUM_URL"].presence + FORUM_URL ||= ENV["FORUM_URL"].presence def initialize - fail "UPLOADS_DIR env variable is required (example: '/path/to/attachments')" unless UPLOADS_DIR + fail "UPLOADS_DIR env variable is required (example: '/path/to/attachments')" unless UPLOADS_DIR fail "FORUM_URL env variable is required (example: 'https://domain.com/forum')" unless FORUM_URL - @client = Mysql2::Client.new( - host: ENV["DB_HOST"] || "localhost", - username: ENV["DB_USER"] || "root", - password: ENV["DB_PW"], - database: ENV["DB_NAME"], - ) + @client = + Mysql2::Client.new( + host: ENV["DB_HOST"] || "localhost", + username: ENV["DB_USER"] || "root", + password: ENV["DB_PW"], + database: ENV["DB_NAME"], + ) check_version! @@ -29,7 +29,12 @@ class ImportScripts::Smf1 < ImportScripts::Base puts "Loading existing usernames..." - @old_to_new_usernames = UserCustomField.joins(:user).where(name: "import_username").pluck("value", "users.username").to_h + @old_to_new_usernames = + UserCustomField + .joins(:user) + .where(name: "import_username") + .pluck("value", "users.username") + .to_h puts "Loading pm mapping..." @@ -41,13 +46,14 @@ class ImportScripts::Smf1 < ImportScripts::Base .where("title NOT ILIKE 'Re: %'") .group(:id) .order(:id) - .pluck("string_agg(topic_allowed_users.user_id::text, ',' ORDER BY topic_allowed_users.user_id), title, topics.id") + .pluck( + "string_agg(topic_allowed_users.user_id::text, ',' ORDER BY topic_allowed_users.user_id), title, topics.id", + ) .each do |users, title, topic_id| - @pm_mapping[users] ||= {} - @pm_mapping[users][title] ||= [] - @pm_mapping[users][title] << topic_id - end - + @pm_mapping[users] ||= {} + @pm_mapping[users][title] ||= [] + @pm_mapping[users][title] << topic_id + end end def execute @@ -71,7 +77,10 @@ class ImportScripts::Smf1 < ImportScripts::Base end def check_version! - version = mysql_query("SELECT value FROM smf_settings WHERE variable = 'smfVersion' LIMIT 1").first["value"] + version = + mysql_query("SELECT value FROM smf_settings WHERE variable = 'smfVersion' LIMIT 1").first[ + "value" + ] fail "Incompatible version (#{version})" unless version&.start_with?("1.") end @@ -84,10 +93,7 @@ class ImportScripts::Smf1 < ImportScripts::Base create_groups(groups) do |g| next if g["groupName"].blank? - { - id: g["id_group"], - full_name: g["groupName"], - } + { id: g["id_group"], full_name: g["groupName"] } end end @@ -98,7 +104,7 @@ class ImportScripts::Smf1 < ImportScripts::Base total = mysql_query("SELECT COUNT(*) count FROM smf_members").first["count"] batches(BATCH_SIZE) do |offset| - users = mysql_query(<<~SQL + users = mysql_query(<<~SQL).to_a SELECT m.id_member , memberName , dateRegistered @@ -125,7 +131,6 @@ class ImportScripts::Smf1 < ImportScripts::Base ORDER BY m.id_member LIMIT #{BATCH_SIZE} SQL - ).to_a break if users.empty? @@ -158,38 +163,45 @@ class ImportScripts::Smf1 < ImportScripts::Base ip_address: u["memberIP2"], active: u["is_activated"] == 1, approved: u["is_activated"] == 1, - post_create_action: proc do |user| - # usernames - @old_to_new_usernames[u["memberName"]] = user.username + post_create_action: + proc do |user| + # usernames + @old_to_new_usernames[u["memberName"]] = user.username - # groups - GroupUser.transaction do - group_ids.each do |gid| - (group_id = group_id_from_imported_group_id(gid)) && GroupUser.find_or_create_by(user: user, group_id: group_id) + # groups + GroupUser.transaction do + group_ids.each do |gid| + (group_id = group_id_from_imported_group_id(gid)) && + GroupUser.find_or_create_by(user: user, group_id: group_id) + end end - end - # avatar - avatar_url = nil + # avatar + avatar_url = nil - if u["avatar"].present? - if u["avatar"].start_with?("http") - avatar_url = u["avatar"] - elsif u["avatar"].start_with?("avatar_") - avatar_url = "#{FORUM_URL}/avatar-members/#{u["avatar"]}" + if u["avatar"].present? + if u["avatar"].start_with?("http") + avatar_url = u["avatar"] + elsif u["avatar"].start_with?("avatar_") + avatar_url = "#{FORUM_URL}/avatar-members/#{u["avatar"]}" + end end - end - avatar_url ||= if u["attachmentType"] == 0 && u["id_attach"].present? - "#{FORUM_URL}/index.php?action=dlattach;attach=#{u["id_attach"]};type=avatar" - elsif u["attachmentType"] == 1 && u["filename"].present? - "#{FORUM_URL}/avatar-members/#{u["filename"]}" - end + avatar_url ||= + if u["attachmentType"] == 0 && u["id_attach"].present? + "#{FORUM_URL}/index.php?action=dlattach;attach=#{u["id_attach"]};type=avatar" + elsif u["attachmentType"] == 1 && u["filename"].present? + "#{FORUM_URL}/avatar-members/#{u["filename"]}" + end - if avatar_url.present? - UserAvatar.import_url_for_user(avatar_url, user) rescue nil - end - end + if avatar_url.present? + begin + UserAvatar.import_url_for_user(avatar_url, user) + rescue StandardError + nil + end + end + end, } end end @@ -198,7 +210,7 @@ class ImportScripts::Smf1 < ImportScripts::Base def import_categories puts "", "Importing categories..." - categories = mysql_query(<<~SQL + categories = mysql_query(<<~SQL).to_a SELECT id_board , id_parent , boardOrder @@ -207,7 +219,6 @@ class ImportScripts::Smf1 < ImportScripts::Base FROM smf_boards ORDER BY id_parent, id_board SQL - ).to_a parent_categories = categories.select { |c| c["id_parent"] == 0 } children_categories = categories.select { |c| c["id_parent"] != 0 } @@ -218,9 +229,13 @@ class ImportScripts::Smf1 < ImportScripts::Base name: c["name"], description: pre_process_raw(c["description"].presence), position: c["boardOrder"], - post_create_action: proc do |category| - Permalink.find_or_create_by(url: "forums/index.php/board,#{c["id_board"]}.0.html", category_id: category.id) - end, + post_create_action: + proc do |category| + Permalink.find_or_create_by( + url: "forums/index.php/board,#{c["id_board"]}.0.html", + category_id: category.id, + ) + end, } end @@ -231,9 +246,13 @@ class ImportScripts::Smf1 < ImportScripts::Base name: c["name"], description: pre_process_raw(c["description"].presence), position: c["boardOrder"], - post_create_action: proc do |category| - Permalink.find_or_create_by(url: "forums/index.php/board,#{c["id_board"]}.0.html", category_id: category.id) - end, + post_create_action: + proc do |category| + Permalink.find_or_create_by( + url: "forums/index.php/board,#{c["id_board"]}.0.html", + category_id: category.id, + ) + end, } end end @@ -245,7 +264,7 @@ class ImportScripts::Smf1 < ImportScripts::Base total = mysql_query("SELECT COUNT(*) count FROM smf_messages").first["count"] batches(BATCH_SIZE) do |offset| - posts = mysql_query(<<~SQL + posts = mysql_query(<<~SQL).to_a SELECT m.id_msg , m.id_topic , m.id_board @@ -262,7 +281,6 @@ class ImportScripts::Smf1 < ImportScripts::Base ORDER BY m.id_msg LIMIT #{BATCH_SIZE} SQL - ).to_a break if posts.empty? @@ -287,12 +305,18 @@ class ImportScripts::Smf1 < ImportScripts::Base post[:views] = p["numViews"] post[:pinned_at] = created_at if p["isSticky"] == 1 post[:post_create_action] = proc do |pp| - Permalink.find_or_create_by(url: "forums/index.php/topic,#{p["id_topic"]}.0.html", topic_id: pp.topic_id) + Permalink.find_or_create_by( + url: "forums/index.php/topic,#{p["id_topic"]}.0.html", + topic_id: pp.topic_id, + ) end elsif parent = topic_lookup_from_imported_post_id(p["id_first_msg"]) post[:topic_id] = parent[:topic_id] post[:post_create_action] = proc do |pp| - Permalink.find_or_create_by(url: "forums/index.php/topic,#{p["id_topic"]}.msg#{p["id_msg"]}.html", post_id: pp.id) + Permalink.find_or_create_by( + url: "forums/index.php/topic,#{p["id_topic"]}.msg#{p["id_msg"]}.html", + post_id: pp.id, + ) end else next @@ -307,10 +331,15 @@ class ImportScripts::Smf1 < ImportScripts::Base puts "", "Importing personal posts..." last_post_id = -1 - total = mysql_query("SELECT COUNT(*) count FROM smf_personal_messages WHERE deletedBySender = 0").first["count"] + total = + mysql_query( + "SELECT COUNT(*) count FROM smf_personal_messages WHERE deletedBySender = 0", + ).first[ + "count" + ] batches(BATCH_SIZE) do |offset| - posts = mysql_query(<<~SQL + posts = mysql_query(<<~SQL).to_a SELECT id_pm , id_member_from , msgtime @@ -323,7 +352,6 @@ class ImportScripts::Smf1 < ImportScripts::Base ORDER BY id_pm LIMIT #{BATCH_SIZE} SQL - ).to_a break if posts.empty? @@ -335,7 +363,8 @@ class ImportScripts::Smf1 < ImportScripts::Base create_posts(posts, total: total, offset: offset) do |p| next unless user_id = user_id_from_imported_user_id(p["id_member_from"]) next if p["recipients"].blank? - recipients = p["recipients"].split(",").map { |id| user_id_from_imported_user_id(id) }.compact.uniq + recipients = + p["recipients"].split(",").map { |id| user_id_from_imported_user_id(id) }.compact.uniq next if recipients.empty? id = "pm-#{p["id_pm"]}" @@ -385,10 +414,13 @@ class ImportScripts::Smf1 < ImportScripts::Base count = 0 last_upload_id = -1 - total = mysql_query("SELECT COUNT(*) count FROM smf_attachments WHERE id_msg IS NOT NULL").first["count"] + total = + mysql_query("SELECT COUNT(*) count FROM smf_attachments WHERE id_msg IS NOT NULL").first[ + "count" + ] batches(BATCH_SIZE) do |offset| - uploads = mysql_query(<<~SQL + uploads = mysql_query(<<~SQL).to_a SELECT id_attach , id_msg , filename @@ -399,7 +431,6 @@ class ImportScripts::Smf1 < ImportScripts::Base ORDER BY id_attach LIMIT #{BATCH_SIZE} SQL - ).to_a break if uploads.empty? @@ -408,7 +439,13 @@ class ImportScripts::Smf1 < ImportScripts::Base uploads.each do |u| count += 1 - next unless post = PostCustomField.joins(:post).find_by(name: "import_id", value: u["id_msg"].to_s)&.post + unless post = + PostCustomField + .joins(:post) + .find_by(name: "import_id", value: u["id_msg"].to_s) + &.post + next + end path = File.join(UPLOADS_DIR, "#{u["id_attach"]}_#{u["file_hash"]}") next unless File.exist?(path) && File.size(path) > 0 @@ -433,15 +470,25 @@ class ImportScripts::Smf1 < ImportScripts::Base puts "", "Importing likes..." count = 0 - total = mysql_query("SELECT COUNT(*) count FROM smf_thank_you_post WHERE thx_time > 0").first["count"] + total = + mysql_query("SELECT COUNT(*) count FROM smf_thank_you_post WHERE thx_time > 0").first["count"] like = PostActionType.types[:like] - mysql_query("SELECT id_msg, id_member, thx_time FROM smf_thank_you_post WHERE thx_time > 0 ORDER BY id_thx_post").each do |l| + mysql_query( + "SELECT id_msg, id_member, thx_time FROM smf_thank_you_post WHERE thx_time > 0 ORDER BY id_thx_post", + ).each do |l| print_status(count += 1, total, get_start_time("likes")) next unless post_id = post_id_from_imported_post_id(l["id_msg"]) next unless user_id = user_id_from_imported_user_id(l["id_member"]) - next if PostAction.where(post_action_type_id: like, post_id: post_id, user_id: user_id).exists? - PostAction.create(post_action_type_id: like, post_id: post_id, user_id: user_id, created_at: Time.at(l["thx_time"])) + if PostAction.where(post_action_type_id: like, post_id: post_id, user_id: user_id).exists? + next + end + PostAction.create( + post_action_type_id: like, + post_id: post_id, + user_id: user_id, + created_at: Time.at(l["thx_time"]), + ) end end @@ -457,7 +504,7 @@ class ImportScripts::Smf1 < ImportScripts::Base count = 0 total = mysql_query("SELECT COUNT(*) count FROM smf_feedback WHERE approved").first["count"] - mysql_query(<<~SQL + mysql_query(<<~SQL).each do |f| SELECT feedbackid , id_member , feedbackmember_id @@ -470,7 +517,6 @@ class ImportScripts::Smf1 < ImportScripts::Base WHERE approved ORDER BY feedbackid SQL - ).each do |f| print_status(count += 1, total, get_start_time("feedbacks")) next unless user_id_from = user_id_from_imported_user_id(f["feedbackmember_id"]) next unless user_id_to = user_id_from_imported_user_id(f["id_member"]) @@ -498,7 +544,10 @@ class ImportScripts::Smf1 < ImportScripts::Base puts "", "Importing banned email domains..." blocklist = SiteSetting.blocked_email_domains.split("|") - banned_domains = mysql_query("SELECT SUBSTRING(email_address, 3) domain FROM smf_ban_items WHERE email_address RLIKE '^%@[^%]+$' GROUP BY email_address").map { |r| r["domain"] } + banned_domains = + mysql_query( + "SELECT SUBSTRING(email_address, 3) domain FROM smf_ban_items WHERE email_address RLIKE '^%@[^%]+$' GROUP BY email_address", + ).map { |r| r["domain"] } SiteSetting.blocked_email_domains = (blocklist + banned_domains).uniq.sort.join("|") end @@ -508,7 +557,10 @@ class ImportScripts::Smf1 < ImportScripts::Base count = 0 - banned_emails = mysql_query("SELECT email_address FROM smf_ban_items WHERE email_address RLIKE '^[^%]+@[^%]+$' GROUP BY email_address").map { |r| r["email_address"] } + banned_emails = + mysql_query( + "SELECT email_address FROM smf_ban_items WHERE email_address RLIKE '^[^%]+@[^%]+$' GROUP BY email_address", + ).map { |r| r["email_address"] } banned_emails.each do |email| print_status(count += 1, banned_emails.size, get_start_time("banned_emails")) ScreenedEmail.find_or_create_by(email: email) @@ -520,7 +572,7 @@ class ImportScripts::Smf1 < ImportScripts::Base count = 0 - banned_ips = mysql_query(<<~SQL + banned_ips = mysql_query(<<~SQL).to_a SELECT CONCAT_WS('.', ip_low1, ip_low2, ip_low3, ip_low4) low , CONCAT_WS('.', ip_high1, ip_high2, ip_high3, ip_high4) high , hits @@ -528,7 +580,6 @@ class ImportScripts::Smf1 < ImportScripts::Base WHERE (ip_low1 + ip_low2 + ip_low3 + ip_low4 + ip_high1 + ip_high2 + ip_high3 + ip_high4) > 0 GROUP BY low, high, hits; SQL - ).to_a banned_ips.each do |r| print_status(count += 1, banned_ips.size, get_start_time("banned_ips")) @@ -537,15 +588,15 @@ class ImportScripts::Smf1 < ImportScripts::Base ScreenedIpAddress.create(ip_address: r["low"], match_count: r["hits"]) end else - low_values = r["low"].split(".").map(&:to_i) + low_values = r["low"].split(".").map(&:to_i) high_values = r["high"].split(".").map(&:to_i) - first_diff = low_values.zip(high_values).count { |a, b| a == b } + first_diff = low_values.zip(high_values).count { |a, b| a == b } first_diff -= 1 if low_values[first_diff] == 0 && high_values[first_diff] == 255 - prefix = low_values[0...first_diff] - suffix = [0] * (3 - first_diff) - mask = 8 * (first_diff + 1) - values = (low_values[first_diff]..high_values[first_diff]) - hits = (r["hits"] / [1, values.count].max).floor + prefix = low_values[0...first_diff] + suffix = [0] * (3 - first_diff) + mask = 8 * (first_diff + 1) + values = (low_values[first_diff]..high_values[first_diff]) + hits = (r["hits"] / [1, values.count].max).floor values.each do |v| range_values = prefix + [v] + suffix ip_address = "#{range_values.join(".")}/#{mask}" @@ -562,10 +613,28 @@ class ImportScripts::Smf1 < ImportScripts::Base ScreenedIpAddress.roll_up end - IGNORED_BBCODE ||= %w{ - black blue center color email flash font glow green iurl left list move red - right shadown size table time white - } + IGNORED_BBCODE ||= %w[ + black + blue + center + color + email + flash + font + glow + green + iurl + left + list + move + red + right + shadown + size + table + time + white + ] def pre_process_raw(raw) return "" if raw.blank? @@ -573,59 +642,59 @@ class ImportScripts::Smf1 < ImportScripts::Base raw = @htmlentities.decode(raw) # [acronym] - raw.gsub!(/\[acronym=([^\]]+)\](.*?)\[\/acronym\]/im) { %{#{$2}} } + raw.gsub!(%r{\[acronym=([^\]]+)\](.*?)\[/acronym\]}im) { %{#{$2}} } # [br] raw.gsub!(/\[br\]/i, "\n") - raw.gsub!(//i, "\n") + raw.gsub!(%r{}i, "\n") # [hr] raw.gsub!(/\[hr\]/i, "
") # [sub] - raw.gsub!(/\[sub\](.*?)\[\/sub\]/im) { "#{$1}" } + raw.gsub!(%r{\[sub\](.*?)\[/sub\]}im) { "#{$1}" } # [sup] - raw.gsub!(/\[sup\](.*?)\[\/sup\]/im) { "#{$1}" } + raw.gsub!(%r{\[sup\](.*?)\[/sup\]}im) { "#{$1}" } # [html] raw.gsub!(/\[html\]/i, "\n```html\n") - raw.gsub!(/\[\/html\]/i, "\n```\n") + raw.gsub!(%r{\[/html\]}i, "\n```\n") # [php] raw.gsub!(/\[php\]/i, "\n```php\n") - raw.gsub!(/\[\/php\]/i, "\n```\n") + raw.gsub!(%r{\[/php\]}i, "\n```\n") # [code] - raw.gsub!(/\[\/?code\]/i, "\n```\n") + raw.gsub!(%r{\[/?code\]}i, "\n```\n") # [pre] - raw.gsub!(/\[\/?pre\]/i, "\n```\n") + raw.gsub!(%r{\[/?pre\]}i, "\n```\n") # [tt] - raw.gsub!(/\[\/?tt\]/i, "`") + raw.gsub!(%r{\[/?tt\]}i, "`") # [ftp] raw.gsub!(/\[ftp/i, "[url") - raw.gsub!(/\[\/ftp\]/i, "[/url]") + raw.gsub!(%r{\[/ftp\]}i, "[/url]") # [me] - raw.gsub!(/\[me=([^\]]*)\](.*?)\[\/me\]/im) { "_\\* #{$1} #{$2}_" } + raw.gsub!(%r{\[me=([^\]]*)\](.*?)\[/me\]}im) { "_\\* #{$1} #{$2}_" } # [li] - raw.gsub!(/\[li\](.*?)\[\/li\]/im) { "- #{$1}" } + raw.gsub!(%r{\[li\](.*?)\[/li\]}im) { "- #{$1}" } # puts [img] on their own line - raw.gsub!(/\[img[^\]]*\](.*?)\[\/img\]/im) { "\n#{$1}\n" } + raw.gsub!(%r{\[img[^\]]*\](.*?)\[/img\]}im) { "\n#{$1}\n" } # puts [youtube] on their own line - raw.gsub!(/\[youtube\](.*?)\[\/youtube\]/im) { "\n#{$1}\n" } + raw.gsub!(%r{\[youtube\](.*?)\[/youtube\]}im) { "\n#{$1}\n" } - IGNORED_BBCODE.each { |code| raw.gsub!(/\[#{code}[^\]]*\](.*?)\[\/#{code}\]/im, '\1') } + IGNORED_BBCODE.each { |code| raw.gsub!(%r{\[#{code}[^\]]*\](.*?)\[/#{code}\]}im, '\1') } # ensure [/quote] are on their own line - raw.gsub!(/\s*\[\/quote\]\s*/im, "\n[/quote]\n") + raw.gsub!(%r{\s*\[/quote\]\s*}im, "\n[/quote]\n") # [quote] - raw.gsub!(/\s*\[quote (.+?)\]\s/im) { + raw.gsub!(/\s*\[quote (.+?)\]\s/im) do params = $1 post_id = params[/msg(\d+)/, 1] username = params[/author=(.+) link=/, 1] @@ -636,14 +705,14 @@ class ImportScripts::Smf1 < ImportScripts::Base else %{\n[quote="#{username}"]\n} end - } + end # remove tapatalk mess - raw.gsub!(/Sent from .+? using \[url=.*?\].+?\[\/url\]/i, "") + raw.gsub!(%r{Sent from .+? using \[url=.*?\].+?\[/url\]}i, "") raw.gsub!(/Sent from .+? using .+?\z/i, "") # clean URLs - raw.gsub!(/\[url=(.+?)\]\1\[\/url\]/i, '\1') + raw.gsub!(%r{\[url=(.+?)\]\1\[/url\]}i, '\1') raw end @@ -651,7 +720,6 @@ class ImportScripts::Smf1 < ImportScripts::Base def mysql_query(sql) @client.query(sql) end - end ImportScripts::Smf1.new.perform diff --git a/script/import_scripts/smf2.rb b/script/import_scripts/smf2.rb index a70faff4cb9..97eb20a92ba 100644 --- a/script/import_scripts/smf2.rb +++ b/script/import_scripts/smf2.rb @@ -1,18 +1,17 @@ # coding: utf-8 # frozen_string_literal: true -require 'mysql2' -require File.expand_path(File.dirname(__FILE__) + '/base.rb') +require "mysql2" +require File.expand_path(File.dirname(__FILE__) + "/base.rb") -require 'htmlentities' -require 'tsort' -require 'set' -require 'optparse' -require 'etc' -require 'open3' +require "htmlentities" +require "tsort" +require "set" +require "optparse" +require "etc" +require "open3" class ImportScripts::Smf2 < ImportScripts::Base - def self.run options = Options.new begin @@ -54,9 +53,9 @@ class ImportScripts::Smf2 < ImportScripts::Base exit 1 end if options.password == :ask - require 'highline' + require "highline" $stderr.print "Enter password for MySQL database `#{options.database}`: " - options.password = HighLine.new.ask('') { |q| q.echo = false } + options.password = HighLine.new.ask("") { |q| q.echo = false } end @default_db_connection = create_db_connection @@ -68,11 +67,11 @@ class ImportScripts::Smf2 < ImportScripts::Base import_categories import_posts postprocess_posts - make_prettyurl_permalinks('/forum') + make_prettyurl_permalinks("/forum") end def import_groups - puts '', 'creating groups' + puts "", "creating groups" total = query(<<-SQL, as: :single) SELECT COUNT(*) FROM {prefix}membergroups @@ -92,7 +91,7 @@ class ImportScripts::Smf2 < ImportScripts::Base MODERATORS_GROUP = 2 def import_users - puts '', 'creating users' + puts "", "creating users" total = query("SELECT COUNT(*) FROM {prefix}members", as: :single) create_users(query(<<-SQL), total: total) do |member| @@ -103,10 +102,25 @@ class ImportScripts::Smf2 < ImportScripts::Base FROM {prefix}members AS a LEFT JOIN {prefix}attachments AS b ON a.id_member = b.id_member SQL - group_ids = [ member[:id_group], *member[:additional_groups].split(',').map(&:to_i) ] - create_time = Time.zone.at(member[:date_registered]) rescue Time.now - last_seen_time = Time.zone.at(member[:last_login]) rescue nil - ip_addr = IPAddr.new(member[:member_ip]) rescue nil + group_ids = [member[:id_group], *member[:additional_groups].split(",").map(&:to_i)] + create_time = + begin + Time.zone.at(member[:date_registered]) + rescue StandardError + Time.now + end + last_seen_time = + begin + Time.zone.at(member[:last_login]) + rescue StandardError + nil + end + ip_addr = + begin + IPAddr.new(member[:member_ip]) + rescue StandardError + nil + end { id: member[:id_member], username: member[:member_name], @@ -121,27 +135,33 @@ class ImportScripts::Smf2 < ImportScripts::Base ip_address: ip_addr, admin: group_ids.include?(ADMIN_GROUP), moderator: group_ids.include?(MODERATORS_GROUP), - - post_create_action: proc do |user| - user.update(created_at: create_time) if create_time < user.created_at - user.save - GroupUser.transaction do - group_ids.each do |gid| - (group_id = group_id_from_imported_group_id(gid)) && - GroupUser.find_or_create_by(user: user, group_id: group_id) - end - end - if options.smfroot && member[:id_attach].present? && user.uploaded_avatar_id.blank? - (path = find_smf_attachment_path(member[:id_attach], member[:file_hash], member[:filename])) && begin - upload = create_upload(user.id, path, member[:filename]) - if upload.persisted? - user.update(uploaded_avatar_id: upload.id) + post_create_action: + proc do |user| + user.update(created_at: create_time) if create_time < user.created_at + user.save + GroupUser.transaction do + group_ids.each do |gid| + (group_id = group_id_from_imported_group_id(gid)) && + GroupUser.find_or_create_by(user: user, group_id: group_id) end - rescue SystemCallError => err - puts "Could not import avatar: #{err.message}" end - end - end + if options.smfroot && member[:id_attach].present? && user.uploaded_avatar_id.blank? + ( + path = + find_smf_attachment_path( + member[:id_attach], + member[:file_hash], + member[:filename], + ) + ) && + begin + upload = create_upload(user.id, path, member[:filename]) + user.update(uploaded_avatar_id: upload.id) if upload.persisted? + rescue SystemCallError => err + puts "Could not import avatar: #{err.message}" + end + end + end, } end end @@ -155,38 +175,39 @@ class ImportScripts::Smf2 < ImportScripts::Base parent_id = category_id_from_imported_category_id(board[:id_parent]) if board[:id_parent] > 0 groups = (board[:member_groups] || "").split(/,/).map(&:to_i) restricted = !groups.include?(GUEST_GROUP) && !groups.include?(MEMBER_GROUP) - if Category.find_by_name(board[:name]) - board[:name] += board[:id_board].to_s - end + board[:name] += board[:id_board].to_s if Category.find_by_name(board[:name]) { id: board[:id_board], name: board[:name], description: board[:description], parent_category_id: parent_id, - post_create_action: restricted && proc do |category| - category.update(read_restricted: true) - groups.each do |imported_group_id| - (group_id = group_id_from_imported_group_id(imported_group_id)) && - CategoryGroup.find_or_create_by(category: category, group_id: group_id) do |cg| - cg.permission_type = CategoryGroup.permission_types[:full] - end - end - end, + post_create_action: + restricted && + proc do |category| + category.update(read_restricted: true) + groups.each do |imported_group_id| + (group_id = group_id_from_imported_group_id(imported_group_id)) && + CategoryGroup.find_or_create_by(category: category, group_id: group_id) do |cg| + cg.permission_type = CategoryGroup.permission_types[:full] + end + end + end, } end end def import_posts - puts '', 'creating posts' - spinner = %w(/ - \\ |).cycle + puts "", "creating posts" + spinner = %w[/ - \\ |].cycle total = query("SELECT COUNT(*) FROM {prefix}messages", as: :single) PostCreator.class_eval do def guardian - @guardian ||= if opts[:import_mode] - @@system_guardian ||= Guardian.new(Discourse.system_user) - else - Guardian.new(@user) - end + @guardian ||= + if opts[:import_mode] + @@system_guardian ||= Guardian.new(Discourse.system_user) + else + Guardian.new(@user) + end end end @@ -208,10 +229,12 @@ class ImportScripts::Smf2 < ImportScripts::Base id: message[:id_msg], user_id: user_id_from_imported_user_id(message[:id_member]) || -1, created_at: Time.zone.at(message[:poster_time]), - post_create_action: ignore_quotes && proc do |p| - p.custom_fields['import_rebake'] = 't' - p.save - end + post_create_action: + ignore_quotes && + proc do |p| + p.custom_fields["import_rebake"] = "t" + p.save + end, } if message[:id_msg] == message[:id_first_msg] @@ -228,31 +251,48 @@ class ImportScripts::Smf2 < ImportScripts::Base end next nil if skip - attachments = message[:attachment_count] == 0 ? [] : query(<<-SQL, connection: db2, as: :array) + attachments = + message[:attachment_count] == 0 ? [] : query(<<-SQL, connection: db2, as: :array) SELECT id_attach, file_hash, filename FROM {prefix}attachments WHERE attachment_type = 0 AND id_msg = #{message[:id_msg]} ORDER BY id_attach ASC SQL - attachments.map! { |a| import_attachment(post, a) rescue (puts $! ; nil) } + attachments.map! do |a| + begin + import_attachment(post, a) + rescue StandardError + ( + puts $! + nil + ) + end + end post[:raw] = convert_message_body(message[:body], attachments, ignore_quotes: ignore_quotes) next post end end def import_attachment(post, attachment) - path = find_smf_attachment_path(attachment[:id_attach], attachment[:file_hash], attachment[:filename]) + path = + find_smf_attachment_path( + attachment[:id_attach], + attachment[:file_hash], + attachment[:filename], + ) raise "Attachment for post #{post[:id]} failed: #{attachment[:filename]}" unless path.present? upload = create_upload(post[:user_id], path, attachment[:filename]) - raise "Attachment for post #{post[:id]} failed: #{upload.errors.full_messages.join(', ')}" unless upload.persisted? + unless upload.persisted? + raise "Attachment for post #{post[:id]} failed: #{upload.errors.full_messages.join(", ")}" + end upload rescue SystemCallError => err raise "Attachment for post #{post[:id]} failed: #{err.message}" end def postprocess_posts - puts '', 'rebaking posts' + puts "", "rebaking posts" - tags = PostCustomField.where(name: 'import_rebake', value: 't') + tags = PostCustomField.where(name: "import_rebake", value: "t") tags_total = tags.count tags_done = 0 @@ -271,38 +311,47 @@ class ImportScripts::Smf2 < ImportScripts::Base private def create_db_connection - Mysql2::Client.new(host: options.host, username: options.username, - password: options.password, database: options.database) + Mysql2::Client.new( + host: options.host, + username: options.username, + password: options.password, + database: options.database, + ) end def query(sql, **opts, &block) db = opts[:connection] || @default_db_connection - return __query(db, sql).to_a if opts[:as] == :array - return __query(db, sql, as: :array).first[0] if opts[:as] == :single + return __query(db, sql).to_a if opts[:as] == :array + return __query(db, sql, as: :array).first[0] if opts[:as] == :single return __query(db, sql, stream: true).each(&block) if block_given? __query(db, sql, stream: true) end def __query(db, sql, **opts) - db.query(sql.gsub('{prefix}', options.prefix), - { symbolize_keys: true, cache_rows: false }.merge(opts)) + db.query( + sql.gsub("{prefix}", options.prefix), + { symbolize_keys: true, cache_rows: false }.merge(opts), + ) end - TRTR_TABLE = begin - from = "ŠŽšžŸÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ" - to = "SZszYAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy" - from.chars.zip(to.chars) - end + TRTR_TABLE = + begin + from = "ŠŽšžŸÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ" + to = "SZszYAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy" + from.chars.zip(to.chars) + end def find_smf_attachment_path(attachment_id, file_hash, filename) cleaned_name = filename.dup TRTR_TABLE.each { |from, to| cleaned_name.gsub!(from, to) } - cleaned_name.gsub!(/\s/, '_') - cleaned_name.gsub!(/[^\w_\.\-]/, '') - legacy_name = "#{attachment_id}_#{cleaned_name.gsub('.', '_')}#{Digest::MD5.hexdigest(cleaned_name)}" + cleaned_name.gsub!(/\s/, "_") + cleaned_name.gsub!(/[^\w_\.\-]/, "") + legacy_name = + "#{attachment_id}_#{cleaned_name.gsub(".", "_")}#{Digest::MD5.hexdigest(cleaned_name)}" - [ filename, "#{attachment_id}_#{file_hash}", legacy_name ] - .map { |name| File.join(options.smfroot, 'attachments', name) } + [filename, "#{attachment_id}_#{file_hash}", legacy_name].map do |name| + File.join(options.smfroot, "attachments", name) + end .detect { |file| File.exist?(file) } end @@ -311,16 +360,16 @@ class ImportScripts::Smf2 < ImportScripts::Base end def convert_message_body(body, attachments = [], **opts) - body = decode_entities(body.gsub(//, "\n")) + body = decode_entities(body.gsub(%r{}, "\n")) body.gsub!(ColorPattern, '\k') body.gsub!(ListPattern) do |s| params = parse_tag_params($~[:params]) - tag = params['type'] == 'decimal' ? 'ol' : 'ul' + tag = params["type"] == "decimal" ? "ol" : "ul" "\n[#{tag}]#{$~[:inner].strip}[/#{tag}]\n" end body.gsub!(XListPattern) do |s| r = +"\n[ul]" - s.lines.each { |l| "#{r}[li]#{l.strip.sub(/^\[x\]\s*/, '')}[/li]" } + s.lines.each { |l| "#{r}[li]#{l.strip.sub(/^\[x\]\s*/, "")}[/li]" } "#{r}[/ul]\n" end @@ -338,9 +387,7 @@ class ImportScripts::Smf2 < ImportScripts::Base if use_count.keys.length < attachments.select(&:present?).length body = "#{body}\n\n---" attachments.each_with_index do |upload, num| - if upload.present? && use_count[num] == (0) - "#{body}\n\n#{get_upload_markdown(upload)}" - end + "#{body}\n\n#{get_upload_markdown(upload)}" if upload.present? && use_count[num] == (0) end end end @@ -353,26 +400,46 @@ class ImportScripts::Smf2 < ImportScripts::Base end def convert_quotes(body) - body.to_s.gsub(QuotePattern) do |s| - inner = $~[:inner].strip - params = parse_tag_params($~[:params]) - if params['author'].present? - quote = +"\n[quote=\"#{params['author']}" - if QuoteParamsPattern =~ params['link'] - tl = topic_lookup_from_imported_post_id($~[:msg].to_i) - quote = "#{quote} post:#{tl[:post_number]}, topic:#{tl[:topic_id]}" if tl + body + .to_s + .gsub(QuotePattern) do |s| + inner = $~[:inner].strip + params = parse_tag_params($~[:params]) + if params["author"].present? + quote = +"\n[quote=\"#{params["author"]}" + if QuoteParamsPattern =~ params["link"] + tl = topic_lookup_from_imported_post_id($~[:msg].to_i) + quote = "#{quote} post:#{tl[:post_number]}, topic:#{tl[:topic_id]}" if tl + end + quote = "#{quote}\"]\n#{convert_quotes(inner)}\n[/quote]" + else + "
#{convert_quotes(inner)}
" end - quote = "#{quote}\"]\n#{convert_quotes(inner)}\n[/quote]" - else - "
#{convert_quotes(inner)}
" end - end end - IGNORED_BBCODE ||= %w{ - black blue center color email flash font glow green iurl left list move red - right shadown size table time white - } + IGNORED_BBCODE ||= %w[ + black + blue + center + color + email + flash + font + glow + green + iurl + left + list + move + red + right + shadown + size + table + time + white + ] def convert_bbcode(raw) return "" if raw.blank? @@ -380,67 +447,67 @@ class ImportScripts::Smf2 < ImportScripts::Base raw = convert_quotes(raw) # [acronym] - raw.gsub!(/\[acronym=([^\]]+)\](.*?)\[\/acronym\]/im) { %{#{$2}} } + raw.gsub!(%r{\[acronym=([^\]]+)\](.*?)\[/acronym\]}im) { %{#{$2}} } # [br] raw.gsub!(/\[br\]/i, "\n") - raw.gsub!(//i, "\n") + raw.gsub!(%r{}i, "\n") # [hr] raw.gsub!(/\[hr\]/i, "
") # [sub] - raw.gsub!(/\[sub\](.*?)\[\/sub\]/im) { "#{$1}" } + raw.gsub!(%r{\[sub\](.*?)\[/sub\]}im) { "#{$1}" } # [sup] - raw.gsub!(/\[sup\](.*?)\[\/sup\]/im) { "#{$1}" } + raw.gsub!(%r{\[sup\](.*?)\[/sup\]}im) { "#{$1}" } # [html] raw.gsub!(/\[html\]/i, "\n```html\n") - raw.gsub!(/\[\/html\]/i, "\n```\n") + raw.gsub!(%r{\[/html\]}i, "\n```\n") # [php] raw.gsub!(/\[php\]/i, "\n```php\n") - raw.gsub!(/\[\/php\]/i, "\n```\n") + raw.gsub!(%r{\[/php\]}i, "\n```\n") # [code] - raw.gsub!(/\[\/?code\]/i, "\n```\n") + raw.gsub!(%r{\[/?code\]}i, "\n```\n") # [pre] - raw.gsub!(/\[\/?pre\]/i, "\n```\n") + raw.gsub!(%r{\[/?pre\]}i, "\n```\n") # [tt] - raw.gsub!(/\[\/?tt\]/i, "`") + raw.gsub!(%r{\[/?tt\]}i, "`") # [ftp] raw.gsub!(/\[ftp/i, "[url") - raw.gsub!(/\[\/ftp\]/i, "[/url]") + raw.gsub!(%r{\[/ftp\]}i, "[/url]") # [me] - raw.gsub!(/\[me=([^\]]*)\](.*?)\[\/me\]/im) { "_\\* #{$1} #{$2}_" } + raw.gsub!(%r{\[me=([^\]]*)\](.*?)\[/me\]}im) { "_\\* #{$1} #{$2}_" } # [ul] raw.gsub!(/\[ul\]/i, "") - raw.gsub!(/\[\/ul\]/i, "") + raw.gsub!(%r{\[/ul\]}i, "") # [li] - raw.gsub!(/\[li\](.*?)\[\/li\]/im) { "- #{$1}" } + raw.gsub!(%r{\[li\](.*?)\[/li\]}im) { "- #{$1}" } # puts [img] on their own line - raw.gsub!(/\[img[^\]]*\](.*?)\[\/img\]/im) { "\n#{$1}\n" } + raw.gsub!(%r{\[img[^\]]*\](.*?)\[/img\]}im) { "\n#{$1}\n" } # puts [youtube] on their own line - raw.gsub!(/\[youtube\](.*?)\[\/youtube\]/im) { "\n#{$1}\n" } + raw.gsub!(%r{\[youtube\](.*?)\[/youtube\]}im) { "\n#{$1}\n" } - IGNORED_BBCODE.each { |code| raw.gsub!(/\[#{code}[^\]]*\](.*?)\[\/#{code}\]/im, '\1') } + IGNORED_BBCODE.each { |code| raw.gsub!(%r{\[#{code}[^\]]*\](.*?)\[/#{code}\]}im, '\1') } # ensure [/quote] are on their own line - raw.gsub!(/\s*\[\/quote\]\s*/im, "\n[/quote]\n") + raw.gsub!(%r{\s*\[/quote\]\s*}im, "\n[/quote]\n") # remove tapatalk mess - raw.gsub!(/Sent from .+? using \[url=.*?\].+?\[\/url\]/i, "") + raw.gsub!(%r{Sent from .+? using \[url=.*?\].+?\[/url\]}i, "") raw.gsub!(/Sent from .+? using .+?\z/i, "") # clean URLs - raw.gsub!(/\[url=(.+?)\]\1\[\/url\]/i, '\1') + raw.gsub!(%r{\[url=(.+?)\]\1\[/url\]}i, '\1') raw end @@ -460,8 +527,14 @@ class ImportScripts::Smf2 < ImportScripts::Base # param1=value1=still1 value1 param2=value2 ... # => {'param1' => 'value1=still1 value1', 'param2' => 'value2 ...'} def parse_tag_params(params) - params.to_s.strip.scan(/(?\w+)=(?(?:(?>\S+)|\s+(?!\w+=))*)/). - inject({}) { |h, e| h[e[0]] = e[1]; h } + params + .to_s + .strip + .scan(/(?\w+)=(?(?:(?>\S+)|\s+(?!\w+=))*)/) + .inject({}) do |h, e| + h[e[0]] = e[1] + h + end end class << self @@ -474,8 +547,8 @@ class ImportScripts::Smf2 < ImportScripts::Base # => match[:params] == 'param=value param2=value2' # match[:inner] == "\n text\n [tag nested=true]text[/tag]\n" def build_nested_tag_regex(ltag, rtag = nil) - rtag ||= '/' + ltag - %r{ + rtag ||= "/" + ltag + / \[#{ltag}(?-x:[ =](?[^\]]*))?\] # consume open tag, followed by... (?(?: (?> [^\[]+ ) # non-tags, or... @@ -495,40 +568,41 @@ class ImportScripts::Smf2 < ImportScripts::Base ) )*) \[#{rtag}\] - }x + /x end end QuoteParamsPattern = /^topic=(?\d+).msg(?\d+)#msg\k$/ XListPattern = /(?(?>^\[x\]\s*(?.*)$\n?)+)/ - QuotePattern = build_nested_tag_regex('quote') - ColorPattern = build_nested_tag_regex('color') - ListPattern = build_nested_tag_regex('list') + QuotePattern = build_nested_tag_regex("quote") + ColorPattern = build_nested_tag_regex("color") + ListPattern = build_nested_tag_regex("list") AttachmentPatterns = [ [/^\[attach(?:|img|url|mini)=(?\d+)\]$/, ->(u) { "\n" + get_upload_markdown(u) + "\n" }], - [/\[attach(?:|img|url|mini)=(?\d+)\]/, ->(u) { get_upload_markdown(u) }] + [/\[attach(?:|img|url|mini)=(?\d+)\]/, ->(u) { get_upload_markdown(u) }], ] # Provides command line options and parses the SMF settings file. class Options - - class Error < StandardError ; end - class SettingsError < Error ; end + class Error < StandardError + end + class SettingsError < Error + end def parse!(args = ARGV) - raise Error, 'not enough arguments' if ARGV.empty? + raise Error, "not enough arguments" if ARGV.empty? begin parser.parse!(args) rescue OptionParser::ParseError => err raise Error, err.message end - raise Error, 'too many arguments' if args.length > 1 + raise Error, "too many arguments" if args.length > 1 self.smfroot = args.first read_smf_settings if self.smfroot - self.host ||= 'localhost' + self.host ||= "localhost" self.username ||= Etc.getlogin - self.prefix ||= 'smf_' + self.prefix ||= "smf_" self.timezone ||= get_php_timezone end @@ -547,44 +621,63 @@ class ImportScripts::Smf2 < ImportScripts::Base private def get_php_timezone - phpinfo, status = Open3.capture2('php', '-i') + phpinfo, status = Open3.capture2("php", "-i") phpinfo.lines.each do |line| - key, *vals = line.split(' => ').map(&:strip) - break vals[0] if key == 'Default timezone' + key, *vals = line.split(" => ").map(&:strip) + break vals[0] if key == "Default timezone" end rescue Errno::ENOENT $stderr.puts "Error: PHP CLI executable not found" end def read_smf_settings - settings = File.join(self.smfroot, 'Settings.php') - File.readlines(settings).each do |line| - next unless m = /\$([a-z_]+)\s*=\s*['"](.+?)['"]\s*;\s*((#|\/\/).*)?$/.match(line) - case m[1] - when 'db_server' then self.host ||= m[2] - when 'db_user' then self.username ||= m[2] - when 'db_passwd' then self.password ||= m[2] - when 'db_name' then self.database ||= m[2] - when 'db_prefix' then self.prefix ||= m[2] + settings = File.join(self.smfroot, "Settings.php") + File + .readlines(settings) + .each do |line| + next unless m = %r{\$([a-z_]+)\s*=\s*['"](.+?)['"]\s*;\s*((#|//).*)?$}.match(line) + case m[1] + when "db_server" + self.host ||= m[2] + when "db_user" + self.username ||= m[2] + when "db_passwd" + self.password ||= m[2] + when "db_name" + self.database ||= m[2] + when "db_prefix" + self.prefix ||= m[2] + end end - end rescue => err raise SettingsError, err.message unless self.database end def parser - @parser ||= OptionParser.new(nil, 12) do |o| - o.banner = "Usage:\t#{File.basename($0)} [options]\n" - o.banner = "${o.banner}\t#{File.basename($0)} -d [options]" - o.on('-h HOST', :REQUIRED, "MySQL server hostname [\"#{self.host}\"]") { |s| self.host = s } - o.on('-u USER', :REQUIRED, "MySQL username [\"#{self.username}\"]") { |s| self.username = s } - o.on('-p [PASS]', :OPTIONAL, 'MySQL password. Without argument, reads password from STDIN.') { |s| self.password = s || :ask } - o.on('-d DBNAME', :REQUIRED, 'Name of SMF database') { |s| self.database = s } - o.on('-f PREFIX', :REQUIRED, "Table names prefix [\"#{self.prefix}\"]") { |s| self.prefix = s } - o.on('-t TIMEZONE', :REQUIRED, 'Timezone used by SMF2 [auto-detected from PHP]') { |s| self.timezone = s } - end + @parser ||= + OptionParser.new(nil, 12) do |o| + o.banner = "Usage:\t#{File.basename($0)} [options]\n" + o.banner = "${o.banner}\t#{File.basename($0)} -d [options]" + o.on("-h HOST", :REQUIRED, "MySQL server hostname [\"#{self.host}\"]") do |s| + self.host = s + end + o.on("-u USER", :REQUIRED, "MySQL username [\"#{self.username}\"]") do |s| + self.username = s + end + o.on( + "-p [PASS]", + :OPTIONAL, + "MySQL password. Without argument, reads password from STDIN.", + ) { |s| self.password = s || :ask } + o.on("-d DBNAME", :REQUIRED, "Name of SMF database") { |s| self.database = s } + o.on("-f PREFIX", :REQUIRED, "Table names prefix [\"#{self.prefix}\"]") do |s| + self.prefix = s + end + o.on("-t TIMEZONE", :REQUIRED, "Timezone used by SMF2 [auto-detected from PHP]") do |s| + self.timezone = s + end + end end - end #Options # Framework around TSort, used to build a dependency graph over messages @@ -644,10 +737,14 @@ class ImportScripts::Smf2 < ImportScripts::Base end def dependencies - @dependencies ||= Set.new.tap do |deps| - deps.merge(quoted) unless ignore_quotes? - deps << prev if prev.present? - end.to_a + @dependencies ||= + Set + .new + .tap do |deps| + deps.merge(quoted) unless ignore_quotes? + deps << prev if prev.present? + end + .to_a end def hash @@ -659,7 +756,7 @@ class ImportScripts::Smf2 < ImportScripts::Base end def inspect - "#<#{self.class.name}: id=#{id.inspect}, prev=#{safe_id(@prev)}, quoted=[#{@quoted.map(&method(:safe_id)).join(', ')}]>" + "#<#{self.class.name}: id=#{id.inspect}, prev=#{safe_id(@prev)}, quoted=[#{@quoted.map(&method(:safe_id)).join(", ")}]>" end private @@ -668,11 +765,10 @@ class ImportScripts::Smf2 < ImportScripts::Base @graph[id].present? ? @graph[id].id.inspect : "(#{id})" end end #Node - end #MessageDependencyGraph def make_prettyurl_permalinks(prefix) - puts 'creating permalinks for prettyurl plugin' + puts "creating permalinks for prettyurl plugin" begin serialized = query(<<-SQL, as: :single) SELECT value FROM {prefix}settings @@ -680,9 +776,7 @@ class ImportScripts::Smf2 < ImportScripts::Base SQL board_slugs = Array.new ser = /\{(.*)\}/.match(serialized)[1] - ser.scan(/i:(\d+);s:\d+:\"(.*?)\";/).each do |nv| - board_slugs[nv[0].to_i] = nv[1] - end + ser.scan(/i:(\d+);s:\d+:\"(.*?)\";/).each { |nv| board_slugs[nv[0].to_i] = nv[1] } topic_urls = query(<<-SQL, as: :array) SELECT t.id_first_msg, t.id_board,u.pretty_url FROM smf_topics t @@ -690,12 +784,14 @@ class ImportScripts::Smf2 < ImportScripts::Base SQL topic_urls.each do |url| t = topic_lookup_from_imported_post_id(url[:id_first_msg]) - Permalink.create(url: "#{prefix}/#{board_slugs[url[:id_board]]}/#{url[:pretty_url]}", topic_id: t[:topic_id]) + Permalink.create( + url: "#{prefix}/#{board_slugs[url[:id_board]]}/#{url[:pretty_url]}", + topic_id: t[:topic_id], + ) end - rescue + rescue StandardError end end - end ImportScripts::Smf2.run diff --git a/script/import_scripts/socialcast/create_title.rb b/script/import_scripts/socialcast/create_title.rb index 8af625eddb5..ea656fadb7e 100644 --- a/script/import_scripts/socialcast/create_title.rb +++ b/script/import_scripts/socialcast/create_title.rb @@ -1,9 +1,8 @@ # frozen_string_literal: true -require 'uri' +require "uri" class CreateTitle - def self.from_body(body) title = remove_mentions body title = remove_urls title @@ -24,11 +23,11 @@ class CreateTitle private def self.remove_mentions(text) - text.gsub(/@[\w]*/, '') + text.gsub(/@[\w]*/, "") end def self.remove_urls(text) - text.gsub(URI::regexp(['http', 'https', 'mailto', 'ftp', 'ldap', 'ldaps']), '') + text.gsub(URI.regexp(%w[http https mailto ftp ldap ldaps]), "") end def self.remove_stray_punctuation(text) @@ -42,7 +41,7 @@ class CreateTitle end def self.complete_sentences(text) - /(^.*[\S]{2,}[.!?:]+)\W/.match(text[0...80] + ' ') + /(^.*[\S]{2,}[.!?:]+)\W/.match(text[0...80] + " ") end def self.complete_words(text) diff --git a/script/import_scripts/socialcast/export.rb b/script/import_scripts/socialcast/export.rb index 1c44c7c5c99..ac3a4690c1e 100644 --- a/script/import_scripts/socialcast/export.rb +++ b/script/import_scripts/socialcast/export.rb @@ -1,14 +1,14 @@ # frozen_string_literal: true -require 'yaml' -require 'fileutils' -require_relative 'socialcast_api' +require "yaml" +require "fileutils" +require_relative "socialcast_api" def load_config(file) - config = YAML::load_file(File.join(__dir__, file)) - @domain = config['domain'] - @username = config['username'] - @password = config['password'] + config = YAML.load_file(File.join(__dir__, file)) + @domain = config["domain"] + @username = config["username"] + @password = config["password"] end def export @@ -23,8 +23,8 @@ def export_users(page = 1) users = @api.list_users(page: page) return if users.empty? users.each do |user| - File.open("output/users/#{user['id']}.json", 'w') do |f| - puts user['contact_info']['email'] + File.open("output/users/#{user["id"]}.json", "w") do |f| + puts user["contact_info"]["email"] f.write user.to_json f.close end @@ -36,12 +36,12 @@ def export_messages(page = 1) messages = @api.list_messages(page: page) return if messages.empty? messages.each do |message| - File.open("output/messages/#{message['id']}.json", 'w') do |f| - title = message['title'] - title = message['body'] if title.empty? + File.open("output/messages/#{message["id"]}.json", "w") do |f| + title = message["title"] + title = message["body"] if title.empty? title = title.split('\n')[0][0..50] unless title.empty? - puts "#{message['id']}: #{title}" + puts "#{message["id"]}: #{title}" f.write message.to_json f.close end @@ -51,9 +51,7 @@ end def create_dir(path) path = File.join(__dir__, path) - unless File.directory?(path) - FileUtils.mkdir_p(path) - end + FileUtils.mkdir_p(path) unless File.directory?(path) end load_config ARGV.shift diff --git a/script/import_scripts/socialcast/import.rb b/script/import_scripts/socialcast/import.rb index 413fd18ff81..c20237f66ce 100644 --- a/script/import_scripts/socialcast/import.rb +++ b/script/import_scripts/socialcast/import.rb @@ -1,12 +1,11 @@ # frozen_string_literal: true -require_relative './socialcast_message.rb' -require_relative './socialcast_user.rb' -require 'set' +require_relative "./socialcast_message.rb" +require_relative "./socialcast_user.rb" +require "set" require File.expand_path(File.dirname(__FILE__) + "/../base.rb") class ImportScripts::Socialcast < ImportScripts::Base - MESSAGES_DIR = "output/messages" USERS_DIR = "output/users" @@ -29,15 +28,13 @@ class ImportScripts::Socialcast < ImportScripts::Base imported = 0 total = count_files(MESSAGES_DIR) Dir.foreach(MESSAGES_DIR) do |filename| - next if filename == ('.') || filename == ('..') + next if filename == (".") || filename == ("..") topics += 1 - message_json = File.read MESSAGES_DIR + '/' + filename + message_json = File.read MESSAGES_DIR + "/" + filename message = SocialcastMessage.new(message_json) next unless message.title created_topic = import_topic message.topic - if created_topic - import_posts message.replies, created_topic.topic_id - end + import_posts message.replies, created_topic.topic_id if created_topic imported += 1 print_status topics, total end @@ -48,8 +45,8 @@ class ImportScripts::Socialcast < ImportScripts::Base users = 0 total = count_files(USERS_DIR) Dir.foreach(USERS_DIR) do |filename| - next if filename == ('.') || filename == ('..') - user_json = File.read USERS_DIR + '/' + filename + next if filename == (".") || filename == ("..") + user_json = File.read USERS_DIR + "/" + filename user = SocialcastUser.new(user_json).user create_user user, user[:id] users += 1 @@ -58,7 +55,7 @@ class ImportScripts::Socialcast < ImportScripts::Base end def count_files(path) - Dir.foreach(path).select { |f| f != '.' && f != '..' }.count + Dir.foreach(path).select { |f| f != "." && f != ".." }.count end def import_topic(topic) @@ -80,9 +77,7 @@ class ImportScripts::Socialcast < ImportScripts::Base end def import_posts(posts, topic_id) - posts.each do |post| - import_post post, topic_id - end + posts.each { |post| import_post post, topic_id } end def import_post(post, topic_id) @@ -95,9 +90,6 @@ class ImportScripts::Socialcast < ImportScripts::Base puts new_post.inspect end end - end -if __FILE__ == $0 - ImportScripts::Socialcast.new.perform -end +ImportScripts::Socialcast.new.perform if __FILE__ == $0 diff --git a/script/import_scripts/socialcast/socialcast_api.rb b/script/import_scripts/socialcast/socialcast_api.rb index 84fc6397703..6b080692c3e 100644 --- a/script/import_scripts/socialcast/socialcast_api.rb +++ b/script/import_scripts/socialcast/socialcast_api.rb @@ -1,10 +1,9 @@ # frozen_string_literal: true -require 'base64' -require 'json' +require "base64" +require "json" class SocialcastApi - attr_accessor :domain, :username, :password def initialize(domain, username, password) @@ -29,12 +28,12 @@ class SocialcastApi def list_users(opts = {}) page = opts[:page] ? opts[:page] : 1 response = request "#{base_url}/users?page=#{page}" - response['users'].sort { |u| u['id'] } + response["users"].sort { |u| u["id"] } end def list_messages(opts = {}) page = opts[:page] ? opts[:page] : 1 response = request "#{base_url}/messages?page=#{page}" - response['messages'].sort { |m| m['id'] } + response["messages"].sort { |m| m["id"] } end end diff --git a/script/import_scripts/socialcast/socialcast_message.rb b/script/import_scripts/socialcast/socialcast_message.rb index 4c7cf7a445d..457713983a5 100644 --- a/script/import_scripts/socialcast/socialcast_message.rb +++ b/script/import_scripts/socialcast/socialcast_message.rb @@ -1,24 +1,23 @@ # frozen_string_literal: true -require 'json' -require 'cgi' -require 'time' -require_relative 'create_title.rb' +require "json" +require "cgi" +require "time" +require_relative "create_title.rb" class SocialcastMessage - DEFAULT_CATEGORY = "Socialcast Import" DEFAULT_TAG = "socialcast-import" TAGS_AND_CATEGORIES = { "somegroupname" => { category: "Apple Stems", - tags: ["waxy", "tough"] + tags: %w[waxy tough], }, "someothergroupname" => { category: "Orange Peels", - tags: ["oily"] - } - } + tags: ["oily"], + }, + } def initialize(message_json) @parsed_json = JSON.parse message_json @@ -26,18 +25,18 @@ class SocialcastMessage def topic topic = {} - topic[:id] = @parsed_json['id'] - topic[:author_id] = @parsed_json['user']['id'] + topic[:id] = @parsed_json["id"] + topic[:author_id] = @parsed_json["user"]["id"] topic[:title] = title - topic[:raw] = @parsed_json['body'] - topic[:created_at] = Time.parse @parsed_json['created_at'] + topic[:raw] = @parsed_json["body"] + topic[:created_at] = Time.parse @parsed_json["created_at"] topic[:tags] = tags topic[:category] = category topic end def title - CreateTitle.from_body @parsed_json['body'] + CreateTitle.from_body @parsed_json["body"] end def tags @@ -55,39 +54,37 @@ class SocialcastMessage def category category = DEFAULT_CATEGORY - if group && TAGS_AND_CATEGORIES[group] - category = TAGS_AND_CATEGORIES[group][:category] - end + category = TAGS_AND_CATEGORIES[group][:category] if group && TAGS_AND_CATEGORIES[group] category end def group - @parsed_json['group']['groupname'].downcase if @parsed_json['group'] && @parsed_json['group']['groupname'] + if @parsed_json["group"] && @parsed_json["group"]["groupname"] + @parsed_json["group"]["groupname"].downcase + end end def url - @parsed_json['url'] + @parsed_json["url"] end def message_type - @parsed_json['message_type'] + @parsed_json["message_type"] end def replies posts = [] - comments = @parsed_json['comments'] - comments.each do |comment| - posts << post_from_comment(comment) - end + comments = @parsed_json["comments"] + comments.each { |comment| posts << post_from_comment(comment) } posts end def post_from_comment(comment) post = {} - post[:id] = comment['id'] - post[:author_id] = comment['user']['id'] - post[:raw] = comment['text'] - post[:created_at] = Time.parse comment['created_at'] + post[:id] = comment["id"] + post[:author_id] = comment["user"]["id"] + post[:raw] = comment["text"] + post[:created_at] = Time.parse comment["created_at"] post end diff --git a/script/import_scripts/socialcast/socialcast_user.rb b/script/import_scripts/socialcast/socialcast_user.rb index 1ffc93081ce..f882637f66b 100644 --- a/script/import_scripts/socialcast/socialcast_user.rb +++ b/script/import_scripts/socialcast/socialcast_user.rb @@ -1,26 +1,24 @@ # frozen_string_literal: true -require 'json' -require 'cgi' -require 'time' +require "json" +require "cgi" +require "time" class SocialcastUser - def initialize(user_json) @parsed_json = JSON.parse user_json end def user - email = @parsed_json['contact_info']['email'] - email = "#{@parsed_json['id']}@noemail.com" unless email + email = @parsed_json["contact_info"]["email"] + email = "#{@parsed_json["id"]}@noemail.com" unless email user = {} - user[:id] = @parsed_json['id'] - user[:name] = @parsed_json['name'] - user[:username] = @parsed_json['username'] + user[:id] = @parsed_json["id"] + user[:name] = @parsed_json["name"] + user[:username] = @parsed_json["username"] user[:email] = email user[:staged] = true user end - end diff --git a/script/import_scripts/socialcast/test/test_create_title.rb b/script/import_scripts/socialcast/test/test_create_title.rb index ee934a4f891..0dac092550f 100644 --- a/script/import_scripts/socialcast/test/test_create_title.rb +++ b/script/import_scripts/socialcast/test/test_create_title.rb @@ -1,26 +1,28 @@ # frozen_string_literal: true -require 'minitest/autorun' -require_relative '../create_title.rb' +require "minitest/autorun" +require_relative "../create_title.rb" class TestCreateTitle < Minitest::Test - def test_create_title_1 - body = "@GreatCheerThreading \nWhere can I find information on how GCTS stacks up against the competition? What are the key differentiators?" + body = + "@GreatCheerThreading \nWhere can I find information on how GCTS stacks up against the competition? What are the key differentiators?" expected = "Where can I find information on how GCTS stacks up against the competition?" title = CreateTitle.from_body body assert_equal(expected, title) end def test_create_title_2 - body = "GCTS in 200 stores across town. How many threads per inch would you guess? @GreatCheerThreading" + body = + "GCTS in 200 stores across town. How many threads per inch would you guess? @GreatCheerThreading" expected = "GCTS in 200 stores across town. How many threads per inch would you guess?" title = CreateTitle.from_body body assert_equal(expected, title) end def test_create_title_3 - body = "gFabric Sheets 1.2 now has Great Cheer Threads, letting you feel the softness running through the cotton fibers." + body = + "gFabric Sheets 1.2 now has Great Cheer Threads, letting you feel the softness running through the cotton fibers." expected = "gFabric Sheets 1.2 now has Great Cheer Threads, letting you feel the softness..." title = CreateTitle.from_body body assert_equal(expected, title) @@ -34,49 +36,56 @@ class TestCreateTitle < Minitest::Test end def test_create_title_5 - body = "One sentence. Two sentence. Three sentence. Four is going to go on and on for more words than we want." + body = + "One sentence. Two sentence. Three sentence. Four is going to go on and on for more words than we want." expected = "One sentence. Two sentence. Three sentence." title = CreateTitle.from_body body assert_equal(expected, title) end def test_create_title_6 - body = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site)?\n\n//cc @RD @GreatCheerThreading" + body = + "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site)?\n\n//cc @RD @GreatCheerThreading" expected = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site)?" title = CreateTitle.from_body body assert_equal(expected, title) end def test_create_title_6b - body = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site of yore)?\n\n//cc @RD @GreatCheerThreading" + body = + "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site of yore)?\n\n//cc @RD @GreatCheerThreading" expected = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site..." title = CreateTitle.from_body body assert_equal(expected, title) end def test_create_title_6c - body = "Anyone know of any invite codes for www.greatcheer.io?! (the Great Cheer v2 site of yore)?\n\n//cc @RD @GreatCheerThreading" + body = + "Anyone know of any invite codes for www.greatcheer.io?! (the Great Cheer v2 site of yore)?\n\n//cc @RD @GreatCheerThreading" expected = "Anyone know of any invite codes for www.greatcheer.io?!" title = CreateTitle.from_body body assert_equal(expected, title) end def test_create_title_7 - body = "@GreatCheerThreading \n\nDoes anyone know what the plan is to move to denser 1.2 threads for GCTS?\n\nI have a customer interested in the higher thread counts offered in 1.2." + body = + "@GreatCheerThreading \n\nDoes anyone know what the plan is to move to denser 1.2 threads for GCTS?\n\nI have a customer interested in the higher thread counts offered in 1.2." expected = "Does anyone know what the plan is to move to denser 1.2 threads for GCTS?" title = CreateTitle.from_body body assert_equal(expected, title) end def test_create_title_8 - body = "@GreatCheerThreading @FabricWeavingWorldwide \n\nI was just chatting with a customer, after receiving this email:\n\n\"Ours is more of a ‘conceptual’ question. We have too much fiber" + body = + "@GreatCheerThreading @FabricWeavingWorldwide \n\nI was just chatting with a customer, after receiving this email:\n\n\"Ours is more of a ‘conceptual’ question. We have too much fiber" expected = "I was just chatting with a customer, after receiving this email:" title = CreateTitle.from_body body assert_equal(expected, title) end def test_create_title_9 - body = "Hi,\n\nDoes anyone have a PPT deck on whats new in cotton (around 10 or so slides) nothing to detailed as per what we have in the current 1.x version?\n\nI am not after a what's coming in cotton 2" + body = + "Hi,\n\nDoes anyone have a PPT deck on whats new in cotton (around 10 or so slides) nothing to detailed as per what we have in the current 1.x version?\n\nI am not after a what's coming in cotton 2" expected = "Does anyone have a PPT deck on whats new in cotton (around 10 or so slides)..." title = CreateTitle.from_body body assert_equal(expected, title) @@ -90,7 +99,8 @@ class TestCreateTitle < Minitest::Test end def test_create_title_11 - body = "Hi Guys,\nI'm working with #gtcs and one of the things we're playing with is TC. What better tool to demo and use than our own \nhttps://greatcheerthreading.com/themostthreads/cool-stuff\n\nThis used to work great in 2013," + body = + "Hi Guys,\nI'm working with #gtcs and one of the things we're playing with is TC. What better tool to demo and use than our own \nhttps://greatcheerthreading.com/themostthreads/cool-stuff\n\nThis used to work great in 2013," expected = "I'm working with #gtcs and one of the things we're playing with is TC." title = CreateTitle.from_body body assert_equal(expected, title) @@ -104,10 +114,10 @@ class TestCreateTitle < Minitest::Test end def test_create_title_13 - body = "Embroidered TC ... http://blogs.greatcheerthreading.com/thread/embroidering-the-threads-is-just-the-beginning\n@SoftStuff @TightWeave and team hopefully can share their thoughts on this recent post." + body = + "Embroidered TC ... http://blogs.greatcheerthreading.com/thread/embroidering-the-threads-is-just-the-beginning\n@SoftStuff @TightWeave and team hopefully can share their thoughts on this recent post." expected = "and team hopefully can share their thoughts on this recent post." title = CreateTitle.from_body body assert_equal(expected, title) end - end diff --git a/script/import_scripts/socialcast/test/test_data.rb b/script/import_scripts/socialcast/test/test_data.rb index 5bdbf52cc9f..2dd018c32da 100644 --- a/script/import_scripts/socialcast/test/test_data.rb +++ b/script/import_scripts/socialcast/test/test_data.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true -USERS = '{ +USERS = + '{ "users": [ { "contact_info": { @@ -1082,7 +1083,8 @@ USERS = '{ ] }' -MESSAGES = '{ +MESSAGES = + '{ "messages": [ { "id": 426, @@ -5429,7 +5431,8 @@ MESSAGES = '{ "messages_next_page": 2 }' -MESSAGES_PG_2 = '{ +MESSAGES_PG_2 = + '{ "messages": [ { "id": 386, diff --git a/script/import_scripts/socialcast/test/test_socialcast_api.rb b/script/import_scripts/socialcast/test/test_socialcast_api.rb index 70ad038c8b4..f46e0fefd55 100644 --- a/script/import_scripts/socialcast/test/test_socialcast_api.rb +++ b/script/import_scripts/socialcast/test/test_socialcast_api.rb @@ -1,21 +1,20 @@ # frozen_string_literal: true -require 'minitest/autorun' -require 'yaml' -require_relative '../socialcast_api.rb' -require_relative './test_data.rb' +require "minitest/autorun" +require "yaml" +require_relative "../socialcast_api.rb" +require_relative "./test_data.rb" class TestSocialcastApi < Minitest::Test - DEBUG = false def initialize(args) - config = YAML::load_file(File.join(__dir__, 'config.ex.yml')) - @domain = config['domain'] - @username = config['username'] - @password = config['password'] - @kb_id = config['kb_id'] - @question_id = config['question_id'] + config = YAML.load_file(File.join(__dir__, "config.ex.yml")) + @domain = config["domain"] + @username = config["username"] + @password = config["password"] + @kb_id = config["kb_id"] + @question_id = config["question_id"] super args end @@ -30,18 +29,18 @@ class TestSocialcastApi < Minitest::Test end def test_base_url - assert_equal 'https://demo.socialcast.com/api', @socialcast.base_url + assert_equal "https://demo.socialcast.com/api", @socialcast.base_url end def test_headers headers = @socialcast.headers - assert_equal 'Basic ZW1pbHlAc29jaWFsY2FzdC5jb206ZGVtbw==', headers[:Authorization] - assert_equal 'application/json', headers[:Accept] + assert_equal "Basic ZW1pbHlAc29jaWFsY2FzdC5jb206ZGVtbw==", headers[:Authorization] + assert_equal "application/json", headers[:Accept] end def test_list_users users = @socialcast.list_users - expected = JSON.parse(USERS)['users'].sort { |u| u['id'] } + expected = JSON.parse(USERS)["users"].sort { |u| u["id"] } assert_equal 15, users.size assert_equal expected[0], users[0] end @@ -53,14 +52,14 @@ class TestSocialcastApi < Minitest::Test def test_list_messages messages = @socialcast.list_messages - expected = JSON.parse(MESSAGES)['messages'].sort { |m| m['id'] } + expected = JSON.parse(MESSAGES)["messages"].sort { |m| m["id"] } assert_equal 20, messages.size check_keys expected[0], messages[0] end def test_messages_next_page messages = @socialcast.list_messages(page: 2) - expected = JSON.parse(MESSAGES_PG_2)['messages'].sort { |m| m['id'] } + expected = JSON.parse(MESSAGES_PG_2)["messages"].sort { |m| m["id"] } assert_equal 20, messages.size check_keys expected[0], messages[0] end @@ -69,18 +68,16 @@ class TestSocialcastApi < Minitest::Test def check_keys(expected, actual) msg = "### caller[0]:\nKey not found in actual keys: #{actual.keys}\n" - expected.keys.each do |k| - assert (actual.keys.include? k), "#{k}" - end + expected.keys.each { |k| assert (actual.keys.include? k), "#{k}" } end def debug(message, show = false) if show || DEBUG - puts '### ' + caller[0] - puts '' + puts "### " + caller[0] + puts "" puts message - puts '' - puts '' + puts "" + puts "" end end end diff --git a/script/import_scripts/socialcast/title.rb b/script/import_scripts/socialcast/title.rb index b9f0e3c8ae9..9f2c3dd82d5 100644 --- a/script/import_scripts/socialcast/title.rb +++ b/script/import_scripts/socialcast/title.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true -require_relative './socialcast_message.rb' -require_relative './socialcast_user.rb' -require 'set' +require_relative "./socialcast_message.rb" +require_relative "./socialcast_user.rb" +require "set" require File.expand_path(File.dirname(__FILE__) + "/../base.rb") MESSAGES_DIR = "output/messages" @@ -11,8 +11,8 @@ def titles topics = 0 total = count_files(MESSAGES_DIR) Dir.foreach(MESSAGES_DIR) do |filename| - next if filename == ('.') || filename == ('..') - message_json = File.read MESSAGES_DIR + '/' + filename + next if filename == (".") || filename == ("..") + message_json = File.read MESSAGES_DIR + "/" + filename message = SocialcastMessage.new(message_json) next unless message.title #puts "#{filename}, #{message.replies.size}, #{message.topic[:raw].size}, #{message.message_type}, #{message.title}" @@ -23,7 +23,7 @@ def titles end def count_files(path) - Dir.foreach(path).select { |f| f != '.' && f != '..' }.count + Dir.foreach(path).select { |f| f != "." && f != ".." }.count end titles diff --git a/script/import_scripts/sourceforge.rb b/script/import_scripts/sourceforge.rb index 7d7de0cb8c2..8d165a7fa0e 100644 --- a/script/import_scripts/sourceforge.rb +++ b/script/import_scripts/sourceforge.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative 'base.rb' +require_relative "base.rb" # Import script for SourceForge discussions. # @@ -15,10 +15,10 @@ require_relative 'base.rb' class ImportScripts::Sourceforge < ImportScripts::Base # When the URL of your project is https://sourceforge.net/projects/foo/ # than the value of PROJECT_NAME is 'foo' - PROJECT_NAME = 'project_name' + PROJECT_NAME = "project_name" # This is the path to the discussion.json that you exported from SourceForge. - JSON_FILE = '/path/to/discussion.json' + JSON_FILE = "/path/to/discussion.json" def initialize super @@ -27,7 +27,7 @@ class ImportScripts::Sourceforge < ImportScripts::Base end def execute - puts '', 'Importing from SourceForge...' + puts "", "Importing from SourceForge..." load_json @@ -40,25 +40,26 @@ class ImportScripts::Sourceforge < ImportScripts::Base end def import_categories - puts '', 'importing categories' + puts "", "importing categories" create_categories(@json[:forums]) do |forum| { id: forum[:shortname], name: forum[:name], - post_create_action: proc do |category| - changes = { raw: forum[:description] } - opts = { revised_at: Time.now, bypass_bump: true } + post_create_action: + proc do |category| + changes = { raw: forum[:description] } + opts = { revised_at: Time.now, bypass_bump: true } - post = category.topic.first_post - post.revise(@system_user, changes, opts) - end + post = category.topic.first_post + post.revise(@system_user, changes, opts) + end, } end end def import_topics - puts '', 'importing posts' + puts "", "importing posts" imported_post_count = 0 total_post_count = count_posts @@ -78,7 +79,7 @@ class ImportScripts::Sourceforge < ImportScripts::Base id: "#{thread[:_id]}_#{post[:slug]}", user_id: @system_user, created_at: Time.zone.parse(post[:timestamp]), - raw: process_post_text(forum, thread, post) + raw: process_post_text(forum, thread, post), } if post == first_post @@ -103,9 +104,7 @@ class ImportScripts::Sourceforge < ImportScripts::Base total_count = 0 @json[:forums].each do |forum| - forum[:threads].each do |thread| - total_count += thread[:posts].size - end + forum[:threads].each { |thread| total_count += thread[:posts].size } end total_count @@ -117,20 +116,22 @@ class ImportScripts::Sourceforge < ImportScripts::Base def process_post_text(forum, thread, post) text = post[:text] - text.gsub!(/~{3,}/, '```') # Discourse doesn't recognize ~~~ as beginning/end of code blocks + text.gsub!(/~{3,}/, "```") # Discourse doesn't recognize ~~~ as beginning/end of code blocks # SourceForge doesn't allow symbols in usernames, so we are safe here. # Well, unless it's the anonymous user, which has an evil asterisk in the JSON file... username = post[:author] - username = 'anonymous' if username == '*anonymous' + username = "anonymous" if username == "*anonymous" # anonymous and nobody are nonexistent users. Make sure we don't create links for them. - user_without_profile = username == 'anonymous' || username == 'nobody' - user_link = user_without_profile ? username : "[#{username}](https://sourceforge.net/u/#{username}/)" + user_without_profile = username == "anonymous" || username == "nobody" + user_link = + user_without_profile ? username : "[#{username}](https://sourceforge.net/u/#{username}/)" # Create a nice looking header for each imported post that links to the author's user profile and the old post. - post_date = Time.zone.parse(post[:timestamp]).strftime('%A, %B %d, %Y') - post_url = "https://sourceforge.net/p/#{PROJECT_NAME}/discussion/#{forum[:shortname]}/thread/#{thread[:_id]}/##{post[:slug]}" + post_date = Time.zone.parse(post[:timestamp]).strftime("%A, %B %d, %Y") + post_url = + "https://sourceforge.net/p/#{PROJECT_NAME}/discussion/#{forum[:shortname]}/thread/#{thread[:_id]}/##{post[:slug]}" "**#{user_link}** wrote on [#{post_date}](#{post_url}):\n\n#{text}" end diff --git a/script/import_scripts/stack_overflow.rb b/script/import_scripts/stack_overflow.rb index 2eca547dbca..30ddf2e551c 100644 --- a/script/import_scripts/stack_overflow.rb +++ b/script/import_scripts/stack_overflow.rb @@ -5,18 +5,18 @@ require "tiny_tds" require File.expand_path(File.dirname(__FILE__) + "/base.rb") class ImportScripts::StackOverflow < ImportScripts::Base - BATCH_SIZE ||= 1000 def initialize super - @client = TinyTds::Client.new( - host: ENV["DB_HOST"], - username: ENV["DB_USERNAME"], - password: ENV["DB_PASSWORD"], - database: ENV["DB_NAME"], - ) + @client = + TinyTds::Client.new( + host: ENV["DB_HOST"], + username: ENV["DB_USERNAME"], + password: ENV["DB_PASSWORD"], + database: ENV["DB_NAME"], + ) end def execute @@ -36,7 +36,7 @@ class ImportScripts::StackOverflow < ImportScripts::Base total = query("SELECT COUNT(*) count FROM Users WHERE Id > 0").first["count"] batches(BATCH_SIZE) do |offset| - users = query(<<~SQL + users = query(<<~SQL).to_a SELECT TOP #{BATCH_SIZE} Id , UserTypeId @@ -55,7 +55,6 @@ class ImportScripts::StackOverflow < ImportScripts::Base AND Id > #{last_user_id} ORDER BY Id SQL - ).to_a break if users.empty? @@ -77,11 +76,16 @@ class ImportScripts::StackOverflow < ImportScripts::Base name: u["RealName"], location: u["Location"], date_of_birth: u["Birthday"], - post_create_action: proc do |user| - if u["ProfileImageUrl"].present? - UserAvatar.import_url_for_user(u["ProfileImageUrl"], user) rescue nil - end - end + post_create_action: + proc do |user| + if u["ProfileImageUrl"].present? + begin + UserAvatar.import_url_for_user(u["ProfileImageUrl"], user) + rescue StandardError + nil + end + end + end, } end end @@ -91,11 +95,16 @@ class ImportScripts::StackOverflow < ImportScripts::Base puts "", "Importing posts..." last_post_id = -1 - total = query("SELECT COUNT(*) count FROM Posts WHERE PostTypeId IN (1,2,3)").first["count"] + - query("SELECT COUNT(*) count FROM PostComments WHERE PostId IN (SELECT Id FROM Posts WHERE PostTypeId IN (1,2,3))").first["count"] + total = + query("SELECT COUNT(*) count FROM Posts WHERE PostTypeId IN (1,2,3)").first["count"] + + query( + "SELECT COUNT(*) count FROM PostComments WHERE PostId IN (SELECT Id FROM Posts WHERE PostTypeId IN (1,2,3))", + ).first[ + "count" + ] batches(BATCH_SIZE) do |offset| - posts = query(<<~SQL + posts = query(<<~SQL).to_a SELECT TOP #{BATCH_SIZE} Id , PostTypeId @@ -113,14 +122,13 @@ class ImportScripts::StackOverflow < ImportScripts::Base AND Id > #{last_post_id} ORDER BY Id SQL - ).to_a break if posts.empty? last_post_id = posts[-1]["Id"] post_ids = posts.map { |p| p["Id"] } - comments = query(<<~SQL + comments = query(<<~SQL).to_a SELECT CONCAT('Comment-', Id) AS Id , PostId AS ParentId , Text @@ -130,7 +138,6 @@ class ImportScripts::StackOverflow < ImportScripts::Base WHERE PostId IN (#{post_ids.join(",")}) ORDER BY Id SQL - ).to_a posts_and_comments = (posts + comments).sort_by { |p| p["CreationDate"] } post_and_comment_ids = posts_and_comments.map { |p| p["Id"] } @@ -173,7 +180,7 @@ class ImportScripts::StackOverflow < ImportScripts::Base last_like_id = -1 batches(BATCH_SIZE) do |offset| - likes = query(<<~SQL + likes = query(<<~SQL).to_a SELECT TOP #{BATCH_SIZE} Id , PostId @@ -185,7 +192,6 @@ class ImportScripts::StackOverflow < ImportScripts::Base AND Id > #{last_like_id} ORDER BY Id SQL - ).to_a break if likes.empty? @@ -196,17 +202,26 @@ class ImportScripts::StackOverflow < ImportScripts::Base next unless post_id = post_id_from_imported_post_id(l["PostId"]) next unless user = User.find_by(id: user_id) next unless post = Post.find_by(id: post_id) - PostActionCreator.like(user, post) rescue nil + begin + PostActionCreator.like(user, post) + rescue StandardError + nil + end end end puts "", "Importing comment likes..." last_like_id = -1 - total = query("SELECT COUNT(*) count FROM Comments2Votes WHERE VoteTypeId = 2 AND DeletionDate IS NULL").first["count"] + total = + query( + "SELECT COUNT(*) count FROM Comments2Votes WHERE VoteTypeId = 2 AND DeletionDate IS NULL", + ).first[ + "count" + ] batches(BATCH_SIZE) do |offset| - likes = query(<<~SQL + likes = query(<<~SQL).to_a SELECT TOP #{BATCH_SIZE} Id , CONCAT('Comment-', PostCommentId) AS PostCommentId @@ -218,7 +233,6 @@ class ImportScripts::StackOverflow < ImportScripts::Base AND Id > #{last_like_id} ORDER BY Id SQL - ).to_a break if likes.empty? @@ -229,7 +243,11 @@ class ImportScripts::StackOverflow < ImportScripts::Base next unless post_id = post_id_from_imported_post_id(l["PostCommentId"]) next unless user = User.find_by(id: user_id) next unless post = Post.find_by(id: post_id) - PostActionCreator.like(user, post) rescue nil + begin + PostActionCreator.like(user, post) + rescue StandardError + nil + end end end end @@ -249,7 +267,6 @@ class ImportScripts::StackOverflow < ImportScripts::Base def query(sql) @client.execute(sql) end - end ImportScripts::StackOverflow.new.perform diff --git a/script/import_scripts/support/convert_mysql_xml_to_mysql.rb b/script/import_scripts/support/convert_mysql_xml_to_mysql.rb index be0e45ca2fb..070bfb41232 100644 --- a/script/import_scripts/support/convert_mysql_xml_to_mysql.rb +++ b/script/import_scripts/support/convert_mysql_xml_to_mysql.rb @@ -3,11 +3,10 @@ # convert huge XML dump to mysql friendly import # -require 'ox' -require 'set' +require "ox" +require "set" class Saxy < Ox::Sax - def initialize @stack = [] end @@ -32,7 +31,6 @@ class Saxy < Ox::Sax def cdata(val) @stack[-1][:text] = val end - end class Convert < Saxy @@ -59,10 +57,13 @@ class Convert < Saxy end def output_table_definition(data) - cols = data[:cols].map do |col| - attrs = col[:attrs] - "#{attrs[:Field]} #{attrs[:Type]}" - end.join(", ") + cols = + data[:cols] + .map do |col| + attrs = col[:attrs] + "#{attrs[:Field]} #{attrs[:Type]}" + end + .join(", ") puts "CREATE TABLE #{data[:attrs][:name]} (#{cols});" end @@ -77,4 +78,4 @@ class Convert < Saxy end end -Ox.sax_parse(Convert.new(skip_data: ['metrics2', 'user_log']), File.open(ARGV[0])) +Ox.sax_parse(Convert.new(skip_data: %w[metrics2 user_log]), File.open(ARGV[0])) diff --git a/script/import_scripts/telligent.rb b/script/import_scripts/telligent.rb index 32ffda8acb1..c46be23facb 100644 --- a/script/import_scripts/telligent.rb +++ b/script/import_scripts/telligent.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true -require_relative 'base' -require 'tiny_tds' +require_relative "base" +require "tiny_tds" # Import script for Telligent communities # @@ -40,17 +40,19 @@ require 'tiny_tds' class ImportScripts::Telligent < ImportScripts::Base BATCH_SIZE ||= 1000 - LOCAL_AVATAR_REGEX ||= /\A~\/.*(?communityserver-components-(?:selectable)?avatars)\/(?[^\/]+)\/(?.+)/i - REMOTE_AVATAR_REGEX ||= /\Ahttps?:\/\//i + LOCAL_AVATAR_REGEX ||= + %r{\A~/.*(?communityserver-components-(?:selectable)?avatars)/(?[^/]+)/(?.+)}i + REMOTE_AVATAR_REGEX ||= %r{\Ahttps?://}i ATTACHMENT_REGEXES ||= [ - /]*\shref="[^"]*?\/cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?[^\/]+)\/(?[^\/]+)\/(?.+?)".*?>.*?<\/a>/i, - /]*\ssrc="[^"]*?\/cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?[^\/]+)\/(?[^\/]+)\/(?.+?)".*?>/i, - /\[View:[^\]]*?\/cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?[^\/]+)\/(?[^\/]+)\/(?.+?)(?:\:[:\d\s]*?)?\]/i, - /\[(?img|url)\][^\[]*?cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?[^\/]+)\/(?[^\/]+)\/(?.+?)\[\/\k\]/i, - /\[(?img|url)=[^\[]*?cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?[^\/]+)\/(?[^\/]+)\/(?.+?)\][^\[]*?\[\/\k\]/i + %r{]*\shref="[^"]*?/cfs-file(?:systemfile)?(?:\.ashx)?/__key/(?[^/]+)/(?[^/]+)/(?.+?)".*?>.*?}i, + %r{]*\ssrc="[^"]*?/cfs-file(?:systemfile)?(?:\.ashx)?/__key/(?[^/]+)/(?[^/]+)/(?.+?)".*?>}i, + %r{\[View:[^\]]*?/cfs-file(?:systemfile)?(?:\.ashx)?/__key/(?[^/]+)/(?[^/]+)/(?.+?)(?:\:[:\d\s]*?)?\]}i, + %r{\[(?img|url)\][^\[]*?cfs-file(?:systemfile)?(?:\.ashx)?/__key/(?[^/]+)/(?[^/]+)/(?.+?)\[/\k\]}i, + %r{\[(?img|url)=[^\[]*?cfs-file(?:systemfile)?(?:\.ashx)?/__key/(?[^/]+)/(?[^/]+)/(?.+?)\][^\[]*?\[/\k\]}i, ] PROPERTY_NAMES_REGEX ||= /(?\w+):S:(?\d+):(?\d+):/ - INTERNAL_LINK_REGEX ||= /\shref=".*?\/f\/\d+(?:(\/t\/(?\d+))|(?:\/p\/\d+\/(?\d+))|(?:\/p\/(?\d+)\/reply))\.aspx[^"]*?"/i + INTERNAL_LINK_REGEX ||= + %r{\shref=".*?/f/\d+(?:(/t/(?\d+))|(?:/p/\d+/(?\d+))|(?:/p/(?\d+)/reply))\.aspx[^"]*?"}i CATEGORY_LINK_NORMALIZATION = '/.*?(f\/\d+)$/\1' TOPIC_LINK_NORMALIZATION = '/.*?(f\/\d+\/t\/\d+)$/\1' @@ -82,19 +84,20 @@ class ImportScripts::Telligent < ImportScripts::Base "1D20" => "”", "B000" => "°", "0003" => ["0300".to_i(16)].pack("U"), - "0103" => ["0301".to_i(16)].pack("U") + "0103" => ["0301".to_i(16)].pack("U"), } def initialize super() - @client = TinyTds::Client.new( - host: ENV["DB_HOST"], - username: ENV["DB_USERNAME"], - password: ENV["DB_PASSWORD"], - database: ENV["DB_NAME"], - timeout: 60 # the user query is very slow - ) + @client = + TinyTds::Client.new( + host: ENV["DB_HOST"], + username: ENV["DB_USERNAME"], + password: ENV["DB_PASSWORD"], + database: ENV["DB_NAME"], + timeout: 60, # the user query is very slow + ) @filestore_root_directory = ENV["FILE_BASE_DIR"] @files = {} @@ -180,10 +183,11 @@ class ImportScripts::Telligent < ImportScripts::Base bio_raw: html_to_markdown(ap_properties["bio"]), location: ap_properties["location"], website: ap_properties["webAddress"], - post_create_action: proc do |user| - import_avatar(user, up_properties["avatarUrl"]) - suspend_user(user, up_properties["BannedUntil"], up_properties["UserBanReason"]) - end + post_create_action: + proc do |user| + import_avatar(user, up_properties["avatarUrl"]) + suspend_user(user, up_properties["BannedUntil"], up_properties["UserBanReason"]) + end, } end @@ -193,13 +197,18 @@ class ImportScripts::Telligent < ImportScripts::Base # TODO move into base importer (create_user) and use consistent error handling def import_avatar(user, avatar_url) - return if @filestore_root_directory.blank? || avatar_url.blank? || avatar_url.include?("anonymous") + if @filestore_root_directory.blank? || avatar_url.blank? || avatar_url.include?("anonymous") + return + end if match_data = avatar_url.match(LOCAL_AVATAR_REGEX) - avatar_path = File.join(@filestore_root_directory, - match_data[:directory].gsub("-", "."), - match_data[:path].split("-"), - match_data[:filename]) + avatar_path = + File.join( + @filestore_root_directory, + match_data[:directory].gsub("-", "."), + match_data[:path].split("-"), + match_data[:filename], + ) if File.file?(avatar_path) @uploader.create_avatar(user, avatar_path) @@ -207,7 +216,11 @@ class ImportScripts::Telligent < ImportScripts::Base STDERR.puts "Could not find avatar: #{avatar_path}" end elsif avatar_url.match?(REMOTE_AVATAR_REGEX) - UserAvatar.import_url_for_user(avatar_url, user) rescue nil + begin + UserAvatar.import_url_for_user(avatar_url, user) + rescue StandardError + nil + end end end @@ -224,7 +237,7 @@ class ImportScripts::Telligent < ImportScripts::Base end def import_categories - if ENV['CATEGORY_MAPPING'] + if ENV["CATEGORY_MAPPING"] import_mapped_forums_as_categories else import_groups_and_forums_as_categories @@ -234,7 +247,7 @@ class ImportScripts::Telligent < ImportScripts::Base def import_mapped_forums_as_categories puts "", "Importing categories..." - json = JSON.parse(File.read(ENV['CATEGORY_MAPPING'])) + json = JSON.parse(File.read(ENV["CATEGORY_MAPPING"])) categories = [] @forum_ids_to_tags = {} @@ -256,7 +269,7 @@ class ImportScripts::Telligent < ImportScripts::Base id: id, name: name, parent_id: parent_id, - forum_ids: index == last_index ? forum_ids : nil + forum_ids: index == last_index ? forum_ids : nil, } parent_id = id end @@ -271,9 +284,7 @@ class ImportScripts::Telligent < ImportScripts::Base id: c[:id], name: c[:name], parent_category_id: category_id_from_imported_category_id(c[:parent_id]), - post_create_action: proc do |category| - map_forum_ids(category.id, c[:forum_ids]) - end + post_create_action: proc { |category| map_forum_ids(category.id, c[:forum_ids]) }, } end end @@ -302,10 +313,10 @@ class ImportScripts::Telligent < ImportScripts::Base create_categories(parent_categories) do |row| { - id: "G#{row['GroupID']}", + id: "G#{row["GroupID"]}", name: clean_category_name(row["Name"]), description: html_to_markdown(row["HtmlDescription"]), - position: row["SortOrder"] + position: row["SortOrder"], } end @@ -320,28 +331,31 @@ class ImportScripts::Telligent < ImportScripts::Base parent_category_id = parent_category_id_for(row) if category_id = replace_with_category_id(child_categories, parent_category_id) - add_category(row['ForumId'], Category.find_by_id(category_id)) - url = "f/#{row['ForumId']}" + add_category(row["ForumId"], Category.find_by_id(category_id)) + url = "f/#{row["ForumId"]}" Permalink.create(url: url, category_id: category_id) unless Permalink.exists?(url: url) nil else { - id: row['ForumId'], + id: row["ForumId"], parent_category_id: parent_category_id, name: clean_category_name(row["Name"]), description: html_to_markdown(row["Description"]), position: row["SortOrder"], - post_create_action: proc do |category| - url = "f/#{row['ForumId']}" - Permalink.create(url: url, category_id: category.id) unless Permalink.exists?(url: url) - end + post_create_action: + proc do |category| + url = "f/#{row["ForumId"]}" + unless Permalink.exists?(url: url) + Permalink.create(url: url, category_id: category.id) + end + end, } end end end def parent_category_id_for(row) - category_id_from_imported_category_id("G#{row['GroupId']}") if row.key?("GroupId") + category_id_from_imported_category_id("G#{row["GroupId"]}") if row.key?("GroupId") end def replace_with_category_id(child_categories, parent_category_id) @@ -351,23 +365,21 @@ class ImportScripts::Telligent < ImportScripts::Base def only_child?(child_categories, parent_category_id) count = 0 - child_categories.each do |row| - count += 1 if parent_category_id_for(row) == parent_category_id - end + child_categories.each { |row| count += 1 if parent_category_id_for(row) == parent_category_id } count == 1 end def clean_category_name(name) - CGI.unescapeHTML(name) - .strip + CGI.unescapeHTML(name).strip end def import_topics puts "", "Importing topics..." last_topic_id = -1 - total_count = count("SELECT COUNT(1) AS count FROM te_Forum_Threads t WHERE #{ignored_forum_sql_condition}") + total_count = + count("SELECT COUNT(1) AS count FROM te_Forum_Threads t WHERE #{ignored_forum_sql_condition}") batches do |offset| rows = query(<<~SQL) @@ -399,13 +411,16 @@ class ImportScripts::Telligent < ImportScripts::Base created_at: row["DateCreated"], closed: row["IsLocked"], views: row["TotalViews"], - post_create_action: proc do |action_post| - topic = action_post.topic - Jobs.enqueue_at(topic.pinned_until, :unpin_topic, topic_id: topic.id) if topic.pinned_until - url = "f/#{row['ForumId']}/t/#{row['ThreadId']}" - Permalink.create(url: url, topic_id: topic.id) unless Permalink.exists?(url: url) - import_topic_views(topic, row["TopicContentId"]) - end + post_create_action: + proc do |action_post| + topic = action_post.topic + if topic.pinned_until + Jobs.enqueue_at(topic.pinned_until, :unpin_topic, topic_id: topic.id) + end + url = "f/#{row["ForumId"]}/t/#{row["ThreadId"]}" + Permalink.create(url: url, topic_id: topic.id) unless Permalink.exists?(url: url) + import_topic_views(topic, row["TopicContentId"]) + end, } if row["StickyDate"] > Time.now @@ -446,9 +461,8 @@ class ImportScripts::Telligent < ImportScripts::Base end def ignored_forum_sql_condition - @ignored_forum_sql_condition ||= @ignored_forum_ids.present? \ - ? "t.ForumId NOT IN (#{@ignored_forum_ids.join(',')})" \ - : "1 = 1" + @ignored_forum_sql_condition ||= + @ignored_forum_ids.present? ? "t.ForumId NOT IN (#{@ignored_forum_ids.join(",")})" : "1 = 1" end def import_posts @@ -492,7 +506,8 @@ class ImportScripts::Telligent < ImportScripts::Base next if all_records_exist?(:post, rows.map { |row| row["ThreadReplyId"] }) create_posts(rows, total: total_count, offset: offset) do |row| - imported_parent_id = row["ParentReplyId"]&.nonzero? ? row["ParentReplyId"] : import_topic_id(row["ThreadId"]) + imported_parent_id = + row["ParentReplyId"]&.nonzero? ? row["ParentReplyId"] : import_topic_id(row["ThreadId"]) parent_post = topic_lookup_from_imported_post_id(imported_parent_id) user_id = user_id_from_imported_user_id(row["UserId"]) || Discourse::SYSTEM_USER_ID @@ -503,13 +518,13 @@ class ImportScripts::Telligent < ImportScripts::Base user_id: user_id, topic_id: parent_post[:topic_id], created_at: row["ThreadReplyDate"], - reply_to_post_number: parent_post[:post_number] + reply_to_post_number: parent_post[:post_number], } post[:custom_fields] = { is_accepted_answer: "true" } if row["IsFirstVerifiedAnswer"] post else - puts "Failed to import post #{row['ThreadReplyId']}. Parent was not found." + puts "Failed to import post #{row["ThreadReplyId"]}. Parent was not found." end end end @@ -565,7 +580,7 @@ class ImportScripts::Telligent < ImportScripts::Base id: row["MessageId"], raw: raw_with_attachment(row, user_id, :message), user_id: user_id, - created_at: row["DateCreated"] + created_at: row["DateCreated"], } if current_conversation_id == row["ConversationId"] @@ -574,7 +589,7 @@ class ImportScripts::Telligent < ImportScripts::Base if parent_post post[:topic_id] = parent_post[:topic_id] else - puts "Failed to import message #{row['MessageId']}. Parent was not found." + puts "Failed to import message #{row["MessageId"]}. Parent was not found." post = nil end else @@ -583,7 +598,7 @@ class ImportScripts::Telligent < ImportScripts::Base post[:target_usernames] = get_recipient_usernames(row) if post[:target_usernames].empty? - puts "Private message without recipients. Skipping #{row['MessageId']}" + puts "Private message without recipients. Skipping #{row["MessageId"]}" post = nil end @@ -611,7 +626,7 @@ class ImportScripts::Telligent < ImportScripts::Base def get_recipient_user_ids(participant_ids) return [] if participant_ids.blank? - user_ids = participant_ids.split(';') + user_ids = participant_ids.split(";") user_ids.uniq! user_ids.map!(&:strip) end @@ -619,9 +634,9 @@ class ImportScripts::Telligent < ImportScripts::Base def get_recipient_usernames(row) import_user_ids = get_recipient_user_ids(row["ParticipantIds"]) - import_user_ids.map! do |import_user_id| - find_user_by_import_id(import_user_id).try(:username) - end.compact + import_user_ids + .map! { |import_user_id| find_user_by_import_id(import_user_id).try(:username) } + .compact end def index_directory(root_directory) @@ -646,17 +661,16 @@ class ImportScripts::Telligent < ImportScripts::Base filename = row["FileName"] return raw if @filestore_root_directory.blank? || filename.blank? - if row["IsRemote"] - return "#{raw}\n#{filename}" - end + return "#{raw}\n#{filename}" if row["IsRemote"] - path = File.join( - "telligent.evolution.components.attachments", - "%02d" % row["ApplicationTypeId"], - "%02d" % row["ApplicationId"], - "%02d" % row["ApplicationContentTypeId"], - ("%010d" % row["ContentId"]).scan(/.{2}/) - ) + path = + File.join( + "telligent.evolution.components.attachments", + "%02d" % row["ApplicationTypeId"], + "%02d" % row["ApplicationId"], + "%02d" % row["ApplicationContentTypeId"], + ("%010d" % row["ContentId"]).scan(/.{2}/), + ) path = fix_attachment_path(path, filename) if path && !embedded_paths.include?(path) @@ -677,11 +691,11 @@ class ImportScripts::Telligent < ImportScripts::Base def print_file_not_found_error(type, path, row) case type when :topic - id = row['ThreadId'] + id = row["ThreadId"] when :post - id = row['ThreadReplyId'] + id = row["ThreadReplyId"] when :message - id = row['MessageId'] + id = row["MessageId"] end STDERR.puts "Could not find file for #{type} #{id}: #{path}" @@ -692,30 +706,31 @@ class ImportScripts::Telligent < ImportScripts::Base paths = [] upload_ids = [] - return [raw, paths, upload_ids] if @filestore_root_directory.blank? + return raw, paths, upload_ids if @filestore_root_directory.blank? ATTACHMENT_REGEXES.each do |regex| - raw = raw.gsub(regex) do - match_data = Regexp.last_match + raw = + raw.gsub(regex) do + match_data = Regexp.last_match - path = File.join(match_data[:directory], match_data[:path]) - fixed_path = fix_attachment_path(path, match_data[:filename]) + path = File.join(match_data[:directory], match_data[:path]) + fixed_path = fix_attachment_path(path, match_data[:filename]) - if fixed_path && File.file?(fixed_path) - filename = File.basename(fixed_path) - upload = @uploader.create_upload(user_id, fixed_path, filename) + if fixed_path && File.file?(fixed_path) + filename = File.basename(fixed_path) + upload = @uploader.create_upload(user_id, fixed_path, filename) - if upload.present? && upload.persisted? - paths << fixed_path - upload_ids << upload.id - @uploader.html_for_upload(upload, filename) + if upload.present? && upload.persisted? + paths << fixed_path + upload_ids << upload.id + @uploader.html_for_upload(upload, filename) + end + else + path = File.join(path, match_data[:filename]) + print_file_not_found_error(type, path, row) + match_data[0] end - else - path = File.join(path, match_data[:filename]) - print_file_not_found_error(type, path, row) - match_data[0] end - end end [raw, paths, upload_ids] @@ -806,8 +821,8 @@ class ImportScripts::Telligent < ImportScripts::Base md = HtmlToMarkdown.new(html).to_markdown md.gsub!(/\[quote.*?\]/, "\n" + '\0' + "\n") - md.gsub!(/(?/i, "\n```\n") - .gsub(/<\/?code\s*>/i, "`") + raw + .gsub("\\n", "\n") + .gsub(%r{}i, "\n```\n") + .gsub(%r{}i, "`") .gsub("<", "<") .gsub(">", ">") end - end ImportScripts::Vanilla.new.perform diff --git a/script/import_scripts/vanilla_body_parser.rb b/script/import_scripts/vanilla_body_parser.rb index ba4608e3ff8..74c39583b95 100644 --- a/script/import_scripts/vanilla_body_parser.rb +++ b/script/import_scripts/vanilla_body_parser.rb @@ -14,9 +14,9 @@ class VanillaBodyParser end def parse - return clean_up(@row['Body']) unless rich? + return clean_up(@row["Body"]) unless rich? - full_text = json.each_with_index.map(&method(:parse_fragment)).join('') + full_text = json.each_with_index.map(&method(:parse_fragment)).join("") normalize full_text end @@ -25,30 +25,46 @@ class VanillaBodyParser def clean_up(text) #
...
- text = text.gsub(/\
(.*?)\<\/pre\>/im) { "\n```\n#{$1}\n```\n" }
+    text = text.gsub(%r{\
(.*?)\}im) { "\n```\n#{$1}\n```\n" }
     # 
...
- text = text.gsub(/\(.*?)\<\/pre\>/im) { "\n```\n#{$1}\n```\n" } + text = text.gsub(%r{\(.*?)\}im) { "\n```\n#{$1}\n```\n" } # - text = text.gsub("\\", "").gsub(/\(.*?)\<\/code\>/im) { "#{$1}" } + text = text.gsub("\\", "").gsub(%r{\(.*?)\}im) { "#{$1}" } #
...
- text = text.gsub(/\
(.*?)\<\/div\>/im) { "\n[quote]\n#{$1}\n[/quote]\n" } + text = text.gsub(%r{\
(.*?)\}im) { "\n[quote]\n#{$1}\n[/quote]\n" } # [code], [quote] - text = text.gsub(/\[\/?code\]/i, "\n```\n").gsub(/\[quote.*?\]/i, "\n" + '\0' + "\n").gsub(/\[\/quote\]/i, "\n" + '\0' + "\n") + text = + text + .gsub(%r{\[/?code\]}i, "\n```\n") + .gsub(/\[quote.*?\]/i, "\n" + '\0' + "\n") + .gsub(%r{\[/quote\]}i, "\n" + '\0' + "\n") - text.gsub(/<\/?font[^>]*>/, '').gsub(/<\/?span[^>]*>/, '').gsub(/<\/?div[^>]*>/, '').gsub(/^ +/, '').gsub(/ +/, ' ') + text + .gsub(%r{]*>}, "") + .gsub(%r{]*>}, "") + .gsub(%r{]*>}, "") + .gsub(/^ +/, "") + .gsub(/ +/, " ") end def rich? - @row['Format'].casecmp?('Rich') + @row["Format"].casecmp?("Rich") end def json return nil unless rich? - @json ||= JSON.parse(@row['Body']).map(&:deep_symbolize_keys) + @json ||= JSON.parse(@row["Body"]).map(&:deep_symbolize_keys) end def parse_fragment(fragment, index) - text = fragment.keys.one? && fragment[:insert].is_a?(String) ? fragment[:insert] : rich_parse(fragment) + text = + ( + if fragment.keys.one? && fragment[:insert].is_a?(String) + fragment[:insert] + else + rich_parse(fragment) + end + ) text = parse_code(text, fragment, index) text = parse_list(text, fragment, index) @@ -59,16 +75,18 @@ class VanillaBodyParser def rich_parse(fragment) insert = fragment[:insert] - return parse_mention(insert[:mention]) if insert.respond_to?(:dig) && insert.dig(:mention, :userID) + if insert.respond_to?(:dig) && insert.dig(:mention, :userID) + return parse_mention(insert[:mention]) + end return parse_formatting(fragment) if fragment[:attributes] - embed_type = insert.dig(:'embed-external', :data, :embedType) + embed_type = insert.dig(:"embed-external", :data, :embedType) - quoting = embed_type == 'quote' + quoting = embed_type == "quote" return parse_quote(insert) if quoting - embed = embed_type.in? ['image', 'link', 'file'] + embed = embed_type.in? %w[image link file] parse_embed(insert, embed_type) if embed end @@ -101,10 +119,10 @@ class VanillaBodyParser def parse_code(text, fragment, index) next_fragment = next_fragment(index) - next_code = next_fragment.dig(:attributes, :'code-block') + next_code = next_fragment.dig(:attributes, :"code-block") if next_code previous_fragment = previous_fragment(index) - previous_code = previous_fragment.dig(:attributes, :'code-block') + previous_code = previous_fragment.dig(:attributes, :"code-block") if previous_code text = text.gsub(/\\n(.*?)\\n/) { "\n```\n#{$1}\n```\n" } @@ -112,7 +130,7 @@ class VanillaBodyParser last_pos = text.rindex(/\n/) if last_pos - array = [text[0..last_pos].strip, text[last_pos + 1 .. text.length].strip] + array = [text[0..last_pos].strip, text[last_pos + 1..text.length].strip] text = array.join("\n```\n") else text = "\n```\n#{text}" @@ -120,10 +138,10 @@ class VanillaBodyParser end end - current_code = fragment.dig(:attributes, :'code-block') + current_code = fragment.dig(:attributes, :"code-block") if current_code second_next_fragment = second_next_fragment(index) - second_next_code = second_next_fragment.dig(:attributes, :'code-block') + second_next_code = second_next_fragment.dig(:attributes, :"code-block") # if current is code and 2 after is not, prepend ``` text = "\n```\n#{text}" unless second_next_code @@ -138,13 +156,13 @@ class VanillaBodyParser next_list = next_fragment.dig(:attributes, :list, :type) if next_list # if next is list, prepend
  • - text = '
  • ' + text + text = "
  • " + text previous_fragment = previous_fragment(index) previous_list = previous_fragment.dig(:attributes, :list, :type) # if next is list and previous is not, prepend
      or
        - list_tag = next_list == 'ordered' ? '
          ' : '
            ' + list_tag = next_list == "ordered" ? "
              " : "
                " text = "\n#{list_tag}\n#{text}" unless previous_list end @@ -152,13 +170,13 @@ class VanillaBodyParser if current_list # if current is list prepend - tag_closings = '' + tag_closings = "" second_next_fragment = second_next_fragment(index) second_next_list = second_next_fragment.dig(:attributes, :list, :type) # if current is list and 2 after is not, prepend
            - list_tag = current_list == 'ordered' ? '
        ' : '
      ' + list_tag = current_list == "ordered" ? "
    " : "" tag_closings = "#{tag_closings}\n#{list_tag}" unless second_next_list text = tag_closings + text @@ -180,24 +198,32 @@ class VanillaBodyParser end def parse_quote(insert) - embed = insert.dig(:'embed-external', :data) + embed = insert.dig(:"embed-external", :data) import_post_id = "#{embed[:recordType]}##{embed[:recordID]}" topic = @@lookup.topic_lookup_from_imported_post_id(import_post_id) user = user_from_imported_id(embed.dig(:insertUser, :userID)) - quote_info = topic && user ? "=\"#{user.username}, post: #{topic[:post_number]}, topic: #{topic[:topic_id]}\"" : '' + quote_info = + ( + if topic && user + "=\"#{user.username}, post: #{topic[:post_number]}, topic: #{topic[:topic_id]}\"" + else + "" + end + ) - "[quote#{quote_info}]\n#{embed[:body]}\n[/quote]\n\n""" + "[quote#{quote_info}]\n#{embed[:body]}\n[/quote]\n\n" \ + "" end def parse_embed(insert, embed_type) - embed = insert.dig(:'embed-external', :data) + embed = insert.dig(:"embed-external", :data) url = embed[:url] - if /https?\:\/\/#{@@host}\/uploads\/.*/.match?(url) - remote_path = url.scan(/uploads\/(.*)/) + if %r{https?\://#{@@host}/uploads/.*}.match?(url) + remote_path = url.scan(%r{uploads/(.*)}) path = File.join(@@uploads_path, remote_path) upload = @@uploader.create_upload(@user_id, path, embed[:name]) @@ -206,7 +232,7 @@ class VanillaBodyParser return "\n" + @@uploader.html_for_upload(upload, embed[:name]) + "\n" else puts "Failed to upload #{path}" - puts upload.errors.full_messages.join(', ') if upload + puts upload.errors.full_messages.join(", ") if upload end end @@ -222,9 +248,9 @@ class VanillaBodyParser def normalize(full_text) code_matcher = /```(.*\n)+```/ code_block = full_text[code_matcher] - full_text[code_matcher] = '{{{CODE_BLOCK}}}' if code_block + full_text[code_matcher] = "{{{CODE_BLOCK}}}" if code_block full_text = double_new_lines(full_text) - full_text['{{{CODE_BLOCK}}}'] = code_block if code_block + full_text["{{{CODE_BLOCK}}}"] = code_block if code_block full_text end diff --git a/script/import_scripts/vanilla_mysql.rb b/script/import_scripts/vanilla_mysql.rb index d0a5e348934..72f294337ae 100644 --- a/script/import_scripts/vanilla_mysql.rb +++ b/script/import_scripts/vanilla_mysql.rb @@ -2,12 +2,11 @@ require "mysql2" require File.expand_path(File.dirname(__FILE__) + "/base.rb") -require 'htmlentities' -require 'reverse_markdown' -require_relative 'vanilla_body_parser' +require "htmlentities" +require "reverse_markdown" +require_relative "vanilla_body_parser" class ImportScripts::VanillaSQL < ImportScripts::Base - VANILLA_DB = "vanilla" TABLE_PREFIX = "GDN_" ATTACHMENTS_BASE_DIR = nil # "/absolute/path/to/attachments" set the absolute path if you have attachments @@ -17,19 +16,15 @@ class ImportScripts::VanillaSQL < ImportScripts::Base def initialize super @htmlentities = HTMLEntities.new - @client = Mysql2::Client.new( - host: "localhost", - username: "root", - database: VANILLA_DB - ) + @client = Mysql2::Client.new(host: "localhost", username: "root", database: VANILLA_DB) # by default, don't use the body parser as it's not pertinent to all versions @vb_parser = false VanillaBodyParser.configure( lookup: @lookup, uploader: @uploader, - host: 'forum.example.com', # your Vanilla forum domain - uploads_path: 'uploads' # relative path to your vanilla uploads folder + host: "forum.example.com", # your Vanilla forum domain + uploads_path: "uploads", # relative path to your vanilla uploads folder ) @import_tags = false @@ -77,80 +72,83 @@ class ImportScripts::VanillaSQL < ImportScripts::Base SQL create_groups(groups) do |group| - { - id: group["RoleID"], - name: @htmlentities.decode(group["Name"]).strip - } + { id: group["RoleID"], name: @htmlentities.decode(group["Name"]).strip } end end def import_users - puts '', "creating users" + puts "", "creating users" @user_is_deleted = false @last_deleted_username = nil username = nil @last_user_id = -1 - total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}User;").first['count'] + total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}User;").first["count"] batches(BATCH_SIZE) do |offset| - results = mysql_query( - "SELECT UserID, Name, Title, Location, About, Email, Admin, Banned, CountComments, + results = + mysql_query( + "SELECT UserID, Name, Title, Location, About, Email, Admin, Banned, CountComments, DateInserted, DateLastActive, InsertIPAddress FROM #{TABLE_PREFIX}User WHERE UserID > #{@last_user_id} ORDER BY UserID ASC - LIMIT #{BATCH_SIZE};") + LIMIT #{BATCH_SIZE};", + ) break if results.size < 1 - @last_user_id = results.to_a.last['UserID'] - next if all_records_exist? :users, results.map { |u| u['UserID'].to_i } + @last_user_id = results.to_a.last["UserID"] + next if all_records_exist? :users, results.map { |u| u["UserID"].to_i } create_users(results, total: total_count, offset: offset) do |user| - email = user['Email'].squish + email = user["Email"].squish next if email.blank? - next if user['Name'].blank? - next if @lookup.user_id_from_imported_user_id(user['UserID']) - if user['Name'] == '[Deleted User]' + next if user["Name"].blank? + next if @lookup.user_id_from_imported_user_id(user["UserID"]) + if user["Name"] == "[Deleted User]" # EVERY deleted user record in Vanilla has the same username: [Deleted User] # Save our UserNameSuggester some pain: @user_is_deleted = true - username = @last_deleted_username || user['Name'] + username = @last_deleted_username || user["Name"] else @user_is_deleted = false - username = user['Name'] + username = user["Name"] end - banned = user['Banned'] != 0 - commented = (user['CountComments'] || 0) > 0 + banned = user["Banned"] != 0 + commented = (user["CountComments"] || 0) > 0 - { id: user['UserID'], + { + id: user["UserID"], email: email, username: username, - name: user['Name'], - created_at: user['DateInserted'] == nil ? 0 : Time.zone.at(user['DateInserted']), - bio_raw: user['About'], - registration_ip_address: user['InsertIPAddress'], - last_seen_at: user['DateLastActive'] == nil ? 0 : Time.zone.at(user['DateLastActive']), - location: user['Location'], - admin: user['Admin'] == 1, + name: user["Name"], + created_at: user["DateInserted"] == nil ? 0 : Time.zone.at(user["DateInserted"]), + bio_raw: user["About"], + registration_ip_address: user["InsertIPAddress"], + last_seen_at: user["DateLastActive"] == nil ? 0 : Time.zone.at(user["DateLastActive"]), + location: user["Location"], + admin: user["Admin"] == 1, trust_level: !banned && commented ? 2 : 0, - post_create_action: proc do |newuser| - if @user_is_deleted - @last_deleted_username = newuser.username - end - if banned - newuser.suspended_at = Time.now - # banning on Vanilla doesn't have an end, so a thousand years seems equivalent - newuser.suspended_till = 1000.years.from_now - if newuser.save - StaffActionLogger.new(Discourse.system_user).log_user_suspend(newuser, 'Imported from Vanilla Forum') - else - puts "Failed to suspend user #{newuser.username}. #{newuser.errors.full_messages.join(', ')}" + post_create_action: + proc do |newuser| + @last_deleted_username = newuser.username if @user_is_deleted + if banned + newuser.suspended_at = Time.now + # banning on Vanilla doesn't have an end, so a thousand years seems equivalent + newuser.suspended_till = 1000.years.from_now + if newuser.save + StaffActionLogger.new(Discourse.system_user).log_user_suspend( + newuser, + "Imported from Vanilla Forum", + ) + else + puts "Failed to suspend user #{newuser.username}. #{newuser.errors.full_messages.join(", ")}" + end end - end - end } + end, + } end end end @@ -162,7 +160,10 @@ class ImportScripts::VanillaSQL < ImportScripts::Base User.find_each do |u| next unless u.custom_fields["import_id"] - r = mysql_query("SELECT photo FROM #{TABLE_PREFIX}User WHERE UserID = #{u.custom_fields['import_id']};").first + r = + mysql_query( + "SELECT photo FROM #{TABLE_PREFIX}User WHERE UserID = #{u.custom_fields["import_id"]};", + ).first next if r.nil? photo = r["photo"] next unless photo.present? @@ -175,9 +176,9 @@ class ImportScripts::VanillaSQL < ImportScripts::Base photo_real_filename = nil parts = photo.squeeze("/").split("/") if parts[0] =~ /^[a-z0-9]{2}:/ - photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[2..-2].join('/')}".squeeze("/") + photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[2..-2].join("/")}".squeeze("/") elsif parts[0] == "~cf" - photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[1..-2].join('/')}".squeeze("/") + photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[1..-2].join("/")}".squeeze("/") else puts "UNKNOWN FORMAT: #{photo}" next @@ -218,7 +219,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base # Otherwise, the file exists but with a prefix: # The p prefix seems to be the full file, so try to find that one first. - ['p', 't', 'n'].each do |prefix| + %w[p t n].each do |prefix| full_guess = File.join(path, "#{prefix}#{base_guess}") return full_guess if File.exist?(full_guess) end @@ -230,38 +231,43 @@ class ImportScripts::VanillaSQL < ImportScripts::Base def import_group_users puts "", "importing group users..." - group_users = mysql_query(" + group_users = + mysql_query( + " SELECT RoleID, UserID FROM #{TABLE_PREFIX}UserRole - ").to_a + ", + ).to_a group_users.each do |row| user_id = user_id_from_imported_user_id(row["UserID"]) group_id = group_id_from_imported_group_id(row["RoleID"]) - if user_id && group_id - GroupUser.find_or_create_by(user_id: user_id, group_id: group_id) - end + GroupUser.find_or_create_by(user_id: user_id, group_id: group_id) if user_id && group_id end end def import_categories puts "", "importing categories..." - categories = mysql_query(" + categories = + mysql_query( + " SELECT CategoryID, ParentCategoryID, Name, Description FROM #{TABLE_PREFIX}Category WHERE CategoryID > 0 ORDER BY CategoryID ASC - ").to_a + ", + ).to_a - top_level_categories = categories.select { |c| c['ParentCategoryID'].blank? || c['ParentCategoryID'] == -1 } + top_level_categories = + categories.select { |c| c["ParentCategoryID"].blank? || c["ParentCategoryID"] == -1 } create_categories(top_level_categories) do |category| { - id: category['CategoryID'], - name: CGI.unescapeHTML(category['Name']), - description: CGI.unescapeHTML(category['Description']) + id: category["CategoryID"], + name: CGI.unescapeHTML(category["Name"]), + description: CGI.unescapeHTML(category["Description"]), } end @@ -272,37 +278,37 @@ class ImportScripts::VanillaSQL < ImportScripts::Base # Depth = 3 create_categories(subcategories) do |category| { - id: category['CategoryID'], - parent_category_id: category_id_from_imported_category_id(category['ParentCategoryID']), - name: CGI.unescapeHTML(category['Name']), - description: category['Description'] ? CGI.unescapeHTML(category['Description']) : nil, + id: category["CategoryID"], + parent_category_id: category_id_from_imported_category_id(category["ParentCategoryID"]), + name: CGI.unescapeHTML(category["Name"]), + description: category["Description"] ? CGI.unescapeHTML(category["Description"]) : nil, } end - subcategory_ids = Set.new(subcategories.map { |c| c['CategoryID'] }) + subcategory_ids = Set.new(subcategories.map { |c| c["CategoryID"] }) # Depth 4 and 5 need to be tags categories.each do |c| - next if c['ParentCategoryID'] == -1 - next if top_level_category_ids.include?(c['CategoryID']) - next if subcategory_ids.include?(c['CategoryID']) + next if c["ParentCategoryID"] == -1 + next if top_level_category_ids.include?(c["CategoryID"]) + next if subcategory_ids.include?(c["CategoryID"]) # Find a depth 3 category for topics in this category parent = c - while !parent.nil? && !subcategory_ids.include?(parent['CategoryID']) - parent = categories.find { |subcat| subcat['CategoryID'] == parent['ParentCategoryID'] } + while !parent.nil? && !subcategory_ids.include?(parent["CategoryID"]) + parent = categories.find { |subcat| subcat["CategoryID"] == parent["ParentCategoryID"] } end if parent - tag_name = DiscourseTagging.clean_tag(c['Name']) + tag_name = DiscourseTagging.clean_tag(c["Name"]) tag = Tag.find_by_name(tag_name) || Tag.create(name: tag_name) - @category_mappings[c['CategoryID']] = { - category_id: category_id_from_imported_category_id(parent['CategoryID']), - tag: tag[:name] + @category_mappings[c["CategoryID"]] = { + category_id: category_id_from_imported_category_id(parent["CategoryID"]), + tag: tag[:name], } else - puts '', "Couldn't find a category for #{c['CategoryID']} '#{c['Name']}'!" + puts "", "Couldn't find a category for #{c["CategoryID"]} '#{c["Name"]}'!" end end end @@ -310,46 +316,66 @@ class ImportScripts::VanillaSQL < ImportScripts::Base def import_topics puts "", "importing topics..." - tag_names_sql = "select t.name as tag_name from GDN_Tag t, GDN_TagDiscussion td where t.tagid = td.tagid and td.discussionid = {discussionid} and t.name != '';" + tag_names_sql = + "select t.name as tag_name from GDN_Tag t, GDN_TagDiscussion td where t.tagid = td.tagid and td.discussionid = {discussionid} and t.name != '';" - total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}Discussion;").first['count'] + total_count = + mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}Discussion;").first["count"] @last_topic_id = -1 batches(BATCH_SIZE) do |offset| - discussions = mysql_query( - "SELECT DiscussionID, CategoryID, Name, Body, Format, CountViews, Closed, Announce, + discussions = + mysql_query( + "SELECT DiscussionID, CategoryID, Name, Body, Format, CountViews, Closed, Announce, DateInserted, InsertUserID, DateLastComment FROM #{TABLE_PREFIX}Discussion WHERE DiscussionID > #{@last_topic_id} ORDER BY DiscussionID ASC - LIMIT #{BATCH_SIZE};") + LIMIT #{BATCH_SIZE};", + ) break if discussions.size < 1 - @last_topic_id = discussions.to_a.last['DiscussionID'] - next if all_records_exist? :posts, discussions.map { |t| "discussion#" + t['DiscussionID'].to_s } + @last_topic_id = discussions.to_a.last["DiscussionID"] + if all_records_exist? :posts, discussions.map { |t| "discussion#" + t["DiscussionID"].to_s } + next + end create_posts(discussions, total: total_count, offset: offset) do |discussion| - user_id = user_id_from_imported_user_id(discussion['InsertUserID']) || Discourse::SYSTEM_USER_ID + user_id = + user_id_from_imported_user_id(discussion["InsertUserID"]) || Discourse::SYSTEM_USER_ID { - id: "discussion#" + discussion['DiscussionID'].to_s, + id: "discussion#" + discussion["DiscussionID"].to_s, user_id: user_id, - title: discussion['Name'], - category: category_id_from_imported_category_id(discussion['CategoryID']) || @category_mappings[discussion['CategoryID']].try(:[], :category_id), + title: discussion["Name"], + category: + category_id_from_imported_category_id(discussion["CategoryID"]) || + @category_mappings[discussion["CategoryID"]].try(:[], :category_id), raw: get_raw(discussion, user_id), - views: discussion['CountViews'] || 0, - closed: discussion['Closed'] == 1, - pinned_at: discussion['Announce'] == 0 ? nil : Time.zone.at(discussion['DateLastComment'] || discussion['DateInserted']), - pinned_globally: discussion['Announce'] == 1, - created_at: Time.zone.at(discussion['DateInserted']), - post_create_action: proc do |post| - if @import_tags - tag_names = @client.query(tag_names_sql.gsub('{discussionid}', discussion['DiscussionID'].to_s)).map { |row| row['tag_name'] } - category_tag = @category_mappings[discussion['CategoryID']].try(:[], :tag) - tag_names = category_tag ? tag_names.append(category_tag) : tag_names - DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names) - end - end + views: discussion["CountViews"] || 0, + closed: discussion["Closed"] == 1, + pinned_at: + ( + if discussion["Announce"] == 0 + nil + else + Time.zone.at(discussion["DateLastComment"] || discussion["DateInserted"]) + end + ), + pinned_globally: discussion["Announce"] == 1, + created_at: Time.zone.at(discussion["DateInserted"]), + post_create_action: + proc do |post| + if @import_tags + tag_names = + @client + .query(tag_names_sql.gsub("{discussionid}", discussion["DiscussionID"].to_s)) + .map { |row| row["tag_name"] } + category_tag = @category_mappings[discussion["CategoryID"]].try(:[], :tag) + tag_names = category_tag ? tag_names.append(category_tag) : tag_names + DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names) + end + end, } end end @@ -358,36 +384,42 @@ class ImportScripts::VanillaSQL < ImportScripts::Base def import_posts puts "", "importing posts..." - total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}Comment;").first['count'] + total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}Comment;").first["count"] @last_post_id = -1 batches(BATCH_SIZE) do |offset| - comments = mysql_query( - "SELECT CommentID, DiscussionID, Body, Format, + comments = + mysql_query( + "SELECT CommentID, DiscussionID, Body, Format, DateInserted, InsertUserID, QnA FROM #{TABLE_PREFIX}Comment WHERE CommentID > #{@last_post_id} ORDER BY CommentID ASC - LIMIT #{BATCH_SIZE};") + LIMIT #{BATCH_SIZE};", + ) break if comments.size < 1 - @last_post_id = comments.to_a.last['CommentID'] - next if all_records_exist? :posts, comments.map { |comment| "comment#" + comment['CommentID'].to_s } + @last_post_id = comments.to_a.last["CommentID"] + if all_records_exist? :posts, + comments.map { |comment| "comment#" + comment["CommentID"].to_s } + next + end create_posts(comments, total: total_count, offset: offset) do |comment| - next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['DiscussionID'].to_s) - next if comment['Body'].blank? - user_id = user_id_from_imported_user_id(comment['InsertUserID']) || Discourse::SYSTEM_USER_ID + unless t = topic_lookup_from_imported_post_id("discussion#" + comment["DiscussionID"].to_s) + next + end + next if comment["Body"].blank? + user_id = + user_id_from_imported_user_id(comment["InsertUserID"]) || Discourse::SYSTEM_USER_ID post = { - id: "comment#" + comment['CommentID'].to_s, + id: "comment#" + comment["CommentID"].to_s, user_id: user_id, topic_id: t[:topic_id], raw: get_raw(comment, user_id), - created_at: Time.zone.at(comment['DateInserted']) + created_at: Time.zone.at(comment["DateInserted"]), } - if comment['QnA'] == "Accepted" - post[:custom_fields] = { is_accepted_answer: true } - end + post[:custom_fields] = { is_accepted_answer: true } if comment["QnA"] == "Accepted" post end @@ -397,19 +429,22 @@ class ImportScripts::VanillaSQL < ImportScripts::Base def import_likes puts "", "importing likes..." - total_count = mysql_query("SELECT count(*) count FROM GDN_ThanksLog;").first['count'] + total_count = mysql_query("SELECT count(*) count FROM GDN_ThanksLog;").first["count"] current_count = 0 start_time = Time.now - likes = mysql_query(" + likes = + mysql_query( + " SELECT CommentID, DateInserted, InsertUserID FROM #{TABLE_PREFIX}ThanksLog ORDER BY CommentID ASC; - ") + ", + ) likes.each do |like| - post_id = post_id_from_imported_post_id("comment##{like['CommentID']}") - user_id = user_id_from_imported_user_id(like['InsertUserID']) + post_id = post_id_from_imported_post_id("comment##{like["CommentID"]}") + user_id = user_id_from_imported_user_id(like["InsertUserID"]) post = Post.find(post_id) if post_id user = User.find(user_id) if user_id @@ -428,51 +463,58 @@ class ImportScripts::VanillaSQL < ImportScripts::Base def import_messages puts "", "importing messages..." - total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}ConversationMessage;").first['count'] + total_count = + mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}ConversationMessage;").first["count"] @last_message_id = -1 batches(BATCH_SIZE) do |offset| - messages = mysql_query( - "SELECT m.MessageID, m.Body, m.Format, + messages = + mysql_query( + "SELECT m.MessageID, m.Body, m.Format, m.InsertUserID, m.DateInserted, m.ConversationID, c.Contributors FROM #{TABLE_PREFIX}ConversationMessage m INNER JOIN #{TABLE_PREFIX}Conversation c on c.ConversationID = m.ConversationID WHERE m.MessageID > #{@last_message_id} ORDER BY m.MessageID ASC - LIMIT #{BATCH_SIZE};") + LIMIT #{BATCH_SIZE};", + ) break if messages.size < 1 - @last_message_id = messages.to_a.last['MessageID'] - next if all_records_exist? :posts, messages.map { |t| "message#" + t['MessageID'].to_s } + @last_message_id = messages.to_a.last["MessageID"] + next if all_records_exist? :posts, messages.map { |t| "message#" + t["MessageID"].to_s } create_posts(messages, total: total_count, offset: offset) do |message| - user_id = user_id_from_imported_user_id(message['InsertUserID']) || Discourse::SYSTEM_USER_ID + user_id = + user_id_from_imported_user_id(message["InsertUserID"]) || Discourse::SYSTEM_USER_ID body = get_raw(message, user_id) common = { user_id: user_id, raw: body, - created_at: Time.zone.at(message['DateInserted']), + created_at: Time.zone.at(message["DateInserted"]), custom_fields: { - conversation_id: message['ConversationID'], - participants: message['Contributors'], - message_id: message['MessageID'] - } + conversation_id: message["ConversationID"], + participants: message["Contributors"], + message_id: message["MessageID"], + }, } - conversation_id = "conversation#" + message['ConversationID'].to_s - message_id = "message#" + message['MessageID'].to_s + conversation_id = "conversation#" + message["ConversationID"].to_s + message_id = "message#" + message["MessageID"].to_s imported_conversation = topic_lookup_from_imported_post_id(conversation_id) if imported_conversation.present? common.merge(id: message_id, topic_id: imported_conversation[:topic_id]) else - user_ids = (message['Contributors'] || '').scan(/\"(\d+)\"/).flatten.map(&:to_i) - usernames = user_ids.map { |id| @lookup.find_user_by_import_id(id).try(:username) }.compact - usernames = [@lookup.find_user_by_import_id(message['InsertUserID']).try(:username)].compact if usernames.empty? + user_ids = (message["Contributors"] || "").scan(/\"(\d+)\"/).flatten.map(&:to_i) + usernames = + user_ids.map { |id| @lookup.find_user_by_import_id(id).try(:username) }.compact + usernames = [ + @lookup.find_user_by_import_id(message["InsertUserID"]).try(:username), + ].compact if usernames.empty? title = body.truncate(40) { @@ -487,8 +529,8 @@ class ImportScripts::VanillaSQL < ImportScripts::Base end def get_raw(record, user_id) - format = (record['Format'] || "").downcase - body = record['Body'] + format = (record["Format"] || "").downcase + body = record["Body"] case format when "html" @@ -507,7 +549,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base raw = @htmlentities.decode(raw) # convert user profile links to user mentions - raw.gsub!(/(@\S+?)<\/a>/) { $1 } + raw.gsub!(%r{(@\S+?)}) { $1 } raw = ReverseMarkdown.convert(raw) unless skip_reverse_markdown @@ -526,14 +568,21 @@ class ImportScripts::VanillaSQL < ImportScripts::Base end def create_permalinks - puts '', 'Creating redirects...', '' + puts "", "Creating redirects...", "" User.find_each do |u| ucf = u.custom_fields if ucf && ucf["import_id"] && ucf["import_username"] - encoded_username = CGI.escape(ucf['import_username']).gsub('+', '%20') - Permalink.create(url: "profile/#{ucf['import_id']}/#{encoded_username}", external_url: "/users/#{u.username}") rescue nil - print '.' + encoded_username = CGI.escape(ucf["import_username"]).gsub("+", "%20") + begin + Permalink.create( + url: "profile/#{ucf["import_id"]}/#{encoded_username}", + external_url: "/users/#{u.username}", + ) + rescue StandardError + nil + end + print "." end end @@ -541,14 +590,22 @@ class ImportScripts::VanillaSQL < ImportScripts::Base pcf = post.custom_fields if pcf && pcf["import_id"] topic = post.topic - id = pcf["import_id"].split('#').last + id = pcf["import_id"].split("#").last if post.post_number == 1 slug = Slug.for(topic.title) # probably matches what vanilla would do... - Permalink.create(url: "discussion/#{id}/#{slug}", topic_id: topic.id) rescue nil + begin + Permalink.create(url: "discussion/#{id}/#{slug}", topic_id: topic.id) + rescue StandardError + nil + end else - Permalink.create(url: "discussion/comment/#{id}", post_id: post.id) rescue nil + begin + Permalink.create(url: "discussion/comment/#{id}", post_id: post.id) + rescue StandardError + nil + end end - print '.' + print "." end end end @@ -561,75 +618,86 @@ class ImportScripts::VanillaSQL < ImportScripts::Base count = 0 # https://us.v-cdn.net/1234567/uploads/editor/xyz/image.jpg - cdn_regex = /https:\/\/us.v-cdn.net\/1234567\/uploads\/(\S+\/(\w|-)+.\w+)/i + cdn_regex = %r{https://us.v-cdn.net/1234567/uploads/(\S+/(\w|-)+.\w+)}i # [attachment=10109:Screen Shot 2012-04-01 at 3.47.35 AM.png] attachment_regex = /\[attachment=(\d+):(.*?)\]/i - Post.where("raw LIKE '%/us.v-cdn.net/%' OR raw LIKE '%[attachment%'").find_each do |post| - count += 1 - print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] - new_raw = post.raw.dup + Post + .where("raw LIKE '%/us.v-cdn.net/%' OR raw LIKE '%[attachment%'") + .find_each do |post| + count += 1 + print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] + new_raw = post.raw.dup - new_raw.gsub!(attachment_regex) do |s| - matches = attachment_regex.match(s) - attachment_id = matches[1] - file_name = matches[2] - next unless attachment_id + new_raw.gsub!(attachment_regex) do |s| + matches = attachment_regex.match(s) + attachment_id = matches[1] + file_name = matches[2] + next unless attachment_id - r = mysql_query("SELECT Path, Name FROM #{TABLE_PREFIX}Media WHERE MediaID = #{attachment_id};").first - next if r.nil? - path = r["Path"] - name = r["Name"] - next unless path.present? + r = + mysql_query( + "SELECT Path, Name FROM #{TABLE_PREFIX}Media WHERE MediaID = #{attachment_id};", + ).first + next if r.nil? + path = r["Path"] + name = r["Name"] + next unless path.present? - path.gsub!("s3://content/", "") - path.gsub!("s3://uploads/", "") - file_path = "#{ATTACHMENTS_BASE_DIR}/#{path}" + path.gsub!("s3://content/", "") + path.gsub!("s3://uploads/", "") + file_path = "#{ATTACHMENTS_BASE_DIR}/#{path}" - if File.exist?(file_path) - upload = create_upload(post.user.id, file_path, File.basename(file_path)) - if upload && upload.errors.empty? - # upload.url - filename = name || file_name || File.basename(file_path) - html_for_upload(upload, normalize_text(filename)) + if File.exist?(file_path) + upload = create_upload(post.user.id, file_path, File.basename(file_path)) + if upload && upload.errors.empty? + # upload.url + filename = name || file_name || File.basename(file_path) + html_for_upload(upload, normalize_text(filename)) + else + puts "Error: Upload did not persist for #{post.id} #{attachment_id}!" + end else - puts "Error: Upload did not persist for #{post.id} #{attachment_id}!" + puts "Couldn't find file for #{attachment_id}. Skipping." + next end - else - puts "Couldn't find file for #{attachment_id}. Skipping." - next end - end - new_raw.gsub!(cdn_regex) do |s| - matches = cdn_regex.match(s) - attachment_id = matches[1] + new_raw.gsub!(cdn_regex) do |s| + matches = cdn_regex.match(s) + attachment_id = matches[1] - file_path = "#{ATTACHMENTS_BASE_DIR}/#{attachment_id}" + file_path = "#{ATTACHMENTS_BASE_DIR}/#{attachment_id}" - if File.exist?(file_path) - upload = create_upload(post.user.id, file_path, File.basename(file_path)) - if upload && upload.errors.empty? - upload.url + if File.exist?(file_path) + upload = create_upload(post.user.id, file_path, File.basename(file_path)) + if upload && upload.errors.empty? + upload.url + else + puts "Error: Upload did not persist for #{post.id} #{attachment_id}!" + end else - puts "Error: Upload did not persist for #{post.id} #{attachment_id}!" + puts "Couldn't find file for #{attachment_id}. Skipping." + next end - else - puts "Couldn't find file for #{attachment_id}. Skipping." - next end - end - if new_raw != post.raw - begin - PostRevisor.new(post).revise!(post.user, { raw: new_raw }, skip_revision: true, skip_validations: true, bypass_bump: true) - rescue - puts "PostRevisor error for #{post.id}" - post.raw = new_raw - post.save(validate: false) + if new_raw != post.raw + begin + PostRevisor.new(post).revise!( + post.user, + { raw: new_raw }, + skip_revision: true, + skip_validations: true, + bypass_bump: true, + ) + rescue StandardError + puts "PostRevisor error for #{post.id}" + post.raw = new_raw + post.save(validate: false) + end end end - end end end diff --git a/script/import_scripts/vbulletin.rb b/script/import_scripts/vbulletin.rb index 8b3de80bdc3..f534b43848d 100644 --- a/script/import_scripts/vbulletin.rb +++ b/script/import_scripts/vbulletin.rb @@ -1,14 +1,14 @@ # frozen_string_literal: true -require 'mysql2' +require "mysql2" require File.expand_path(File.dirname(__FILE__) + "/base.rb") -require 'htmlentities' +require "htmlentities" begin - require 'php_serialize' # https://github.com/jqr/php-serialize + require "php_serialize" # https://github.com/jqr/php-serialize rescue LoadError puts - puts 'php_serialize not found.' - puts 'Add to Gemfile, like this: ' + puts "php_serialize not found." + puts "Add to Gemfile, like this: " puts puts "echo gem \\'php-serialize\\' >> Gemfile" puts "bundle install" @@ -23,13 +23,13 @@ class ImportScripts::VBulletin < ImportScripts::Base # CHANGE THESE BEFORE RUNNING THE IMPORTER - DB_HOST ||= ENV['DB_HOST'] || "localhost" - DB_NAME ||= ENV['DB_NAME'] || "vbulletin" - DB_PW ||= ENV['DB_PW'] || "" - DB_USER ||= ENV['DB_USER'] || "root" - TIMEZONE ||= ENV['TIMEZONE'] || "America/Los_Angeles" - TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "vb_" - ATTACHMENT_DIR ||= ENV['ATTACHMENT_DIR'] || '/path/to/your/attachment/folder' + DB_HOST ||= ENV["DB_HOST"] || "localhost" + DB_NAME ||= ENV["DB_NAME"] || "vbulletin" + DB_PW ||= ENV["DB_PW"] || "" + DB_USER ||= ENV["DB_USER"] || "root" + TIMEZONE ||= ENV["TIMEZONE"] || "America/Los_Angeles" + TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "vb_" + ATTACHMENT_DIR ||= ENV["ATTACHMENT_DIR"] || "/path/to/your/attachment/folder" puts "#{DB_USER}:#{DB_PW}@#{DB_HOST} wants #{DB_NAME}" @@ -44,16 +44,12 @@ class ImportScripts::VBulletin < ImportScripts::Base @htmlentities = HTMLEntities.new - @client = Mysql2::Client.new( - host: DB_HOST, - username: DB_USER, - password: DB_PW, - database: DB_NAME - ) - rescue Exception => e - puts '=' * 50 - puts e.message - puts <<~TEXT + @client = + Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME) + rescue Exception => e + puts "=" * 50 + puts e.message + puts <<~TEXT Cannot connect in to database. Hostname: #{DB_HOST} @@ -72,11 +68,15 @@ class ImportScripts::VBulletin < ImportScripts::Base Exiting. TEXT - exit + exit end def execute - mysql_query("CREATE INDEX firstpostid_index ON #{TABLE_PREFIX}thread (firstpostid)") rescue nil + begin + mysql_query("CREATE INDEX firstpostid_index ON #{TABLE_PREFIX}thread (firstpostid)") + rescue StandardError + nil + end import_groups import_users @@ -104,10 +104,7 @@ class ImportScripts::VBulletin < ImportScripts::Base SQL create_groups(groups) do |group| - { - id: group["usergroupid"], - name: @htmlentities.decode(group["title"]).strip - } + { id: group["usergroupid"], name: @htmlentities.decode(group["title"]).strip } end end @@ -127,7 +124,7 @@ class ImportScripts::VBulletin < ImportScripts::Base last_user_id = -1 batches(BATCH_SIZE) do |offset| - users = mysql_query(<<-SQL + users = mysql_query(<<-SQL).to_a SELECT userid , username , homepage @@ -142,7 +139,6 @@ class ImportScripts::VBulletin < ImportScripts::Base ORDER BY userid LIMIT #{BATCH_SIZE} SQL - ).to_a break if users.empty? @@ -169,15 +165,21 @@ class ImportScripts::VBulletin < ImportScripts::Base primary_group_id: group_id_from_imported_group_id(user["usergroupid"].to_i), created_at: parse_timestamp(user["joindate"]), last_seen_at: parse_timestamp(user["lastvisit"]), - post_create_action: proc do |u| - import_profile_picture(user, u) - import_profile_background(user, u) - end + post_create_action: + proc do |u| + import_profile_picture(user, u) + import_profile_background(user, u) + end, } end end - @usernames = UserCustomField.joins(:user).where(name: 'import_username').pluck('user_custom_fields.value', 'users.username').to_h + @usernames = + UserCustomField + .joins(:user) + .where(name: "import_username") + .pluck("user_custom_fields.value", "users.username") + .to_h end def create_groups_membership @@ -190,7 +192,10 @@ class ImportScripts::VBulletin < ImportScripts::Base next if GroupUser.where(group_id: group.id).count > 0 user_ids_in_group = User.where(primary_group_id: group.id).pluck(:id).to_a next if user_ids_in_group.size == 0 - values = user_ids_in_group.map { |user_id| "(#{group.id}, #{user_id}, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)" }.join(",") + values = + user_ids_in_group + .map { |user_id| "(#{group.id}, #{user_id}, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)" } + .join(",") DB.exec <<~SQL INSERT INTO group_users (group_id, user_id, created_at, updated_at) VALUES #{values} @@ -230,8 +235,16 @@ class ImportScripts::VBulletin < ImportScripts::Base imported_user.user_avatar.update(custom_upload_id: upload.id) imported_user.update(uploaded_avatar_id: upload.id) ensure - file.close rescue nil - file.unlind rescue nil + begin + file.close + rescue StandardError + nil + end + begin + file.unlind + rescue StandardError + nil + end end def import_profile_background(old_user, imported_user) @@ -258,14 +271,25 @@ class ImportScripts::VBulletin < ImportScripts::Base imported_user.user_profile.upload_profile_background(upload) ensure - file.close rescue nil - file.unlink rescue nil + begin + file.close + rescue StandardError + nil + end + begin + file.unlink + rescue StandardError + nil + end end def import_categories puts "", "importing top level categories..." - categories = mysql_query("SELECT forumid, title, description, displayorder, parentid FROM #{TABLE_PREFIX}forum ORDER BY forumid").to_a + categories = + mysql_query( + "SELECT forumid, title, description, displayorder, parentid FROM #{TABLE_PREFIX}forum ORDER BY forumid", + ).to_a top_level_categories = categories.select { |c| c["parentid"] == -1 } @@ -274,7 +298,7 @@ class ImportScripts::VBulletin < ImportScripts::Base id: category["forumid"], name: @htmlentities.decode(category["title"]).strip, position: category["displayorder"], - description: @htmlentities.decode(category["description"]).strip + description: @htmlentities.decode(category["description"]).strip, } end @@ -296,7 +320,7 @@ class ImportScripts::VBulletin < ImportScripts::Base name: @htmlentities.decode(category["title"]).strip, position: category["displayorder"], description: @htmlentities.decode(category["description"]).strip, - parent_category_id: category_id_from_imported_category_id(category["parentid"]) + parent_category_id: category_id_from_imported_category_id(category["parentid"]), } end end @@ -304,12 +328,13 @@ class ImportScripts::VBulletin < ImportScripts::Base def import_topics puts "", "importing topics..." - topic_count = mysql_query("SELECT COUNT(threadid) count FROM #{TABLE_PREFIX}thread").first["count"] + topic_count = + mysql_query("SELECT COUNT(threadid) count FROM #{TABLE_PREFIX}thread").first["count"] last_topic_id = -1 batches(BATCH_SIZE) do |offset| - topics = mysql_query(<<-SQL + topics = mysql_query(<<-SQL).to_a SELECT t.threadid threadid, t.title title, forumid, open, postuserid, t.dateline dateline, views, t.visible visible, sticky, p.pagetext raw FROM #{TABLE_PREFIX}thread t @@ -318,7 +343,6 @@ class ImportScripts::VBulletin < ImportScripts::Base ORDER BY t.threadid LIMIT #{BATCH_SIZE} SQL - ).to_a break if topics.empty? @@ -326,7 +350,12 @@ class ImportScripts::VBulletin < ImportScripts::Base topics.reject! { |t| @lookup.post_already_imported?("thread-#{t["threadid"]}") } create_posts(topics, total: topic_count, offset: offset) do |topic| - raw = preprocess_post_raw(topic["raw"]) rescue nil + raw = + begin + preprocess_post_raw(topic["raw"]) + rescue StandardError + nil + end next if raw.blank? topic_id = "thread-#{topic["threadid"]}" t = { @@ -351,28 +380,28 @@ class ImportScripts::VBulletin < ImportScripts::Base topic = topic_lookup_from_imported_post_id(topic_id) if topic.present? url_slug = "thread/#{thread["threadid"]}" if thread["title"].present? - Permalink.create(url: url_slug, topic_id: topic[:topic_id].to_i) if url_slug.present? && topic[:topic_id].present? + if url_slug.present? && topic[:topic_id].present? + Permalink.create(url: url_slug, topic_id: topic[:topic_id].to_i) + end end end - end end def import_posts puts "", "importing posts..." - post_count = mysql_query(<<-SQL + post_count = mysql_query(<<-SQL).first["count"] SELECT COUNT(postid) count FROM #{TABLE_PREFIX}post p JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid WHERE t.firstpostid <> p.postid SQL - ).first["count"] last_post_id = -1 batches(BATCH_SIZE) do |offset| - posts = mysql_query(<<-SQL + posts = mysql_query(<<-SQL).to_a SELECT p.postid, p.userid, p.threadid, p.pagetext raw, p.dateline, p.visible, p.parentid FROM #{TABLE_PREFIX}post p JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid @@ -381,7 +410,6 @@ class ImportScripts::VBulletin < ImportScripts::Base ORDER BY p.postid LIMIT #{BATCH_SIZE} SQL - ).to_a break if posts.empty? @@ -389,7 +417,12 @@ class ImportScripts::VBulletin < ImportScripts::Base posts.reject! { |p| @lookup.post_already_imported?(p["postid"].to_i) } create_posts(posts, total: post_count, offset: offset) do |post| - raw = preprocess_post_raw(post["raw"]) rescue nil + raw = + begin + preprocess_post_raw(post["raw"]) + rescue StandardError + nil + end next if raw.blank? next unless topic = topic_lookup_from_imported_post_id("thread-#{post["threadid"]}") p = { @@ -410,7 +443,8 @@ class ImportScripts::VBulletin < ImportScripts::Base # find the uploaded file information from the db def find_upload(post, attachment_id) - sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename, + sql = + "SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename, LENGTH(fd.filedata) AS dbsize, filedata, a.caption caption FROM #{TABLE_PREFIX}attachment a LEFT JOIN #{TABLE_PREFIX}filedata fd ON fd.filedataid = a.filedataid @@ -418,25 +452,24 @@ class ImportScripts::VBulletin < ImportScripts::Base results = mysql_query(sql) unless row = results.first - puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}" + puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields["import_id"]}" return end - filename = File.join(ATTACHMENT_DIR, row['user_id'].to_s.split('').join('/'), "#{row['file_id']}.attach") - real_filename = row['filename'] - real_filename.prepend SecureRandom.hex if real_filename[0] == '.' + filename = + File.join(ATTACHMENT_DIR, row["user_id"].to_s.split("").join("/"), "#{row["file_id"]}.attach") + real_filename = row["filename"] + real_filename.prepend SecureRandom.hex if real_filename[0] == "." unless File.exist?(filename) - if row['dbsize'].to_i == 0 - puts "Attachment file #{row['filedataid']} doesn't exist" + if row["dbsize"].to_i == 0 + puts "Attachment file #{row["filedataid"]} doesn't exist" return nil end - tmpfile = 'attach_' + row['filedataid'].to_s - filename = File.join('/tmp/', tmpfile) - File.open(filename, 'wb') { |f| - f.write(row['filedata']) - } + tmpfile = "attach_" + row["filedataid"].to_s + filename = File.join("/tmp/", tmpfile) + File.open(filename, "wb") { |f| f.write(row["filedata"]) } end upload = create_upload(post.user.id, filename, real_filename) @@ -457,24 +490,24 @@ class ImportScripts::VBulletin < ImportScripts::Base def import_private_messages puts "", "importing private messages..." - topic_count = mysql_query("SELECT COUNT(pmtextid) count FROM #{TABLE_PREFIX}pmtext").first["count"] + topic_count = + mysql_query("SELECT COUNT(pmtextid) count FROM #{TABLE_PREFIX}pmtext").first["count"] last_private_message_id = -1 batches(BATCH_SIZE) do |offset| - private_messages = mysql_query(<<-SQL + private_messages = mysql_query(<<-SQL).to_a SELECT pmtextid, fromuserid, title, message, touserarray, dateline FROM #{TABLE_PREFIX}pmtext WHERE pmtextid > #{last_private_message_id} ORDER BY pmtextid LIMIT #{BATCH_SIZE} SQL - ).to_a break if private_messages.empty? last_private_message_id = private_messages[-1]["pmtextid"] - private_messages.reject! { |pm| @lookup.post_already_imported?("pm-#{pm['pmtextid']}") } + private_messages.reject! { |pm| @lookup.post_already_imported?("pm-#{pm["pmtextid"]}") } title_username_of_pm_first_post = {} @@ -482,11 +515,16 @@ class ImportScripts::VBulletin < ImportScripts::Base skip = false mapped = {} - mapped[:id] = "pm-#{m['pmtextid']}" - mapped[:user_id] = user_id_from_imported_user_id(m['fromuserid']) || Discourse::SYSTEM_USER_ID - mapped[:raw] = preprocess_post_raw(m['message']) rescue nil - mapped[:created_at] = Time.zone.at(m['dateline']) - title = @htmlentities.decode(m['title']).strip[0...255] + mapped[:id] = "pm-#{m["pmtextid"]}" + mapped[:user_id] = user_id_from_imported_user_id(m["fromuserid"]) || + Discourse::SYSTEM_USER_ID + mapped[:raw] = begin + preprocess_post_raw(m["message"]) + rescue StandardError + nil + end + mapped[:created_at] = Time.zone.at(m["dateline"]) + title = @htmlentities.decode(m["title"]).strip[0...255] topic_id = nil next if mapped[:raw].blank? @@ -495,9 +533,9 @@ class ImportScripts::VBulletin < ImportScripts::Base target_usernames = [] target_userids = [] begin - to_user_array = PHP.unserialize(m['touserarray']) - rescue - puts "#{m['pmtextid']} -- #{m['touserarray']}" + to_user_array = PHP.unserialize(m["touserarray"]) + rescue StandardError + puts "#{m["pmtextid"]} -- #{m["touserarray"]}" skip = true end @@ -517,8 +555,8 @@ class ImportScripts::VBulletin < ImportScripts::Base target_usernames << username if username end end - rescue - puts "skipping pm-#{m['pmtextid']} `to_user_array` is not properly serialized -- #{to_user_array.inspect}" + rescue StandardError + puts "skipping pm-#{m["pmtextid"]} `to_user_array` is not properly serialized -- #{to_user_array.inspect}" skip = true end @@ -526,18 +564,18 @@ class ImportScripts::VBulletin < ImportScripts::Base participants << mapped[:user_id] begin participants.sort! - rescue + rescue StandardError puts "one of the participant's id is nil -- #{participants.inspect}" end if title =~ /^Re:/ - - parent_id = title_username_of_pm_first_post[[title[3..-1], participants]] || - title_username_of_pm_first_post[[title[4..-1], participants]] || - title_username_of_pm_first_post[[title[5..-1], participants]] || - title_username_of_pm_first_post[[title[6..-1], participants]] || - title_username_of_pm_first_post[[title[7..-1], participants]] || - title_username_of_pm_first_post[[title[8..-1], participants]] + parent_id = + title_username_of_pm_first_post[[title[3..-1], participants]] || + title_username_of_pm_first_post[[title[4..-1], participants]] || + title_username_of_pm_first_post[[title[5..-1], participants]] || + title_username_of_pm_first_post[[title[6..-1], participants]] || + title_username_of_pm_first_post[[title[7..-1], participants]] || + title_username_of_pm_first_post[[title[8..-1], participants]] if parent_id if t = topic_lookup_from_imported_post_id("pm-#{parent_id}") @@ -545,18 +583,18 @@ class ImportScripts::VBulletin < ImportScripts::Base end end else - title_username_of_pm_first_post[[title, participants]] ||= m['pmtextid'] + title_username_of_pm_first_post[[title, participants]] ||= m["pmtextid"] end unless topic_id mapped[:title] = title mapped[:archetype] = Archetype.private_message - mapped[:target_usernames] = target_usernames.join(',') + mapped[:target_usernames] = target_usernames.join(",") if mapped[:target_usernames].size < 1 # pm with yourself? # skip = true mapped[:target_usernames] = "system" - puts "pm-#{m['pmtextid']} has no target (#{m['touserarray']})" + puts "pm-#{m["pmtextid"]} has no target (#{m["touserarray"]})" end else mapped[:topic_id] = topic_id @@ -568,25 +606,24 @@ class ImportScripts::VBulletin < ImportScripts::Base end def import_attachments - puts '', 'importing attachments...' + puts "", "importing attachments..." mapping = {} - attachments = mysql_query(<<-SQL + attachments = mysql_query(<<-SQL) SELECT a.attachmentid, a.contentid as postid, p.threadid FROM #{TABLE_PREFIX}attachment a, #{TABLE_PREFIX}post p WHERE a.contentid = p.postid AND contenttypeid = 1 AND state = 'visible' SQL - ) attachments.each do |attachment| - post_id = post_id_from_imported_post_id(attachment['postid']) - post_id = post_id_from_imported_post_id("thread-#{attachment['threadid']}") unless post_id + post_id = post_id_from_imported_post_id(attachment["postid"]) + post_id = post_id_from_imported_post_id("thread-#{attachment["threadid"]}") unless post_id if post_id.nil? - puts "Post for attachment #{attachment['attachmentid']} not found" + puts "Post for attachment #{attachment["attachmentid"]} not found" next end mapping[post_id] ||= [] - mapping[post_id] << attachment['attachmentid'].to_i + mapping[post_id] << attachment["attachmentid"].to_i end current_count = 0 @@ -594,7 +631,7 @@ class ImportScripts::VBulletin < ImportScripts::Base success_count = 0 fail_count = 0 - attachment_regex = /\[attach[^\]]*\](\d+)\[\/attach\]/i + attachment_regex = %r{\[attach[^\]]*\](\d+)\[/attach\]}i Post.find_each do |post| current_count += 1 @@ -605,9 +642,7 @@ class ImportScripts::VBulletin < ImportScripts::Base matches = attachment_regex.match(s) attachment_id = matches[1] - unless mapping[post.id].nil? - mapping[post.id].delete(attachment_id.to_i) - end + mapping[post.id].delete(attachment_id.to_i) unless mapping[post.id].nil? upload, filename = find_upload(post, attachment_id) unless upload @@ -621,13 +656,12 @@ class ImportScripts::VBulletin < ImportScripts::Base # make resumed imports faster if new_raw == post.raw unless mapping[post.id].nil? || mapping[post.id].empty? - imported_text = mysql_query(<<-SQL + imported_text = mysql_query(<<-SQL).first["pagetext"] SELECT p.pagetext FROM #{TABLE_PREFIX}attachment a, #{TABLE_PREFIX}post p WHERE a.contentid = p.postid AND a.attachmentid = #{mapping[post.id][0]} SQL - ).first["pagetext"] imported_text.scan(attachment_regex) do |match| attachment_id = match[0] @@ -646,14 +680,17 @@ class ImportScripts::VBulletin < ImportScripts::Base # internal upload deduplication will make sure that we do not import attachments again html = html_for_upload(upload, filename) - if !new_raw[html] - new_raw += "\n\n#{html}\n\n" - end + new_raw += "\n\n#{html}\n\n" if !new_raw[html] end end if new_raw != post.raw - PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from vBulletin') + PostRevisor.new(post).revise!( + post.user, + { raw: new_raw }, + bypass_bump: true, + edit_reason: "Import attachments from vBulletin", + ) end success_count += 1 @@ -728,22 +765,22 @@ class ImportScripts::VBulletin < ImportScripts::Base # [HTML]...[/HTML] raw.gsub!(/\[html\]/i, "\n```html\n") - raw.gsub!(/\[\/html\]/i, "\n```\n") + raw.gsub!(%r{\[/html\]}i, "\n```\n") # [PHP]...[/PHP] raw.gsub!(/\[php\]/i, "\n```php\n") - raw.gsub!(/\[\/php\]/i, "\n```\n") + raw.gsub!(%r{\[/php\]}i, "\n```\n") # [HIGHLIGHT="..."] raw.gsub!(/\[highlight="?(\w+)"?\]/i) { "\n```#{$1.downcase}\n" } # [CODE]...[/CODE] # [HIGHLIGHT]...[/HIGHLIGHT] - raw.gsub!(/\[\/?code\]/i, "\n```\n") - raw.gsub!(/\[\/?highlight\]/i, "\n```\n") + raw.gsub!(%r{\[/?code\]}i, "\n```\n") + raw.gsub!(%r{\[/?highlight\]}i, "\n```\n") # [SAMP]...[/SAMP] - raw.gsub!(/\[\/?samp\]/i, "`") + raw.gsub!(%r{\[/?samp\]}i, "`") # replace all chevrons with HTML entities # NOTE: must be done @@ -758,96 +795,99 @@ class ImportScripts::VBulletin < ImportScripts::Base raw.gsub!("\u2603", ">") # [URL=...]...[/URL] - raw.gsub!(/\[url="?([^"]+?)"?\](.*?)\[\/url\]/im) { "[#{$2.strip}](#{$1})" } - raw.gsub!(/\[url="?(.+?)"?\](.+)\[\/url\]/im) { "[#{$2.strip}](#{$1})" } + raw.gsub!(%r{\[url="?([^"]+?)"?\](.*?)\[/url\]}im) { "[#{$2.strip}](#{$1})" } + raw.gsub!(%r{\[url="?(.+?)"?\](.+)\[/url\]}im) { "[#{$2.strip}](#{$1})" } # [URL]...[/URL] # [MP3]...[/MP3] - raw.gsub!(/\[\/?url\]/i, "") - raw.gsub!(/\[\/?mp3\]/i, "") + raw.gsub!(%r{\[/?url\]}i, "") + raw.gsub!(%r{\[/?mp3\]}i, "") # [MENTION][/MENTION] - raw.gsub!(/\[mention\](.+?)\[\/mention\]/i) do + raw.gsub!(%r{\[mention\](.+?)\[/mention\]}i) do new_username = get_username_for_old_username($1) "@#{new_username}" end # [FONT=blah] and [COLOR=blah] - raw.gsub! /\[FONT=.*?\](.*?)\[\/FONT\]/im, '\1' - raw.gsub! /\[COLOR=.*?\](.*?)\[\/COLOR\]/im, '\1' - raw.gsub! /\[COLOR=#.*?\](.*?)\[\/COLOR\]/im, '\1' + raw.gsub! %r{\[FONT=.*?\](.*?)\[/FONT\]}im, '\1' + raw.gsub! %r{\[COLOR=.*?\](.*?)\[/COLOR\]}im, '\1' + raw.gsub! %r{\[COLOR=#.*?\](.*?)\[/COLOR\]}im, '\1' - raw.gsub! /\[SIZE=.*?\](.*?)\[\/SIZE\]/im, '\1' - raw.gsub! /\[SUP\](.*?)\[\/SUP\]/im, '\1' - raw.gsub! /\[h=.*?\](.*?)\[\/h\]/im, '\1' + raw.gsub! %r{\[SIZE=.*?\](.*?)\[/SIZE\]}im, '\1' + raw.gsub! %r{\[SUP\](.*?)\[/SUP\]}im, '\1' + raw.gsub! %r{\[h=.*?\](.*?)\[/h\]}im, '\1' # [CENTER]...[/CENTER] - raw.gsub! /\[CENTER\](.*?)\[\/CENTER\]/im, '\1' + raw.gsub! %r{\[CENTER\](.*?)\[/CENTER\]}im, '\1' # [INDENT]...[/INDENT] - raw.gsub! /\[INDENT\](.*?)\[\/INDENT\]/im, '\1' + raw.gsub! %r{\[INDENT\](.*?)\[/INDENT\]}im, '\1' # Tables to MD - raw.gsub!(/\[TABLE.*?\](.*?)\[\/TABLE\]/im) { |t| - rows = $1.gsub!(/\s*\[TR\](.*?)\[\/TR\]\s*/im) { |r| - cols = $1.gsub! /\s*\[TD.*?\](.*?)\[\/TD\]\s*/im, '|\1' - "#{cols}|\n" - } + raw.gsub!(%r{\[TABLE.*?\](.*?)\[/TABLE\]}im) do |t| + rows = + $1.gsub!(%r{\s*\[TR\](.*?)\[/TR\]\s*}im) do |r| + cols = $1.gsub! %r{\s*\[TD.*?\](.*?)\[/TD\]\s*}im, '|\1' + "#{cols}|\n" + end header, rest = rows.split "\n", 2 c = header.count "|" sep = "|---" * (c - 1) "#{header}\n#{sep}|\n#{rest}\n" - } + end # [QUOTE]...[/QUOTE] - raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { |quote| - quote.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n#{$1}\n" } + raw.gsub!(%r{\[quote\](.+?)\[/quote\]}im) do |quote| + quote.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n#{$1}\n" } quote.gsub!(/\n(.+?)/) { "\n> #{$1}" } - } + end # [QUOTE=]...[/QUOTE] - raw.gsub!(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do + raw.gsub!(%r{\[quote=([^;\]]+)\](.+?)\[/quote\]}im) do old_username, quote = $1, $2 new_username = get_username_for_old_username(old_username) "\n[quote=\"#{new_username}\"]\n#{quote}\n[/quote]\n" end # [YOUTUBE][/YOUTUBE] - raw.gsub!(/\[youtube\](.+?)\[\/youtube\]/i) { "\n//youtu.be/#{$1}\n" } + raw.gsub!(%r{\[youtube\](.+?)\[/youtube\]}i) { "\n//youtu.be/#{$1}\n" } # [VIDEO=youtube;]...[/VIDEO] - raw.gsub!(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\n//youtu.be/#{$1}\n" } + raw.gsub!(%r{\[video=youtube;([^\]]+)\].*?\[/video\]}i) { "\n//youtu.be/#{$1}\n" } # Fix uppercase B U and I tags - raw.gsub!(/(\[\/?[BUI]\])/i) { $1.downcase } + raw.gsub!(%r{(\[/?[BUI]\])}i) { $1.downcase } # More Additions .... # [spoiler=Some hidden stuff]SPOILER HERE!![/spoiler] - raw.gsub!(/\[spoiler="?(.+?)"?\](.+?)\[\/spoiler\]/im) { "\n#{$1}\n[spoiler]#{$2}[/spoiler]\n" } + raw.gsub!(%r{\[spoiler="?(.+?)"?\](.+?)\[/spoiler\]}im) do + "\n#{$1}\n[spoiler]#{$2}[/spoiler]\n" + end # [IMG][IMG]http://i63.tinypic.com/akga3r.jpg[/IMG][/IMG] - raw.gsub!(/\[IMG\]\[IMG\](.+?)\[\/IMG\]\[\/IMG\]/i) { "[IMG]#{$1}[/IMG]" } + raw.gsub!(%r{\[IMG\]\[IMG\](.+?)\[/IMG\]\[/IMG\]}i) { "[IMG]#{$1}[/IMG]" } # convert list tags to ul and list=1 tags to ol # (basically, we're only missing list=a here...) # (https://meta.discourse.org/t/phpbb-3-importer-old/17397) - raw.gsub!(/\[list\](.*?)\[\/list\]/im, '[ul]\1[/ul]') - raw.gsub!(/\[list=1\](.*?)\[\/list\]/im, '[ol]\1[/ol]') - raw.gsub!(/\[list\](.*?)\[\/list:u\]/im, '[ul]\1[/ul]') - raw.gsub!(/\[list=1\](.*?)\[\/list:o\]/im, '[ol]\1[/ol]') + raw.gsub!(%r{\[list\](.*?)\[/list\]}im, '[ul]\1[/ul]') + raw.gsub!(%r{\[list=1\](.*?)\[/list\]}im, '[ol]\1[/ol]') + raw.gsub!(%r{\[list\](.*?)\[/list:u\]}im, '[ul]\1[/ul]') + raw.gsub!(%r{\[list=1\](.*?)\[/list:o\]}im, '[ol]\1[/ol]') # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists: - raw.gsub!(/\[\*\]\n/, '') - raw.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]') + raw.gsub!(/\[\*\]\n/, "") + raw.gsub!(%r{\[\*\](.*?)\[/\*:m\]}, '[li]\1[/li]') raw.gsub!(/\[\*\](.*?)\n/, '[li]\1[/li]') - raw.gsub!(/\[\*=1\]/, '') + raw.gsub!(/\[\*=1\]/, "") raw end def postprocess_post_raw(raw) # [QUOTE=;]...[/QUOTE] - raw.gsub!(/\[quote=([^;]+);(\d+)\](.+?)\[\/quote\]/im) do + raw.gsub!(%r{\[quote=([^;]+);(\d+)\](.+?)\[/quote\]}im) do old_username, post_id, quote = $1, $2, $3 new_username = get_username_for_old_username(old_username) @@ -859,7 +899,7 @@ class ImportScripts::VBulletin < ImportScripts::Base if topic_lookup = topic_lookup_from_imported_post_id(post_id) post_number = topic_lookup[:post_number] - topic_id = topic_lookup[:topic_id] + topic_id = topic_lookup[:topic_id] "\n[quote=\"#{new_username},post:#{post_number},topic:#{topic_id}\"]\n#{quote}\n[/quote]\n" else "\n[quote=\"#{new_username}\"]\n#{quote}\n[/quote]\n" @@ -867,11 +907,11 @@ class ImportScripts::VBulletin < ImportScripts::Base end # remove attachments - raw.gsub!(/\[attach[^\]]*\]\d+\[\/attach\]/i, "") + raw.gsub!(%r{\[attach[^\]]*\]\d+\[/attach\]}i, "") # [THREAD][/THREAD] # ==> http://my.discourse.org/t/slug/ - raw.gsub!(/\[thread\](\d+)\[\/thread\]/i) do + raw.gsub!(%r{\[thread\](\d+)\[/thread\]}i) do thread_id = $1 if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}") topic_lookup[:url] @@ -882,7 +922,7 @@ class ImportScripts::VBulletin < ImportScripts::Base # [THREAD=]...[/THREAD] # ==> [...](http://my.discourse.org/t/slug/) - raw.gsub!(/\[thread=(\d+)\](.+?)\[\/thread\]/i) do + raw.gsub!(%r{\[thread=(\d+)\](.+?)\[/thread\]}i) do thread_id, link = $1, $2 if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}") url = topic_lookup[:url] @@ -894,7 +934,7 @@ class ImportScripts::VBulletin < ImportScripts::Base # [POST][/POST] # ==> http://my.discourse.org/t/slug// - raw.gsub!(/\[post\](\d+)\[\/post\]/i) do + raw.gsub!(%r{\[post\](\d+)\[/post\]}i) do post_id = $1 if topic_lookup = topic_lookup_from_imported_post_id(post_id) topic_lookup[:url] @@ -905,7 +945,7 @@ class ImportScripts::VBulletin < ImportScripts::Base # [POST=]...[/POST] # ==> [...](http://my.discourse.org/t///) - raw.gsub!(/\[post=(\d+)\](.+?)\[\/post\]/i) do + raw.gsub!(%r{\[post=(\d+)\](.+?)\[/post\]}i) do post_id, link = $1, $2 if topic_lookup = topic_lookup_from_imported_post_id(post_id) url = topic_lookup[:url] @@ -919,14 +959,14 @@ class ImportScripts::VBulletin < ImportScripts::Base end def create_permalink_file - puts '', 'Creating Permalink File...', '' + puts "", "Creating Permalink File...", "" id_mapping = [] Topic.listable_topics.find_each do |topic| pcf = topic.first_post.custom_fields if pcf && pcf["import_id"] - id = pcf["import_id"].split('-').last + id = pcf["import_id"].split("-").last id_mapping.push("XXX#{id} YYY#{topic.id}") end end @@ -940,24 +980,21 @@ class ImportScripts::VBulletin < ImportScripts::Base # end CSV.open(File.expand_path("../vb_map.csv", __FILE__), "w") do |csv| - id_mapping.each do |value| - csv << [value] - end + id_mapping.each { |value| csv << [value] } end - end def suspend_users - puts '', "updating banned users" + puts "", "updating banned users" banned = 0 failed = 0 - total = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}userban").first['count'] + total = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}userban").first["count"] system_user = Discourse.system_user mysql_query("SELECT userid, bandate FROM #{TABLE_PREFIX}userban").each do |b| - user = User.find_by_id(user_id_from_imported_user_id(b['userid'])) + user = User.find_by_id(user_id_from_imported_user_id(b["userid"])) if user user.suspended_at = parse_timestamp(user["bandate"]) user.suspended_till = 200.years.from_now @@ -970,7 +1007,7 @@ class ImportScripts::VBulletin < ImportScripts::Base failed += 1 end else - puts "Not found: #{b['userid']}" + puts "Not found: #{b["userid"]}" failed += 1 end @@ -985,7 +1022,6 @@ class ImportScripts::VBulletin < ImportScripts::Base def mysql_query(sql) @client.query(sql, cache_rows: true) end - end ImportScripts::VBulletin.new.perform diff --git a/script/import_scripts/vbulletin5.rb b/script/import_scripts/vbulletin5.rb index 5e5696e4f0f..af62c0a6bb2 100644 --- a/script/import_scripts/vbulletin5.rb +++ b/script/import_scripts/vbulletin5.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true -require 'mysql2' +require "mysql2" require File.expand_path(File.dirname(__FILE__) + "/base.rb") -require 'htmlentities' +require "htmlentities" class ImportScripts::VBulletin < ImportScripts::Base BATCH_SIZE = 1000 @@ -11,14 +11,14 @@ class ImportScripts::VBulletin < ImportScripts::Base # override these using environment vars - URL_PREFIX ||= ENV['URL_PREFIX'] || "forum/" - DB_PREFIX ||= ENV['DB_PREFIX'] || "vb_" - DB_HOST ||= ENV['DB_HOST'] || "localhost" - DB_NAME ||= ENV['DB_NAME'] || "vbulletin" - DB_PASS ||= ENV['DB_PASS'] || "password" - DB_USER ||= ENV['DB_USER'] || "username" - ATTACH_DIR ||= ENV['ATTACH_DIR'] || "/home/discourse/vbulletin/attach" - AVATAR_DIR ||= ENV['AVATAR_DIR'] || "/home/discourse/vbulletin/avatars" + URL_PREFIX ||= ENV["URL_PREFIX"] || "forum/" + DB_PREFIX ||= ENV["DB_PREFIX"] || "vb_" + DB_HOST ||= ENV["DB_HOST"] || "localhost" + DB_NAME ||= ENV["DB_NAME"] || "vbulletin" + DB_PASS ||= ENV["DB_PASS"] || "password" + DB_USER ||= ENV["DB_USER"] || "username" + ATTACH_DIR ||= ENV["ATTACH_DIR"] || "/home/discourse/vbulletin/attach" + AVATAR_DIR ||= ENV["AVATAR_DIR"] || "/home/discourse/vbulletin/avatars" def initialize super @@ -29,16 +29,21 @@ class ImportScripts::VBulletin < ImportScripts::Base @htmlentities = HTMLEntities.new - @client = Mysql2::Client.new( - host: DB_HOST, - username: DB_USER, - database: DB_NAME, - password: DB_PASS - ) + @client = + Mysql2::Client.new(host: DB_HOST, username: DB_USER, database: DB_NAME, password: DB_PASS) - @forum_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Forum'").first['contenttypeid'] - @channel_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Channel'").first['contenttypeid'] - @text_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Text'").first['contenttypeid'] + @forum_typeid = + mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Forum'").first[ + "contenttypeid" + ] + @channel_typeid = + mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Channel'").first[ + "contenttypeid" + ] + @text_typeid = + mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Text'").first[ + "contenttypeid" + ] end def execute @@ -64,10 +69,7 @@ class ImportScripts::VBulletin < ImportScripts::Base SQL create_groups(groups) do |group| - { - id: group["usergroupid"], - name: @htmlentities.decode(group["title"]).strip - } + { id: group["usergroupid"], name: @htmlentities.decode(group["title"]).strip } end end @@ -102,17 +104,18 @@ class ImportScripts::VBulletin < ImportScripts::Base name: username, username: username, email: user["email"].presence || fake_email, - admin: user['admin'] == 1, + admin: user["admin"] == 1, password: user["password"], website: user["homepage"].strip, title: @htmlentities.decode(user["usertitle"]).strip, primary_group_id: group_id_from_imported_group_id(user["usergroupid"]), created_at: parse_timestamp(user["joindate"]), - post_create_action: proc do |u| - @old_username_to_new_usernames[user["username"]] = u.username - import_profile_picture(user, u) - # import_profile_background(user, u) - end + post_create_action: + proc do |u| + @old_username_to_new_usernames[user["username"]] = u.username + import_profile_picture(user, u) + # import_profile_background(user, u) + end, } end end @@ -131,18 +134,18 @@ class ImportScripts::VBulletin < ImportScripts::Base return if picture.nil? - if picture['filedata'] + if picture["filedata"] file = Tempfile.new("profile-picture") file.write(picture["filedata"].encode("ASCII-8BIT").force_encoding("UTF-8")) file.rewind upload = UploadCreator.new(file, picture["filename"]).create_for(imported_user.id) else - filename = File.join(AVATAR_DIR, picture['filename']) + filename = File.join(AVATAR_DIR, picture["filename"]) unless File.exist?(filename) puts "Avatar file doesn't exist: #{filename}" return nil end - upload = create_upload(imported_user.id, filename, picture['filename']) + upload = create_upload(imported_user.id, filename, picture["filename"]) end return if !upload.persisted? @@ -151,8 +154,16 @@ class ImportScripts::VBulletin < ImportScripts::Base imported_user.user_avatar.update(custom_upload_id: upload.id) imported_user.update(uploaded_avatar_id: upload.id) ensure - file.close rescue nil - file.unlind rescue nil + begin + file.close + rescue StandardError + nil + end + begin + file.unlind + rescue StandardError + nil + end end def import_profile_background(old_user, imported_user) @@ -178,21 +189,32 @@ class ImportScripts::VBulletin < ImportScripts::Base imported_user.user_profile.upload_profile_background(upload) ensure - file.close rescue nil - file.unlink rescue nil + begin + file.close + rescue StandardError + nil + end + begin + file.unlink + rescue StandardError + nil + end end def import_categories puts "", "importing top level categories..." - categories = mysql_query("SELECT nodeid AS forumid, title, description, displayorder, parentid + categories = + mysql_query( + "SELECT nodeid AS forumid, title, description, displayorder, parentid FROM #{DB_PREFIX}node WHERE parentid=#{ROOT_NODE} UNION SELECT nodeid, title, description, displayorder, parentid FROM #{DB_PREFIX}node WHERE contenttypeid = #{@channel_typeid} - AND parentid IN (SELECT nodeid FROM #{DB_PREFIX}node WHERE parentid=#{ROOT_NODE})").to_a + AND parentid IN (SELECT nodeid FROM #{DB_PREFIX}node WHERE parentid=#{ROOT_NODE})", + ).to_a top_level_categories = categories.select { |c| c["parentid"] == ROOT_NODE } @@ -201,7 +223,7 @@ class ImportScripts::VBulletin < ImportScripts::Base id: category["forumid"], name: @htmlentities.decode(category["title"]).strip, position: category["displayorder"], - description: @htmlentities.decode(category["description"]).strip + description: @htmlentities.decode(category["description"]).strip, } end @@ -223,7 +245,7 @@ class ImportScripts::VBulletin < ImportScripts::Base name: @htmlentities.decode(category["title"]).strip, position: category["displayorder"], description: @htmlentities.decode(category["description"]).strip, - parent_category_id: category_id_from_imported_category_id(category["parentid"]) + parent_category_id: category_id_from_imported_category_id(category["parentid"]), } end end @@ -234,13 +256,17 @@ class ImportScripts::VBulletin < ImportScripts::Base # keep track of closed topics @closed_topic_ids = [] - topic_count = mysql_query("SELECT COUNT(nodeid) cnt + topic_count = + mysql_query( + "SELECT COUNT(nodeid) cnt FROM #{DB_PREFIX}node WHERE (unpublishdate = 0 OR unpublishdate IS NULL) AND (approved = 1 AND showapproved = 1) AND parentid IN ( - SELECT nodeid FROM #{DB_PREFIX}node WHERE contenttypeid=#{@channel_typeid} ) AND contenttypeid=#{@text_typeid};" - ).first["cnt"] + SELECT nodeid FROM #{DB_PREFIX}node WHERE contenttypeid=#{@channel_typeid} ) AND contenttypeid=#{@text_typeid};", + ).first[ + "cnt" + ] batches(BATCH_SIZE) do |offset| topics = mysql_query <<-SQL @@ -265,7 +291,12 @@ class ImportScripts::VBulletin < ImportScripts::Base # next if all_records_exist? :posts, topics.map {|t| "thread-#{topic["threadid"]}" } create_posts(topics, total: topic_count, offset: offset) do |topic| - raw = preprocess_post_raw(topic["raw"]) rescue nil + raw = + begin + preprocess_post_raw(topic["raw"]) + rescue StandardError + nil + end next if raw.blank? topic_id = "thread-#{topic["threadid"]}" @closed_topic_ids << topic_id if topic["open"] == "0" @@ -291,11 +322,16 @@ class ImportScripts::VBulletin < ImportScripts::Base # make sure `firstpostid` is indexed begin mysql_query("CREATE INDEX firstpostid_index ON thread (firstpostid)") - rescue + rescue StandardError end - post_count = mysql_query("SELECT COUNT(nodeid) cnt FROM #{DB_PREFIX}node WHERE parentid NOT IN ( - SELECT nodeid FROM #{DB_PREFIX}node WHERE contenttypeid=#{@channel_typeid} ) AND contenttypeid=#{@text_typeid};").first["cnt"] + post_count = + mysql_query( + "SELECT COUNT(nodeid) cnt FROM #{DB_PREFIX}node WHERE parentid NOT IN ( + SELECT nodeid FROM #{DB_PREFIX}node WHERE contenttypeid=#{@channel_typeid} ) AND contenttypeid=#{@text_typeid};", + ).first[ + "cnt" + ] batches(BATCH_SIZE) do |offset| posts = mysql_query <<-SQL @@ -338,10 +374,14 @@ class ImportScripts::VBulletin < ImportScripts::Base end def import_attachments - puts '', 'importing attachments...' + puts "", "importing attachments..." - ext = mysql_query("SELECT GROUP_CONCAT(DISTINCT(extension)) exts FROM #{DB_PREFIX}filedata").first['exts'].split(',') - SiteSetting.authorized_extensions = (SiteSetting.authorized_extensions.split("|") + ext).uniq.join("|") + ext = + mysql_query("SELECT GROUP_CONCAT(DISTINCT(extension)) exts FROM #{DB_PREFIX}filedata").first[ + "exts" + ].split(",") + SiteSetting.authorized_extensions = + (SiteSetting.authorized_extensions.split("|") + ext).uniq.join("|") uploads = mysql_query <<-SQL SELECT n.parentid nodeid, a.filename, fd.userid, LENGTH(fd.filedata) AS dbsize, filedata, fd.filedataid @@ -354,32 +394,43 @@ class ImportScripts::VBulletin < ImportScripts::Base total_count = uploads.count uploads.each do |upload| - post_id = PostCustomField.where(name: 'import_id').where(value: upload['nodeid']).first&.post_id - post_id = PostCustomField.where(name: 'import_id').where(value: "thread-#{upload['nodeid']}").first&.post_id unless post_id + post_id = + PostCustomField.where(name: "import_id").where(value: upload["nodeid"]).first&.post_id + post_id = + PostCustomField + .where(name: "import_id") + .where(value: "thread-#{upload["nodeid"]}") + .first + &.post_id unless post_id if post_id.nil? - puts "Post for #{upload['nodeid']} not found" + puts "Post for #{upload["nodeid"]} not found" next end post = Post.find(post_id) - filename = File.join(ATTACH_DIR, upload['userid'].to_s.split('').join('/'), "#{upload['filedataid']}.attach") - real_filename = upload['filename'] - real_filename.prepend SecureRandom.hex if real_filename[0] == '.' + filename = + File.join( + ATTACH_DIR, + upload["userid"].to_s.split("").join("/"), + "#{upload["filedataid"]}.attach", + ) + real_filename = upload["filename"] + real_filename.prepend SecureRandom.hex if real_filename[0] == "." unless File.exist?(filename) # attachments can be on filesystem or in database # try to retrieve from database if the file did not exist on filesystem - if upload['dbsize'].to_i == 0 - puts "Attachment file #{upload['filedataid']} doesn't exist" + if upload["dbsize"].to_i == 0 + puts "Attachment file #{upload["filedataid"]} doesn't exist" next end - tmpfile = 'attach_' + upload['filedataid'].to_s - filename = File.join('/tmp/', tmpfile) - File.open(filename, 'wb') { |f| + tmpfile = "attach_" + upload["filedataid"].to_s + filename = File.join("/tmp/", tmpfile) + File.open(filename, "wb") do |f| #f.write(PG::Connection.unescape_bytea(row['filedata'])) - f.write(upload['filedata']) - } + f.write(upload["filedata"]) + end end upl_obj = create_upload(post.user.id, filename, real_filename) @@ -388,7 +439,9 @@ class ImportScripts::VBulletin < ImportScripts::Base if !post.raw[html] post.raw += "\n\n#{html}\n\n" post.save! - PostUpload.create!(post: post, upload: upl_obj) unless PostUpload.where(post: post, upload: upl_obj).exists? + unless PostUpload.where(post: post, upload: upl_obj).exists? + PostUpload.create!(post: post, upload: upl_obj) + end end else puts "Fail" @@ -447,170 +500,177 @@ class ImportScripts::VBulletin < ImportScripts::Base raw = @htmlentities.decode(raw) # fix whitespaces - raw = raw.gsub(/(\\r)?\\n/, "\n") - .gsub("\\t", "\t") + raw = raw.gsub(/(\\r)?\\n/, "\n").gsub("\\t", "\t") # [HTML]...[/HTML] - raw = raw.gsub(/\[html\]/i, "\n```html\n") - .gsub(/\[\/html\]/i, "\n```\n") + raw = raw.gsub(/\[html\]/i, "\n```html\n").gsub(%r{\[/html\]}i, "\n```\n") # [PHP]...[/PHP] - raw = raw.gsub(/\[php\]/i, "\n```php\n") - .gsub(/\[\/php\]/i, "\n```\n") + raw = raw.gsub(/\[php\]/i, "\n```php\n").gsub(%r{\[/php\]}i, "\n```\n") # [HIGHLIGHT="..."] raw = raw.gsub(/\[highlight="?(\w+)"?\]/i) { "\n```#{$1.downcase}\n" } # [CODE]...[/CODE] # [HIGHLIGHT]...[/HIGHLIGHT] - raw = raw.gsub(/\[\/?code\]/i, "\n```\n") - .gsub(/\[\/?highlight\]/i, "\n```\n") + raw = raw.gsub(%r{\[/?code\]}i, "\n```\n").gsub(%r{\[/?highlight\]}i, "\n```\n") # [SAMP]...[/SAMP] - raw = raw.gsub(/\[\/?samp\]/i, "`") + raw = raw.gsub(%r{\[/?samp\]}i, "`") # replace all chevrons with HTML entities # NOTE: must be done # - AFTER all the "code" processing # - BEFORE the "quote" processing - raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" } - .gsub("<", "<") - .gsub("\u2603", "<") + raw = + raw + .gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" } + .gsub("<", "<") + .gsub("\u2603", "<") - raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" } - .gsub(">", ">") - .gsub("\u2603", ">") + raw = + raw + .gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" } + .gsub(">", ">") + .gsub("\u2603", ">") # [URL=...]...[/URL] - raw.gsub!(/\[url="?(.+?)"?\](.+?)\[\/url\]/i) { "#{$2}" } + raw.gsub!(%r{\[url="?(.+?)"?\](.+?)\[/url\]}i) { "#{$2}" } # [URL]...[/URL] # [MP3]...[/MP3] - raw = raw.gsub(/\[\/?url\]/i, "") - .gsub(/\[\/?mp3\]/i, "") + raw = raw.gsub(%r{\[/?url\]}i, "").gsub(%r{\[/?mp3\]}i, "") # [MENTION][/MENTION] - raw = raw.gsub(/\[mention\](.+?)\[\/mention\]/i) do - old_username = $1 - if @old_username_to_new_usernames.has_key?(old_username) - old_username = @old_username_to_new_usernames[old_username] + raw = + raw.gsub(%r{\[mention\](.+?)\[/mention\]}i) do + old_username = $1 + if @old_username_to_new_usernames.has_key?(old_username) + old_username = @old_username_to_new_usernames[old_username] + end + "@#{old_username}" end - "@#{old_username}" - end # [USER=][/USER] - raw = raw.gsub(/\[user="?(\d+)"?\](.+?)\[\/user\]/i) do - user_id, old_username = $1, $2 - if @old_username_to_new_usernames.has_key?(old_username) - new_username = @old_username_to_new_usernames[old_username] - else - new_username = old_username + raw = + raw.gsub(%r{\[user="?(\d+)"?\](.+?)\[/user\]}i) do + user_id, old_username = $1, $2 + if @old_username_to_new_usernames.has_key?(old_username) + new_username = @old_username_to_new_usernames[old_username] + else + new_username = old_username + end + "@#{new_username}" end - "@#{new_username}" - end # [FONT=blah] and [COLOR=blah] # no idea why the /i is not matching case insensitive.. - raw.gsub! /\[color=.*?\](.*?)\[\/color\]/im, '\1' - raw.gsub! /\[COLOR=.*?\](.*?)\[\/COLOR\]/im, '\1' - raw.gsub! /\[font=.*?\](.*?)\[\/font\]/im, '\1' - raw.gsub! /\[FONT=.*?\](.*?)\[\/FONT\]/im, '\1' + raw.gsub! %r{\[color=.*?\](.*?)\[/color\]}im, '\1' + raw.gsub! %r{\[COLOR=.*?\](.*?)\[/COLOR\]}im, '\1' + raw.gsub! %r{\[font=.*?\](.*?)\[/font\]}im, '\1' + raw.gsub! %r{\[FONT=.*?\](.*?)\[/FONT\]}im, '\1' # [CENTER]...[/CENTER] - raw.gsub! /\[CENTER\](.*?)\[\/CENTER\]/im, '\1' + raw.gsub! %r{\[CENTER\](.*?)\[/CENTER\]}im, '\1' # fix LIST - raw.gsub! /\[LIST\](.*?)\[\/LIST\]/im, '
      \1
    ' - raw.gsub! /\[\*\]/im, '
  • ' + raw.gsub! %r{\[LIST\](.*?)\[/LIST\]}im, '
      \1
    ' + raw.gsub! /\[\*\]/im, "
  • " # [QUOTE]...[/QUOTE] - raw = raw.gsub(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" } + raw = raw.gsub(%r{\[quote\](.+?)\[/quote\]}im) { "\n> #{$1}\n" } # [QUOTE=]...[/QUOTE] - raw = raw.gsub(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do - old_username, quote = $1, $2 + raw = + raw.gsub(%r{\[quote=([^;\]]+)\](.+?)\[/quote\]}im) do + old_username, quote = $1, $2 - if @old_username_to_new_usernames.has_key?(old_username) - old_username = @old_username_to_new_usernames[old_username] + if @old_username_to_new_usernames.has_key?(old_username) + old_username = @old_username_to_new_usernames[old_username] + end + "\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n" end - "\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n" - end # [YOUTUBE][/YOUTUBE] - raw = raw.gsub(/\[youtube\](.+?)\[\/youtube\]/i) { "\n//youtu.be/#{$1}\n" } + raw = raw.gsub(%r{\[youtube\](.+?)\[/youtube\]}i) { "\n//youtu.be/#{$1}\n" } # [VIDEO=youtube;]...[/VIDEO] - raw = raw.gsub(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\n//youtu.be/#{$1}\n" } + raw = raw.gsub(%r{\[video=youtube;([^\]]+)\].*?\[/video\]}i) { "\n//youtu.be/#{$1}\n" } raw end def postprocess_post_raw(raw) # [QUOTE=;]...[/QUOTE] - raw = raw.gsub(/\[quote=([^;]+);n(\d+)\](.+?)\[\/quote\]/im) do - old_username, post_id, quote = $1, $2, $3 + raw = + raw.gsub(%r{\[quote=([^;]+);n(\d+)\](.+?)\[/quote\]}im) do + old_username, post_id, quote = $1, $2, $3 - if @old_username_to_new_usernames.has_key?(old_username) - old_username = @old_username_to_new_usernames[old_username] - end + if @old_username_to_new_usernames.has_key?(old_username) + old_username = @old_username_to_new_usernames[old_username] + end - if topic_lookup = topic_lookup_from_imported_post_id(post_id) - post_number = topic_lookup[:post_number] - topic_id = topic_lookup[:topic_id] - "\n[quote=\"#{old_username},post:#{post_number},topic:#{topic_id}\"]\n#{quote}\n[/quote]\n" - else - "\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n" + if topic_lookup = topic_lookup_from_imported_post_id(post_id) + post_number = topic_lookup[:post_number] + topic_id = topic_lookup[:topic_id] + "\n[quote=\"#{old_username},post:#{post_number},topic:#{topic_id}\"]\n#{quote}\n[/quote]\n" + else + "\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n" + end end - end # remove attachments - raw = raw.gsub(/\[attach[^\]]*\]\d+\[\/attach\]/i, "") + raw = raw.gsub(%r{\[attach[^\]]*\]\d+\[/attach\]}i, "") # [THREAD][/THREAD] # ==> http://my.discourse.org/t/slug/ - raw = raw.gsub(/\[thread\](\d+)\[\/thread\]/i) do - thread_id = $1 - if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}") - topic_lookup[:url] - else - $& + raw = + raw.gsub(%r{\[thread\](\d+)\[/thread\]}i) do + thread_id = $1 + if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}") + topic_lookup[:url] + else + $& + end end - end # [THREAD=]...[/THREAD] # ==> [...](http://my.discourse.org/t/slug/) - raw = raw.gsub(/\[thread=(\d+)\](.+?)\[\/thread\]/i) do - thread_id, link = $1, $2 - if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}") - url = topic_lookup[:url] - "[#{link}](#{url})" - else - $& + raw = + raw.gsub(%r{\[thread=(\d+)\](.+?)\[/thread\]}i) do + thread_id, link = $1, $2 + if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}") + url = topic_lookup[:url] + "[#{link}](#{url})" + else + $& + end end - end # [POST][/POST] # ==> http://my.discourse.org/t/slug// - raw = raw.gsub(/\[post\](\d+)\[\/post\]/i) do - post_id = $1 - if topic_lookup = topic_lookup_from_imported_post_id(post_id) - topic_lookup[:url] - else - $& + raw = + raw.gsub(%r{\[post\](\d+)\[/post\]}i) do + post_id = $1 + if topic_lookup = topic_lookup_from_imported_post_id(post_id) + topic_lookup[:url] + else + $& + end end - end # [POST=]...[/POST] # ==> [...](http://my.discourse.org/t///) - raw = raw.gsub(/\[post=(\d+)\](.+?)\[\/post\]/i) do - post_id, link = $1, $2 - if topic_lookup = topic_lookup_from_imported_post_id(post_id) - url = topic_lookup[:url] - "[#{link}](#{url})" - else - $& + raw = + raw.gsub(%r{\[post=(\d+)\](.+?)\[/post\]}i) do + post_id, link = $1, $2 + if topic_lookup = topic_lookup_from_imported_post_id(post_id) + url = topic_lookup[:url] + "[#{link}](#{url})" + else + $& + end end - end raw end @@ -619,13 +679,17 @@ class ImportScripts::VBulletin < ImportScripts::Base puts "", "creating permalinks..." current_count = 0 - total_count = mysql_query("SELECT COUNT(nodeid) cnt + total_count = + mysql_query( + "SELECT COUNT(nodeid) cnt FROM #{DB_PREFIX}node WHERE (unpublishdate = 0 OR unpublishdate IS NULL) AND (approved = 1 AND showapproved = 1) AND parentid IN ( - SELECT nodeid FROM #{DB_PREFIX}node WHERE contenttypeid=#{@channel_typeid} ) AND contenttypeid=#{@text_typeid};" - ).first["cnt"] + SELECT nodeid FROM #{DB_PREFIX}node WHERE contenttypeid=#{@channel_typeid} ) AND contenttypeid=#{@text_typeid};", + ).first[ + "cnt" + ] batches(BATCH_SIZE) do |offset| topics = mysql_query <<-SQL @@ -647,12 +711,16 @@ class ImportScripts::VBulletin < ImportScripts::Base topics.each do |topic| current_count += 1 print_status current_count, total_count - disc_topic = topic_lookup_from_imported_post_id("thread-#{topic['nodeid']}") + disc_topic = topic_lookup_from_imported_post_id("thread-#{topic["nodeid"]}") - Permalink.create( - url: "#{URL_PREFIX}#{topic['p1']}/#{topic['p2']}/#{topic['nodeid']}-#{topic['p3']}", - topic_id: disc_topic[:topic_id] - ) rescue nil + begin + Permalink.create( + url: "#{URL_PREFIX}#{topic["p1"]}/#{topic["p2"]}/#{topic["nodeid"]}-#{topic["p3"]}", + topic_id: disc_topic[:topic_id], + ) + rescue StandardError + nil + end end end @@ -664,8 +732,13 @@ class ImportScripts::VBulletin < ImportScripts::Base AND parentid=#{ROOT_NODE}; SQL cats.each do |c| - category_id = CategoryCustomField.where(name: 'import_id').where(value: c['nodeid']).first.category_id - Permalink.create(url: "#{URL_PREFIX}#{c['urlident']}", category_id: category_id) rescue nil + category_id = + CategoryCustomField.where(name: "import_id").where(value: c["nodeid"]).first.category_id + begin + Permalink.create(url: "#{URL_PREFIX}#{c["urlident"]}", category_id: category_id) + rescue StandardError + nil + end end # subcats @@ -677,8 +750,13 @@ class ImportScripts::VBulletin < ImportScripts::Base AND n1.contenttypeid=#{@channel_typeid}; SQL subcats.each do |sc| - category_id = CategoryCustomField.where(name: 'import_id').where(value: sc['nodeid']).first.category_id - Permalink.create(url: "#{URL_PREFIX}#{sc['p1']}/#{sc['p2']}", category_id: category_id) rescue nil + category_id = + CategoryCustomField.where(name: "import_id").where(value: sc["nodeid"]).first.category_id + begin + Permalink.create(url: "#{URL_PREFIX}#{sc["p1"]}/#{sc["p2"]}", category_id: category_id) + rescue StandardError + nil + end end end @@ -689,7 +767,7 @@ class ImportScripts::VBulletin < ImportScripts::Base SiteSetting.max_tags_per_topic = 100 staff_guardian = Guardian.new(Discourse.system_user) - records = mysql_query(<<~SQL + records = mysql_query(<<~SQL).to_a SELECT nodeid, GROUP_CONCAT(tagtext) tags FROM #{DB_PREFIX}tag t LEFT JOIN #{DB_PREFIX}tagnode tn ON tn.tagid = t.tagid @@ -697,7 +775,6 @@ class ImportScripts::VBulletin < ImportScripts::Base AND tn.nodeid IS NOT NULL GROUP BY nodeid SQL - ).to_a current_count = 0 total_count = records.count @@ -705,11 +782,11 @@ class ImportScripts::VBulletin < ImportScripts::Base records.each do |rec| current_count += 1 print_status current_count, total_count - tl = topic_lookup_from_imported_post_id("thread-#{rec['nodeid']}") - next if tl.nil? # topic might have been deleted + tl = topic_lookup_from_imported_post_id("thread-#{rec["nodeid"]}") + next if tl.nil? # topic might have been deleted topic = Topic.find(tl[:topic_id]) - tag_names = rec['tags'].force_encoding("UTF-8").split(',') + tag_names = rec["tags"].force_encoding("UTF-8").split(",") DiscourseTagging.tag_topic_by_names(topic, staff_guardian, tag_names) end end diff --git a/script/import_scripts/xenforo.rb b/script/import_scripts/xenforo.rb index 38e45342c65..1c9aaaeb774 100755 --- a/script/import_scripts/xenforo.rb +++ b/script/import_scripts/xenforo.rb @@ -3,11 +3,11 @@ require "mysql2" begin - require 'php_serialize' # https://github.com/jqr/php-serialize + require "php_serialize" # https://github.com/jqr/php-serialize rescue LoadError puts - puts 'php_serialize not found.' - puts 'Add to Gemfile, like this: ' + puts "php_serialize not found." + puts "Add to Gemfile, like this: " puts puts "echo gem \\'php-serialize\\' >> Gemfile" puts "bundle install" @@ -19,20 +19,20 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb") # Call it like this: # RAILS_ENV=production bundle exec ruby script/import_scripts/xenforo.rb class ImportScripts::XenForo < ImportScripts::Base - XENFORO_DB = "xenforo_db" TABLE_PREFIX = "xf_" BATCH_SIZE = 1000 - ATTACHMENT_DIR = '/tmp/attachments' + ATTACHMENT_DIR = "/tmp/attachments" def initialize super - @client = Mysql2::Client.new( - host: "localhost", - username: "root", - password: "pa$$word", - database: XENFORO_DB - ) + @client = + Mysql2::Client.new( + host: "localhost", + username: "root", + password: "pa$$word", + database: XENFORO_DB, + ) @category_mappings = {} @prefix_as_category = false @@ -47,10 +47,8 @@ class ImportScripts::XenForo < ImportScripts::Base end def import_avatar(id, imported_user) - filename = File.join(AVATAR_DIR, 'l', (id / 1000).to_s, "#{id}.jpg") - unless File.exist?(filename) - return nil - end + filename = File.join(AVATAR_DIR, "l", (id / 1000).to_s, "#{id}.jpg") + return nil unless File.exist?(filename) upload = create_upload(imported_user.id, filename, "avatar_#{id}") return if !upload.persisted? imported_user.create_user_avatar @@ -59,36 +57,42 @@ class ImportScripts::XenForo < ImportScripts::Base end def import_users - puts '', "creating users" + puts "", "creating users" - total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}user WHERE user_state = 'valid' AND is_banned = 0;").first['count'] + total_count = + mysql_query( + "SELECT count(*) count FROM #{TABLE_PREFIX}user WHERE user_state = 'valid' AND is_banned = 0;", + ).first[ + "count" + ] batches(BATCH_SIZE) do |offset| - results = mysql_query( - "SELECT user_id id, username, email, custom_title title, register_date created_at, + results = + mysql_query( + "SELECT user_id id, username, email, custom_title title, register_date created_at, last_activity last_visit_time, user_group_id, is_moderator, is_admin, is_staff FROM #{TABLE_PREFIX}user WHERE user_state = 'valid' AND is_banned = 0 LIMIT #{BATCH_SIZE} - OFFSET #{offset};") + OFFSET #{offset};", + ) break if results.size < 1 next if all_records_exist? :users, results.map { |u| u["id"].to_i } create_users(results, total: total_count, offset: offset) do |user| - next if user['username'].blank? - { id: user['id'], - email: user['email'], - username: user['username'], - title: user['title'], - created_at: Time.zone.at(user['created_at']), - last_seen_at: Time.zone.at(user['last_visit_time']), - moderator: user['is_moderator'] == 1 || user['is_staff'] == 1, - admin: user['is_admin'] == 1, - post_create_action: proc do |u| - import_avatar(user['id'], u) - end + next if user["username"].blank? + { + id: user["id"], + email: user["email"], + username: user["username"], + title: user["title"], + created_at: Time.zone.at(user["created_at"]), + last_seen_at: Time.zone.at(user["last_visit_time"]), + moderator: user["is_moderator"] == 1 || user["is_staff"] == 1, + admin: user["is_admin"] == 1, + post_create_action: proc { |u| import_avatar(user["id"], u) }, } end end @@ -97,7 +101,9 @@ class ImportScripts::XenForo < ImportScripts::Base def import_categories puts "", "importing categories..." - categories = mysql_query(" + categories = + mysql_query( + " SELECT node_id id, title, description, @@ -106,20 +112,22 @@ class ImportScripts::XenForo < ImportScripts::Base display_order FROM #{TABLE_PREFIX}node ORDER BY parent_node_id, display_order - ").to_a + ", + ).to_a top_level_categories = categories.select { |c| c["parent_node_id"] == 0 } create_categories(top_level_categories) do |c| { - id: c['id'], - name: c['title'], - description: c['description'], - position: c['display_order'], - post_create_action: proc do |category| - url = "board/#{c['node_name']}" - Permalink.find_or_create_by(url: url, category_id: category.id) - end + id: c["id"], + name: c["title"], + description: c["description"], + position: c["display_order"], + post_create_action: + proc do |category| + url = "board/#{c["node_name"]}" + Permalink.find_or_create_by(url: url, category_id: category.id) + end, } end @@ -129,40 +137,41 @@ class ImportScripts::XenForo < ImportScripts::Base create_categories(subcategories) do |c| { - id: c['id'], - name: c['title'], - description: c['description'], - position: c['display_order'], - parent_category_id: category_id_from_imported_category_id(c['parent_node_id']), - post_create_action: proc do |category| - url = "board/#{c['node_name']}" - Permalink.find_or_create_by(url: url, category_id: category.id) - end + id: c["id"], + name: c["title"], + description: c["description"], + position: c["display_order"], + parent_category_id: category_id_from_imported_category_id(c["parent_node_id"]), + post_create_action: + proc do |category| + url = "board/#{c["node_name"]}" + Permalink.find_or_create_by(url: url, category_id: category.id) + end, } end - subcategory_ids = Set.new(subcategories.map { |c| c['id'] }) + subcategory_ids = Set.new(subcategories.map { |c| c["id"] }) # deeper categories need to be tags categories.each do |c| - next if c['parent_node_id'] == 0 - next if top_level_category_ids.include?(c['id']) - next if subcategory_ids.include?(c['id']) + next if c["parent_node_id"] == 0 + next if top_level_category_ids.include?(c["id"]) + next if subcategory_ids.include?(c["id"]) # Find a subcategory for topics in this category parent = c - while !parent.nil? && !subcategory_ids.include?(parent['id']) - parent = categories.find { |subcat| subcat['id'] == parent['parent_node_id'] } + while !parent.nil? && !subcategory_ids.include?(parent["id"]) + parent = categories.find { |subcat| subcat["id"] == parent["parent_node_id"] } end if parent - tag_name = DiscourseTagging.clean_tag(c['title']) - @category_mappings[c['id']] = { - category_id: category_id_from_imported_category_id(parent['id']), - tag: Tag.find_by_name(tag_name) || Tag.create(name: tag_name) + tag_name = DiscourseTagging.clean_tag(c["title"]) + @category_mappings[c["id"]] = { + category_id: category_id_from_imported_category_id(parent["id"]), + tag: Tag.find_by_name(tag_name) || Tag.create(name: tag_name), } else - puts '', "Couldn't find a category for #{c['id']} '#{c['title']}'!" + puts "", "Couldn't find a category for #{c["id"]} '#{c["title"]}'!" end end end @@ -172,40 +181,46 @@ class ImportScripts::XenForo < ImportScripts::Base def import_categories_from_thread_prefixes puts "", "importing categories..." - categories = mysql_query(" + categories = + mysql_query( + " SELECT prefix_id id FROM #{TABLE_PREFIX}thread_prefix ORDER BY prefix_id ASC - ").to_a + ", + ).to_a create_categories(categories) do |category| - { - id: category["id"], - name: "Category-#{category["id"]}" - } + { id: category["id"], name: "Category-#{category["id"]}" } end @prefix_as_category = true end def import_likes - puts '', 'importing likes' - total_count = mysql_query("SELECT COUNT(*) AS count FROM #{TABLE_PREFIX}liked_content WHERE content_type = 'post'").first["count"] + puts "", "importing likes" + total_count = + mysql_query( + "SELECT COUNT(*) AS count FROM #{TABLE_PREFIX}liked_content WHERE content_type = 'post'", + ).first[ + "count" + ] batches(BATCH_SIZE) do |offset| - results = mysql_query( - "SELECT like_id, content_id, like_user_id, like_date + results = + mysql_query( + "SELECT like_id, content_id, like_user_id, like_date FROM #{TABLE_PREFIX}liked_content WHERE content_type = 'post' ORDER BY like_id LIMIT #{BATCH_SIZE} - OFFSET #{offset};" - ) + OFFSET #{offset};", + ) break if results.size < 1 create_likes(results, total: total_count, offset: offset) do |row| { - post_id: row['content_id'], - user_id: row['like_user_id'], - created_at: Time.zone.at(row['like_date']) + post_id: row["content_id"], + user_id: row["like_user_id"], + created_at: Time.zone.at(row["like_date"]), } end end @@ -216,10 +231,11 @@ class ImportScripts::XenForo < ImportScripts::Base total_count = mysql_query("SELECT count(*) count from #{TABLE_PREFIX}post").first["count"] - posts_sql = " + posts_sql = + " SELECT p.post_id id, t.thread_id topic_id, - #{@prefix_as_category ? 't.prefix_id' : 't.node_id'} category_id, + #{@prefix_as_category ? "t.prefix_id" : "t.node_id"} category_id, t.title title, t.first_post_id first_post_id, t.view_count, @@ -238,35 +254,35 @@ class ImportScripts::XenForo < ImportScripts::Base results = mysql_query("#{posts_sql} OFFSET #{offset};").to_a break if results.size < 1 - next if all_records_exist? :posts, results.map { |p| p['id'] } + next if all_records_exist? :posts, results.map { |p| p["id"] } create_posts(results, total: total_count, offset: offset) do |m| skip = false mapped = {} - mapped[:id] = m['id'] - mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 - mapped[:raw] = process_xenforo_post(m['raw'], m['id']) - mapped[:created_at] = Time.zone.at(m['created_at']) + mapped[:id] = m["id"] + mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1 + mapped[:raw] = process_xenforo_post(m["raw"], m["id"]) + mapped[:created_at] = Time.zone.at(m["created_at"]) - if m['id'] == m['first_post_id'] - if m['category_id'].to_i == 0 || m['category_id'].nil? + if m["id"] == m["first_post_id"] + if m["category_id"].to_i == 0 || m["category_id"].nil? mapped[:category] = SiteSetting.uncategorized_category_id else - mapped[:category] = category_id_from_imported_category_id(m['category_id'].to_i) || - @category_mappings[m['category_id']].try(:[], :category_id) + mapped[:category] = category_id_from_imported_category_id(m["category_id"].to_i) || + @category_mappings[m["category_id"]].try(:[], :category_id) end - mapped[:title] = CGI.unescapeHTML(m['title']) - mapped[:views] = m['view_count'] + mapped[:title] = CGI.unescapeHTML(m["title"]) + mapped[:views] = m["view_count"] mapped[:post_create_action] = proc do |pp| - Permalink.find_or_create_by(url: "threads/#{m['topic_id']}", topic_id: pp.topic_id) + Permalink.find_or_create_by(url: "threads/#{m["topic_id"]}", topic_id: pp.topic_id) end else - parent = topic_lookup_from_imported_post_id(m['first_post_id']) + parent = topic_lookup_from_imported_post_id(m["first_post_id"]) if parent mapped[:topic_id] = parent[:topic_id] else - puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" + puts "Parent post #{m["first_post_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" skip = true end end @@ -281,16 +297,15 @@ class ImportScripts::XenForo < ImportScripts::Base break if results.size < 1 results.each do |m| - next unless m['id'] == m['first_post_id'] && m['category_id'].to_i > 0 - next unless tag = @category_mappings[m['category_id']].try(:[], :tag) - next unless topic_mapping = topic_lookup_from_imported_post_id(m['id']) + next unless m["id"] == m["first_post_id"] && m["category_id"].to_i > 0 + next unless tag = @category_mappings[m["category_id"]].try(:[], :tag) + next unless topic_mapping = topic_lookup_from_imported_post_id(m["id"]) topic = Topic.find_by_id(topic_mapping[:topic_id]) topic.tags = [tag] if topic end end - end def import_private_messages @@ -318,29 +333,29 @@ class ImportScripts::XenForo < ImportScripts::Base user_id: user_id, raw: raw, created_at: Time.zone.at(post["message_date"].to_i), - import_mode: true + import_mode: true, } unless post["topic_id"] > 0 msg[:title] = post["title"] msg[:archetype] = Archetype.private_message - to_user_array = PHP.unserialize(post['recipients']) + to_user_array = PHP.unserialize(post["recipients"]) if to_user_array.size > 0 discourse_user_ids = to_user_array.keys.map { |id| user_id_from_imported_user_id(id) } usernames = User.where(id: [discourse_user_ids]).pluck(:username) - msg[:target_usernames] = usernames.join(',') + msg[:target_usernames] = usernames.join(",") end else topic_id = post["topic_id"] if t = topic_lookup_from_imported_post_id("pm_#{topic_id}") msg[:topic_id] = t[:topic_id] else - puts "Topic ID #{topic_id} not found, skipping post #{post['message_id']} from #{post['user_id']}" + puts "Topic ID #{topic_id} not found, skipping post #{post["message_id"]} from #{post["user_id"]}" next end end msg else - puts "Empty message, skipping post #{post['message_id']}" + puts "Empty message, skipping post #{post["message_id"]}" next end end @@ -351,18 +366,18 @@ class ImportScripts::XenForo < ImportScripts::Base s = raw.dup # :) is encoded as :) - s.gsub!(/]+) \/>/, '\1') + s.gsub!(%r{]+) />}, '\1') # Some links look like this: http://www.onegameamonth.com - s.gsub!(/(.+)<\/a>/, '[\2](\1)') + s.gsub!(%r{(.+)}, '[\2](\1)') # Many phpbb bbcode tags have a hash attached to them. Examples: # [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky] # [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex] - s.gsub!(/:(?:\w{8})\]/, ']') + s.gsub!(/:(?:\w{8})\]/, "]") # Remove mybb video tags. - s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '') + s.gsub!(%r{(^\[video=.*?\])|(\[/video\]$)}, "") s = CGI.unescapeHTML(s) @@ -370,18 +385,16 @@ class ImportScripts::XenForo < ImportScripts::Base # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli) # #Fix for the error: xenforo.rb: 160: in `gsub!': invalid byte sequence in UTF-8 (ArgumentError) - if ! s.valid_encoding? - s = s.encode("UTF-16be", invalid: :replace, replace: "?").encode('UTF-8') - end + s = s.encode("UTF-16be", invalid: :replace, replace: "?").encode("UTF-8") if !s.valid_encoding? # Work around it for now: - s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[') + s.gsub!(%r{\[http(s)?://(www\.)?}, "[") # [QUOTE]...[/QUOTE] - s.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" } + s.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n> #{$1}\n" } # Nested Quotes - s.gsub!(/(\[\/?QUOTE.*?\])/mi) { |q| "\n#{q}\n" } + s.gsub!(%r{(\[/?QUOTE.*?\])}mi) { |q| "\n#{q}\n" } # [QUOTE="username, post: 28662, member: 1283"] s.gsub!(/\[quote="(\w+), post: (\d*), member: (\d*)"\]/i) do @@ -397,48 +410,52 @@ class ImportScripts::XenForo < ImportScripts::Base end # [URL=...]...[/URL] - s.gsub!(/\[url="?(.+?)"?\](.+?)\[\/url\]/i) { "[#{$2}](#{$1})" } + s.gsub!(%r{\[url="?(.+?)"?\](.+?)\[/url\]}i) { "[#{$2}](#{$1})" } # [URL]...[/URL] - s.gsub!(/\[url\](.+?)\[\/url\]/i) { " #{$1} " } + s.gsub!(%r{\[url\](.+?)\[/url\]}i) { " #{$1} " } # [IMG]...[/IMG] - s.gsub!(/\[\/?img\]/i, "") + s.gsub!(%r{\[/?img\]}i, "") # convert list tags to ul and list=1 tags to ol # (basically, we're only missing list=a here...) - s.gsub!(/\[list\](.*?)\[\/list\]/im, '[ul]\1[/ul]') - s.gsub!(/\[list=1\](.*?)\[\/list\]/im, '[ol]\1[/ol]') - s.gsub!(/\[list\](.*?)\[\/list:u\]/im, '[ul]\1[/ul]') - s.gsub!(/\[list=1\](.*?)\[\/list:o\]/im, '[ol]\1[/ol]') + s.gsub!(%r{\[list\](.*?)\[/list\]}im, '[ul]\1[/ul]') + s.gsub!(%r{\[list=1\](.*?)\[/list\]}im, '[ol]\1[/ol]') + s.gsub!(%r{\[list\](.*?)\[/list:u\]}im, '[ul]\1[/ul]') + s.gsub!(%r{\[list=1\](.*?)\[/list:o\]}im, '[ol]\1[/ol]') # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists: - s.gsub!(/\[\*\]\n/, '') - s.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]') + s.gsub!(/\[\*\]\n/, "") + s.gsub!(%r{\[\*\](.*?)\[/\*:m\]}, '[li]\1[/li]') s.gsub!(/\[\*\](.*?)\n/, '[li]\1[/li]') - s.gsub!(/\[\*=1\]/, '') + s.gsub!(/\[\*=1\]/, "") # [YOUTUBE][/YOUTUBE] - s.gsub!(/\[youtube\](.+?)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } + s.gsub!(%r{\[youtube\](.+?)\[/youtube\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } # [youtube=425,350]id[/youtube] - s.gsub!(/\[youtube="?(.+?)"?\](.+?)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$2}\n" } + s.gsub!(%r{\[youtube="?(.+?)"?\](.+?)\[/youtube\]}i) do + "\nhttps://www.youtube.com/watch?v=#{$2}\n" + end # [MEDIA=youtube]id[/MEDIA] - s.gsub!(/\[MEDIA=youtube\](.+?)\[\/MEDIA\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } + s.gsub!(%r{\[MEDIA=youtube\](.+?)\[/MEDIA\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } # [ame="youtube_link"]title[/ame] - s.gsub!(/\[ame="?(.+?)"?\](.+?)\[\/ame\]/i) { "\n#{$1}\n" } + s.gsub!(%r{\[ame="?(.+?)"?\](.+?)\[/ame\]}i) { "\n#{$1}\n" } # [VIDEO=youtube;]...[/VIDEO] - s.gsub!(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" } + s.gsub!(%r{\[video=youtube;([^\]]+)\].*?\[/video\]}i) do + "\nhttps://www.youtube.com/watch?v=#{$1}\n" + end # [USER=706]@username[/USER] - s.gsub!(/\[user="?(.+?)"?\](.+?)\[\/user\]/i) { $2 } + s.gsub!(%r{\[user="?(.+?)"?\](.+?)\[/user\]}i) { $2 } # Remove the color tag s.gsub!(/\[color=[#a-z0-9]+\]/i, "") - s.gsub!(/\[\/color\]/i, "") + s.gsub!(%r{\[/color\]}i, "") if Dir.exist? ATTACHMENT_DIR s = process_xf_attachments(:gallery, s, import_id) @@ -454,7 +471,8 @@ class ImportScripts::XenForo < ImportScripts::Base # not all attachments have an [ATTACH=] tag so we need to get the other ID's from the xf_attachment table if xf_type == :attachment && import_id > 0 - sql = "SELECT attachment_id FROM #{TABLE_PREFIX}attachment WHERE content_id=#{import_id} and content_type='post';" + sql = + "SELECT attachment_id FROM #{TABLE_PREFIX}attachment WHERE content_id=#{import_id} and content_type='post';" ids.merge(mysql_query(sql).to_a.map { |v| v["attachment_id"].to_i }) end @@ -464,18 +482,22 @@ class ImportScripts::XenForo < ImportScripts::Base results = mysql_query(sql) if results.size < 1 # Strip attachment - s.gsub!(get_xf_regexp(xf_type, id), '') + s.gsub!(get_xf_regexp(xf_type, id), "") STDERR.puts "#{xf_type.capitalize} id #{id} not found in source database. Stripping." next end - original_filename = results.first['filename'] + original_filename = results.first["filename"] result = results.first - upload = import_xf_attachment(result['data_id'], result['file_hash'], result['user_id'], original_filename) + upload = + import_xf_attachment( + result["data_id"], + result["file_hash"], + result["user_id"], + original_filename, + ) if upload && upload.present? && upload.persisted? html = @uploader.html_for_upload(upload, original_filename) - unless s.gsub!(get_xf_regexp(xf_type, id), html) - s = s + "\n\n#{html}\n\n" - end + s = s + "\n\n#{html}\n\n" unless s.gsub!(get_xf_regexp(xf_type, id), html) else STDERR.puts "Could not process upload: #{original_filename}. Skipping attachment id #{id}" end @@ -503,7 +525,7 @@ class ImportScripts::XenForo < ImportScripts::Base when :gallery Regexp.new(/\[GALLERY=media,\s#{id ? id : '(\d+)'}\].+?\]/i) when :attachment - Regexp.new(/\[ATTACH(?>=\w+)?\]#{id ? id : '(\d+)'}\[\/ATTACH\]/i) + Regexp.new(%r{\[ATTACH(?>=\w+)?\]#{id ? id : '(\d+)'}\[/ATTACH\]}i) end end diff --git a/script/import_scripts/yahoogroup.rb b/script/import_scripts/yahoogroup.rb index 93651e5d7f9..4c307f506e6 100644 --- a/script/import_scripts/yahoogroup.rb +++ b/script/import_scripts/yahoogroup.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true require File.expand_path(File.dirname(__FILE__) + "/base.rb") -require 'mongo' +require "mongo" # Import YahooGroups data as exported into MongoDB by: # https://github.com/jonbartlett/yahoo-groups-export @@ -13,14 +13,13 @@ require 'mongo' # =end class ImportScripts::YahooGroup < ImportScripts::Base - - MONGODB_HOST = '192.168.10.1:27017' - MONGODB_DB = 'syncro' + MONGODB_HOST = "192.168.10.1:27017" + MONGODB_DB = "syncro" def initialize super - client = Mongo::Client.new([ MONGODB_HOST ], database: MONGODB_DB) + client = Mongo::Client.new([MONGODB_HOST], database: MONGODB_DB) db = client.database Mongo::Logger.logger.level = Logger::FATAL puts "connected to db...." @@ -28,7 +27,6 @@ class ImportScripts::YahooGroup < ImportScripts::Base @collection = client[:posts] @user_profile_map = {} - end def execute @@ -41,43 +39,42 @@ class ImportScripts::YahooGroup < ImportScripts::Base end def import_users - - puts '', "Importing users" + puts "", "Importing users" # fetch distinct list of Yahoo "profile" names - profiles = @collection.aggregate( - [ - { "$group": { "_id": { profile: "$ygData.profile" } } } - ] - ) + profiles = @collection.aggregate([{ "$group": { _id: { profile: "$ygData.profile" } } }]) user_id = 0 create_users(profiles.to_a) do |u| - user_id = user_id + 1 # fetch last message for profile to pickup latest user info as this may have changed - user_info = @collection.find("ygData.profile": u["_id"]["profile"]).sort("ygData.msgId": -1).limit(1).to_a[0] + user_info = + @collection + .find("ygData.profile": u["_id"]["profile"]) + .sort("ygData.msgId": -1) + .limit(1) + .to_a[ + 0 + ] # Store user_id to profile lookup @user_profile_map.store(user_info["ygData"]["profile"], user_id) puts "User created: #{user_info["ygData"]["profile"]}" - user = - { - id: user_id, # yahoo "userId" sequence appears to have changed mid forum life so generate this + user = { + id: user_id, # yahoo "userId" sequence appears to have changed mid forum life so generate this username: user_info["ygData"]["profile"], name: user_info["ygData"]["authorName"], email: user_info["ygData"]["from"], # mandatory - created_at: Time.now + created_at: Time.now, } user end puts "#{user_id} users created" - end def import_discussions @@ -86,21 +83,16 @@ class ImportScripts::YahooGroup < ImportScripts::Base topics_count = 0 posts_count = 0 - topics = @collection.aggregate( - [ - { "$group": { "_id": { topicId: "$ygData.topicId" } } } - ] - ).to_a + topics = @collection.aggregate([{ "$group": { _id: { topicId: "$ygData.topicId" } } }]).to_a # for each distinct topicId found topics.each_with_index do |t, tidx| - # create "topic" post first. # fetch topic document topic_post = @collection.find("ygData.msgId": t["_id"]["topicId"]).to_a[0] next if topic_post.nil? - puts "Topic: #{tidx + 1} / #{topics.count()} (#{sprintf('%.2f', ((tidx + 1).to_f / topics.count().to_f) * 100)}%) Subject: #{topic_post["ygData"]["subject"]}" + puts "Topic: #{tidx + 1} / #{topics.count()} (#{sprintf("%.2f", ((tidx + 1).to_f / topics.count().to_f) * 100)}%) Subject: #{topic_post["ygData"]["subject"]}" if topic_post["ygData"]["subject"].to_s.empty? topic_title = "No Subject" @@ -115,8 +107,10 @@ class ImportScripts::YahooGroup < ImportScripts::Base created_at: Time.at(topic_post["ygData"]["postDate"].to_i), cook_method: Post.cook_methods[:raw_html], title: topic_title, - category: ENV['CATEGORY_ID'], - custom_fields: { import_id: topic_post["ygData"]["msgId"] } + category: ENV["CATEGORY_ID"], + custom_fields: { + import_id: topic_post["ygData"]["msgId"], + }, } topics_count += 1 @@ -128,34 +122,31 @@ class ImportScripts::YahooGroup < ImportScripts::Base posts = @collection.find("ygData.topicId": topic_post["ygData"]["topicId"]).to_a posts.each_with_index do |p, pidx| - # skip over first post as this is created by topic above next if p["ygData"]["msgId"] == topic_post["ygData"]["topicId"] puts " Post: #{pidx + 1} / #{posts.count()}" post = { - id: pidx + 1, - topic_id: parent_post[:topic_id], - user_id: @user_profile_map[p["ygData"]["profile"]] || -1, - raw: p["ygData"]["messageBody"], - created_at: Time.at(p["ygData"]["postDate"].to_i), - cook_method: Post.cook_methods[:raw_html], - custom_fields: { import_id: p["ygData"]["msgId"] } + id: pidx + 1, + topic_id: parent_post[:topic_id], + user_id: @user_profile_map[p["ygData"]["profile"]] || -1, + raw: p["ygData"]["messageBody"], + created_at: Time.at(p["ygData"]["postDate"].to_i), + cook_method: Post.cook_methods[:raw_html], + custom_fields: { + import_id: p["ygData"]["msgId"], + }, } child_post = create_post(post, post[:id]) posts_count += 1 - end - end puts "", "Imported #{topics_count} topics with #{topics_count + posts_count} posts." - end - end ImportScripts::YahooGroup.new.perform diff --git a/script/import_scripts/zendesk.rb b/script/import_scripts/zendesk.rb index a8e44f5ffd8..a8b5ef59ac5 100644 --- a/script/import_scripts/zendesk.rb +++ b/script/import_scripts/zendesk.rb @@ -9,10 +9,10 @@ # - posts.csv (posts in Zendesk are topics in Discourse) # - comments.csv (comments in Zendesk are posts in Discourse) -require 'csv' -require 'reverse_markdown' -require_relative 'base' -require_relative 'base/generic_database' +require "csv" +require "reverse_markdown" +require_relative "base" +require_relative "base/generic_database" # Call it like this: # RAILS_ENV=production bundle exec ruby script/import_scripts/zendesk.rb DIRNAME @@ -45,7 +45,7 @@ class ImportScripts::Zendesk < ImportScripts::Base name: row[:name], description: row[:description], position: row[:position], - url: row[:htmlurl] + url: row[:htmlurl], ) end @@ -56,7 +56,7 @@ class ImportScripts::Zendesk < ImportScripts::Base name: row[:name], created_at: parse_datetime(row[:createdat]), last_seen_at: parse_datetime(row[:lastloginat]), - active: true + active: true, ) end @@ -69,7 +69,7 @@ class ImportScripts::Zendesk < ImportScripts::Base closed: row[:closed] == "TRUE", user_id: row[:authorid], created_at: parse_datetime(row[:createdat]), - url: row[:htmlurl] + url: row[:htmlurl], ) end @@ -80,7 +80,7 @@ class ImportScripts::Zendesk < ImportScripts::Base topic_id: row[:postid], user_id: row[:authorid], created_at: parse_datetime(row[:createdat]), - url: row[:htmlurl] + url: row[:htmlurl], ) end @@ -99,14 +99,15 @@ class ImportScripts::Zendesk < ImportScripts::Base create_categories(rows) do |row| { - id: row['id'], - name: row['name'], - description: row['description'], - position: row['position'], - post_create_action: proc do |category| - url = remove_domain(row['url']) - Permalink.create(url: url, category_id: category.id) unless permalink_exists?(url) - end + id: row["id"], + name: row["name"], + description: row["description"], + position: row["position"], + post_create_action: + proc do |category| + url = remove_domain(row["url"]) + Permalink.create(url: url, category_id: category.id) unless permalink_exists?(url) + end, } end end @@ -118,22 +119,22 @@ class ImportScripts::Zendesk < ImportScripts::Base def import_users puts "", "creating users" total_count = @db.count_users - last_id = '' + last_id = "" batches do |offset| rows, last_id = @db.fetch_users(last_id) break if rows.empty? - next if all_records_exist?(:users, rows.map { |row| row['id'] }) + next if all_records_exist?(:users, rows.map { |row| row["id"] }) create_users(rows, total: total_count, offset: offset) do |row| { - id: row['id'], - email: row['email'], - name: row['name'], - created_at: row['created_at'], - last_seen_at: row['last_seen_at'], - active: row['active'] == 1 + id: row["id"], + email: row["email"], + name: row["name"], + created_at: row["created_at"], + last_seen_at: row["last_seen_at"], + active: row["active"] == 1, } end end @@ -142,27 +143,28 @@ class ImportScripts::Zendesk < ImportScripts::Base def import_topics puts "", "creating topics" total_count = @db.count_topics - last_id = '' + last_id = "" batches do |offset| rows, last_id = @db.fetch_topics(last_id) break if rows.empty? - next if all_records_exist?(:posts, rows.map { |row| import_topic_id(row['id']) }) + next if all_records_exist?(:posts, rows.map { |row| import_topic_id(row["id"]) }) create_posts(rows, total: total_count, offset: offset) do |row| { - id: import_topic_id(row['id']), - title: row['title'].present? ? row['title'].strip[0...255] : "Topic title missing", - raw: normalize_raw(row['raw']), - category: category_id_from_imported_category_id(row['category_id']), - user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id, - created_at: row['created_at'], - closed: row['closed'] == 1, - post_create_action: proc do |post| - url = remove_domain(row['url']) - Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url) - end + id: import_topic_id(row["id"]), + title: row["title"].present? ? row["title"].strip[0...255] : "Topic title missing", + raw: normalize_raw(row["raw"]), + category: category_id_from_imported_category_id(row["category_id"]), + user_id: user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id, + created_at: row["created_at"], + closed: row["closed"] == 1, + post_create_action: + proc do |post| + url = remove_domain(row["url"]) + Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url) + end, } end end @@ -181,34 +183,35 @@ class ImportScripts::Zendesk < ImportScripts::Base rows, last_row_id = @db.fetch_sorted_posts(last_row_id) break if rows.empty? - next if all_records_exist?(:posts, rows.map { |row| row['id'] }) + next if all_records_exist?(:posts, rows.map { |row| row["id"] }) create_posts(rows, total: total_count, offset: offset) do |row| - topic = topic_lookup_from_imported_post_id(import_topic_id(row['topic_id'])) + topic = topic_lookup_from_imported_post_id(import_topic_id(row["topic_id"])) if topic.nil? - p "MISSING TOPIC #{row['topic_id']}" + p "MISSING TOPIC #{row["topic_id"]}" p row next end { - id: row['id'], - raw: normalize_raw(row['raw']), - user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id, + id: row["id"], + raw: normalize_raw(row["raw"]), + user_id: user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id, topic_id: topic[:topic_id], - created_at: row['created_at'], - post_create_action: proc do |post| - url = remove_domain(row['url']) - Permalink.create(url: url, post_id: post.id) unless permalink_exists?(url) - end + created_at: row["created_at"], + post_create_action: + proc do |post| + url = remove_domain(row["url"]) + Permalink.create(url: url, post_id: post.id) unless permalink_exists?(url) + end, } end end end def normalize_raw(raw) - raw = raw.gsub('\n', '') + raw = raw.gsub('\n', "") raw = ReverseMarkdown.convert(raw) raw end @@ -222,11 +225,13 @@ class ImportScripts::Zendesk < ImportScripts::Base end def csv_parse(table_name) - CSV.foreach(File.join(@path, "#{table_name}.csv"), - headers: true, - header_converters: :symbol, - skip_blanks: true, - encoding: 'bom|utf-8') { |row| yield row } + CSV.foreach( + File.join(@path, "#{table_name}.csv"), + headers: true, + header_converters: :symbol, + skip_blanks: true, + encoding: "bom|utf-8", + ) { |row| yield row } end end diff --git a/script/import_scripts/zendesk_api.rb b/script/import_scripts/zendesk_api.rb index 9237a764423..d76ff1652dc 100644 --- a/script/import_scripts/zendesk_api.rb +++ b/script/import_scripts/zendesk_api.rb @@ -4,10 +4,10 @@ # # This one uses their API. -require 'open-uri' -require 'reverse_markdown' -require_relative 'base' -require_relative 'base/generic_database' +require "open-uri" +require "reverse_markdown" +require_relative "base" +require_relative "base/generic_database" # Call it like this: # RAILS_ENV=production bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN @@ -23,7 +23,7 @@ class ImportScripts::ZendeskApi < ImportScripts::Base Net::ProtocolError, Timeout::Error, OpenURI::HTTPError, - OpenSSL::SSL::SSLError + OpenSSL::SSL::SSLError, ] MAX_RETRIES = 5 @@ -62,66 +62,72 @@ class ImportScripts::ZendeskApi < ImportScripts::Base end def fetch_categories - puts '', 'fetching categories...' + puts "", "fetching categories..." - get_from_api('/api/v2/community/topics.json', 'topics', show_status: true) do |row| + get_from_api("/api/v2/community/topics.json", "topics", show_status: true) do |row| @db.insert_category( - id: row['id'], - name: row['name'], - description: row['description'], - position: row['position'], - url: row['html_url'] + id: row["id"], + name: row["name"], + description: row["description"], + position: row["position"], + url: row["html_url"], ) end end def fetch_topics - puts '', 'fetching topics...' + puts "", "fetching topics..." - get_from_api('/api/v2/community/posts.json', 'posts', show_status: true) do |row| - if row['vote_count'] > 0 - like_user_ids = fetch_likes("/api/v2/community/posts/#{row['id']}/votes.json") + get_from_api("/api/v2/community/posts.json", "posts", show_status: true) do |row| + if row["vote_count"] > 0 + like_user_ids = fetch_likes("/api/v2/community/posts/#{row["id"]}/votes.json") end @db.insert_topic( - id: row['id'], - title: row['title'], - raw: row['details'], - category_id: row['topic_id'], - closed: row['closed'], - user_id: row['author_id'], - created_at: row['created_at'], - url: row['html_url'], - like_user_ids: like_user_ids + id: row["id"], + title: row["title"], + raw: row["details"], + category_id: row["topic_id"], + closed: row["closed"], + user_id: row["author_id"], + created_at: row["created_at"], + url: row["html_url"], + like_user_ids: like_user_ids, ) end end def fetch_posts - puts '', 'fetching posts...' + puts "", "fetching posts..." current_count = 0 total_count = @db.count_topics start_time = Time.now - last_id = '' + last_id = "" batches do |offset| rows, last_id = @db.fetch_topics(last_id) break if rows.empty? rows.each do |topic_row| - get_from_api("/api/v2/community/posts/#{topic_row['id']}/comments.json", 'comments') do |row| - if row['vote_count'] > 0 - like_user_ids = fetch_likes("/api/v2/community/posts/#{topic_row['id']}/comments/#{row['id']}/votes.json") + get_from_api( + "/api/v2/community/posts/#{topic_row["id"]}/comments.json", + "comments", + ) do |row| + if row["vote_count"] > 0 + like_user_ids = + fetch_likes( + "/api/v2/community/posts/#{topic_row["id"]}/comments/#{row["id"]}/votes.json", + ) end @db.insert_post( - id: row['id'], - raw: row['body'], - topic_id: topic_row['id'], - user_id: row['author_id'], - created_at: row['created_at'], - url: row['html_url'], - like_user_ids: like_user_ids + id: row["id"], + raw: row["body"], + topic_id: topic_row["id"], + user_id: row["author_id"], + created_at: row["created_at"], + url: row["html_url"], + like_user_ids: like_user_ids, ) end @@ -132,9 +138,9 @@ class ImportScripts::ZendeskApi < ImportScripts::Base end def fetch_users - puts '', 'fetching users...' + puts "", "fetching users..." - user_ids = @db.execute_sql(<<~SQL).map { |row| row['user_id'] } + user_ids = @db.execute_sql(<<~SQL).map { |row| row["user_id"] } SELECT user_id FROM topic UNION SELECT user_id FROM post @@ -147,15 +153,18 @@ class ImportScripts::ZendeskApi < ImportScripts::Base start_time = Time.now while !user_ids.empty? - get_from_api("/api/v2/users/show_many.json?ids=#{user_ids.shift(50).join(',')}", 'users') do |row| + get_from_api( + "/api/v2/users/show_many.json?ids=#{user_ids.shift(50).join(",")}", + "users", + ) do |row| @db.insert_user( - id: row['id'], - email: row['email'], - name: row['name'], - created_at: row['created_at'], - last_seen_at: row['last_login_at'], - active: row['active'], - avatar_path: row['photo'].present? ? row['photo']['content_url'] : nil + id: row["id"], + email: row["email"], + name: row["name"], + created_at: row["created_at"], + last_seen_at: row["last_login_at"], + active: row["active"], + avatar_path: row["photo"].present? ? row["photo"]["content_url"] : nil, ) current_count += 1 @@ -167,10 +176,8 @@ class ImportScripts::ZendeskApi < ImportScripts::Base def fetch_likes(url) user_ids = [] - get_from_api(url, 'votes') do |row| - if row['id'].present? && row['value'] == 1 - user_ids << row['user_id'] - end + get_from_api(url, "votes") do |row| + user_ids << row["user_id"] if row["id"].present? && row["value"] == 1 end user_ids @@ -182,14 +189,15 @@ class ImportScripts::ZendeskApi < ImportScripts::Base create_categories(rows) do |row| { - id: row['id'], - name: row['name'], - description: row['description'], - position: row['position'], - post_create_action: proc do |category| - url = remove_domain(row['url']) - Permalink.create(url: url, category_id: category.id) unless permalink_exists?(url) - end + id: row["id"], + name: row["name"], + description: row["description"], + position: row["position"], + post_create_action: + proc do |category| + url = remove_domain(row["url"]) + Permalink.create(url: url, category_id: category.id) unless permalink_exists?(url) + end, } end end @@ -197,27 +205,32 @@ class ImportScripts::ZendeskApi < ImportScripts::Base def import_users puts "", "creating users" total_count = @db.count_users - last_id = '' + last_id = "" batches do |offset| rows, last_id = @db.fetch_users(last_id) break if rows.empty? - next if all_records_exist?(:users, rows.map { |row| row['id'] }) + next if all_records_exist?(:users, rows.map { |row| row["id"] }) create_users(rows, total: total_count, offset: offset) do |row| { - id: row['id'], - email: row['email'], - name: row['name'], - created_at: row['created_at'], - last_seen_at: row['last_seen_at'], - active: row['active'] == 1, - post_create_action: proc do |user| - if row['avatar_path'].present? - UserAvatar.import_url_for_user(row['avatar_path'], user) rescue nil - end - end + id: row["id"], + email: row["email"], + name: row["name"], + created_at: row["created_at"], + last_seen_at: row["last_seen_at"], + active: row["active"] == 1, + post_create_action: + proc do |user| + if row["avatar_path"].present? + begin + UserAvatar.import_url_for_user(row["avatar_path"], user) + rescue StandardError + nil + end + end + end, } end end @@ -226,27 +239,32 @@ class ImportScripts::ZendeskApi < ImportScripts::Base def import_topics puts "", "creating topics" total_count = @db.count_topics - last_id = '' + last_id = "" batches do |offset| rows, last_id = @db.fetch_topics(last_id) break if rows.empty? - next if all_records_exist?(:posts, rows.map { |row| import_topic_id(row['id']) }) + next if all_records_exist?(:posts, rows.map { |row| import_topic_id(row["id"]) }) create_posts(rows, total: total_count, offset: offset) do |row| { - id: import_topic_id(row['id']), - title: row['title'].present? ? row['title'].strip[0...255] : "Topic title missing", - raw: normalize_raw(row['raw'], user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id), - category: category_id_from_imported_category_id(row['category_id']), - user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id, - created_at: row['created_at'], - closed: row['closed'] == 1, - post_create_action: proc do |post| - url = remove_domain(row['url']) - Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url) - end + id: import_topic_id(row["id"]), + title: row["title"].present? ? row["title"].strip[0...255] : "Topic title missing", + raw: + normalize_raw( + row["raw"], + user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id, + ), + category: category_id_from_imported_category_id(row["category_id"]), + user_id: user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id, + created_at: row["created_at"], + closed: row["closed"] == 1, + post_create_action: + proc do |post| + url = remove_domain(row["url"]) + Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url) + end, } end end @@ -266,24 +284,29 @@ class ImportScripts::ZendeskApi < ImportScripts::Base break if rows.empty? create_posts(rows, total: total_count, offset: offset) do |row| - topic = topic_lookup_from_imported_post_id(import_topic_id(row['topic_id'])) + topic = topic_lookup_from_imported_post_id(import_topic_id(row["topic_id"])) if topic.nil? - p "MISSING TOPIC #{row['topic_id']}" + p "MISSING TOPIC #{row["topic_id"]}" p row next end { - id: row['id'], - raw: normalize_raw(row['raw'], user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id), - user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id, + id: row["id"], + raw: + normalize_raw( + row["raw"], + user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id, + ), + user_id: user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id, topic_id: topic[:topic_id], - created_at: row['created_at'], - post_create_action: proc do |post| - url = remove_domain(row['url']) - Permalink.create(url: url, post_id: post.id) unless permalink_exists?(url) - end + created_at: row["created_at"], + post_create_action: + proc do |post| + url = remove_domain(row["url"]) + Permalink.create(url: url, post_id: post.id) unless permalink_exists?(url) + end, } end end @@ -301,9 +324,9 @@ class ImportScripts::ZendeskApi < ImportScripts::Base break if rows.empty? rows.each do |row| - import_id = row['topic_id'] ? import_topic_id(row['topic_id']) : row['post_id'] + import_id = row["topic_id"] ? import_topic_id(row["topic_id"]) : row["post_id"] post = Post.find_by(id: post_id_from_imported_post_id(import_id)) if import_id - user = User.find_by(id: user_id_from_imported_user_id(row['user_id'])) + user = User.find_by(id: user_id_from_imported_user_id(row["user_id"])) if post && user begin @@ -312,7 +335,7 @@ class ImportScripts::ZendeskApi < ImportScripts::Base puts "error acting on post #{e}" end else - puts "Skipping Like from #{row['user_id']} on topic #{row['topic_id']} / post #{row['post_id']}" + puts "Skipping Like from #{row["user_id"]} on topic #{row["topic_id"]} / post #{row["post_id"]}" end current_count += 1 @@ -322,23 +345,23 @@ class ImportScripts::ZendeskApi < ImportScripts::Base end def normalize_raw(raw, user_id) - raw = raw.gsub('\n', '') + raw = raw.gsub('\n', "") raw = ReverseMarkdown.convert(raw) # Process images, after the ReverseMarkdown they look like # ![](https://.zendesk.com/.) - raw.gsub!(/!\[\]\((https:\/\/#{SUBDOMAIN}\.zendesk\.com\/hc\/user_images\/([^).]+\.[^)]+))\)/i) do + raw.gsub!(%r{!\[\]\((https://#{SUBDOMAIN}\.zendesk\.com/hc/user_images/([^).]+\.[^)]+))\)}i) do image_url = $1 filename = $2 attempts = 0 begin - URI.parse(image_url).open do |image| - # IMAGE_DOWNLOAD_PATH is whatever image, it will be replaced with the downloaded image - File.open(IMAGE_DOWNLOAD_PATH, "wb") do |file| - file.write(image.read) + URI + .parse(image_url) + .open do |image| + # IMAGE_DOWNLOAD_PATH is whatever image, it will be replaced with the downloaded image + File.open(IMAGE_DOWNLOAD_PATH, "wb") { |file| file.write(image.read) } end - end rescue *HTTP_ERRORS => e if attempts < MAX_RETRIES attempts += 1 @@ -374,23 +397,25 @@ class ImportScripts::ZendeskApi < ImportScripts::Base end def connection - @_connection ||= begin - connect_uri = URI.parse(@source_url) + @_connection ||= + begin + connect_uri = URI.parse(@source_url) - http = Net::HTTP.new(connect_uri.host, connect_uri.port) - http.open_timeout = 30 - http.read_timeout = 30 - http.use_ssl = connect_uri.scheme == "https" + http = Net::HTTP.new(connect_uri.host, connect_uri.port) + http.open_timeout = 30 + http.read_timeout = 30 + http.use_ssl = connect_uri.scheme == "https" - http - end + http + end end def authorization - @_authorization ||= begin - auth_str = "#{@auth_email}/token:#{@auth_token}" - "Basic #{Base64.strict_encode64(auth_str)}" - end + @_authorization ||= + begin + auth_str = "#{@auth_email}/token:#{@auth_token}" + "Basic #{Base64.strict_encode64(auth_str)}" + end end def get_from_api(path, array_name, show_status: false) @@ -399,8 +424,8 @@ class ImportScripts::ZendeskApi < ImportScripts::Base while url get = Net::HTTP::Get.new(url) - get['User-Agent'] = 'Discourse Zendesk Importer' - get['Authorization'] = authorization + get["User-Agent"] = "Discourse Zendesk Importer" + get["Authorization"] = authorization retry_count = 0 @@ -420,26 +445,27 @@ class ImportScripts::ZendeskApi < ImportScripts::Base json = JSON.parse(response.body) - json[array_name].each do |row| - yield row - end + json[array_name].each { |row| yield row } - url = json['next_page'] + url = json["next_page"] if show_status - if json['page'] && json['page_count'] - print_status(json['page'], json['page_count'], start_time) + if json["page"] && json["page_count"] + print_status(json["page"], json["page_count"], start_time) else - print '.' + print "." end end end end - end unless ARGV.length == 4 && Dir.exist?(ARGV[1]) - puts "", "Usage:", "", "bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN", "" + puts "", + "Usage:", + "", + "bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN", + "" exit 1 end diff --git a/script/import_scripts/zoho.rb b/script/import_scripts/zoho.rb index e354b79f8dc..b05debeb7e2 100644 --- a/script/import_scripts/zoho.rb +++ b/script/import_scripts/zoho.rb @@ -21,14 +21,13 @@ # full names instead of usernames. This may cause duplicate users with slightly different # usernames to be created. -require 'csv' +require "csv" require File.expand_path(File.dirname(__FILE__) + "/base.rb") require File.expand_path(File.dirname(__FILE__) + "/base/csv_helper.rb") # Call it like this: # bundle exec ruby script/import_scripts/zoho.rb class ImportScripts::Zoho < ImportScripts::Base - include ImportScripts::CsvHelper BATCH_SIZE = 1000 @@ -50,19 +49,14 @@ class ImportScripts::Zoho < ImportScripts::Base end def cleanup_zoho_username(s) - s.strip.gsub(/[^A-Za-z0-9_\.\-]/, '') + s.strip.gsub(/[^A-Za-z0-9_\.\-]/, "") end def import_users puts "", "Importing users" - create_users(CSV.parse(File.read(File.join(@path, 'users.csv')))) do |u| + create_users(CSV.parse(File.read(File.join(@path, "users.csv")))) do |u| username = cleanup_zoho_username(u[0]) - { - id: username, - username: username, - email: u[1], - created_at: Time.zone.now - } + { id: username, username: username, email: u[1], created_at: Time.zone.now } end end @@ -83,9 +77,7 @@ class ImportScripts::Zoho < ImportScripts::Base csv_parse(File.join(@path, "posts.csv")) do |row| @all_posts << row.dup - if @categories[row.forum_name].nil? - @categories[row.forum_name] = [] - end + @categories[row.forum_name] = [] if @categories[row.forum_name].nil? unless @categories[row.forum_name].include?(row.category_name) @categories[row.forum_name] << row.category_name @@ -105,56 +97,61 @@ class ImportScripts::Zoho < ImportScripts::Base puts "", "Creating topics and posts" - created, skipped = create_posts(@all_posts, total: @all_posts.size) do |row| - @current_row = row + created, skipped = + create_posts(@all_posts, total: @all_posts.size) do |row| + @current_row = row - # fetch user - username = cleanup_zoho_username(row.author) + # fetch user + username = cleanup_zoho_username(row.author) - next if username.blank? # no author for this post, so skip + next if username.blank? # no author for this post, so skip - user_id = user_id_from_imported_user_id(username) + user_id = user_id_from_imported_user_id(username) - if user_id.nil? - # user CSV file didn't have a user with this username. create it now with an invalid email address. - u = create_user( - { id: username, - username: username, - email: "#{username}@example.com", - created_at: Time.zone.parse(row.posted_time) }, - username - ) - user_id = u.id - end - - if @topic_mapping[row.permalink].nil? - category_id = nil - if row.category_name != "Uncategorized" && row.category_name != "Uncategorised" - category_id = category_id_from_imported_category_id("#{row.forum_name}:#{row.category_name}") - else - category_id = category_id_from_imported_category_id(row.forum_name) + if user_id.nil? + # user CSV file didn't have a user with this username. create it now with an invalid email address. + u = + create_user( + { + id: username, + username: username, + email: "#{username}@example.com", + created_at: Time.zone.parse(row.posted_time), + }, + username, + ) + user_id = u.id end - # create topic - { - id: import_post_id(row), - user_id: user_id, - category: category_id, - title: CGI.unescapeHTML(row.topic_title), - raw: cleanup_post(row.content), - created_at: Time.zone.parse(row.posted_time) - } - # created_post callback will be called - else - { - id: import_post_id(row), - user_id: user_id, - raw: cleanup_post(row.content), - created_at: Time.zone.parse(row.posted_time), - topic_id: @topic_mapping[row.permalink] - } + if @topic_mapping[row.permalink].nil? + category_id = nil + if row.category_name != "Uncategorized" && row.category_name != "Uncategorised" + category_id = + category_id_from_imported_category_id("#{row.forum_name}:#{row.category_name}") + else + category_id = category_id_from_imported_category_id(row.forum_name) + end + + # create topic + { + id: import_post_id(row), + user_id: user_id, + category: category_id, + title: CGI.unescapeHTML(row.topic_title), + raw: cleanup_post(row.content), + created_at: Time.zone.parse(row.posted_time), + } + # created_post callback will be called + else + { + id: import_post_id(row), + user_id: user_id, + raw: cleanup_post(row.content), + created_at: Time.zone.parse(row.posted_time), + topic_id: @topic_mapping[row.permalink], + } + end end - end puts "" puts "Created: #{created}" @@ -176,31 +173,30 @@ class ImportScripts::Zoho < ImportScripts::Base STYLE_ATTR = /(\s)*style="(.)*"/ def cleanup_post(raw) - # Check if Zoho's most common form of a code block is present. # If so, don't clean up the post as much because we can't tell which markup # is inside the code block. These posts will look worse than others. has_code_block = !!(raw =~ ZOHO_CODE_BLOCK_START) - x = raw.gsub(STYLE_ATTR, '') + x = raw.gsub(STYLE_ATTR, "") if has_code_block # We have to assume all lists in this post are meant to be code blocks # to make it somewhat readable. x.gsub!(/( )*
      (\s)*/, "") - x.gsub!(/( )*<\/ol>/, "") - x.gsub!('
    1. ', '') - x.gsub!('
    2. ', '') + x.gsub!(%r{( )*
    }, "") + x.gsub!("
  • ", "") + x.gsub!("
  • ", "") else # No code block (probably...) so clean up more aggressively. x.gsub!("\n", " ") - x.gsub!('
    ', "\n\n") - x.gsub('
    ', ' ') + x.gsub!("
    ", "\n\n") + x.gsub("
    ", " ") x.gsub!("
    ", "\n") - x.gsub!('', '') - x.gsub!('', '') - x.gsub!(/]*)>/, '') - x.gsub!('', '') + x.gsub!("", "") + x.gsub!("", "") + x.gsub!(/]*)>/, "") + x.gsub!("", "") end x.gsub!(TOO_MANY_LINE_BREAKS, "\n\n") @@ -213,13 +209,10 @@ class ImportScripts::Zoho < ImportScripts::Base # The posted_time seems to be the same for all posts in a topic, so we can't use that. Digest::SHA1.hexdigest "#{row.permalink}:#{row.content}" end - end unless ARGV[0] && Dir.exist?(ARGV[0]) - if ARGV[0] && !Dir.exist?(ARGV[0]) - puts "", "ERROR! Dir #{ARGV[0]} not found.", "" - end + puts "", "ERROR! Dir #{ARGV[0]} not found.", "" if ARGV[0] && !Dir.exist?(ARGV[0]) puts "", "Usage:", "", " bundle exec ruby script/import_scripts/zoho.rb DIRNAME", "" exit 1 diff --git a/script/measure.rb b/script/measure.rb index 9711206ae62..1dc391735be 100644 --- a/script/measure.rb +++ b/script/measure.rb @@ -1,38 +1,36 @@ # frozen_string_literal: true # using this script to try figure out why Ruby 2 is slower than 1.9 -require 'flamegraph' +require "flamegraph" -Flamegraph.generate('test.html', fidelity: 2) do +Flamegraph.generate("test.html", fidelity: 2) do require File.expand_path("../../config/environment", __FILE__) end exit -require 'memory_profiler' +require "memory_profiler" -result = MemoryProfiler.report do - require File.expand_path("../../config/environment", __FILE__) -end +result = MemoryProfiler.report { require File.expand_path("../../config/environment", __FILE__) } result.pretty_print exit -require 'benchmark' +require "benchmark" def profile_allocations(name) GC.disable initial_size = ObjectSpace.count_objects yield changes = ObjectSpace.count_objects - changes.each do |k, _| - changes[k] -= initial_size[k] - end + changes.each { |k, _| changes[k] -= initial_size[k] } puts "#{name} changes" - changes.sort { |a, b| b[1] <=> a[1] }.each do |a, b| - next if b <= 0 - # 1 extra hash for tracking - puts "#{a} #{a == :T_HASH ? b - 1 : b}" - end + changes + .sort { |a, b| b[1] <=> a[1] } + .each do |a, b| + next if b <= 0 + # 1 extra hash for tracking + puts "#{a} #{a == :T_HASH ? b - 1 : b}" + end GC.enable end @@ -47,9 +45,7 @@ def profile(name, &block) ObjectSpace.trace_object_allocations do block.call - ObjectSpace.each_object do |o| - objs << o - end + ObjectSpace.each_object { |o| objs << o } objs.each do |o| g = ObjectSpace.allocation_generation(o) @@ -63,9 +59,10 @@ def profile(name, &block) end end - items.group_by { |x| x }.sort { |a, b| b[1].length <=> a[1].length }.each do |row, group| - puts "#{row} x #{group.length}" - end + items + .group_by { |x| x } + .sort { |a, b| b[1].length <=> a[1].length } + .each { |row, group| puts "#{row} x #{group.length}" } GC.enable profile_allocations(name, &block) diff --git a/script/memstats.rb b/script/memstats.rb index 998d8d525f7..4ca1f531dc7 100755 --- a/script/memstats.rb +++ b/script/memstats.rb @@ -28,7 +28,7 @@ #------------------------------------------------------------------------------ class Mapping - FIELDS = %w[ size rss shared_clean shared_dirty private_clean private_dirty swap pss ] + FIELDS = %w[size rss shared_clean shared_dirty private_clean private_dirty swap pss] attr_reader :address_start attr_reader :address_end attr_reader :perms @@ -48,15 +48,10 @@ class Mapping attr_accessor :pss def initialize(lines) - - FIELDS.each do |field| - self.public_send("#{field}=", 0) - end + FIELDS.each { |field| self.public_send("#{field}=", 0) } parse_first_line(lines.shift) - lines.each do |l| - parse_field_line(l) - end + lines.each { |l| parse_field_line(l) } end def parse_first_line(line) @@ -71,7 +66,7 @@ class Mapping def parse_field_line(line) parts = line.strip.split - field = parts[0].downcase.sub(':', '') + field = parts[0].downcase.sub(":", "") if respond_to? "#{field}=" value = Float(parts[1]).to_i self.public_send("#{field}=", value) @@ -82,26 +77,21 @@ end def consume_mapping(map_lines, totals) m = Mapping.new(map_lines) - Mapping::FIELDS.each do |field| - totals[field] += m.public_send(field) - end + Mapping::FIELDS.each { |field| totals[field] += m.public_send(field) } m end def create_memstats_not_available(totals) - Mapping::FIELDS.each do |field| - totals[field] += Float::NAN - end + Mapping::FIELDS.each { |field| totals[field] += Float::NAN } end -abort 'usage: memstats [pid]' unless ARGV.first +abort "usage: memstats [pid]" unless ARGV.first pid = ARGV.shift.to_i totals = Hash.new(0) mappings = [] begin File.open("/proc/#{pid}/smaps") do |smaps| - map_lines = [] loop do @@ -111,9 +101,7 @@ begin when /\w+:\s+/ map_lines << line when /[0-9a-f]+:[0-9a-f]+\s+/ - if map_lines.size > 0 then - mappings << consume_mapping(map_lines, totals) - end + mappings << consume_mapping(map_lines, totals) if map_lines.size > 0 map_lines.clear map_lines << line else @@ -121,7 +109,7 @@ begin end end end -rescue +rescue StandardError create_memstats_not_available(totals) end @@ -132,23 +120,19 @@ end def get_commandline(pid) commandline = File.read("/proc/#{pid}/cmdline").split("\0") - if commandline.first =~ /java$/ then + if commandline.first =~ /java$/ loop { break if commandline.shift == "-jar" } return "[java] #{commandline.shift}" end - commandline.join(' ') + commandline.join(" ") end -if ARGV.include? '--yaml' - require 'yaml' - puts Hash[*totals.map do |k, v| - [k + '_kb', v] - end.flatten].to_yaml +if ARGV.include? "--yaml" + require "yaml" + puts Hash[*totals.map do |k, v| [k + "_kb", v] end.flatten].to_yaml else puts "#{"Process:".ljust(20)} #{pid}" puts "#{"Command Line:".ljust(20)} #{get_commandline(pid)}" puts "Memory Summary:" - totals.keys.sort.each do |k| - puts " #{k.ljust(20)} #{format_number(totals[k]).rjust(12)} kB" - end + totals.keys.sort.each { |k| puts " #{k.ljust(20)} #{format_number(totals[k]).rjust(12)} kB" } end diff --git a/script/micro_bench.rb b/script/micro_bench.rb index 51ea6c89128..7ba5dceffe8 100644 --- a/script/micro_bench.rb +++ b/script/micro_bench.rb @@ -1,32 +1,20 @@ # frozen_string_literal: true -require 'benchmark/ips' +require "benchmark/ips" require File.expand_path("../../config/environment", __FILE__) conn = ActiveRecord::Base.connection.raw_connection Benchmark.ips do |b| - b.report("simple") do - User.first.name - end + b.report("simple") { User.first.name } - b.report("simple with select") do - User.select("name").first.name - end + b.report("simple with select") { User.select("name").first.name } - b.report("pluck with first") do - User.pluck(:name).first - end + b.report("pluck with first") { User.pluck(:name).first } - b.report("pluck with limit") do - User.limit(1).pluck(:name).first - end + b.report("pluck with limit") { User.limit(1).pluck(:name).first } - b.report("pluck with pluck_first") do - User.pluck_first(:name) - end + b.report("pluck with pluck_first") { User.pluck_first(:name) } - b.report("raw") do - conn.exec("SELECT name FROM users LIMIT 1").getvalue(0, 0) - end + b.report("raw") { conn.exec("SELECT name FROM users LIMIT 1").getvalue(0, 0) } end diff --git a/script/profile_db_generator.rb b/script/profile_db_generator.rb index c49ef6f6278..8222c008096 100644 --- a/script/profile_db_generator.rb +++ b/script/profile_db_generator.rb @@ -5,7 +5,7 @@ # we want our script to generate a consistent output, to do so # we monkey patch array sample so it always uses the same rng class Array - RNG = Random.new(1098109928029800) + RNG = Random.new(1_098_109_928_029_800) def sample self[RNG.rand(size)] @@ -16,9 +16,7 @@ end def unbundled_require(gem) if defined?(::Bundler) spec_path = Dir.glob("#{Gem.dir}/specifications/#{gem}-*.gemspec").last - if spec_path.nil? - raise LoadError - end + raise LoadError if spec_path.nil? spec = Gem::Specification.load spec_path spec.activate @@ -30,13 +28,14 @@ def unbundled_require(gem) end def sentence - @gabbler ||= Gabbler.new.tap do |gabbler| - story = File.read(File.dirname(__FILE__) + "/alice.txt") - gabbler.learn(story) - end + @gabbler ||= + Gabbler.new.tap do |gabbler| + story = File.read(File.dirname(__FILE__) + "/alice.txt") + gabbler.learn(story) + end sentence = +"" - until sentence.length > 800 do + until sentence.length > 800 sentence << @gabbler.sentence sentence << "\n" end @@ -74,13 +73,13 @@ if User.count > 2 exit end -require 'optparse' +require "optparse" begin - unbundled_require 'gabbler' + unbundled_require "gabbler" rescue LoadError puts "installing gabbler gem" puts `gem install gabbler` - unbundled_require 'gabbler' + unbundled_require "gabbler" end number_of_users = 100 @@ -98,41 +97,61 @@ users = User.human_users.all puts puts "Creating 10 categories" -categories = 10.times.map do |i| - putc "." - Category.create(name: "category#{i}", text_color: "ffffff", color: "000000", user: admin_user) -end +categories = + 10.times.map do |i| + putc "." + Category.create(name: "category#{i}", text_color: "ffffff", color: "000000", user: admin_user) + end puts puts "Creating 100 topics" -topic_ids = 100.times.map do - post = PostCreator.create(admin_user, raw: sentence, title: sentence[0..50].strip, category: categories.sample.id, skip_validations: true) - putc "." - post.topic_id -end +topic_ids = + 100.times.map do + post = + PostCreator.create( + admin_user, + raw: sentence, + title: sentence[0..50].strip, + category: categories.sample.id, + skip_validations: true, + ) + putc "." + post.topic_id + end puts puts "Creating 2000 replies" 2000.times do putc "." - PostCreator.create(users.sample, raw: sentence, topic_id: topic_ids.sample, skip_validations: true) + PostCreator.create( + users.sample, + raw: sentence, + topic_id: topic_ids.sample, + skip_validations: true, + ) end puts puts "creating perf test topic" -first_post = PostCreator.create( - users.sample, - raw: sentence, - title: "I am a topic used for perf tests", - category: categories.sample.id, - skip_validations: true -) +first_post = + PostCreator.create( + users.sample, + raw: sentence, + title: "I am a topic used for perf tests", + category: categories.sample.id, + skip_validations: true, + ) puts puts "Creating 100 replies for perf test topic" 100.times do putc "." - PostCreator.create(users.sample, raw: sentence, topic_id: first_post.topic_id, skip_validations: true) + PostCreator.create( + users.sample, + raw: sentence, + topic_id: first_post.topic_id, + skip_validations: true, + ) end # no sidekiq so update some stuff diff --git a/script/redis_memory.rb b/script/redis_memory.rb index 442463bc415..b191112cefb 100644 --- a/script/redis_memory.rb +++ b/script/redis_memory.rb @@ -24,7 +24,10 @@ stats = {} end puts "Top 100 keys" -stats.sort { |a, b| b[1][0] <=> a[1][0] }.first(50).each do |k, (len, type, elems)| - elems = " [#{elems}]" if elems - puts "#{k} #{type} #{len}#{elems}" -end +stats + .sort { |a, b| b[1][0] <=> a[1][0] } + .first(50) + .each do |k, (len, type, elems)| + elems = " [#{elems}]" if elems + puts "#{k} #{type} #{len}#{elems}" + end diff --git a/script/require_profiler.rb b/script/require_profiler.rb index a0135d08b75..b8f3437efb6 100644 --- a/script/require_profiler.rb +++ b/script/require_profiler.rb @@ -5,12 +5,11 @@ # This is a rudimentary script that allows us to # quickly determine if any gems are slowing down startup -require 'benchmark' -require 'fileutils' +require "benchmark" +require "fileutils" module RequireProfiler class << self - attr_accessor :stats def profiling_enabled? @@ -25,10 +24,19 @@ module RequireProfiler def start(tmp_options = {}) @start_time = Time.now - [ ::Kernel, (class << ::Kernel; self; end) ].each do |klass| + [ + ::Kernel, + ( + class << ::Kernel + self + end + ), + ].each do |klass| klass.class_eval do def require_with_profiling(path, *args) - RequireProfiler.measure(path, caller, :require) { require_without_profiling(path, *args) } + RequireProfiler.measure(path, caller, :require) do + require_without_profiling(path, *args) + end end alias require_without_profiling require alias require require_with_profiling @@ -47,7 +55,14 @@ module RequireProfiler def stop @stop_time = Time.now - [ ::Kernel, (class << ::Kernel; self; end) ].each do |klass| + [ + ::Kernel, + ( + class << ::Kernel + self + end + ), + ].each do |klass| klass.class_eval do alias require require_without_profiling alias load load_without_profiling @@ -63,21 +78,20 @@ module RequireProfiler @stack ||= [] self.stats ||= {} - stat = self.stats.fetch(path) { |key| self.stats[key] = { calls: 0, time: 0, parent_time: 0 } } + stat = + self.stats.fetch(path) { |key| self.stats[key] = { calls: 0, time: 0, parent_time: 0 } } @stack << stat time = Time.now begin - output = yield # do the require or load here + output = yield # do the require or load here ensure delta = Time.now - time stat[:time] += delta stat[:calls] += 1 @stack.pop - @stack.each do |frame| - frame[:parent_time] += delta - end + @stack.each { |frame| frame[:parent_time] += delta } end output @@ -102,7 +116,6 @@ module RequireProfiler puts "GC duration: #{gc_duration_finish}" puts "GC impact: #{gc_duration_finish - gc_duration_start}" end - end end @@ -122,8 +135,9 @@ RequireProfiler.profile do end end -sorted = RequireProfiler.stats.to_a.sort { |a, b| b[1][:time] - b[1][:parent_time] <=> a[1][:time] - a[1][:parent_time] } +sorted = + RequireProfiler.stats.to_a.sort do |a, b| + b[1][:time] - b[1][:parent_time] <=> a[1][:time] - a[1][:parent_time] + end -sorted[0..120].each do |k, v| - puts "#{k} : time #{v[:time] - v[:parent_time]} " -end +sorted[0..120].each { |k, v| puts "#{k} : time #{v[:time] - v[:parent_time]} " } diff --git a/script/spawn_backup_restore.rb b/script/spawn_backup_restore.rb index 4af4f667c63..7ab3721d5db 100644 --- a/script/spawn_backup_restore.rb +++ b/script/spawn_backup_restore.rb @@ -15,11 +15,8 @@ fork do BackupRestore::Restorer.new( user_id: user_id, filename: opts[:filename], - factory: BackupRestore::Factory.new( - user_id: user_id, - client_id: opts[:client_id] - ), - disable_emails: opts.fetch(:disable_emails, true) + factory: BackupRestore::Factory.new(user_id: user_id, client_id: opts[:client_id]), + disable_emails: opts.fetch(:disable_emails, true), ).run end diff --git a/script/test_email_settings.rb b/script/test_email_settings.rb index 9ef0fc8b93d..c4c9272e432 100755 --- a/script/test_email_settings.rb +++ b/script/test_email_settings.rb @@ -1,16 +1,16 @@ #!/usr/bin/env ruby # frozen_string_literal: true -require 'action_mailer' +require "action_mailer" # Make this your email address. Poor example.com gets SO MUCH MAIL YOUR_EMAIL = "nobody@example.com" # Change these to be the same settings as your Discourse environment -DISCOURSE_SMTP_ADDRESS = "smtp.example.com" # (mandatory) -@DISCOURSE_SMTP_PORT = 587 # (optional) -@DISCOURSE_SMTP_USER_NAME = "username" # (optional) -@DISCOURSE_SMTP_PASSWORD = "blah" # (optional) +DISCOURSE_SMTP_ADDRESS = "smtp.example.com" # (mandatory) +@DISCOURSE_SMTP_PORT = 587 # (optional) +@DISCOURSE_SMTP_USER_NAME = "username" # (optional) +@DISCOURSE_SMTP_PASSWORD = "blah" # (optional) #@DISCOURSE_SMTP_OPENSSL_VERIFY_MODE = "none" # (optional) none|peer|client_once|fail_if_no_peer_cert # Note that DISCOURSE_SMTP_ADDRESS should NOT BE ALLOWED to relay mail to @@ -24,16 +24,18 @@ $delivery_options = { password: @DISCOURSE_SMTP_PASSWORD || nil, address: DISCOURSE_SMTP_ADDRESS, port: @DISCOURSE_SMTP_PORT || nil, - openssl_verify_mode: @DISCOURSE_SMTP_OPENSSL_VERIFY_MODE || nil + openssl_verify_mode: @DISCOURSE_SMTP_OPENSSL_VERIFY_MODE || nil, } class EmailTestMailer < ActionMailer::Base def email_test(mailfrom, mailto) - mail(from: mailfrom, - to: mailto, - body: "Testing email settings", - subject: "Discourse email settings test", - delivery_method_options: $delivery_options) + mail( + from: mailfrom, + to: mailto, + body: "Testing email settings", + subject: "Discourse email settings test", + delivery_method_options: $delivery_options, + ) end end diff --git a/script/test_mem.rb b/script/test_mem.rb index dd64ea2de37..284e99993f3 100644 --- a/script/test_mem.rb +++ b/script/test_mem.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true start = Time.now -require 'objspace' +require "objspace" require File.expand_path("../../config/environment", __FILE__) # preload stuff @@ -9,11 +9,19 @@ I18n.t(:posts) # load up all models and schema (ActiveRecord::Base.connection.tables - %w[schema_migrations]).each do |table| - table.classify.constantize.first rescue nil + begin + table.classify.constantize.first + rescue StandardError + nil + end end # router warm up -Rails.application.routes.recognize_path('abc') rescue nil +begin + Rails.application.routes.recognize_path("abc") +rescue StandardError + nil +end puts "Ruby version #{RUBY_VERSION} p#{RUBY_PATCHLEVEL}" @@ -23,8 +31,11 @@ GC.start puts "RSS: #{`ps -o rss -p #{$$}`.chomp.split("\n").last.to_i} KB" -s = ObjectSpace.each_object(String).map do |o| - ObjectSpace.memsize_of(o) + 40 # rvalue size on x64 -end +s = + ObjectSpace + .each_object(String) + .map do |o| + ObjectSpace.memsize_of(o) + 40 # rvalue size on x64 + end puts "Total strings: #{s.count} space used: #{s.sum} bytes" diff --git a/script/test_memory_leak.rb b/script/test_memory_leak.rb index 3ecf01c39e4..a426d367841 100644 --- a/script/test_memory_leak.rb +++ b/script/test_memory_leak.rb @@ -5,25 +5,21 @@ # this performs a trivial operation walking all multisites and grabbing first topic / localizing # the expectation is that RSS will remain static no matter how many iterations run -if ENV['RAILS_ENV'] != "production" - exec "RAILS_ENV=production ruby #{__FILE__}" -end +exec "RAILS_ENV=production ruby #{__FILE__}" if ENV["RAILS_ENV"] != "production" -if !ENV['LD_PRELOAD'] - exec "LD_PRELOAD=/usr/lib/libjemalloc.so.1 ruby #{__FILE__}" -end +exec "LD_PRELOAD=/usr/lib/libjemalloc.so.1 ruby #{__FILE__}" if !ENV["LD_PRELOAD"] -if ENV['LD_PRELOAD'].include?("jemalloc") +if ENV["LD_PRELOAD"].include?("jemalloc") # for 3.6.0 we need a patch jemal 1.1.0 gem (1.1.1 does not support 3.6.0) # however ffi is a problem so we need to patch the gem - require 'jemal' + require "jemal" $jemalloc = true end -if ENV['LD_PRELOAD'].include?("mwrap") +if ENV["LD_PRELOAD"].include?("mwrap") $mwrap = true - require 'mwrap' + require "mwrap" end def bin_diff(current) @@ -39,7 +35,11 @@ end require File.expand_path("../../config/environment", __FILE__) -Rails.application.routes.recognize_path('abc') rescue nil +begin + Rails.application.routes.recognize_path("abc") +rescue StandardError + nil +end I18n.t(:posts) def rss @@ -47,9 +47,7 @@ def rss end def loop_sites - RailsMultisite::ConnectionManagement.each_connection do - yield - end + RailsMultisite::ConnectionManagement.each_connection { yield } end def biggest_klass(klass) @@ -57,7 +55,10 @@ def biggest_klass(klass) end def iter(warmup: false) - loop_sites { Topic.first; I18n.t('too_late_to_edit') } + loop_sites do + Topic.first + I18n.t("too_late_to_edit") + end if !warmup GC.start(full_mark: true, immediate_sweep: true) @@ -75,24 +76,17 @@ def iter(warmup: false) array_delta = biggest_klass(Array).length - $biggest_array_length puts "rss: #{rss} (#{rss_delta}) #{mwrap_delta}#{jedelta} heap_delta: #{GC.stat[:heap_live_slots] - $baseline_slots} array_delta: #{array_delta}" - if $jemalloc - bin_diff(jemal_stats) - end + bin_diff(jemal_stats) if $jemalloc end - end iter(warmup: true) -4.times do - GC.start(full_mark: true, immediate_sweep: true) -end +4.times { GC.start(full_mark: true, immediate_sweep: true) } if $jemalloc $baseline = Jemal.stats $baseline_jemalloc_active = $baseline[:active] - 4.times do - GC.start(full_mark: true, immediate_sweep: true) - end + 4.times { GC.start(full_mark: true, immediate_sweep: true) } end def render_table(array) @@ -102,33 +96,33 @@ def render_table(array) cols = array[0].length array.each do |row| - row.each_with_index do |val, i| - width[i] = [width[i].to_i, val.to_s.length].max - end + row.each_with_index { |val, i| width[i] = [width[i].to_i, val.to_s.length].max } end array[0].each_with_index do |col, i| - buffer << col.to_s.ljust(width[i], ' ') + buffer << col.to_s.ljust(width[i], " ") if i == cols - 1 buffer << "\n" else - buffer << ' | ' + buffer << " | " end end buffer << ("-" * (width.sum + width.length)) buffer << "\n" - array.drop(1).each do |row| - row.each_with_index do |val, i| - buffer << val.to_s.ljust(width[i], ' ') - if i == cols - 1 - buffer << "\n" - else - buffer << ' | ' + array + .drop(1) + .each do |row| + row.each_with_index do |val, i| + buffer << val.to_s.ljust(width[i], " ") + if i == cols - 1 + buffer << "\n" + else + buffer << " | " + end end end - end buffer end @@ -141,14 +135,20 @@ def mwrap_log report << "\n" table = [] - Mwrap.each(200000) do |loc, total, allocations, frees, age_sum, max_life| - table << [total, allocations - frees, frees == 0 ? -1 : (age_sum / frees.to_f).round(2), max_life, loc] + Mwrap.each(200_000) do |loc, total, allocations, frees, age_sum, max_life| + table << [ + total, + allocations - frees, + frees == 0 ? -1 : (age_sum / frees.to_f).round(2), + max_life, + loc, + ] end table.sort! { |a, b| b[1] <=> a[1] } table = table[0..50] - table.prepend(["total", "delta", "mean_life", "max_life", "location"]) + table.prepend(%w[total delta mean_life max_life location]) report << render_table(table) end @@ -158,15 +158,13 @@ end Mwrap.clear -if $mwrap - $mwrap_baseline = Mwrap.total_bytes_allocated - Mwrap.total_bytes_freed -end +$mwrap_baseline = Mwrap.total_bytes_allocated - Mwrap.total_bytes_freed if $mwrap $baseline_slots = GC.stat[:heap_live_slots] $baseline_rss = rss $biggest_array_length = biggest_klass(Array).length -100000.times do +100_000.times do iter if $mwrap puts mwrap_log diff --git a/script/test_pretty_text.rb b/script/test_pretty_text.rb index 29839d96293..02e8644a9eb 100644 --- a/script/test_pretty_text.rb +++ b/script/test_pretty_text.rb @@ -9,7 +9,7 @@ puts PrettyText.cook "test" # JS PrettyText.cook "test" - PrettyText.v8.eval('gc()') + PrettyText.v8.eval("gc()") # if i % 500 == 0 #p PrettyText.v8.heap_stats diff --git a/script/thread_detective.rb b/script/thread_detective.rb index c654158edef..83e886d085c 100644 --- a/script/thread_detective.rb +++ b/script/thread_detective.rb @@ -9,13 +9,9 @@ class ThreadDetective Thread.new { sleep 1 } end def self.start(max_threads) - @thread ||= Thread.new do - self.new.monitor(max_threads) - end + @thread ||= Thread.new { self.new.monitor(max_threads) } - @trace = TracePoint.new(:thread_begin) do |tp| - Thread.current.origin = Thread.current.inspect - end + @trace = TracePoint.new(:thread_begin) { |tp| Thread.current.origin = Thread.current.inspect } @trace.enable end @@ -52,5 +48,4 @@ class ThreadDetective sleep 1 end end - end diff --git a/script/user_simulator.rb b/script/user_simulator.rb index fb7853ac71c..562c559ed1c 100644 --- a/script/user_simulator.rb +++ b/script/user_simulator.rb @@ -4,31 +4,32 @@ # # by default 1 new topic every 30 sec, 1 reply to last topic every 30 secs -require 'optparse' -require 'gabbler' +require "optparse" +require "gabbler" user_id = nil def sentence - @gabbler ||= Gabbler.new.tap do |gabbler| - story = File.read(File.dirname(__FILE__) + "/alice.txt") - gabbler.learn(story) - end + @gabbler ||= + Gabbler.new.tap do |gabbler| + story = File.read(File.dirname(__FILE__) + "/alice.txt") + gabbler.learn(story) + end sentence = +"" - until sentence.length > 800 do + until sentence.length > 800 sentence << @gabbler.sentence sentence << "\n" end sentence end -OptionParser.new do |opts| - opts.banner = "Usage: ruby user_simulator.rb [options]" - opts.on("-u", "--user NUMBER", "user id") do |u| - user_id = u.to_i +OptionParser + .new do |opts| + opts.banner = "Usage: ruby user_simulator.rb [options]" + opts.on("-u", "--user NUMBER", "user id") { |u| user_id = u.to_i } end -end.parse! + .parse! unless user_id puts "user must be specified" @@ -37,19 +38,19 @@ end require File.expand_path(File.dirname(__FILE__) + "/../config/environment") -unless ["profile", "development"].include? Rails.env +unless %w[profile development].include? Rails.env puts "Bad idea to run a script that inserts random posts in any non development environment" exit end user = User.find(user_id) -last_topics = Topic.order('id desc').limit(10).pluck(:id) +last_topics = Topic.order("id desc").limit(10).pluck(:id) puts "Simulating activity for user id #{user.id}: #{user.name}" while true puts "Creating a random topic" - category = Category.where(read_restricted: false).order('random()').first + category = Category.where(read_restricted: false).order("random()").first PostCreator.create(user, raw: sentence, title: sentence[0..50].strip, category: category.id) puts "creating random reply"