DEV: Apply syntax_tree formatting to script/*

This commit is contained in:
David Taylor
2023-01-07 11:53:14 +00:00
parent ff508d1ae5
commit 436b3b392b
143 changed files with 8905 additions and 7353 deletions

View File

@ -21,14 +21,13 @@
# full names instead of usernames. This may cause duplicate users with slightly different
# usernames to be created.
require 'csv'
require "csv"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require File.expand_path(File.dirname(__FILE__) + "/base/csv_helper.rb")
# Call it like this:
# bundle exec ruby script/import_scripts/zoho.rb <path-to-csv-files>
class ImportScripts::Zoho < ImportScripts::Base
include ImportScripts::CsvHelper
BATCH_SIZE = 1000
@ -50,19 +49,14 @@ class ImportScripts::Zoho < ImportScripts::Base
end
def cleanup_zoho_username(s)
s.strip.gsub(/[^A-Za-z0-9_\.\-]/, '')
s.strip.gsub(/[^A-Za-z0-9_\.\-]/, "")
end
def import_users
puts "", "Importing users"
create_users(CSV.parse(File.read(File.join(@path, 'users.csv')))) do |u|
create_users(CSV.parse(File.read(File.join(@path, "users.csv")))) do |u|
username = cleanup_zoho_username(u[0])
{
id: username,
username: username,
email: u[1],
created_at: Time.zone.now
}
{ id: username, username: username, email: u[1], created_at: Time.zone.now }
end
end
@ -83,9 +77,7 @@ class ImportScripts::Zoho < ImportScripts::Base
csv_parse(File.join(@path, "posts.csv")) do |row|
@all_posts << row.dup
if @categories[row.forum_name].nil?
@categories[row.forum_name] = []
end
@categories[row.forum_name] = [] if @categories[row.forum_name].nil?
unless @categories[row.forum_name].include?(row.category_name)
@categories[row.forum_name] << row.category_name
@ -105,56 +97,61 @@ class ImportScripts::Zoho < ImportScripts::Base
puts "", "Creating topics and posts"
created, skipped = create_posts(@all_posts, total: @all_posts.size) do |row|
@current_row = row
created, skipped =
create_posts(@all_posts, total: @all_posts.size) do |row|
@current_row = row
# fetch user
username = cleanup_zoho_username(row.author)
# fetch user
username = cleanup_zoho_username(row.author)
next if username.blank? # no author for this post, so skip
next if username.blank? # no author for this post, so skip
user_id = user_id_from_imported_user_id(username)
user_id = user_id_from_imported_user_id(username)
if user_id.nil?
# user CSV file didn't have a user with this username. create it now with an invalid email address.
u = create_user(
{ id: username,
username: username,
email: "#{username}@example.com",
created_at: Time.zone.parse(row.posted_time) },
username
)
user_id = u.id
end
if @topic_mapping[row.permalink].nil?
category_id = nil
if row.category_name != "Uncategorized" && row.category_name != "Uncategorised"
category_id = category_id_from_imported_category_id("#{row.forum_name}:#{row.category_name}")
else
category_id = category_id_from_imported_category_id(row.forum_name)
if user_id.nil?
# user CSV file didn't have a user with this username. create it now with an invalid email address.
u =
create_user(
{
id: username,
username: username,
email: "#{username}@example.com",
created_at: Time.zone.parse(row.posted_time),
},
username,
)
user_id = u.id
end
# create topic
{
id: import_post_id(row),
user_id: user_id,
category: category_id,
title: CGI.unescapeHTML(row.topic_title),
raw: cleanup_post(row.content),
created_at: Time.zone.parse(row.posted_time)
}
# created_post callback will be called
else
{
id: import_post_id(row),
user_id: user_id,
raw: cleanup_post(row.content),
created_at: Time.zone.parse(row.posted_time),
topic_id: @topic_mapping[row.permalink]
}
if @topic_mapping[row.permalink].nil?
category_id = nil
if row.category_name != "Uncategorized" && row.category_name != "Uncategorised"
category_id =
category_id_from_imported_category_id("#{row.forum_name}:#{row.category_name}")
else
category_id = category_id_from_imported_category_id(row.forum_name)
end
# create topic
{
id: import_post_id(row),
user_id: user_id,
category: category_id,
title: CGI.unescapeHTML(row.topic_title),
raw: cleanup_post(row.content),
created_at: Time.zone.parse(row.posted_time),
}
# created_post callback will be called
else
{
id: import_post_id(row),
user_id: user_id,
raw: cleanup_post(row.content),
created_at: Time.zone.parse(row.posted_time),
topic_id: @topic_mapping[row.permalink],
}
end
end
end
puts ""
puts "Created: #{created}"
@ -176,31 +173,30 @@ class ImportScripts::Zoho < ImportScripts::Base
STYLE_ATTR = /(\s)*style="(.)*"/
def cleanup_post(raw)
# Check if Zoho's most common form of a code block is present.
# If so, don't clean up the post as much because we can't tell which markup
# is inside the code block. These posts will look worse than others.
has_code_block = !!(raw =~ ZOHO_CODE_BLOCK_START)
x = raw.gsub(STYLE_ATTR, '')
x = raw.gsub(STYLE_ATTR, "")
if has_code_block
# We have to assume all lists in this post are meant to be code blocks
# to make it somewhat readable.
x.gsub!(/( )*<ol>(\s)*/, "")
x.gsub!(/( )*<\/ol>/, "")
x.gsub!('<li>', '')
x.gsub!('</li>', '')
x.gsub!(%r{( )*</ol>}, "")
x.gsub!("<li>", "")
x.gsub!("</li>", "")
else
# No code block (probably...) so clean up more aggressively.
x.gsub!("\n", " ")
x.gsub!('<div>', "\n\n")
x.gsub('</div>', ' ')
x.gsub!("<div>", "\n\n")
x.gsub("</div>", " ")
x.gsub!("<br />", "\n")
x.gsub!('<span>', '')
x.gsub!('</span>', '')
x.gsub!(/<font ([^>]*)>/, '')
x.gsub!('</font>', '')
x.gsub!("<span>", "")
x.gsub!("</span>", "")
x.gsub!(/<font ([^>]*)>/, "")
x.gsub!("</font>", "")
end
x.gsub!(TOO_MANY_LINE_BREAKS, "\n\n")
@ -213,13 +209,10 @@ class ImportScripts::Zoho < ImportScripts::Base
# The posted_time seems to be the same for all posts in a topic, so we can't use that.
Digest::SHA1.hexdigest "#{row.permalink}:#{row.content}"
end
end
unless ARGV[0] && Dir.exist?(ARGV[0])
if ARGV[0] && !Dir.exist?(ARGV[0])
puts "", "ERROR! Dir #{ARGV[0]} not found.", ""
end
puts "", "ERROR! Dir #{ARGV[0]} not found.", "" if ARGV[0] && !Dir.exist?(ARGV[0])
puts "", "Usage:", "", " bundle exec ruby script/import_scripts/zoho.rb DIRNAME", ""
exit 1