DEV: Apply syntax_tree formatting to script/*

This commit is contained in:
David Taylor
2023-01-07 11:53:14 +00:00
parent ff508d1ae5
commit 436b3b392b
143 changed files with 8905 additions and 7353 deletions

View File

@ -1,9 +1,9 @@
# frozen_string_literal: true
require_relative '../base'
require_relative 'support/database'
require_relative 'support/indexer'
require_relative 'support/settings'
require_relative "../base"
require_relative "support/database"
require_relative "support/indexer"
require_relative "support/settings"
module ImportScripts::Mbox
class Importer < ImportScripts::Base
@ -38,44 +38,44 @@ module ImportScripts::Mbox
end
def index_messages
puts '', 'creating index'
puts "", "creating index"
indexer = Indexer.new(@database, @settings)
indexer.execute
end
def import_categories
puts '', 'creating categories'
puts "", "creating categories"
rows = @database.fetch_categories
create_categories(rows) do |row|
{
id: row['name'],
name: row['name'],
parent_category_id: row['parent_category_id'].presence,
id: row["name"],
name: row["name"],
parent_category_id: row["parent_category_id"].presence,
}
end
end
def import_users
puts '', 'creating users'
puts "", "creating users"
total_count = @database.count_users
last_email = ''
last_email = ""
batches do |offset|
rows, last_email = @database.fetch_users(last_email)
break if rows.empty?
next if all_records_exist?(:users, rows.map { |row| row['email'] })
next if all_records_exist?(:users, rows.map { |row| row["email"] })
create_users(rows, total: total_count, offset: offset) do |row|
{
id: row['email'],
email: row['email'],
name: row['name'],
id: row["email"],
email: row["email"],
name: row["name"],
trust_level: @settings.trust_level,
staged: @settings.staged,
active: !@settings.staged,
created_at: to_time(row['date_of_first_message'])
created_at: to_time(row["date_of_first_message"]),
}
end
end
@ -86,7 +86,7 @@ module ImportScripts::Mbox
end
def import_posts
puts '', 'creating topics and posts'
puts "", "creating topics and posts"
total_count = @database.count_messages
last_row_id = 0
@ -94,47 +94,45 @@ module ImportScripts::Mbox
rows, last_row_id = @database.fetch_messages(last_row_id)
break if rows.empty?
next if all_records_exist?(:posts, rows.map { |row| row['msg_id'] })
next if all_records_exist?(:posts, rows.map { |row| row["msg_id"] })
create_posts(rows, total: total_count, offset: offset) do |row|
begin
if row['email_date'].blank?
puts "Date is missing. Skipping #{row['msg_id']}"
if row["email_date"].blank?
puts "Date is missing. Skipping #{row["msg_id"]}"
nil
elsif row['in_reply_to'].blank?
elsif row["in_reply_to"].blank?
map_first_post(row)
else
map_reply(row)
end
rescue => e
puts "Failed to map post for #{row['msg_id']}", e, e.backtrace.join("\n")
puts "Failed to map post for #{row["msg_id"]}", e, e.backtrace.join("\n")
end
end
end
end
def map_post(row)
user_id = user_id_from_imported_user_id(row['from_email']) || Discourse::SYSTEM_USER_ID
user_id = user_id_from_imported_user_id(row["from_email"]) || Discourse::SYSTEM_USER_ID
{
id: row['msg_id'],
id: row["msg_id"],
user_id: user_id,
created_at: to_time(row['email_date']),
created_at: to_time(row["email_date"]),
raw: format_raw(row, user_id),
raw_email: row['raw_message'],
raw_email: row["raw_message"],
via_email: true,
post_create_action: proc do |post|
create_incoming_email(post, row)
end
post_create_action: proc { |post| create_incoming_email(post, row) },
}
end
def format_raw(row, user_id)
body = row['body'] || ''
elided = row['elided']
body = row["body"] || ""
elided = row["elided"]
if row['attachment_count'].positive?
receiver = Email::Receiver.new(row['raw_message'])
if row["attachment_count"].positive?
receiver = Email::Receiver.new(row["raw_message"])
user = User.find(user_id)
body = receiver.add_attachments(body, user)
end
@ -147,21 +145,21 @@ module ImportScripts::Mbox
end
def map_first_post(row)
subject = row['subject']
subject = row["subject"]
tags = remove_tags!(subject)
mapped = map_post(row)
mapped[:category] = category_id_from_imported_category_id(row['category'])
mapped[:category] = category_id_from_imported_category_id(row["category"])
mapped[:title] = subject.strip[0...255]
mapped[:tags] = tags if tags.present?
mapped
end
def map_reply(row)
parent = @lookup.topic_lookup_from_imported_post_id(row['in_reply_to'])
parent = @lookup.topic_lookup_from_imported_post_id(row["in_reply_to"])
if parent.blank?
puts "Parent message #{row['in_reply_to']} doesn't exist. Skipping #{row['msg_id']}: #{row['subject'][0..40]}"
puts "Parent message #{row["in_reply_to"]} doesn't exist. Skipping #{row["msg_id"]}: #{row["subject"][0..40]}"
return nil
end
@ -178,9 +176,7 @@ module ImportScripts::Mbox
old_length = subject.length
@settings.tags.each do |tag|
if subject.sub!(tag[:regex], "") && tag[:name].present?
tag_names << tag[:name]
end
tag_names << tag[:name] if subject.sub!(tag[:regex], "") && tag[:name].present?
end
remove_prefixes!(subject) if subject.length != old_length
@ -203,13 +199,13 @@ module ImportScripts::Mbox
def create_incoming_email(post, row)
IncomingEmail.create(
message_id: row['msg_id'],
raw: row['raw_message'],
subject: row['subject'],
from_address: row['from_email'],
message_id: row["msg_id"],
raw: row["raw_message"],
subject: row["subject"],
from_address: row["from_email"],
user_id: post.user_id,
topic_id: post.topic_id,
post_id: post.id
post_id: post.id,
)
end

View File

@ -1,6 +1,6 @@
# frozen_string_literal: true
require 'sqlite3'
require "sqlite3"
module ImportScripts::Mbox
class Database
@ -23,8 +23,7 @@ module ImportScripts::Mbox
@db.transaction
yield self
@db.commit
rescue
rescue StandardError
@db.rollback
end
@ -60,9 +59,7 @@ module ImportScripts::Mbox
SQL
@db.prepare(sql) do |stmt|
reply_message_ids.each do |in_reply_to|
stmt.execute(msg_id, in_reply_to)
end
reply_message_ids.each { |in_reply_to| stmt.execute(msg_id, in_reply_to) }
end
end
@ -95,7 +92,7 @@ module ImportScripts::Mbox
end
def sort_emails_by_date_and_reply_level
@db.execute 'DELETE FROM email_order'
@db.execute "DELETE FROM email_order"
@db.execute <<-SQL
WITH RECURSIVE
@ -117,7 +114,7 @@ module ImportScripts::Mbox
end
def sort_emails_by_subject
@db.execute 'DELETE FROM email_order'
@db.execute "DELETE FROM email_order"
@db.execute <<-SQL
INSERT INTO email_order (msg_id)
@ -128,7 +125,7 @@ module ImportScripts::Mbox
end
def fill_users_from_emails
@db.execute 'DELETE FROM user'
@db.execute "DELETE FROM user"
@db.execute <<-SQL
INSERT INTO user (email, name, date_of_first_message)
@ -172,7 +169,7 @@ module ImportScripts::Mbox
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'email')
add_last_column_value(rows, "email")
end
def count_messages
@ -193,14 +190,14 @@ module ImportScripts::Mbox
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'rowid')
add_last_column_value(rows, "rowid")
end
private
def configure_database
@db.execute 'PRAGMA journal_mode = OFF'
@db.execute 'PRAGMA locking_mode = EXCLUSIVE'
@db.execute "PRAGMA journal_mode = OFF"
@db.execute "PRAGMA locking_mode = EXCLUSIVE"
end
def upgrade_schema_version
@ -260,10 +257,10 @@ module ImportScripts::Mbox
)
SQL
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_from ON email (from_email)'
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_subject ON email (subject)'
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_in_reply_to ON email (in_reply_to)'
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_date ON email (email_date)'
@db.execute "CREATE INDEX IF NOT EXISTS email_by_from ON email (from_email)"
@db.execute "CREATE INDEX IF NOT EXISTS email_by_subject ON email (subject)"
@db.execute "CREATE INDEX IF NOT EXISTS email_by_in_reply_to ON email (in_reply_to)"
@db.execute "CREATE INDEX IF NOT EXISTS email_by_date ON email (email_date)"
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS email_order (
@ -282,7 +279,7 @@ module ImportScripts::Mbox
)
SQL
@db.execute 'CREATE INDEX IF NOT EXISTS reply_by_in_reply_to ON reply (in_reply_to)'
@db.execute "CREATE INDEX IF NOT EXISTS reply_by_in_reply_to ON reply (in_reply_to)"
end
def create_table_for_users

View File

@ -1,8 +1,8 @@
# frozen_string_literal: true
require_relative 'database'
require 'json'
require 'yaml'
require_relative "database"
require "json"
require "yaml"
module ImportScripts::Mbox
class Indexer
@ -15,7 +15,7 @@ module ImportScripts::Mbox
end
def execute
directories = Dir.glob(File.join(@settings.data_dir, '*'))
directories = Dir.glob(File.join(@settings.data_dir, "*"))
directories.select! { |f| File.directory?(f) }
directories.sort!
@ -25,7 +25,7 @@ module ImportScripts::Mbox
index_emails(directory, category[:name])
end
puts '', 'indexing replies and users'
puts "", "indexing replies and users"
if @settings.group_messages_by_subject
@database.sort_emails_by_subject
@database.update_in_reply_to_by_email_subject
@ -39,24 +39,24 @@ module ImportScripts::Mbox
private
METADATA_FILENAME = 'metadata.yml'
IGNORED_FILE_EXTENSIONS = ['.dbindex', '.dbnames', '.digest', '.subjects', '.yml']
METADATA_FILENAME = "metadata.yml"
IGNORED_FILE_EXTENSIONS = %w[.dbindex .dbnames .digest .subjects .yml]
def index_category(directory)
metadata_file = File.join(directory, METADATA_FILENAME)
if File.exist?(metadata_file)
# workaround for YML files that contain classname in file header
yaml = File.read(metadata_file).sub(/^--- !.*$/, '---')
yaml = File.read(metadata_file).sub(/^--- !.*$/, "---")
metadata = YAML.safe_load(yaml)
else
metadata = {}
end
category = {
name: metadata['name'].presence || File.basename(directory),
description: metadata['description'],
parent_category_id: metadata['parent_category_id'].presence,
name: metadata["name"].presence || File.basename(directory),
description: metadata["description"],
parent_category_id: metadata["parent_category_id"].presence,
}
@database.insert_category(category)
@ -75,7 +75,7 @@ module ImportScripts::Mbox
# Detect cases like this and attempt to get actual sender from other headers:
# From: Jane Smith via ListName <ListName@lists.example.com>
if receiver.mail['X-Mailman-Version'] && from_display_name =~ /\bvia \S+$/i
if receiver.mail["X-Mailman-Version"] && from_display_name =~ /\bvia \S+$/i
email_from_from_line = opts[:from_line].scan(/From (\S+)/).flatten.first
a = Mail::Address.new(email_from_from_line)
from_email = a.address
@ -88,7 +88,7 @@ module ImportScripts::Mbox
end
end
from_email = from_email.sub(/^(.*)=/, '') if @settings.elide_equals_in_addresses
from_email = from_email.sub(/^(.*)=/, "") if @settings.elide_equals_in_addresses
body, elided, format = receiver.select_body
reply_message_ids = extract_reply_message_ids(parsed_email)
@ -109,7 +109,7 @@ module ImportScripts::Mbox
filename: File.basename(filename),
first_line_number: opts[:first_line_number],
last_line_number: opts[:last_line_number],
index_duration: (monotonic_time - opts[:start_time]).round(4)
index_duration: (monotonic_time - opts[:start_time]).round(4),
}
@database.transaction do |db|
@ -132,8 +132,8 @@ module ImportScripts::Mbox
def imported_file_checksums(category_name)
rows = @database.fetch_imported_files(category_name)
rows.each_with_object({}) do |row, hash|
filename = File.basename(row['filename'])
hash[filename] = row['checksum']
filename = File.basename(row["filename"])
hash[filename] = row["checksum"]
end
end
@ -171,14 +171,14 @@ module ImportScripts::Mbox
imported_file = {
category: category_name,
filename: File.basename(filename),
checksum: calc_checksum(filename)
checksum: calc_checksum(filename),
}
@database.insert_imported_file(imported_file)
end
def each_mail(filename)
raw_message = +''
raw_message = +""
first_line_number = 1
last_line_number = 0
@ -188,7 +188,7 @@ module ImportScripts::Mbox
if line.scrub =~ @split_regex
if last_line_number > 0
yield raw_message, first_line_number, last_line_number, from_line
raw_message = +''
raw_message = +""
first_line_number = last_line_number + 1
end
@ -204,12 +204,10 @@ module ImportScripts::Mbox
end
def each_line(filename)
raw_file = File.open(filename, 'r')
text_file = filename.end_with?('.gz') ? Zlib::GzipReader.new(raw_file) : raw_file
raw_file = File.open(filename, "r")
text_file = filename.end_with?(".gz") ? Zlib::GzipReader.new(raw_file) : raw_file
text_file.each_line do |line|
yield line
end
text_file.each_line { |line| yield line }
ensure
raw_file.close if raw_file
end
@ -220,7 +218,9 @@ module ImportScripts::Mbox
end
def read_mail_from_string(raw_message)
Email::Receiver.new(raw_message, convert_plaintext: true, skip_trimming: false) unless raw_message.blank?
unless raw_message.blank?
Email::Receiver.new(raw_message, convert_plaintext: true, skip_trimming: false)
end
end
def extract_reply_message_ids(mail)
@ -229,14 +229,13 @@ module ImportScripts::Mbox
def extract_subject(receiver, list_name)
subject = receiver.subject
subject.blank? ? nil : subject.strip.gsub(/\t+/, ' ')
subject.blank? ? nil : subject.strip.gsub(/\t+/, " ")
end
def ignored_file?(path, checksums)
filename = File.basename(path)
filename.start_with?('.') ||
filename == METADATA_FILENAME ||
filename.start_with?(".") || filename == METADATA_FILENAME ||
IGNORED_FILE_EXTENSIONS.include?(File.extname(filename)) ||
fully_indexed?(path, filename, checksums)
end

View File

@ -1,6 +1,6 @@
# frozen_string_literal: true
require 'yaml'
require "yaml"
module ImportScripts::Mbox
class Settings
@ -25,32 +25,32 @@ module ImportScripts::Mbox
attr_reader :elide_equals_in_addresses
def initialize(yaml)
@data_dir = yaml['data_dir']
@split_regex = Regexp.new(yaml['split_regex']) unless yaml['split_regex'].empty?
@data_dir = yaml["data_dir"]
@split_regex = Regexp.new(yaml["split_regex"]) unless yaml["split_regex"].empty?
@batch_size = 1000 # no need to make this actually configurable at the moment
@trust_level = yaml['default_trust_level']
@prefer_html = yaml['prefer_html']
@staged = yaml['staged']
@index_only = yaml['index_only']
@group_messages_by_subject = yaml['group_messages_by_subject']
@trust_level = yaml["default_trust_level"]
@prefer_html = yaml["prefer_html"]
@staged = yaml["staged"]
@index_only = yaml["index_only"]
@group_messages_by_subject = yaml["group_messages_by_subject"]
if yaml['remove_subject_prefixes'].present?
prefix_regexes = yaml['remove_subject_prefixes'].map { |p| Regexp.new(p) }
if yaml["remove_subject_prefixes"].present?
prefix_regexes = yaml["remove_subject_prefixes"].map { |p| Regexp.new(p) }
@subject_prefix_regex = /^#{Regexp.union(prefix_regexes).source}/i
end
@automatically_remove_list_name_prefix = yaml['automatically_remove_list_name_prefix']
@show_trimmed_content = yaml['show_trimmed_content']
@fix_mailman_via_addresses = yaml['fix_mailman_via_addresses']
@elide_equals_in_addresses = yaml['elide_equals_in_addresses']
@automatically_remove_list_name_prefix = yaml["automatically_remove_list_name_prefix"]
@show_trimmed_content = yaml["show_trimmed_content"]
@fix_mailman_via_addresses = yaml["fix_mailman_via_addresses"]
@elide_equals_in_addresses = yaml["elide_equals_in_addresses"]
@tags = []
if yaml['tags'].present?
yaml['tags'].each do |tag_name, value|
if yaml["tags"].present?
yaml["tags"].each do |tag_name, value|
prefixes = Regexp.union(value).source
@tags << {
regex: /^(?:(?:\[(?:#{prefixes})\])|(?:\((?:#{prefixes})\)))\s*/i,
name: tag_name
name: tag_name,
}
end
end