mirror of
https://github.com/discourse/discourse.git
synced 2025-05-22 18:11:11 +08:00
DEV: Apply syntax_tree formatting to script/*
This commit is contained in:
@ -1,9 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require_relative '../base'
|
||||
require_relative 'support/database'
|
||||
require_relative 'support/indexer'
|
||||
require_relative 'support/settings'
|
||||
require_relative "../base"
|
||||
require_relative "support/database"
|
||||
require_relative "support/indexer"
|
||||
require_relative "support/settings"
|
||||
|
||||
module ImportScripts::Mbox
|
||||
class Importer < ImportScripts::Base
|
||||
@ -38,44 +38,44 @@ module ImportScripts::Mbox
|
||||
end
|
||||
|
||||
def index_messages
|
||||
puts '', 'creating index'
|
||||
puts "", "creating index"
|
||||
indexer = Indexer.new(@database, @settings)
|
||||
indexer.execute
|
||||
end
|
||||
|
||||
def import_categories
|
||||
puts '', 'creating categories'
|
||||
puts "", "creating categories"
|
||||
rows = @database.fetch_categories
|
||||
|
||||
create_categories(rows) do |row|
|
||||
{
|
||||
id: row['name'],
|
||||
name: row['name'],
|
||||
parent_category_id: row['parent_category_id'].presence,
|
||||
id: row["name"],
|
||||
name: row["name"],
|
||||
parent_category_id: row["parent_category_id"].presence,
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def import_users
|
||||
puts '', 'creating users'
|
||||
puts "", "creating users"
|
||||
total_count = @database.count_users
|
||||
last_email = ''
|
||||
last_email = ""
|
||||
|
||||
batches do |offset|
|
||||
rows, last_email = @database.fetch_users(last_email)
|
||||
break if rows.empty?
|
||||
|
||||
next if all_records_exist?(:users, rows.map { |row| row['email'] })
|
||||
next if all_records_exist?(:users, rows.map { |row| row["email"] })
|
||||
|
||||
create_users(rows, total: total_count, offset: offset) do |row|
|
||||
{
|
||||
id: row['email'],
|
||||
email: row['email'],
|
||||
name: row['name'],
|
||||
id: row["email"],
|
||||
email: row["email"],
|
||||
name: row["name"],
|
||||
trust_level: @settings.trust_level,
|
||||
staged: @settings.staged,
|
||||
active: !@settings.staged,
|
||||
created_at: to_time(row['date_of_first_message'])
|
||||
created_at: to_time(row["date_of_first_message"]),
|
||||
}
|
||||
end
|
||||
end
|
||||
@ -86,7 +86,7 @@ module ImportScripts::Mbox
|
||||
end
|
||||
|
||||
def import_posts
|
||||
puts '', 'creating topics and posts'
|
||||
puts "", "creating topics and posts"
|
||||
total_count = @database.count_messages
|
||||
last_row_id = 0
|
||||
|
||||
@ -94,47 +94,45 @@ module ImportScripts::Mbox
|
||||
rows, last_row_id = @database.fetch_messages(last_row_id)
|
||||
break if rows.empty?
|
||||
|
||||
next if all_records_exist?(:posts, rows.map { |row| row['msg_id'] })
|
||||
next if all_records_exist?(:posts, rows.map { |row| row["msg_id"] })
|
||||
|
||||
create_posts(rows, total: total_count, offset: offset) do |row|
|
||||
begin
|
||||
if row['email_date'].blank?
|
||||
puts "Date is missing. Skipping #{row['msg_id']}"
|
||||
if row["email_date"].blank?
|
||||
puts "Date is missing. Skipping #{row["msg_id"]}"
|
||||
nil
|
||||
elsif row['in_reply_to'].blank?
|
||||
elsif row["in_reply_to"].blank?
|
||||
map_first_post(row)
|
||||
else
|
||||
map_reply(row)
|
||||
end
|
||||
rescue => e
|
||||
puts "Failed to map post for #{row['msg_id']}", e, e.backtrace.join("\n")
|
||||
puts "Failed to map post for #{row["msg_id"]}", e, e.backtrace.join("\n")
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def map_post(row)
|
||||
user_id = user_id_from_imported_user_id(row['from_email']) || Discourse::SYSTEM_USER_ID
|
||||
user_id = user_id_from_imported_user_id(row["from_email"]) || Discourse::SYSTEM_USER_ID
|
||||
|
||||
{
|
||||
id: row['msg_id'],
|
||||
id: row["msg_id"],
|
||||
user_id: user_id,
|
||||
created_at: to_time(row['email_date']),
|
||||
created_at: to_time(row["email_date"]),
|
||||
raw: format_raw(row, user_id),
|
||||
raw_email: row['raw_message'],
|
||||
raw_email: row["raw_message"],
|
||||
via_email: true,
|
||||
post_create_action: proc do |post|
|
||||
create_incoming_email(post, row)
|
||||
end
|
||||
post_create_action: proc { |post| create_incoming_email(post, row) },
|
||||
}
|
||||
end
|
||||
|
||||
def format_raw(row, user_id)
|
||||
body = row['body'] || ''
|
||||
elided = row['elided']
|
||||
body = row["body"] || ""
|
||||
elided = row["elided"]
|
||||
|
||||
if row['attachment_count'].positive?
|
||||
receiver = Email::Receiver.new(row['raw_message'])
|
||||
if row["attachment_count"].positive?
|
||||
receiver = Email::Receiver.new(row["raw_message"])
|
||||
user = User.find(user_id)
|
||||
body = receiver.add_attachments(body, user)
|
||||
end
|
||||
@ -147,21 +145,21 @@ module ImportScripts::Mbox
|
||||
end
|
||||
|
||||
def map_first_post(row)
|
||||
subject = row['subject']
|
||||
subject = row["subject"]
|
||||
tags = remove_tags!(subject)
|
||||
|
||||
mapped = map_post(row)
|
||||
mapped[:category] = category_id_from_imported_category_id(row['category'])
|
||||
mapped[:category] = category_id_from_imported_category_id(row["category"])
|
||||
mapped[:title] = subject.strip[0...255]
|
||||
mapped[:tags] = tags if tags.present?
|
||||
mapped
|
||||
end
|
||||
|
||||
def map_reply(row)
|
||||
parent = @lookup.topic_lookup_from_imported_post_id(row['in_reply_to'])
|
||||
parent = @lookup.topic_lookup_from_imported_post_id(row["in_reply_to"])
|
||||
|
||||
if parent.blank?
|
||||
puts "Parent message #{row['in_reply_to']} doesn't exist. Skipping #{row['msg_id']}: #{row['subject'][0..40]}"
|
||||
puts "Parent message #{row["in_reply_to"]} doesn't exist. Skipping #{row["msg_id"]}: #{row["subject"][0..40]}"
|
||||
return nil
|
||||
end
|
||||
|
||||
@ -178,9 +176,7 @@ module ImportScripts::Mbox
|
||||
old_length = subject.length
|
||||
|
||||
@settings.tags.each do |tag|
|
||||
if subject.sub!(tag[:regex], "") && tag[:name].present?
|
||||
tag_names << tag[:name]
|
||||
end
|
||||
tag_names << tag[:name] if subject.sub!(tag[:regex], "") && tag[:name].present?
|
||||
end
|
||||
|
||||
remove_prefixes!(subject) if subject.length != old_length
|
||||
@ -203,13 +199,13 @@ module ImportScripts::Mbox
|
||||
|
||||
def create_incoming_email(post, row)
|
||||
IncomingEmail.create(
|
||||
message_id: row['msg_id'],
|
||||
raw: row['raw_message'],
|
||||
subject: row['subject'],
|
||||
from_address: row['from_email'],
|
||||
message_id: row["msg_id"],
|
||||
raw: row["raw_message"],
|
||||
subject: row["subject"],
|
||||
from_address: row["from_email"],
|
||||
user_id: post.user_id,
|
||||
topic_id: post.topic_id,
|
||||
post_id: post.id
|
||||
post_id: post.id,
|
||||
)
|
||||
end
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require 'sqlite3'
|
||||
require "sqlite3"
|
||||
|
||||
module ImportScripts::Mbox
|
||||
class Database
|
||||
@ -23,8 +23,7 @@ module ImportScripts::Mbox
|
||||
@db.transaction
|
||||
yield self
|
||||
@db.commit
|
||||
|
||||
rescue
|
||||
rescue StandardError
|
||||
@db.rollback
|
||||
end
|
||||
|
||||
@ -60,9 +59,7 @@ module ImportScripts::Mbox
|
||||
SQL
|
||||
|
||||
@db.prepare(sql) do |stmt|
|
||||
reply_message_ids.each do |in_reply_to|
|
||||
stmt.execute(msg_id, in_reply_to)
|
||||
end
|
||||
reply_message_ids.each { |in_reply_to| stmt.execute(msg_id, in_reply_to) }
|
||||
end
|
||||
end
|
||||
|
||||
@ -95,7 +92,7 @@ module ImportScripts::Mbox
|
||||
end
|
||||
|
||||
def sort_emails_by_date_and_reply_level
|
||||
@db.execute 'DELETE FROM email_order'
|
||||
@db.execute "DELETE FROM email_order"
|
||||
|
||||
@db.execute <<-SQL
|
||||
WITH RECURSIVE
|
||||
@ -117,7 +114,7 @@ module ImportScripts::Mbox
|
||||
end
|
||||
|
||||
def sort_emails_by_subject
|
||||
@db.execute 'DELETE FROM email_order'
|
||||
@db.execute "DELETE FROM email_order"
|
||||
|
||||
@db.execute <<-SQL
|
||||
INSERT INTO email_order (msg_id)
|
||||
@ -128,7 +125,7 @@ module ImportScripts::Mbox
|
||||
end
|
||||
|
||||
def fill_users_from_emails
|
||||
@db.execute 'DELETE FROM user'
|
||||
@db.execute "DELETE FROM user"
|
||||
|
||||
@db.execute <<-SQL
|
||||
INSERT INTO user (email, name, date_of_first_message)
|
||||
@ -172,7 +169,7 @@ module ImportScripts::Mbox
|
||||
LIMIT #{@batch_size}
|
||||
SQL
|
||||
|
||||
add_last_column_value(rows, 'email')
|
||||
add_last_column_value(rows, "email")
|
||||
end
|
||||
|
||||
def count_messages
|
||||
@ -193,14 +190,14 @@ module ImportScripts::Mbox
|
||||
LIMIT #{@batch_size}
|
||||
SQL
|
||||
|
||||
add_last_column_value(rows, 'rowid')
|
||||
add_last_column_value(rows, "rowid")
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def configure_database
|
||||
@db.execute 'PRAGMA journal_mode = OFF'
|
||||
@db.execute 'PRAGMA locking_mode = EXCLUSIVE'
|
||||
@db.execute "PRAGMA journal_mode = OFF"
|
||||
@db.execute "PRAGMA locking_mode = EXCLUSIVE"
|
||||
end
|
||||
|
||||
def upgrade_schema_version
|
||||
@ -260,10 +257,10 @@ module ImportScripts::Mbox
|
||||
)
|
||||
SQL
|
||||
|
||||
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_from ON email (from_email)'
|
||||
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_subject ON email (subject)'
|
||||
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_in_reply_to ON email (in_reply_to)'
|
||||
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_date ON email (email_date)'
|
||||
@db.execute "CREATE INDEX IF NOT EXISTS email_by_from ON email (from_email)"
|
||||
@db.execute "CREATE INDEX IF NOT EXISTS email_by_subject ON email (subject)"
|
||||
@db.execute "CREATE INDEX IF NOT EXISTS email_by_in_reply_to ON email (in_reply_to)"
|
||||
@db.execute "CREATE INDEX IF NOT EXISTS email_by_date ON email (email_date)"
|
||||
|
||||
@db.execute <<-SQL
|
||||
CREATE TABLE IF NOT EXISTS email_order (
|
||||
@ -282,7 +279,7 @@ module ImportScripts::Mbox
|
||||
)
|
||||
SQL
|
||||
|
||||
@db.execute 'CREATE INDEX IF NOT EXISTS reply_by_in_reply_to ON reply (in_reply_to)'
|
||||
@db.execute "CREATE INDEX IF NOT EXISTS reply_by_in_reply_to ON reply (in_reply_to)"
|
||||
end
|
||||
|
||||
def create_table_for_users
|
||||
|
@ -1,8 +1,8 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require_relative 'database'
|
||||
require 'json'
|
||||
require 'yaml'
|
||||
require_relative "database"
|
||||
require "json"
|
||||
require "yaml"
|
||||
|
||||
module ImportScripts::Mbox
|
||||
class Indexer
|
||||
@ -15,7 +15,7 @@ module ImportScripts::Mbox
|
||||
end
|
||||
|
||||
def execute
|
||||
directories = Dir.glob(File.join(@settings.data_dir, '*'))
|
||||
directories = Dir.glob(File.join(@settings.data_dir, "*"))
|
||||
directories.select! { |f| File.directory?(f) }
|
||||
directories.sort!
|
||||
|
||||
@ -25,7 +25,7 @@ module ImportScripts::Mbox
|
||||
index_emails(directory, category[:name])
|
||||
end
|
||||
|
||||
puts '', 'indexing replies and users'
|
||||
puts "", "indexing replies and users"
|
||||
if @settings.group_messages_by_subject
|
||||
@database.sort_emails_by_subject
|
||||
@database.update_in_reply_to_by_email_subject
|
||||
@ -39,24 +39,24 @@ module ImportScripts::Mbox
|
||||
|
||||
private
|
||||
|
||||
METADATA_FILENAME = 'metadata.yml'
|
||||
IGNORED_FILE_EXTENSIONS = ['.dbindex', '.dbnames', '.digest', '.subjects', '.yml']
|
||||
METADATA_FILENAME = "metadata.yml"
|
||||
IGNORED_FILE_EXTENSIONS = %w[.dbindex .dbnames .digest .subjects .yml]
|
||||
|
||||
def index_category(directory)
|
||||
metadata_file = File.join(directory, METADATA_FILENAME)
|
||||
|
||||
if File.exist?(metadata_file)
|
||||
# workaround for YML files that contain classname in file header
|
||||
yaml = File.read(metadata_file).sub(/^--- !.*$/, '---')
|
||||
yaml = File.read(metadata_file).sub(/^--- !.*$/, "---")
|
||||
metadata = YAML.safe_load(yaml)
|
||||
else
|
||||
metadata = {}
|
||||
end
|
||||
|
||||
category = {
|
||||
name: metadata['name'].presence || File.basename(directory),
|
||||
description: metadata['description'],
|
||||
parent_category_id: metadata['parent_category_id'].presence,
|
||||
name: metadata["name"].presence || File.basename(directory),
|
||||
description: metadata["description"],
|
||||
parent_category_id: metadata["parent_category_id"].presence,
|
||||
}
|
||||
|
||||
@database.insert_category(category)
|
||||
@ -75,7 +75,7 @@ module ImportScripts::Mbox
|
||||
# Detect cases like this and attempt to get actual sender from other headers:
|
||||
# From: Jane Smith via ListName <ListName@lists.example.com>
|
||||
|
||||
if receiver.mail['X-Mailman-Version'] && from_display_name =~ /\bvia \S+$/i
|
||||
if receiver.mail["X-Mailman-Version"] && from_display_name =~ /\bvia \S+$/i
|
||||
email_from_from_line = opts[:from_line].scan(/From (\S+)/).flatten.first
|
||||
a = Mail::Address.new(email_from_from_line)
|
||||
from_email = a.address
|
||||
@ -88,7 +88,7 @@ module ImportScripts::Mbox
|
||||
end
|
||||
end
|
||||
|
||||
from_email = from_email.sub(/^(.*)=/, '') if @settings.elide_equals_in_addresses
|
||||
from_email = from_email.sub(/^(.*)=/, "") if @settings.elide_equals_in_addresses
|
||||
|
||||
body, elided, format = receiver.select_body
|
||||
reply_message_ids = extract_reply_message_ids(parsed_email)
|
||||
@ -109,7 +109,7 @@ module ImportScripts::Mbox
|
||||
filename: File.basename(filename),
|
||||
first_line_number: opts[:first_line_number],
|
||||
last_line_number: opts[:last_line_number],
|
||||
index_duration: (monotonic_time - opts[:start_time]).round(4)
|
||||
index_duration: (monotonic_time - opts[:start_time]).round(4),
|
||||
}
|
||||
|
||||
@database.transaction do |db|
|
||||
@ -132,8 +132,8 @@ module ImportScripts::Mbox
|
||||
def imported_file_checksums(category_name)
|
||||
rows = @database.fetch_imported_files(category_name)
|
||||
rows.each_with_object({}) do |row, hash|
|
||||
filename = File.basename(row['filename'])
|
||||
hash[filename] = row['checksum']
|
||||
filename = File.basename(row["filename"])
|
||||
hash[filename] = row["checksum"]
|
||||
end
|
||||
end
|
||||
|
||||
@ -171,14 +171,14 @@ module ImportScripts::Mbox
|
||||
imported_file = {
|
||||
category: category_name,
|
||||
filename: File.basename(filename),
|
||||
checksum: calc_checksum(filename)
|
||||
checksum: calc_checksum(filename),
|
||||
}
|
||||
|
||||
@database.insert_imported_file(imported_file)
|
||||
end
|
||||
|
||||
def each_mail(filename)
|
||||
raw_message = +''
|
||||
raw_message = +""
|
||||
first_line_number = 1
|
||||
last_line_number = 0
|
||||
|
||||
@ -188,7 +188,7 @@ module ImportScripts::Mbox
|
||||
if line.scrub =~ @split_regex
|
||||
if last_line_number > 0
|
||||
yield raw_message, first_line_number, last_line_number, from_line
|
||||
raw_message = +''
|
||||
raw_message = +""
|
||||
first_line_number = last_line_number + 1
|
||||
end
|
||||
|
||||
@ -204,12 +204,10 @@ module ImportScripts::Mbox
|
||||
end
|
||||
|
||||
def each_line(filename)
|
||||
raw_file = File.open(filename, 'r')
|
||||
text_file = filename.end_with?('.gz') ? Zlib::GzipReader.new(raw_file) : raw_file
|
||||
raw_file = File.open(filename, "r")
|
||||
text_file = filename.end_with?(".gz") ? Zlib::GzipReader.new(raw_file) : raw_file
|
||||
|
||||
text_file.each_line do |line|
|
||||
yield line
|
||||
end
|
||||
text_file.each_line { |line| yield line }
|
||||
ensure
|
||||
raw_file.close if raw_file
|
||||
end
|
||||
@ -220,7 +218,9 @@ module ImportScripts::Mbox
|
||||
end
|
||||
|
||||
def read_mail_from_string(raw_message)
|
||||
Email::Receiver.new(raw_message, convert_plaintext: true, skip_trimming: false) unless raw_message.blank?
|
||||
unless raw_message.blank?
|
||||
Email::Receiver.new(raw_message, convert_plaintext: true, skip_trimming: false)
|
||||
end
|
||||
end
|
||||
|
||||
def extract_reply_message_ids(mail)
|
||||
@ -229,14 +229,13 @@ module ImportScripts::Mbox
|
||||
|
||||
def extract_subject(receiver, list_name)
|
||||
subject = receiver.subject
|
||||
subject.blank? ? nil : subject.strip.gsub(/\t+/, ' ')
|
||||
subject.blank? ? nil : subject.strip.gsub(/\t+/, " ")
|
||||
end
|
||||
|
||||
def ignored_file?(path, checksums)
|
||||
filename = File.basename(path)
|
||||
|
||||
filename.start_with?('.') ||
|
||||
filename == METADATA_FILENAME ||
|
||||
filename.start_with?(".") || filename == METADATA_FILENAME ||
|
||||
IGNORED_FILE_EXTENSIONS.include?(File.extname(filename)) ||
|
||||
fully_indexed?(path, filename, checksums)
|
||||
end
|
||||
|
@ -1,6 +1,6 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require 'yaml'
|
||||
require "yaml"
|
||||
|
||||
module ImportScripts::Mbox
|
||||
class Settings
|
||||
@ -25,32 +25,32 @@ module ImportScripts::Mbox
|
||||
attr_reader :elide_equals_in_addresses
|
||||
|
||||
def initialize(yaml)
|
||||
@data_dir = yaml['data_dir']
|
||||
@split_regex = Regexp.new(yaml['split_regex']) unless yaml['split_regex'].empty?
|
||||
@data_dir = yaml["data_dir"]
|
||||
@split_regex = Regexp.new(yaml["split_regex"]) unless yaml["split_regex"].empty?
|
||||
@batch_size = 1000 # no need to make this actually configurable at the moment
|
||||
@trust_level = yaml['default_trust_level']
|
||||
@prefer_html = yaml['prefer_html']
|
||||
@staged = yaml['staged']
|
||||
@index_only = yaml['index_only']
|
||||
@group_messages_by_subject = yaml['group_messages_by_subject']
|
||||
@trust_level = yaml["default_trust_level"]
|
||||
@prefer_html = yaml["prefer_html"]
|
||||
@staged = yaml["staged"]
|
||||
@index_only = yaml["index_only"]
|
||||
@group_messages_by_subject = yaml["group_messages_by_subject"]
|
||||
|
||||
if yaml['remove_subject_prefixes'].present?
|
||||
prefix_regexes = yaml['remove_subject_prefixes'].map { |p| Regexp.new(p) }
|
||||
if yaml["remove_subject_prefixes"].present?
|
||||
prefix_regexes = yaml["remove_subject_prefixes"].map { |p| Regexp.new(p) }
|
||||
@subject_prefix_regex = /^#{Regexp.union(prefix_regexes).source}/i
|
||||
end
|
||||
|
||||
@automatically_remove_list_name_prefix = yaml['automatically_remove_list_name_prefix']
|
||||
@show_trimmed_content = yaml['show_trimmed_content']
|
||||
@fix_mailman_via_addresses = yaml['fix_mailman_via_addresses']
|
||||
@elide_equals_in_addresses = yaml['elide_equals_in_addresses']
|
||||
@automatically_remove_list_name_prefix = yaml["automatically_remove_list_name_prefix"]
|
||||
@show_trimmed_content = yaml["show_trimmed_content"]
|
||||
@fix_mailman_via_addresses = yaml["fix_mailman_via_addresses"]
|
||||
@elide_equals_in_addresses = yaml["elide_equals_in_addresses"]
|
||||
|
||||
@tags = []
|
||||
if yaml['tags'].present?
|
||||
yaml['tags'].each do |tag_name, value|
|
||||
if yaml["tags"].present?
|
||||
yaml["tags"].each do |tag_name, value|
|
||||
prefixes = Regexp.union(value).source
|
||||
@tags << {
|
||||
regex: /^(?:(?:\[(?:#{prefixes})\])|(?:\((?:#{prefixes})\)))\s*/i,
|
||||
name: tag_name
|
||||
name: tag_name,
|
||||
}
|
||||
end
|
||||
end
|
||||
|
Reference in New Issue
Block a user