mirror of
https://github.com/discourse/discourse.git
synced 2025-05-30 23:58:26 +08:00
improvements to the mbox import script
* ignores dot-files and empty emails * new setting to prefer HTML over plaintext emails during import * restore original site settings at the end of import * elided content of HTML mails was not put inside details block
This commit is contained in:
@ -102,10 +102,12 @@ module ImportScripts::Mbox
|
||||
|
||||
if @split_regex.present?
|
||||
each_mail(filename) do |raw_message, first_line_number, last_line_number|
|
||||
yield read_mail_from_string(raw_message), filename, first_line_number, last_line_number
|
||||
receiver = read_mail_from_string(raw_message)
|
||||
yield receiver, filename, first_line_number, last_line_number if receiver.present?
|
||||
end
|
||||
else
|
||||
yield read_mail_from_file(filename), filename
|
||||
receiver = read_mail_from_file(filename)
|
||||
yield receiver, filename if receiver.present?
|
||||
end
|
||||
|
||||
mark_as_fully_indexed(category_name, filename)
|
||||
@ -161,7 +163,7 @@ module ImportScripts::Mbox
|
||||
end
|
||||
|
||||
def read_mail_from_string(raw_message)
|
||||
Email::Receiver.new(raw_message)
|
||||
Email::Receiver.new(raw_message) unless raw_message.blank?
|
||||
end
|
||||
|
||||
def extract_reply_message_ids(mail)
|
||||
@ -208,7 +210,12 @@ module ImportScripts::Mbox
|
||||
end
|
||||
|
||||
def ignored_file?(filename, checksums)
|
||||
File.directory?(filename) || metadata_file?(filename) || fully_indexed?(filename, checksums)
|
||||
File.directory?(filename) || hidden_file?(filename) ||
|
||||
metadata_file?(filename) || fully_indexed?(filename, checksums)
|
||||
end
|
||||
|
||||
def hidden_file?(filename)
|
||||
File.basename(filename).start_with?('.')
|
||||
end
|
||||
|
||||
def metadata_file?(filename)
|
||||
|
@ -11,12 +11,14 @@ module ImportScripts::Mbox
|
||||
attr_reader :split_regex
|
||||
attr_reader :batch_size
|
||||
attr_reader :trust_level
|
||||
attr_reader :prefer_html
|
||||
|
||||
def initialize(yaml)
|
||||
@data_dir = yaml['data_dir']
|
||||
@split_regex = Regexp.new(yaml['split_regex']) unless yaml['split_regex'].empty?
|
||||
@batch_size = 1000 # no need to make this actually configurable at the moment
|
||||
@trust_level = yaml['default_trust_level']
|
||||
@prefer_html = yaml['prefer_html']
|
||||
end
|
||||
end
|
||||
end
|
||||
|
Reference in New Issue
Block a user