mirror of
https://github.com/discourse/discourse.git
synced 2025-06-07 19:07:19 +08:00
DEV: Improve mbox import script
* Better documentation of settings * Add option to exclude trimmed parts of emails (enabled by default) to not revail email addresses
This commit is contained in:
@ -8,11 +8,7 @@ end
|
|||||||
|
|
||||||
module ImportScripts
|
module ImportScripts
|
||||||
module Mbox
|
module Mbox
|
||||||
require_relative 'mbox/support/settings'
|
|
||||||
|
|
||||||
@settings = Settings.load(ARGV[0])
|
|
||||||
|
|
||||||
require_relative 'mbox/importer'
|
require_relative 'mbox/importer'
|
||||||
Importer.new(@settings).perform
|
Importer.new(ARGV[0]).perform
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -7,9 +7,8 @@ require_relative 'support/settings'
|
|||||||
|
|
||||||
module ImportScripts::Mbox
|
module ImportScripts::Mbox
|
||||||
class Importer < ImportScripts::Base
|
class Importer < ImportScripts::Base
|
||||||
# @param settings [ImportScripts::Mbox::Settings]
|
def initialize(settings_filename)
|
||||||
def initialize(settings)
|
@settings = Settings.load(settings_filename)
|
||||||
@settings = settings
|
|
||||||
super()
|
super()
|
||||||
|
|
||||||
@database = Database.new(@settings.data_dir, @settings.batch_size)
|
@database = Database.new(@settings.data_dir, @settings.batch_size)
|
||||||
@ -139,7 +138,10 @@ module ImportScripts::Mbox
|
|||||||
body = receiver.add_attachments(body, user)
|
body = receiver.add_attachments(body, user)
|
||||||
end
|
end
|
||||||
|
|
||||||
body = "#{body}#{Email::Receiver.elided_html(elided)}" if elided.present?
|
if elided.present? && @settings.show_trimmed_content
|
||||||
|
body = "#{body}#{Email::Receiver.elided_html(elided)}"
|
||||||
|
end
|
||||||
|
|
||||||
body
|
body
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -1,22 +1,38 @@
|
|||||||
|
# Directory where all emails and mbox files are stored.
|
||||||
data_dir: /shared/import/data
|
data_dir: /shared/import/data
|
||||||
|
|
||||||
# mbox files
|
# Regular expression for splitting emails in mbox files.
|
||||||
|
# Choose one of the following examples that works for you or add your own regular expression.
|
||||||
split_regex: "^From .+@.+"
|
split_regex: "^From .+@.+"
|
||||||
#split_regex: "^From .+@example.com.+"
|
#split_regex: "^From .+@example.com.+"
|
||||||
|
|
||||||
# individual emails
|
|
||||||
#split_regex: ""
|
|
||||||
|
|
||||||
# Listserv files
|
# Listserv files
|
||||||
#split_regex: "^========================================================================="
|
#split_regex: "^========================================================================="
|
||||||
|
|
||||||
default_trust_level: 1
|
# When each file contains only one email
|
||||||
|
#split_regex: ""
|
||||||
|
|
||||||
|
# Prefer the HTML part of emails instead of the plain text part.
|
||||||
prefer_html: true
|
prefer_html: true
|
||||||
|
|
||||||
|
# The trust level of users created by the import script.
|
||||||
|
default_trust_level: 1
|
||||||
|
|
||||||
|
# Create staged users instead of regular users.
|
||||||
staged: true
|
staged: true
|
||||||
|
|
||||||
|
# You can enable this option if the script should stop after indexing all emails
|
||||||
|
# instead of executing the import right away. That's useful if you need to make some changes
|
||||||
|
# to the indexed data in the `index.db` created during the indexing step.
|
||||||
index_only: false
|
index_only: false
|
||||||
|
|
||||||
|
# Only enable this option when the import script fails to group messages with
|
||||||
|
# with the `In-Reply-To` and `References` headers.
|
||||||
group_messages_by_subject: false
|
group_messages_by_subject: false
|
||||||
|
|
||||||
|
# Always show trimmed part of emails. WARNING: This might reveal email addresses.
|
||||||
|
show_trimmed_content: false
|
||||||
|
|
||||||
# Remove prefixes like [FOO] or (BAR) from topic titles and replace them with tags.
|
# Remove prefixes like [FOO] or (BAR) from topic titles and replace them with tags.
|
||||||
# You can map one or more case-insensitive prefixes to the same tag in Discourse.
|
# You can map one or more case-insensitive prefixes to the same tag in Discourse.
|
||||||
# "Tag name in Discourse": "foo"
|
# "Tag name in Discourse": "foo"
|
||||||
|
@ -19,6 +19,7 @@ module ImportScripts::Mbox
|
|||||||
attr_reader :group_messages_by_subject
|
attr_reader :group_messages_by_subject
|
||||||
attr_reader :subject_prefix_regex
|
attr_reader :subject_prefix_regex
|
||||||
attr_reader :automatically_remove_list_name_prefix
|
attr_reader :automatically_remove_list_name_prefix
|
||||||
|
attr_reader :show_trimmed_content
|
||||||
attr_reader :tags
|
attr_reader :tags
|
||||||
|
|
||||||
def initialize(yaml)
|
def initialize(yaml)
|
||||||
@ -31,20 +32,23 @@ module ImportScripts::Mbox
|
|||||||
@index_only = yaml['index_only']
|
@index_only = yaml['index_only']
|
||||||
@group_messages_by_subject = yaml['group_messages_by_subject']
|
@group_messages_by_subject = yaml['group_messages_by_subject']
|
||||||
|
|
||||||
unless yaml['remove_subject_prefixes'].empty?
|
if yaml['remove_subject_prefixes'].present?
|
||||||
prefix_regexes = yaml['remove_subject_prefixes'].map { |p| Regexp.new(p) }
|
prefix_regexes = yaml['remove_subject_prefixes'].map { |p| Regexp.new(p) }
|
||||||
@subject_prefix_regex = /^#{Regexp.union(prefix_regexes).source}/i
|
@subject_prefix_regex = /^#{Regexp.union(prefix_regexes).source}/i
|
||||||
end
|
end
|
||||||
|
|
||||||
@automatically_remove_list_name_prefix = yaml['automatically_remove_list_name_prefix']
|
@automatically_remove_list_name_prefix = yaml['automatically_remove_list_name_prefix']
|
||||||
|
@show_trimmed_content = yaml['show_trimmed_content']
|
||||||
|
|
||||||
@tags = []
|
if yaml['tags'].present?
|
||||||
yaml['tags'].each do |tag_name, value|
|
@tags = []
|
||||||
prefixes = Regexp.union(value).source
|
yaml['tags'].each do |tag_name, value|
|
||||||
@tags << {
|
prefixes = Regexp.union(value).source
|
||||||
regex: /^(?:(?:\[(?:#{prefixes})\])|(?:\((?:#{prefixes})\)))\s*/i,
|
@tags << {
|
||||||
name: tag_name
|
regex: /^(?:(?:\[(?:#{prefixes})\])|(?:\((?:#{prefixes})\)))\s*/i,
|
||||||
}
|
name: tag_name
|
||||||
|
}
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
Reference in New Issue
Block a user