mirror of
https://github.com/discourse/discourse.git
synced 2025-05-22 22:43:33 +08:00
Improve Telligent import script
* Detects mostly all attachments and it's a lot faster * Parses user properties in Ruby instead of the DB, because that's less errorprone * Imports user avatars * Imports topic views by users * Better handling of quotes and YouTube links
This commit is contained in:
@ -42,11 +42,43 @@ class ImportScripts::Telligent < ImportScripts::Base
|
|||||||
BATCH_SIZE ||= 1000
|
BATCH_SIZE ||= 1000
|
||||||
LOCAL_AVATAR_REGEX ||= /\A~\/.*(?<directory>communityserver-components-(?:selectable)?avatars)\/(?<path>[^\/]+)\/(?<filename>.+)/i
|
LOCAL_AVATAR_REGEX ||= /\A~\/.*(?<directory>communityserver-components-(?:selectable)?avatars)\/(?<path>[^\/]+)\/(?<filename>.+)/i
|
||||||
REMOTE_AVATAR_REGEX ||= /\Ahttps?:\/\//i
|
REMOTE_AVATAR_REGEX ||= /\Ahttps?:\/\//i
|
||||||
EMBEDDED_ATTACHMENT_REGEX ||= /<a href="\/cfs-file(?:\.ashx)?\/__key\/(?<directory>[^\/]+)\/(?<path>[^\/]+)\/(?<filename1>.+?)".*?>(?<filename2>.*?)<\/a>/i
|
EMBEDDED_ATTACHMENT_REGEX ||= /<a href="\/cfs-file(?:\.ashx)?\/__key\/(?<directory>[^\/]+)\/(?<path>[^\/]+)\/(?<filename>.+?)".*?>.*?<\/a>/i
|
||||||
|
EMBEDDED_VIEW_ATTACHMENT_REGEX ||= /\[View:~\/cfs-file(?:\.ashx)?\/__key\/(?<directory>[^\/]+)\/(?<path>[^\/]+)\/(?<filename>.+?)(?:\:[:\d\s]*?)\]/i
|
||||||
|
PROPERTY_NAMES_REGEX ||= /(?<name>\w+):S:(?<start>\d+):(?<length>\d+):/
|
||||||
|
|
||||||
CATEGORY_LINK_NORMALIZATION = '/.*?(f\/\d+)$/\1'
|
CATEGORY_LINK_NORMALIZATION = '/.*?(f\/\d+)$/\1'
|
||||||
TOPIC_LINK_NORMALIZATION = '/.*?(f\/\d+\/t\/\d+)$/\1'
|
TOPIC_LINK_NORMALIZATION = '/.*?(f\/\d+\/t\/\d+)$/\1'
|
||||||
|
|
||||||
|
UNICODE_REPLACEMENTS = {
|
||||||
|
"5F00" => "_",
|
||||||
|
"2800" => "(",
|
||||||
|
"2900" => ")",
|
||||||
|
"2D00" => "-",
|
||||||
|
"2C00" => ",",
|
||||||
|
"2700" => "'",
|
||||||
|
"5B00" => "[",
|
||||||
|
"5D00" => "]",
|
||||||
|
"3D00" => "=",
|
||||||
|
"2600" => "&",
|
||||||
|
"2100" => "!",
|
||||||
|
"2300" => "#",
|
||||||
|
"7E00" => "~",
|
||||||
|
"2500" => "%",
|
||||||
|
"2E00" => ".",
|
||||||
|
"4000" => "@",
|
||||||
|
"2B00" => "+",
|
||||||
|
"2400" => "$",
|
||||||
|
"1920" => "’",
|
||||||
|
"E900" => "é",
|
||||||
|
"E000" => "à",
|
||||||
|
"F300" => "ó",
|
||||||
|
"1C20" => "“",
|
||||||
|
"1D20" => "”",
|
||||||
|
"B000" => "°",
|
||||||
|
"0003" => ["0300".to_i(16)].pack("U"),
|
||||||
|
"0103" => ["0301".to_i(16)].pack("U")
|
||||||
|
}
|
||||||
|
|
||||||
def initialize
|
def initialize
|
||||||
super()
|
super()
|
||||||
|
|
||||||
@ -58,11 +90,16 @@ class ImportScripts::Telligent < ImportScripts::Base
|
|||||||
timeout: 60 # the user query is very slow
|
timeout: 60 # the user query is very slow
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@filestore_root_directory = ENV["FILE_BASE_DIR"]
|
||||||
|
@files = {}
|
||||||
|
|
||||||
SiteSetting.tagging_enabled = true
|
SiteSetting.tagging_enabled = true
|
||||||
end
|
end
|
||||||
|
|
||||||
def execute
|
def execute
|
||||||
add_permalink_normalizations
|
add_permalink_normalizations
|
||||||
|
index_filestore
|
||||||
|
|
||||||
import_categories
|
import_categories
|
||||||
import_users
|
import_users
|
||||||
import_topics
|
import_topics
|
||||||
@ -70,6 +107,11 @@ class ImportScripts::Telligent < ImportScripts::Base
|
|||||||
mark_topics_as_solved
|
mark_topics_as_solved
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def index_filestore
|
||||||
|
puts "", "Indexing filestore..."
|
||||||
|
index_directory(@filestore_root_directory)
|
||||||
|
end
|
||||||
|
|
||||||
def import_users
|
def import_users
|
||||||
puts "", "Importing users..."
|
puts "", "Importing users..."
|
||||||
|
|
||||||
@ -94,32 +136,14 @@ class ImportScripts::Telligent < ImportScripts::Base
|
|||||||
|
|
||||||
loop do
|
loop do
|
||||||
rows = query(<<~SQL)
|
rows = query(<<~SQL)
|
||||||
SELECT *
|
SELECT TOP #{BATCH_SIZE}
|
||||||
FROM (
|
u.UserID, u.Email, u.UserName, u.CreateDate,
|
||||||
SELECT TOP #{BATCH_SIZE}
|
ap.PropertyNames AP_PropertyNames, ap.PropertyValuesString AS AP_PropertyValues,
|
||||||
u.UserID, u.Email, u.UserName,u.CreateDate, p.PropertyName, p.PropertyValue
|
up.PropertyNames UP_PropertyNames, up.PropertyValues AS UP_PropertyValues
|
||||||
FROM cs_Users u
|
FROM cs_Users u
|
||||||
LEFT OUTER JOIN (
|
LEFT OUTER JOIN aspnet_Profile ap ON ap.UserId = u.MembershipID
|
||||||
SELECT NULL AS UserID, ap.UserId AS MembershipID, x.PropertyName, x.PropertyValue
|
LEFT OUTER JOIN cs_UserProfile up ON up.UserID = u.UserID
|
||||||
FROM aspnet_Profile ap
|
WHERE u.UserID > #{last_user_id} AND #{user_conditions}
|
||||||
CROSS APPLY dbo.GetProperties(ap.PropertyNames, ap.PropertyValuesString) x
|
|
||||||
WHERE ap.PropertyNames NOT LIKE '%:-1%' AND
|
|
||||||
x.PropertyName IN ('bio', 'commonName', 'location', 'webAddress')
|
|
||||||
UNION
|
|
||||||
SELECT up.UserID, NULL AS MembershipID, x.PropertyName, CAST(x.PropertyValue AS NVARCHAR) AS PropertyValue
|
|
||||||
FROM cs_UserProfile up
|
|
||||||
CROSS APPLY dbo.GetProperties(up.PropertyNames, up.PropertyValues) x
|
|
||||||
WHERE up.PropertyNames NOT LIKE '%:-1%' AND
|
|
||||||
x.PropertyName IN ('avatarUrl', 'BannedUntil', 'UserBanReason')
|
|
||||||
) p ON p.UserID = u.UserID OR p.MembershipID = u.MembershipID
|
|
||||||
WHERE u.UserID > #{last_user_id} AND #{user_conditions}
|
|
||||||
ORDER BY u.UserID
|
|
||||||
) x
|
|
||||||
PIVOT (
|
|
||||||
MAX(PropertyValue)
|
|
||||||
FOR PropertyName
|
|
||||||
IN (bio, commonName, location, webAddress, avatarUrl, BannedUntil, UserBanReason)
|
|
||||||
) Y
|
|
||||||
ORDER BY UserID
|
ORDER BY UserID
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
@ -132,18 +156,21 @@ class ImportScripts::Telligent < ImportScripts::Base
|
|||||||
end
|
end
|
||||||
|
|
||||||
create_users(rows, total: total_count, offset: import_count) do |row|
|
create_users(rows, total: total_count, offset: import_count) do |row|
|
||||||
|
ap_properties = parse_properties(row["AP_PropertyNames"], row["AP_PropertyValues"])
|
||||||
|
up_properties = parse_properties(row["UP_PropertyNames"], row["UP_PropertyValues"])
|
||||||
|
|
||||||
{
|
{
|
||||||
id: row["UserID"],
|
id: row["UserID"],
|
||||||
email: row["Email"],
|
email: row["Email"],
|
||||||
username: row["UserName"],
|
username: row["UserName"],
|
||||||
name: row["commonName"],
|
name: ap_properties["commonName"],
|
||||||
created_at: row["CreateDate"],
|
created_at: row["CreateDate"],
|
||||||
bio_raw: html_to_markdown(row["bio"]),
|
bio_raw: html_to_markdown(ap_properties["bio"]),
|
||||||
location: row["location"],
|
location: ap_properties["location"],
|
||||||
website: row["webAddress"],
|
website: ap_properties["webAddress"],
|
||||||
post_create_action: proc do |user|
|
post_create_action: proc do |user|
|
||||||
import_avatar(user, row["avatarUrl"])
|
import_avatar(user, up_properties["avatarUrl"])
|
||||||
suspend_user(user, row["BannedUntil"], row["UserBanReason"])
|
suspend_user(user, up_properties["BannedUntil"], up_properties["UserBanReason"])
|
||||||
end
|
end
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
@ -154,10 +181,10 @@ class ImportScripts::Telligent < ImportScripts::Base
|
|||||||
|
|
||||||
# TODO move into base importer (create_user) and use consistent error handling
|
# TODO move into base importer (create_user) and use consistent error handling
|
||||||
def import_avatar(user, avatar_url)
|
def import_avatar(user, avatar_url)
|
||||||
return if ENV["FILE_BASE_DIR"].blank? || avatar_url.blank? || avatar_url.include?("anonymous")
|
return if @filestore_root_directory.blank? || avatar_url.blank? || avatar_url.include?("anonymous")
|
||||||
|
|
||||||
if match_data = avatar_url.match(LOCAL_AVATAR_REGEX)
|
if match_data = avatar_url.match(LOCAL_AVATAR_REGEX)
|
||||||
avatar_path = File.join(ENV["FILE_BASE_DIR"],
|
avatar_path = File.join(@filestore_root_directory,
|
||||||
match_data[:directory].gsub("-", "."),
|
match_data[:directory].gsub("-", "."),
|
||||||
match_data[:path].split("-"),
|
match_data[:path].split("-"),
|
||||||
match_data[:filename])
|
match_data[:filename])
|
||||||
@ -333,7 +360,7 @@ class ImportScripts::Telligent < ImportScripts::Base
|
|||||||
batches do |offset|
|
batches do |offset|
|
||||||
rows = query(<<~SQL)
|
rows = query(<<~SQL)
|
||||||
SELECT TOP #{BATCH_SIZE}
|
SELECT TOP #{BATCH_SIZE}
|
||||||
t.ThreadId, t.ForumId, t.UserId, t.TotalViews,
|
t.ThreadId, t.ForumId, t.UserId, t.TotalViews, t.ContentID AS TopicContentId,
|
||||||
t.Subject, t.Body, t.DateCreated, t.IsLocked, t.StickyDate,
|
t.Subject, t.Body, t.DateCreated, t.IsLocked, t.StickyDate,
|
||||||
a.ApplicationTypeId, a.ApplicationId, a.ApplicationContentTypeId, a.ContentId, a.FileName, a.IsRemote
|
a.ApplicationTypeId, a.ApplicationId, a.ApplicationContentTypeId, a.ContentId, a.FileName, a.IsRemote
|
||||||
FROM te_Forum_Threads t
|
FROM te_Forum_Threads t
|
||||||
@ -365,6 +392,7 @@ class ImportScripts::Telligent < ImportScripts::Base
|
|||||||
Jobs.enqueue_at(topic.pinned_until, :unpin_topic, topic_id: topic.id) if topic.pinned_until
|
Jobs.enqueue_at(topic.pinned_until, :unpin_topic, topic_id: topic.id) if topic.pinned_until
|
||||||
url = "f/#{row['ForumId']}/t/#{row['ThreadId']}"
|
url = "f/#{row['ForumId']}/t/#{row['ThreadId']}"
|
||||||
Permalink.create(url: url, topic_id: topic.id) unless Permalink.exists?(url: url)
|
Permalink.create(url: url, topic_id: topic.id) unless Permalink.exists?(url: url)
|
||||||
|
import_topic_views(topic, row["TopicContentId"])
|
||||||
end
|
end
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -382,6 +410,29 @@ class ImportScripts::Telligent < ImportScripts::Base
|
|||||||
"T#{topic_id}"
|
"T#{topic_id}"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def import_topic_views(topic, content_id)
|
||||||
|
last_user_id = -1
|
||||||
|
|
||||||
|
batches do |_|
|
||||||
|
rows = query(<<~SQL)
|
||||||
|
SELECT TOP #{BATCH_SIZE}
|
||||||
|
UserId, MAX(CreatedUtcDate) AS ViewDate
|
||||||
|
FROM te_Content_Views
|
||||||
|
WHERE ContentId = '#{content_id}' AND UserId > #{last_user_id}
|
||||||
|
GROUP BY UserId
|
||||||
|
ORDER BY UserId
|
||||||
|
SQL
|
||||||
|
|
||||||
|
break if rows.blank?
|
||||||
|
last_user_id = rows[-1]["UserId"]
|
||||||
|
|
||||||
|
rows.each do |row|
|
||||||
|
user_id = user_id_from_imported_user_id(row["UserId"])
|
||||||
|
TopicViewItem.add(topic.id, "127.0.0.1", user_id, row["ViewDate"], true) if user_id
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def ignored_forum_sql_condition
|
def ignored_forum_sql_condition
|
||||||
@ignored_forum_sql_condition ||= @ignored_forum_ids.present? \
|
@ignored_forum_sql_condition ||= @ignored_forum_ids.present? \
|
||||||
? "t.ForumId NOT IN (#{@ignored_forum_ids.join(',')})" \
|
? "t.ForumId NOT IN (#{@ignored_forum_ids.join(',')})" \
|
||||||
@ -452,28 +503,42 @@ class ImportScripts::Telligent < ImportScripts::Base
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def index_directory(root_directory)
|
||||||
|
Dir.foreach(root_directory) do |directory_name|
|
||||||
|
next if directory_name == "." || directory_name == ".."
|
||||||
|
|
||||||
|
path = File.join(root_directory, directory_name)
|
||||||
|
if File.directory?(path)
|
||||||
|
index_directory(path)
|
||||||
|
else
|
||||||
|
path.delete_prefix!(@filestore_root_directory)
|
||||||
|
path.delete_prefix!("/")
|
||||||
|
@files[path.downcase] = path
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def raw_with_attachment(row, user_id, type)
|
def raw_with_attachment(row, user_id, type)
|
||||||
raw, embedded_paths, upload_ids = replace_embedded_attachments(row["Body"], user_id)
|
raw, embedded_paths, upload_ids = replace_embedded_attachments(row["Body"], user_id)
|
||||||
raw = html_to_markdown(raw) || ""
|
raw = html_to_markdown(raw) || ""
|
||||||
|
|
||||||
filename = row["FileName"]
|
filename = row["FileName"]
|
||||||
return raw if ENV["FILE_BASE_DIR"].blank? || filename.blank?
|
return raw if @filestore_root_directory.blank? || filename.blank?
|
||||||
|
|
||||||
if row["IsRemote"]
|
if row["IsRemote"]
|
||||||
return "#{raw}\n#{filename}"
|
return "#{raw}\n#{filename}"
|
||||||
end
|
end
|
||||||
|
|
||||||
path = File.join(
|
path = File.join(
|
||||||
ENV["FILE_BASE_DIR"],
|
|
||||||
"telligent.evolution.components.attachments",
|
"telligent.evolution.components.attachments",
|
||||||
"%02d" % row["ApplicationTypeId"],
|
"%02d" % row["ApplicationTypeId"],
|
||||||
"%02d" % row["ApplicationId"],
|
"%02d" % row["ApplicationId"],
|
||||||
"%02d" % row["ApplicationContentTypeId"],
|
"%02d" % row["ApplicationContentTypeId"],
|
||||||
("%010d" % row["ContentId"]).scan(/.{2}/),
|
("%010d" % row["ContentId"]).scan(/.{2}/)
|
||||||
clean_filename(filename)
|
|
||||||
)
|
)
|
||||||
|
path = fix_attachment_path(path, filename)
|
||||||
|
|
||||||
unless embedded_paths.include?(path)
|
if path && !embedded_paths.include?(path)
|
||||||
if File.file?(path)
|
if File.file?(path)
|
||||||
upload = @uploader.create_upload(user_id, path, filename)
|
upload = @uploader.create_upload(user_id, path, filename)
|
||||||
|
|
||||||
@ -493,91 +558,110 @@ class ImportScripts::Telligent < ImportScripts::Base
|
|||||||
paths = []
|
paths = []
|
||||||
upload_ids = []
|
upload_ids = []
|
||||||
|
|
||||||
return [raw, paths, upload_ids] if ENV["FILE_BASE_DIR"].blank?
|
return [raw, paths, upload_ids] if @filestore_root_directory.blank?
|
||||||
|
|
||||||
raw = raw.gsub(EMBEDDED_ATTACHMENT_REGEX) do
|
[EMBEDDED_ATTACHMENT_REGEX, EMBEDDED_VIEW_ATTACHMENT_REGEX].each do |regex|
|
||||||
filename, path = attachment_path(Regexp.last_match)
|
raw = raw.gsub(regex) do
|
||||||
|
match_data = Regexp.last_match
|
||||||
|
|
||||||
if File.file?(path)
|
path = File.join(match_data[:directory], match_data[:path])
|
||||||
upload = @uploader.create_upload(user_id, path, filename)
|
fixed_path = fix_attachment_path(path, match_data[:filename])
|
||||||
|
|
||||||
if upload.present? && upload.persisted?
|
if fixed_path && File.file?(fixed_path)
|
||||||
paths << path
|
filename = File.basename(fixed_path)
|
||||||
upload_ids << upload.id
|
upload = @uploader.create_upload(user_id, fixed_path, filename)
|
||||||
@uploader.html_for_upload(upload, filename)
|
|
||||||
|
if upload.present? && upload.persisted?
|
||||||
|
paths << fixed_path
|
||||||
|
upload_ids << upload.id
|
||||||
|
@uploader.html_for_upload(upload, filename)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
path = File.join(path, match_data[:filename])
|
||||||
|
STDERR.puts "Could not find file: #{path}"
|
||||||
end
|
end
|
||||||
else
|
|
||||||
STDERR.puts "Could not find file: #{path}"
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
[raw, paths, upload_ids]
|
[raw, paths, upload_ids]
|
||||||
end
|
end
|
||||||
|
|
||||||
def clean_filename(filename)
|
def fix_attachment_path(base_path, filename)
|
||||||
|
path = find_correct_path(base_path, filename)
|
||||||
|
return path if attachment_exists?(path)
|
||||||
|
|
||||||
|
base_path.downcase!
|
||||||
|
path = find_correct_path(base_path, filename)
|
||||||
|
return path if attachment_exists?(path)
|
||||||
|
|
||||||
filename = CGI.unescapeHTML(filename)
|
filename = CGI.unescapeHTML(filename)
|
||||||
filename.gsub!(/[\x00\/\\:\*\?\"<>\|]/, '_')
|
path = find_correct_path(base_path, filename)
|
||||||
|
return path if attachment_exists?(path)
|
||||||
|
|
||||||
%w|( ) # % - _ [ ] = , ' ~ ! + { } & @ #|.each do |c|
|
filename.gsub!("-", " ")
|
||||||
number = "#{c.ord.to_s(16).upcase}00"
|
filename.strip!
|
||||||
filename.gsub!("_#{number}_", c)
|
path = find_correct_path(base_path, filename)
|
||||||
filename.gsub!("_#{number}#{number}_", c * 2)
|
return path if attachment_exists?(path)
|
||||||
end
|
|
||||||
|
|
||||||
filename
|
directories = base_path.split(File::SEPARATOR)
|
||||||
|
first_directory = directories.shift
|
||||||
|
first_directory.gsub!("-", ".")
|
||||||
|
base_path = File.join(first_directory, directories)
|
||||||
|
path = find_correct_path(base_path, filename)
|
||||||
|
return path if attachment_exists?(path)
|
||||||
|
|
||||||
|
directories.map! { |d| File.join(d.split(/[\.\-]/).map(&:strip)) }
|
||||||
|
base_path = File.join(first_directory, directories)
|
||||||
|
path = find_correct_path(base_path, filename)
|
||||||
|
return path if attachment_exists?(path)
|
||||||
|
|
||||||
|
directories = base_path.split(File::SEPARATOR)
|
||||||
|
directories.map! { |d| d.gsub("+", " ").strip }
|
||||||
|
base_path = File.join(directories)
|
||||||
|
path = find_correct_path(base_path, filename)
|
||||||
|
return path if attachment_exists?(path)
|
||||||
|
|
||||||
|
replace_codes!(filename)
|
||||||
|
path = find_correct_path(base_path, filename)
|
||||||
|
return path if attachment_exists?(path)
|
||||||
|
|
||||||
|
replace_codes!(base_path)
|
||||||
|
path = find_correct_path(base_path, filename)
|
||||||
|
return path if attachment_exists?(path)
|
||||||
|
|
||||||
|
filename.gsub!(/(?:\:\d+)+$/, "")
|
||||||
|
path = find_correct_path(base_path, filename)
|
||||||
|
return path if attachment_exists?(path)
|
||||||
|
|
||||||
|
path = File.join(base_path, filename)
|
||||||
|
path_regex = Regexp.new("^#{Regexp.escape(path)}-\\d+x\\d+\\.\\w+$", Regexp::IGNORECASE)
|
||||||
|
path = find_correct_path_with_regex(path_regex)
|
||||||
|
return path if attachment_exists?(path)
|
||||||
|
|
||||||
|
nil
|
||||||
end
|
end
|
||||||
|
|
||||||
def attachment_path(match_data)
|
def find_correct_path(base_path, filename)
|
||||||
filename, path = join_attachment_path(match_data, filename_index: 2)
|
path = File.join(base_path, filename)
|
||||||
filename, path = join_attachment_path(match_data, filename_index: 1) unless File.file?(path)
|
path = @files[path.downcase]
|
||||||
[filename, path]
|
path ? File.join(@filestore_root_directory, path) : nil
|
||||||
end
|
end
|
||||||
|
|
||||||
# filenames are a total mess - try to guess the correct filename
|
def find_correct_path_with_regex(regex)
|
||||||
# works for 70% of all files
|
keys = @files.keys.filter { |key| regex =~ key }
|
||||||
def join_attachment_path(match_data, filename_index:)
|
keys.size == 1 ? File.join(@filestore_root_directory, @files[keys.first]) : nil
|
||||||
filename = clean_filename(match_data[:"filename#{filename_index}"])
|
end
|
||||||
base_path = File.join(
|
|
||||||
ENV["FILE_BASE_DIR"],
|
|
||||||
match_data[:directory].gsub("-", ".").downcase,
|
|
||||||
match_data[:path].gsub("+", " ").gsub(/_\h{4}_/, "?").split(/[\.\-]/).map(&:strip)
|
|
||||||
)
|
|
||||||
|
|
||||||
original_filename = filename.dup
|
def attachment_exists?(path)
|
||||||
|
path.present? && File.file?(path)
|
||||||
|
end
|
||||||
|
|
||||||
filename.gsub!(/[_\- ]/, "?")
|
def replace_codes!(text)
|
||||||
path = File.join(base_path, filename)
|
text.gsub!(/_(\h{4}+)_/i) do
|
||||||
paths = Dir.glob(path, File::FNM_CASEFOLD)
|
codes = Regexp.last_match[1].upcase.scan(/.{4}/)
|
||||||
if (path = paths.first) && paths.size == 1
|
mapped_codes = codes.map { |c| UNICODE_REPLACEMENTS[c] }
|
||||||
filename = File.basename(path)
|
mapped_codes.any? { |c| c.nil? } ? Regexp.last_match[0] : mapped_codes.join("")
|
||||||
return [filename, path]
|
|
||||||
end
|
end
|
||||||
|
|
||||||
filename = original_filename.dup
|
|
||||||
filename.gsub!(/_\h{2}00_/, "?")
|
|
||||||
filename.gsub!(/_\h{2}00\h{2}00_/, "??")
|
|
||||||
filename.gsub!(/_\h{2}00\h{2}00\h{2}00_/, "???")
|
|
||||||
filename.gsub!(/[_\- ]/, "?")
|
|
||||||
path = File.join(base_path, filename)
|
|
||||||
paths = Dir.glob(path, File::FNM_CASEFOLD)
|
|
||||||
if (path = paths.first) && paths.size == 1
|
|
||||||
filename = File.basename(path)
|
|
||||||
return [filename, path]
|
|
||||||
end
|
|
||||||
|
|
||||||
filename = original_filename.dup
|
|
||||||
filename.gsub!(/_\h{4}_/, "?")
|
|
||||||
filename.gsub!(/_\h{4}\h{4}_/, "??")
|
|
||||||
filename.gsub!(/_\h{4}\h{4}\h{4}_/, "???")
|
|
||||||
filename.gsub!(/[_\- ]/, "?")
|
|
||||||
path = File.join(base_path, filename)
|
|
||||||
paths = Dir.glob(path, File::FNM_CASEFOLD)
|
|
||||||
if (path = paths.first) && paths.size == 1
|
|
||||||
filename = File.basename(path)
|
|
||||||
return [filename, path]
|
|
||||||
end
|
|
||||||
|
|
||||||
[original_filename, File.join(base_path, original_filename)]
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def html_to_markdown(html)
|
def html_to_markdown(html)
|
||||||
@ -586,10 +670,27 @@ class ImportScripts::Telligent < ImportScripts::Base
|
|||||||
md = HtmlToMarkdown.new(html).to_markdown
|
md = HtmlToMarkdown.new(html).to_markdown
|
||||||
md.gsub!(/\[quote.*?\]/, "\n" + '\0' + "\n")
|
md.gsub!(/\[quote.*?\]/, "\n" + '\0' + "\n")
|
||||||
md.gsub!(/(?<!^)\[\/quote\]/, "\n[/quote]\n")
|
md.gsub!(/(?<!^)\[\/quote\]/, "\n[/quote]\n")
|
||||||
|
md.gsub!(/\[\/quote\](?!$)/, "\n[/quote]\n")
|
||||||
|
md.gsub!(/\[View:(http.*?)[:\d\s]*?(?:\]|\z)/i, '\1')
|
||||||
md.strip!
|
md.strip!
|
||||||
md
|
md
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def parse_properties(names, values)
|
||||||
|
properties = {}
|
||||||
|
return properties if names.blank? || values.blank?
|
||||||
|
|
||||||
|
names.scan(PROPERTY_NAMES_REGEX).each do |property|
|
||||||
|
name = property[0]
|
||||||
|
start_index = property[1].to_i
|
||||||
|
end_index = start_index + property[2].to_i - 1
|
||||||
|
|
||||||
|
properties[name] = values[start_index..end_index]
|
||||||
|
end
|
||||||
|
|
||||||
|
properties
|
||||||
|
end
|
||||||
|
|
||||||
def mark_topics_as_solved
|
def mark_topics_as_solved
|
||||||
puts "", "Marking topics as solved..."
|
puts "", "Marking topics as solved..."
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user