mirror of
https://github.com/discourse/discourse.git
synced 2025-05-22 07:53:49 +08:00
Update Disqus importer.
This commit is contained in:
198
script/import_scripts/disqus.rb
Normal file
198
script/import_scripts/disqus.rb
Normal file
@ -0,0 +1,198 @@
|
||||
require 'nokogiri'
|
||||
require 'optparse'
|
||||
require File.expand_path(File.dirname(__FILE__) + "/base")
|
||||
|
||||
class ImportScripts::Disqus < ImportScripts::Base
|
||||
def initialize(options)
|
||||
verify_file(options[:file])
|
||||
@post_as_user = get_post_as_user(options[:post_as])
|
||||
@dry_run = options[:dry_run]
|
||||
@parser = DisqusSAX.new(options[:strip])
|
||||
doc = Nokogiri::XML::SAX::Parser.new(@parser)
|
||||
doc.parse_file(options[:file])
|
||||
@parser.normalize
|
||||
super()
|
||||
end
|
||||
|
||||
def execute
|
||||
@parser.threads.each do |id, t|
|
||||
puts "Creating #{t[:title]}... (#{t[:posts].size} posts)"
|
||||
|
||||
if !@dry_run
|
||||
post = TopicEmbed.import_remote(@post_as_user, t[:link], title: t[:title])
|
||||
|
||||
if post.present?
|
||||
t[:posts].each do |p|
|
||||
post_user = @post_as_user
|
||||
|
||||
if p[:author_email]
|
||||
post_user = create_user({ id: nil, email: p[:author_email] }, nil)
|
||||
end
|
||||
|
||||
attrs = {
|
||||
user_id: post_user.id,
|
||||
topic_id: post.topic_id,
|
||||
raw: p[:cooked],
|
||||
cooked: p[:cooked],
|
||||
created_at: Date.parse(p[:created_at])
|
||||
}
|
||||
|
||||
if p[:parent_id]
|
||||
parent = @parser.posts[p[:parent_id]]
|
||||
|
||||
if parent && parent[:discourse_number]
|
||||
attrs[:reply_to_post_number] = parent[:discourse_number]
|
||||
end
|
||||
end
|
||||
|
||||
post = create_post(attrs, p[:id])
|
||||
p[:discourse_number] = post.post_number
|
||||
end
|
||||
|
||||
TopicFeaturedUsers.new(post.topic).choose
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def verify_file(file)
|
||||
abort("File '#{file}' not found") if !File.exist?(file)
|
||||
end
|
||||
|
||||
def get_post_as_user(username)
|
||||
user = User.find_by_username_lower(username.downcase)
|
||||
abort("No user found named: '#{username}'") if user.nil?
|
||||
user
|
||||
end
|
||||
end
|
||||
|
||||
class DisqusSAX < Nokogiri::XML::SAX::Document
|
||||
attr_accessor :posts, :threads
|
||||
|
||||
def initialize(strip)
|
||||
@inside = {}
|
||||
@posts = {}
|
||||
@threads = {}
|
||||
@strip = strip
|
||||
end
|
||||
|
||||
def start_element(name, attrs = [])
|
||||
case name
|
||||
when 'post'
|
||||
@post = {}
|
||||
@post[:id] = Hash[attrs]['dsq:id'] if @post
|
||||
when 'thread'
|
||||
id = Hash[attrs]['dsq:id']
|
||||
if @post
|
||||
thread = @threads[id]
|
||||
thread[:posts] << @post
|
||||
else
|
||||
@thread = {id: id, posts: []}
|
||||
end
|
||||
when 'parent'
|
||||
if @post
|
||||
id = Hash[attrs]['dsq:id']
|
||||
@post[:parent_id] = id
|
||||
end
|
||||
end
|
||||
|
||||
@inside[name] = true
|
||||
end
|
||||
|
||||
def end_element(name)
|
||||
case name
|
||||
when 'post'
|
||||
@posts[@post[:id]] = @post
|
||||
@post = nil
|
||||
when 'thread'
|
||||
if @post.nil?
|
||||
@threads[@thread[:id]] = @thread
|
||||
@thread = nil
|
||||
end
|
||||
end
|
||||
|
||||
@inside[name] = false
|
||||
end
|
||||
|
||||
def characters(str)
|
||||
record(@post, :author_email, str, 'author', 'email')
|
||||
record(@post, :author_name, str, 'author', 'name')
|
||||
record(@post, :author_anonymous, str, 'author', 'isAnonymous')
|
||||
record(@post, :created_at, str, 'createdAt')
|
||||
|
||||
record(@thread, :link, str, 'link')
|
||||
record(@thread, :title, str, 'title')
|
||||
record(@thread, :created_at, str, 'createdAt')
|
||||
end
|
||||
|
||||
def cdata_block(str)
|
||||
record(@post, :cooked, str, 'message')
|
||||
end
|
||||
|
||||
def record(target, sym, str, *params)
|
||||
return if target.nil?
|
||||
|
||||
if inside?(*params)
|
||||
target[sym] ||= ""
|
||||
target[sym] << str
|
||||
end
|
||||
end
|
||||
|
||||
def inside?(*params)
|
||||
return !params.find{|p| !@inside[p]}
|
||||
end
|
||||
|
||||
def normalize
|
||||
@threads.each do |id, t|
|
||||
if t[:posts].size == 0
|
||||
# Remove any threads that have no posts
|
||||
@threads.delete(id)
|
||||
else
|
||||
# Normalize titles
|
||||
t[:title].gsub!(@strip, '') if @strip.present?
|
||||
t[:title].strip!
|
||||
end
|
||||
end
|
||||
|
||||
# Merge any threads that have the same title
|
||||
existing_title = {}
|
||||
@threads.each do |id, t|
|
||||
existing = existing_title[t[:title]]
|
||||
if existing.nil?
|
||||
existing_title[t[:title]] = t
|
||||
else
|
||||
existing[:posts] << t[:posts]
|
||||
existing[:posts].flatten!
|
||||
@threads.delete(t[:id])
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
options = {
|
||||
dry_run: false
|
||||
}
|
||||
|
||||
OptionParser.new do |opts|
|
||||
opts.banner = 'Usage: RAILS_ENV=production ruby disqus.rb [OPTIONS]'
|
||||
|
||||
opts.on('-f', '--file=FILE_PATH', 'The disqus XML file to import') do |value|
|
||||
options[:file] = value
|
||||
end
|
||||
|
||||
opts.on('-d', '--dry_run', 'Just output what will be imported rather than doing it') do
|
||||
options[:dry_run] = true
|
||||
end
|
||||
|
||||
opts.on('-p', '--post_as=USERNAME', 'The Discourse username to post as') do |value|
|
||||
options[:post_as] = value
|
||||
end
|
||||
|
||||
opts.on('-s', '--strip=TEXT', 'Text to strip from titles') do |value|
|
||||
options[:strip] = value
|
||||
end
|
||||
end.parse!
|
||||
|
||||
ImportScripts::Disqus.new(options).perform
|
Reference in New Issue
Block a user