mirror of
https://github.com/codeninjasllc/discourse.git
synced 2024-11-30 10:58:31 -05:00
Lots of ugprades to the Disqus importer script
This commit is contained in:
parent
6af4e6bd05
commit
72d7c055f4
2 changed files with 100 additions and 80 deletions
|
@ -274,7 +274,7 @@ class ImportScripts::Base
|
||||||
merge = opts.delete(:merge)
|
merge = opts.delete(:merge)
|
||||||
post_create_action = opts.delete(:post_create_action)
|
post_create_action = opts.delete(:post_create_action)
|
||||||
|
|
||||||
existing = User.where(email: opts[:email].downcase, username: opts[:username]).first
|
existing = User.where("email = ? OR username = ?", opts[:email].downcase, opts[:username]).first
|
||||||
return existing if existing && (merge || existing.custom_fields["import_id"].to_i == import_id.to_i)
|
return existing if existing && (merge || existing.custom_fields["import_id"].to_i == import_id.to_i)
|
||||||
|
|
||||||
bio_raw = opts.delete(:bio_raw)
|
bio_raw = opts.delete(:bio_raw)
|
||||||
|
|
|
@ -3,53 +3,101 @@ require 'optparse'
|
||||||
require File.expand_path(File.dirname(__FILE__) + "/base")
|
require File.expand_path(File.dirname(__FILE__) + "/base")
|
||||||
|
|
||||||
class ImportScripts::Disqus < ImportScripts::Base
|
class ImportScripts::Disqus < ImportScripts::Base
|
||||||
def initialize(options)
|
# CHANGE THESE BEFORE RUNNING THE IMPORTER
|
||||||
verify_file(options[:file])
|
|
||||||
@post_as_user = get_post_as_user(options[:post_as])
|
IMPORT_FILE = File.expand_path("~/import/site/export.xml")
|
||||||
@dry_run = options[:dry_run]
|
IMPORT_CATEGORY = "Front page"
|
||||||
@parser = DisqusSAX.new(options[:strip], options[:no_deleted])
|
|
||||||
|
def initialize
|
||||||
|
abort("File '#{IMPORT_FILE}' not found") if !File.exist?(IMPORT_FILE)
|
||||||
|
|
||||||
|
@category = Category.where(name: IMPORT_CATEGORY).first
|
||||||
|
abort("Category #{IMPORT_CATEGORY} not found") if @category.blank?
|
||||||
|
|
||||||
|
@parser = DisqusSAX.new
|
||||||
doc = Nokogiri::XML::SAX::Parser.new(@parser)
|
doc = Nokogiri::XML::SAX::Parser.new(@parser)
|
||||||
doc.parse_file(options[:file])
|
doc.parse_file(IMPORT_FILE)
|
||||||
@parser.normalize
|
@parser.normalize
|
||||||
super()
|
|
||||||
|
super
|
||||||
end
|
end
|
||||||
|
|
||||||
def execute
|
def execute
|
||||||
|
import_users
|
||||||
|
import_topics_and_posts
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_users
|
||||||
|
puts "", "importing users..."
|
||||||
|
|
||||||
|
by_email = {}
|
||||||
|
|
||||||
|
@parser.posts.each do |id, p|
|
||||||
|
next if p[:is_spam] == 'true' || p[:is_deleted] == 'true'
|
||||||
|
by_email[p[:author_email]] = { name: p[:author_name], username: p[:author_username] }
|
||||||
|
end
|
||||||
|
|
||||||
@parser.threads.each do |id, t|
|
@parser.threads.each do |id, t|
|
||||||
puts "Creating #{t[:title]}... (#{t[:posts].size} posts)"
|
by_email[t[:author_email]] = { name: t[:author_name], username: t[:author_username] }
|
||||||
|
end
|
||||||
|
|
||||||
if !@dry_run
|
create_users(by_email.keys) do |email|
|
||||||
post = TopicEmbed.import_remote(@post_as_user, t[:link], title: t[:title])
|
user = by_email[email]
|
||||||
|
{
|
||||||
|
id: email,
|
||||||
|
email: email,
|
||||||
|
username: user[:username],
|
||||||
|
name: user[:name],
|
||||||
|
merge: true
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
if post.present?
|
def import_topics_and_posts
|
||||||
t[:posts].each do |p|
|
puts "", "importing topics..."
|
||||||
post_user = @post_as_user
|
|
||||||
|
|
||||||
if p[:author_email]
|
@parser.threads.each do |id, t|
|
||||||
post_user = create_user({ id: nil, email: p[:author_email] }, nil)
|
|
||||||
|
title = t[:title]
|
||||||
|
title.gsub!(/“/, '"')
|
||||||
|
title.gsub!(/”/, '"')
|
||||||
|
title.gsub!(/’/, "'")
|
||||||
|
title.gsub!(/—/, "--")
|
||||||
|
title.gsub!(/–/, "-")
|
||||||
|
|
||||||
|
puts "Creating #{title}... (#{t[:posts].size} posts)"
|
||||||
|
|
||||||
|
topic_user = User.where('email = ? OR username = ?', t[:author_email].downcase, t[:author_username]).first
|
||||||
|
begin
|
||||||
|
post = TopicEmbed.import_remote(topic_user, t[:link], title: title)
|
||||||
|
post.topic.update_column(:category_id, @category.id)
|
||||||
|
rescue OpenURI::HTTPError
|
||||||
|
post = nil
|
||||||
|
end
|
||||||
|
|
||||||
|
if post.present? && post.topic.posts_count <= 1
|
||||||
|
(t[:posts] || []).each do |p|
|
||||||
|
post_user = User.where('email = ? OR username = ?', (p[:author_email] || '').downcase, p[:author_username]).first
|
||||||
|
next unless post_user.present?
|
||||||
|
|
||||||
|
attrs = {
|
||||||
|
user_id: post_user.id,
|
||||||
|
topic_id: post.topic_id,
|
||||||
|
raw: p[:cooked],
|
||||||
|
cooked: p[:cooked],
|
||||||
|
created_at: Date.parse(p[:created_at])
|
||||||
|
}
|
||||||
|
|
||||||
|
if p[:parent_id]
|
||||||
|
parent = @parser.posts[p[:parent_id]]
|
||||||
|
|
||||||
|
if parent && parent[:discourse_number]
|
||||||
|
attrs[:reply_to_post_number] = parent[:discourse_number]
|
||||||
end
|
end
|
||||||
|
|
||||||
attrs = {
|
|
||||||
user_id: post_user.id,
|
|
||||||
topic_id: post.topic_id,
|
|
||||||
raw: p[:cooked],
|
|
||||||
cooked: p[:cooked],
|
|
||||||
created_at: Date.parse(p[:created_at])
|
|
||||||
}
|
|
||||||
|
|
||||||
if p[:parent_id]
|
|
||||||
parent = @parser.posts[p[:parent_id]]
|
|
||||||
|
|
||||||
if parent && parent[:discourse_number]
|
|
||||||
attrs[:reply_to_post_number] = parent[:discourse_number]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
post = create_post(attrs, p[:id])
|
|
||||||
p[:discourse_number] = post.post_number
|
|
||||||
end
|
end
|
||||||
|
|
||||||
TopicFeaturedUsers.new(post.topic).choose
|
post = create_post(attrs, p[:id])
|
||||||
|
p[:discourse_number] = post.post_number
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -57,10 +105,6 @@ class ImportScripts::Disqus < ImportScripts::Base
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def verify_file(file)
|
|
||||||
abort("File '#{file}' not found") if !File.exist?(file)
|
|
||||||
end
|
|
||||||
|
|
||||||
def get_post_as_user(username)
|
def get_post_as_user(username)
|
||||||
user = User.find_by_username_lower(username.downcase)
|
user = User.find_by_username_lower(username.downcase)
|
||||||
abort("No user found named: '#{username}'") if user.nil?
|
abort("No user found named: '#{username}'") if user.nil?
|
||||||
|
@ -69,26 +113,25 @@ class ImportScripts::Disqus < ImportScripts::Base
|
||||||
end
|
end
|
||||||
|
|
||||||
class DisqusSAX < Nokogiri::XML::SAX::Document
|
class DisqusSAX < Nokogiri::XML::SAX::Document
|
||||||
attr_accessor :posts, :threads
|
attr_accessor :posts, :threads, :users
|
||||||
|
|
||||||
def initialize(strip, no_deleted = false)
|
def initialize
|
||||||
@inside = {}
|
@inside = {}
|
||||||
@posts = {}
|
@posts = {}
|
||||||
@threads = {}
|
@threads = {}
|
||||||
@no_deleted = no_deleted
|
@users = {}
|
||||||
@strip = strip
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def start_element(name, attrs = [])
|
def start_element(name, attrs = [])
|
||||||
|
|
||||||
|
hashed = Hash[attrs]
|
||||||
case name
|
case name
|
||||||
when 'post'
|
when 'post'
|
||||||
@post = {}
|
@post = {}
|
||||||
@post[:id] = Hash[attrs]['dsq:id'] if @post
|
@post[:id] = hashed['dsq:id'] if @post
|
||||||
when 'thread'
|
when 'thread'
|
||||||
id = Hash[attrs]['dsq:id']
|
id = hashed['dsq:id']
|
||||||
if @post
|
if @post
|
||||||
# Skip this post if it's deleted and no_deleted is true
|
|
||||||
return if @no_deleted && @post[:is_deleted].to_s == 'true'
|
|
||||||
thread = @threads[id]
|
thread = @threads[id]
|
||||||
thread[:posts] << @post
|
thread[:posts] << @post
|
||||||
else
|
else
|
||||||
|
@ -96,7 +139,7 @@ class DisqusSAX < Nokogiri::XML::SAX::Document
|
||||||
end
|
end
|
||||||
when 'parent'
|
when 'parent'
|
||||||
if @post
|
if @post
|
||||||
id = Hash[attrs]['dsq:id']
|
id = hashed['dsq:id']
|
||||||
@post[:parent_id] = id
|
@post[:parent_id] = id
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -122,13 +165,19 @@ class DisqusSAX < Nokogiri::XML::SAX::Document
|
||||||
def characters(str)
|
def characters(str)
|
||||||
record(@post, :author_email, str, 'author', 'email')
|
record(@post, :author_email, str, 'author', 'email')
|
||||||
record(@post, :author_name, str, 'author', 'name')
|
record(@post, :author_name, str, 'author', 'name')
|
||||||
|
record(@post, :author_username, str, 'author', 'username')
|
||||||
record(@post, :author_anonymous, str, 'author', 'isAnonymous')
|
record(@post, :author_anonymous, str, 'author', 'isAnonymous')
|
||||||
record(@post, :created_at, str, 'createdAt')
|
record(@post, :created_at, str, 'createdAt')
|
||||||
record(@post, :is_deleted, str, 'isDeleted')
|
record(@post, :is_deleted, str, 'isDeleted')
|
||||||
|
record(@post, :is_spam, str, 'isSpam')
|
||||||
|
|
||||||
record(@thread, :link, str, 'link')
|
record(@thread, :link, str, 'link')
|
||||||
record(@thread, :title, str, 'title')
|
record(@thread, :title, str, 'title')
|
||||||
record(@thread, :created_at, str, 'createdAt')
|
record(@thread, :created_at, str, 'createdAt')
|
||||||
|
record(@thread, :author_email, str, 'author', 'email')
|
||||||
|
record(@thread, :author_name, str, 'author', 'name')
|
||||||
|
record(@thread, :author_username, str, 'author', 'username')
|
||||||
|
record(@thread, :author_anonymous, str, 'author', 'isAnonymous')
|
||||||
end
|
end
|
||||||
|
|
||||||
def cdata_block(str)
|
def cdata_block(str)
|
||||||
|
@ -154,8 +203,7 @@ class DisqusSAX < Nokogiri::XML::SAX::Document
|
||||||
# Remove any threads that have no posts
|
# Remove any threads that have no posts
|
||||||
@threads.delete(id)
|
@threads.delete(id)
|
||||||
else
|
else
|
||||||
# Normalize titles
|
t[:posts].delete_if {|p| p[:is_spam] == 'true' || p[:is_deleted] == 'true'}
|
||||||
t[:title] = [:title].gsub(@strip, '').strip if @strip.present?
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -174,32 +222,4 @@ class DisqusSAX < Nokogiri::XML::SAX::Document
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
options = {
|
ImportScripts::Disqus.new.perform
|
||||||
dry_run: false
|
|
||||||
}
|
|
||||||
|
|
||||||
OptionParser.new do |opts|
|
|
||||||
opts.banner = 'Usage: RAILS_ENV=production ruby disqus.rb [OPTIONS]'
|
|
||||||
|
|
||||||
opts.on('-f', '--file=FILE_PATH', 'The disqus XML file to import') do |value|
|
|
||||||
options[:file] = value
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on('-d', '--dry_run', 'Just output what will be imported rather than doing it') do
|
|
||||||
options[:dry_run] = true
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on('-p', '--post_as=USERNAME', 'The Discourse username to post as') do |value|
|
|
||||||
options[:post_as] = value
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on('-D', '--no_deleted', 'Do not post deleted comments') do
|
|
||||||
options[:no_deleted] = true
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on('-s', '--strip=TEXT', 'Text to strip from titles') do |value|
|
|
||||||
options[:strip] = value
|
|
||||||
end
|
|
||||||
end.parse!
|
|
||||||
|
|
||||||
ImportScripts::Disqus.new(options).perform
|
|
||||||
|
|
Loading…
Reference in a new issue