2013-12-12 15:35:55 -05:00
require 'nokogiri'
2015-07-01 09:16:52 +08:00
require 'optparse'
require File . expand_path ( File . dirname ( __FILE__ ) + " /base " )
class ImportScripts :: Disqus < ImportScripts :: Base
def initialize ( options )
verify_file ( options [ :file ] )
@post_as_user = get_post_as_user ( options [ :post_as ] )
@dry_run = options [ :dry_run ]
2016-04-01 10:25:34 -05:00
@parser = DisqusSAX . new ( options [ :strip ] , options [ :no_deleted ] )
2015-07-01 09:16:52 +08:00
doc = Nokogiri :: XML :: SAX :: Parser . new ( @parser )
doc . parse_file ( options [ :file ] )
@parser . normalize
super ( )
end
def execute
@parser . threads . each do | id , t |
puts " Creating #{ t [ :title ] } ... ( #{ t [ :posts ] . size } posts) "
if ! @dry_run
post = TopicEmbed . import_remote ( @post_as_user , t [ :link ] , title : t [ :title ] )
if post . present?
t [ :posts ] . each do | p |
post_user = @post_as_user
if p [ :author_email ]
post_user = create_user ( { id : nil , email : p [ :author_email ] } , nil )
end
attrs = {
user_id : post_user . id ,
topic_id : post . topic_id ,
raw : p [ :cooked ] ,
cooked : p [ :cooked ] ,
created_at : Date . parse ( p [ :created_at ] )
}
if p [ :parent_id ]
parent = @parser . posts [ p [ :parent_id ] ]
if parent && parent [ :discourse_number ]
attrs [ :reply_to_post_number ] = parent [ :discourse_number ]
end
end
post = create_post ( attrs , p [ :id ] )
p [ :discourse_number ] = post . post_number
end
TopicFeaturedUsers . new ( post . topic ) . choose
end
end
end
end
private
def verify_file ( file )
abort ( " File ' #{ file } ' not found " ) if ! File . exist? ( file )
end
def get_post_as_user ( username )
user = User . find_by_username_lower ( username . downcase )
abort ( " No user found named: ' #{ username } ' " ) if user . nil?
user
end
end
2013-12-12 15:35:55 -05:00
class DisqusSAX < Nokogiri :: XML :: SAX :: Document
attr_accessor :posts , :threads
2016-04-01 10:25:34 -05:00
def initialize ( strip , no_deleted = false )
2013-12-12 15:35:55 -05:00
@inside = { }
@posts = { }
@threads = { }
2016-04-01 10:25:34 -05:00
@no_deleted = no_deleted
2015-07-01 09:16:52 +08:00
@strip = strip
2013-12-12 15:35:55 -05:00
end
def start_element ( name , attrs = [ ] )
case name
when 'post'
@post = { }
@post [ :id ] = Hash [ attrs ] [ 'dsq:id' ] if @post
when 'thread'
id = Hash [ attrs ] [ 'dsq:id' ]
if @post
2016-04-01 10:25:34 -05:00
# Skip this post if it's deleted and no_deleted is true
return if @no_deleted && @post [ :is_deleted ] . to_s == 'true'
2013-12-12 15:35:55 -05:00
thread = @threads [ id ]
thread [ :posts ] << @post
else
@thread = { id : id , posts : [ ] }
end
when 'parent'
if @post
id = Hash [ attrs ] [ 'dsq:id' ]
@post [ :parent_id ] = id
end
end
@inside [ name ] = true
end
def end_element ( name )
case name
when 'post'
@posts [ @post [ :id ] ] = @post
@post = nil
when 'thread'
if @post . nil?
@threads [ @thread [ :id ] ] = @thread
@thread = nil
end
end
@inside [ name ] = false
end
def characters ( str )
record ( @post , :author_email , str , 'author' , 'email' )
record ( @post , :author_name , str , 'author' , 'name' )
record ( @post , :author_anonymous , str , 'author' , 'isAnonymous' )
record ( @post , :created_at , str , 'createdAt' )
2016-04-01 10:25:34 -05:00
record ( @post , :is_deleted , str , 'isDeleted' )
2013-12-12 15:35:55 -05:00
record ( @thread , :link , str , 'link' )
record ( @thread , :title , str , 'title' )
2013-12-12 16:31:08 -05:00
record ( @thread , :created_at , str , 'createdAt' )
2013-12-12 15:35:55 -05:00
end
def cdata_block ( str )
record ( @post , :cooked , str , 'message' )
end
def record ( target , sym , str , * params )
return if target . nil?
2013-12-30 11:32:21 -05:00
if inside? ( * params )
target [ sym ] || = " "
target [ sym ] << str
end
2013-12-12 15:35:55 -05:00
end
def inside? ( * params )
return ! params . find { | p | ! @inside [ p ] }
end
def normalize
@threads . each do | id , t |
if t [ :posts ] . size == 0
2013-12-30 11:32:21 -05:00
# Remove any threads that have no posts
2013-12-12 15:35:55 -05:00
@threads . delete ( id )
2013-12-30 11:32:21 -05:00
else
# Normalize titles
2015-12-07 20:33:14 +08:00
t [ :title ] = [ :title ] . gsub ( @strip , '' ) . strip if @strip . present?
2013-12-12 15:35:55 -05:00
end
end
# Merge any threads that have the same title
existing_title = { }
@threads . each do | id , t |
existing = existing_title [ t [ :title ] ]
if existing . nil?
existing_title [ t [ :title ] ] = t
else
existing [ :posts ] << t [ :posts ]
existing [ :posts ] . flatten!
@threads . delete ( t [ :id ] )
end
end
end
end
2015-07-01 09:16:52 +08:00
options = {
dry_run : false
}
2013-12-12 15:35:55 -05:00
2015-07-01 09:16:52 +08:00
OptionParser . new do | opts |
opts . banner = 'Usage: RAILS_ENV=production ruby disqus.rb [OPTIONS]'
2013-12-30 11:32:21 -05:00
2015-07-01 09:16:52 +08:00
opts . on ( '-f' , '--file=FILE_PATH' , 'The disqus XML file to import' ) do | value |
options [ :file ] = value
end
2013-12-12 15:35:55 -05:00
2015-07-01 09:16:52 +08:00
opts . on ( '-d' , '--dry_run' , 'Just output what will be imported rather than doing it' ) do
options [ :dry_run ] = true
end
2013-12-12 15:35:55 -05:00
2015-07-01 09:16:52 +08:00
opts . on ( '-p' , '--post_as=USERNAME' , 'The Discourse username to post as' ) do | value |
options [ :post_as ] = value
2013-12-12 15:35:55 -05:00
end
2016-04-01 10:25:34 -05:00
opts . on ( '-D' , '--no_deleted' , 'Do not post deleted comments' ) do
options [ :no_deleted ] = true
end
2015-07-01 09:16:52 +08:00
opts . on ( '-s' , '--strip=TEXT' , 'Text to strip from titles' ) do | value |
options [ :strip ] = value
end
end . parse!
2013-12-12 15:35:55 -05:00
2015-07-01 09:16:52 +08:00
ImportScripts :: Disqus . new ( options ) . perform