2013-12-12 15:35:55 -05:00
require 'nokogiri'
2015-07-01 09:16:52 +08:00
require 'optparse'
require File . expand_path ( File . dirname ( __FILE__ ) + " /base " )
class ImportScripts :: Disqus < ImportScripts :: Base
2016-08-17 15:10:25 -04:00
# CHANGE THESE BEFORE RUNNING THE IMPORTER
IMPORT_FILE = File . expand_path ( " ~/import/site/export.xml " )
IMPORT_CATEGORY = " Front page "
def initialize
abort ( " File ' #{ IMPORT_FILE } ' not found " ) if ! File . exist? ( IMPORT_FILE )
@category = Category . where ( name : IMPORT_CATEGORY ) . first
abort ( " Category #{ IMPORT_CATEGORY } not found " ) if @category . blank?
@parser = DisqusSAX . new
2015-07-01 09:16:52 +08:00
doc = Nokogiri :: XML :: SAX :: Parser . new ( @parser )
2016-08-17 15:10:25 -04:00
doc . parse_file ( IMPORT_FILE )
2015-07-01 09:16:52 +08:00
@parser . normalize
2016-08-17 15:10:25 -04:00
super
2015-07-01 09:16:52 +08:00
end
def execute
2016-08-17 15:10:25 -04:00
import_users
import_topics_and_posts
end
def import_users
puts " " , " importing users... "
by_email = { }
@parser . posts . each do | id , p |
next if p [ :is_spam ] == 'true' || p [ :is_deleted ] == 'true'
by_email [ p [ :author_email ] ] = { name : p [ :author_name ] , username : p [ :author_username ] }
end
2015-07-01 09:16:52 +08:00
@parser . threads . each do | id , t |
2016-08-17 15:10:25 -04:00
by_email [ t [ :author_email ] ] = { name : t [ :author_name ] , username : t [ :author_username ] }
end
2015-07-01 09:16:52 +08:00
2016-08-17 15:10:25 -04:00
create_users ( by_email . keys ) do | email |
user = by_email [ email ]
{
id : email ,
email : email ,
username : user [ :username ] ,
name : user [ :name ] ,
merge : true
}
end
end
2015-07-01 09:16:52 +08:00
2016-08-17 15:10:25 -04:00
def import_topics_and_posts
puts " " , " importing topics... "
2015-07-01 09:16:52 +08:00
2016-08-17 15:10:25 -04:00
@parser . threads . each do | id , t |
2015-07-01 09:16:52 +08:00
2016-08-17 15:10:25 -04:00
title = t [ :title ]
title . gsub! ( / & # 8220; / , '"' )
title . gsub! ( / & # 8221; / , '"' )
title . gsub! ( / & # 8217; / , " ' " )
title . gsub! ( / & # 8212; / , " -- " )
title . gsub! ( / & # 8211; / , " - " )
puts " Creating #{ title } ... ( #{ t [ :posts ] . size } posts) "
topic_user = User . where ( 'email = ? OR username = ?' , t [ :author_email ] . downcase , t [ :author_username ] ) . first
begin
post = TopicEmbed . import_remote ( topic_user , t [ :link ] , title : title )
post . topic . update_column ( :category_id , @category . id )
rescue OpenURI :: HTTPError
post = nil
end
2015-07-01 09:16:52 +08:00
2016-08-17 15:10:25 -04:00
if post . present? && post . topic . posts_count < = 1
( t [ :posts ] || [ ] ) . each do | p |
post_user = User . where ( 'email = ? OR username = ?' , ( p [ :author_email ] || '' ) . downcase , p [ :author_username ] ) . first
next unless post_user . present?
2015-07-01 09:16:52 +08:00
2016-08-17 15:10:25 -04:00
attrs = {
user_id : post_user . id ,
topic_id : post . topic_id ,
raw : p [ :cooked ] ,
cooked : p [ :cooked ] ,
created_at : Date . parse ( p [ :created_at ] )
}
2015-07-01 09:16:52 +08:00
2016-08-17 15:10:25 -04:00
if p [ :parent_id ]
parent = @parser . posts [ p [ :parent_id ] ]
if parent && parent [ :discourse_number ]
attrs [ :reply_to_post_number ] = parent [ :discourse_number ]
end
2015-07-01 09:16:52 +08:00
end
2016-08-17 15:10:25 -04:00
post = create_post ( attrs , p [ :id ] )
p [ :discourse_number ] = post . post_number
2015-07-01 09:16:52 +08:00
end
end
end
end
private
def get_post_as_user ( username )
user = User . find_by_username_lower ( username . downcase )
abort ( " No user found named: ' #{ username } ' " ) if user . nil?
user
end
end
2013-12-12 15:35:55 -05:00
class DisqusSAX < Nokogiri :: XML :: SAX :: Document
2016-08-17 15:10:25 -04:00
attr_accessor :posts , :threads , :users
2013-12-12 15:35:55 -05:00
2016-08-17 15:10:25 -04:00
def initialize
2013-12-12 15:35:55 -05:00
@inside = { }
@posts = { }
@threads = { }
2016-08-17 15:10:25 -04:00
@users = { }
2013-12-12 15:35:55 -05:00
end
def start_element ( name , attrs = [ ] )
2016-08-17 15:10:25 -04:00
hashed = Hash [ attrs ]
2013-12-12 15:35:55 -05:00
case name
when 'post'
@post = { }
2016-08-17 15:10:25 -04:00
@post [ :id ] = hashed [ 'dsq:id' ] if @post
2013-12-12 15:35:55 -05:00
when 'thread'
2016-08-17 15:10:25 -04:00
id = hashed [ 'dsq:id' ]
2013-12-12 15:35:55 -05:00
if @post
thread = @threads [ id ]
thread [ :posts ] << @post
else
@thread = { id : id , posts : [ ] }
end
when 'parent'
if @post
2016-08-17 15:10:25 -04:00
id = hashed [ 'dsq:id' ]
2013-12-12 15:35:55 -05:00
@post [ :parent_id ] = id
end
end
@inside [ name ] = true
end
def end_element ( name )
case name
when 'post'
@posts [ @post [ :id ] ] = @post
@post = nil
when 'thread'
if @post . nil?
@threads [ @thread [ :id ] ] = @thread
@thread = nil
end
end
@inside [ name ] = false
end
def characters ( str )
record ( @post , :author_email , str , 'author' , 'email' )
record ( @post , :author_name , str , 'author' , 'name' )
2016-08-17 15:10:25 -04:00
record ( @post , :author_username , str , 'author' , 'username' )
2013-12-12 15:35:55 -05:00
record ( @post , :author_anonymous , str , 'author' , 'isAnonymous' )
record ( @post , :created_at , str , 'createdAt' )
2016-04-01 10:25:34 -05:00
record ( @post , :is_deleted , str , 'isDeleted' )
2016-08-17 15:10:25 -04:00
record ( @post , :is_spam , str , 'isSpam' )
2013-12-12 15:35:55 -05:00
record ( @thread , :link , str , 'link' )
record ( @thread , :title , str , 'title' )
2013-12-12 16:31:08 -05:00
record ( @thread , :created_at , str , 'createdAt' )
2016-08-17 15:10:25 -04:00
record ( @thread , :author_email , str , 'author' , 'email' )
record ( @thread , :author_name , str , 'author' , 'name' )
record ( @thread , :author_username , str , 'author' , 'username' )
record ( @thread , :author_anonymous , str , 'author' , 'isAnonymous' )
2013-12-12 15:35:55 -05:00
end
def cdata_block ( str )
record ( @post , :cooked , str , 'message' )
end
def record ( target , sym , str , * params )
return if target . nil?
2013-12-30 11:32:21 -05:00
if inside? ( * params )
target [ sym ] || = " "
target [ sym ] << str
end
2013-12-12 15:35:55 -05:00
end
def inside? ( * params )
return ! params . find { | p | ! @inside [ p ] }
end
def normalize
@threads . each do | id , t |
if t [ :posts ] . size == 0
2013-12-30 11:32:21 -05:00
# Remove any threads that have no posts
2013-12-12 15:35:55 -05:00
@threads . delete ( id )
2013-12-30 11:32:21 -05:00
else
2016-08-17 15:10:25 -04:00
t [ :posts ] . delete_if { | p | p [ :is_spam ] == 'true' || p [ :is_deleted ] == 'true' }
2013-12-12 15:35:55 -05:00
end
end
# Merge any threads that have the same title
existing_title = { }
@threads . each do | id , t |
existing = existing_title [ t [ :title ] ]
if existing . nil?
existing_title [ t [ :title ] ] = t
else
existing [ :posts ] << t [ :posts ]
existing [ :posts ] . flatten!
@threads . delete ( t [ :id ] )
end
end
end
end
2016-08-17 15:10:25 -04:00
ImportScripts :: Disqus . new . perform