2015-03-19 16:09:03 +11:00
# getsatisfaction importer
#
# pre-req: you will get a bunch of CSV files, be sure to rename them all so
#
# - users.csv is the users table export (it may come from getsatisfaction as Users-Table 1.csv
# - replies.csv is the reply table export
# - topics.csv is the topics table export
#
#
2016-09-08 14:57:20 +05:30
# note, the importer will import all topics into a new category called 'Old Forum' and optionally close all the topics
2015-03-19 16:09:03 +11:00
#
require 'csv'
require File . expand_path ( File . dirname ( __FILE__ ) + " /base.rb " )
2016-09-08 14:57:20 +05:30
require 'reverse_markdown' # gem 'reverse_markdown'
2015-03-19 16:09:03 +11:00
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/getsatisfaction.rb
class ImportScripts :: GetSatisfaction < ImportScripts :: Base
BATCH_SIZE = 1000
def initialize ( path )
@path = path
super ( )
@bbcode_to_md = true
2016-09-08 14:57:20 +05:30
@topic_slug = { }
2015-03-19 16:09:03 +11:00
puts " loading post mappings... "
@post_number_map = { }
Post . pluck ( :id , :post_number ) . each do | post_id , post_number |
@post_number_map [ post_id ] = post_number
end
end
def created_post ( post )
@post_number_map [ post . id ] = post . post_number
super
end
2015-03-20 16:49:58 +11:00
def execute
2015-03-19 16:09:03 +11:00
c = Category . find_by ( name : 'Old Forum' ) ||
Category . create! ( name : 'Old Forum' , user : Discourse . system_user )
import_users
import_posts ( c )
2016-09-08 14:57:20 +05:30
create_permalinks
# uncomment if you want to close all the topics
# Topic.where(category: c).update_all(closed: true)
2015-03-19 16:09:03 +11:00
end
class RowResolver
def load ( row )
@row = row
end
def self . create ( cols )
Class . new ( RowResolver ) . new ( cols )
end
def initialize ( cols )
cols . each_with_index do | col , idx |
self . class . send ( :define_method , col ) do
@row [ idx ]
end
end
end
end
def load_user_batch! ( users , offset , total )
if users . length > 0
create_users ( users , offset : offset , total : total ) do | user |
user
end
users . clear
end
end
def csv_parse ( name )
filename = " #{ @path } / #{ name } .csv "
first = true
row = nil
current_row = " " ;
double_quote_count = 0
2016-09-08 14:57:20 +05:30
# In case of Excel export file, I converted it to CSV and used:
# CSV.open(filename, encoding:'iso-8859-1:utf-8').each do |raw|
2015-03-19 16:09:03 +11:00
File . open ( filename ) . each_line do | line |
line . strip!
current_row << " \n " unless current_row . empty?
current_row << line
raw = begin
CSV . parse ( current_row , col_sep : " ; " )
rescue CSV :: MalformedCSVError = > e
puts e . message
puts " * " * 100
puts " Bad row skipped, line is: #{ line } "
puts
puts current_row
puts
puts " double quote count is : #{ double_quote_count } "
puts " * " * 100
current_row = " "
double_quote_count = 0
2015-03-20 16:49:58 +11:00
2015-03-19 16:09:03 +11:00
next
end [ 0 ]
if first
row = RowResolver . create ( raw )
current_row = " "
double_quote_count = 0
first = false
next
end
row . load ( raw )
yield row
current_row = " "
double_quote_count = 0
end
end
def total_rows ( table )
2016-09-08 14:57:20 +05:30
# In case of Excel export file, I converted it to CSV and used:
# CSV.foreach("#{@path}/#{table}.csv", encoding:'iso-8859-1:utf-8').inject(0) {|c, line| c+1} - 1
2015-03-19 16:09:03 +11:00
File . foreach ( " #{ @path } / #{ table } .csv " ) . inject ( 0 ) { | c , line | c + 1 } - 1
end
def import_users
puts " " , " creating users "
count = 0
users = [ ]
total = total_rows ( " users " )
csv_parse ( " users " ) do | row |
if row . suspended_at
puts " skipping suspended user "
p row
next
end
id = row . user_id
email = row . email
# fake it
if row . email . blank? || row . email !~ / @ /
email = SecureRandom . hex << " @domain.com "
end
name = row . real_name
username = row . nick
created_at = DateTime . parse ( row . m_created )
username = name if username == " NULL "
username = email . split ( " @ " ) [ 0 ] if username . blank?
name = email . split ( " @ " ) [ 0 ] if name . blank?
users << {
id : id ,
email : email ,
name : name ,
username : username ,
created_at : created_at ,
active : false
}
count += 1
if count % BATCH_SIZE == 0
load_user_batch! users , count - users . length , total
end
end
load_user_batch! users , count , total
end
def import_categories
rows = [ ]
csv_parse ( " categories " ) do | row |
rows << { id : row . id , name : row . name , description : row . description }
end
create_categories ( rows ) do | row |
row
end
end
def normalize_raw! ( raw )
2016-09-08 14:57:20 +05:30
return " <missing> " if raw . nil?
2015-03-19 16:09:03 +11:00
raw = raw . dup
# hoist code
hoisted = { }
raw . gsub! ( / (<pre> \ s*)?<code>(.*?)< \/ code>( \ s*< \/ pre>)? /mi ) do
code = $2
hoist = SecureRandom . hex
# tidy code, wow, this is impressively crazy
code . gsub! ( / ( \ s*) / , " \n \\ 1 " )
code . gsub! ( / ^ \ s* \ n$ / , " \n " )
code . gsub! ( / \ n+ /m , " \n " )
code . strip!
hoisted [ hoist ] = code
hoist
end
# impressive seems to be using tripple space as a <p> unless hoisted
# in this case double space works best ... so odd
raw . gsub! ( " " , " \n \n " )
hoisted . each do | hoist , code |
raw . gsub! ( hoist , " \n ``` \n " << code << " \n ``` \n " )
end
2016-09-08 14:57:20 +05:30
raw = CGI . unescapeHTML ( raw )
raw = ReverseMarkdown . convert ( raw )
2015-03-19 16:09:03 +11:00
raw
end
def import_post_batch! ( posts , topics , offset , total )
create_posts ( posts , total : total , offset : offset ) do | post |
mapped = { }
mapped [ :id ] = post [ :id ]
mapped [ :user_id ] = user_id_from_imported_user_id ( post [ :user_id ] ) || - 1
mapped [ :raw ] = post [ :body ]
mapped [ :created_at ] = post [ :created_at ]
topic = topics [ post [ :topic_id ] ]
unless topic
p " MISSING TOPIC #{ post [ :topic_id ] } "
p post
next
end
unless topic [ :post_id ]
2015-03-23 11:58:17 +11:00
mapped [ :title ] = post [ :title ] || " Topic title missing "
2015-03-19 16:09:03 +11:00
topic [ :post_id ] = post [ :id ]
mapped [ :category ] = post [ :category ]
else
parent = topic_lookup_from_imported_post_id ( topic [ :post_id ] )
next unless parent
mapped [ :topic_id ] = parent [ :topic_id ]
reply_to_post_id = post_id_from_imported_post_id ( post [ :reply_id ] )
if reply_to_post_id
reply_to_post_number = @post_number_map [ reply_to_post_id ]
if reply_to_post_number && reply_to_post_number > 1
mapped [ :reply_to_post_number ] = reply_to_post_number
end
end
end
next if topic [ :deleted ] or post [ :deleted ]
mapped
end
posts . clear
end
def import_posts ( category )
puts " " , " creating topics and posts "
topic_map = { }
csv_parse ( " topics " ) do | topic |
2016-09-08 14:57:20 +05:30
@topic_slug [ topic . id . to_i ] = topic . url
2015-03-19 16:09:03 +11:00
topic_map [ topic . id ] = {
id : topic . id ,
topic_id : topic . id ,
title : topic . subject ,
deleted : topic . removed == " 1 " ,
closed : true ,
2015-03-23 12:16:43 +11:00
body : normalize_raw! ( topic . additional_detail || topic . subject || " <missing> " ) ,
2015-03-19 16:09:03 +11:00
created_at : DateTime . parse ( topic . created_at ) ,
user_id : topic . UserId ,
category : category . name
}
end
total = total_rows ( " replies " )
posts = [ ]
count = 0
topic_map . each do | _ , topic |
# a bit lazy
posts << topic if topic [ :body ]
end
csv_parse ( " replies " ) do | row |
unless row . created_at
puts " NO CREATION DATE FOR POST "
p row
next
end
row = {
id : row . id ,
topic_id : row . topic_id ,
reply_id : row . parent_id ,
user_id : row . UserId ,
body : normalize_raw! ( row . content ) ,
created_at : DateTime . parse ( row . created_at )
}
posts << row
count += 1
if posts . length > 0 && posts . length % BATCH_SIZE == 0
import_post_batch! ( posts , topic_map , count - posts . length , total )
end
end
import_post_batch! ( posts , topic_map , count - posts . length , total ) if posts . length > 0
end
2016-09-08 14:57:20 +05:30
def create_permalinks
puts '' , 'Creating Permalinks...' , ''
topic_mapping = [ ]
Topic . listable_topics . find_each do | topic |
tcf = topic . first_post . custom_fields
if tcf && tcf [ " import_id " ]
slug = @topic_slug [ tcf [ " import_id " ] . to_i ]
2016-09-08 15:01:40 +05:30
# TODO: replace "http://community.example.com/" with the URL of your community
slug = slug . gsub ( " http://community.example.com/ " , " " )
2016-09-08 14:57:20 +05:30
Permalink . create ( url : slug , topic_id : topic . id )
end
end
end
2015-03-19 16:09:03 +11:00
end
unless ARGV [ 0 ] && Dir . exist? ( ARGV [ 0 ] )
puts " " , " Usage: " , " " , " bundle exec ruby script/import_scripts/getsatisfaction.rb DIRNAME " , " "
exit 1
end
ImportScripts :: GetSatisfaction . new ( ARGV [ 0 ] ) . perform