2014-10-31 15:19:27 +11:00
# bespoke importer for a customer, feel free to borrow ideas
#
#
require 'csv'
require File . expand_path ( File . dirname ( __FILE__ ) + " /base.rb " )
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/bespoke_1.rb
class ImportScripts :: Bespoke < ImportScripts :: Base
BATCH_SIZE = 1000
def initialize ( path )
@path = path
super ( )
@bbcode_to_md = true
puts " loading post mappings... "
@post_number_map = { }
Post . pluck ( :id , :post_number ) . each do | post_id , post_number |
@post_number_map [ post_id ] = post_number
end
end
def created_post ( post )
@post_number_map [ post . id ] = post . post_number
super
end
def execute
2014-11-20 14:53:30 +11:00
import_users
import_categories
2014-10-31 15:19:27 +11:00
import_posts
end
class RowResolver
def load ( row )
@row = row
end
def self . create ( cols )
Class . new ( RowResolver ) . new ( cols )
end
def initialize ( cols )
cols . each_with_index do | col , idx |
self . class . send ( :define_method , col ) do
@row [ idx ]
end
end
end
end
def load_user_batch! ( users , offset , total )
if users . length > 0
create_users ( users , offset : offset , total : total ) do | user |
user
end
users . clear
end
end
def csv_parse ( name )
filename = " #{ @path } / #{ name } .csv "
first = true
row = nil
current_row = " " ;
double_quote_count = 0
File . open ( filename ) . each_line do | line |
# escaping is mental here
line . gsub! ( / \\ (.{1}) / ) { | m | m [ - 1 ] == '"' ? '""' : m [ - 1 ] }
line . strip!
current_row << " \n " unless current_row . empty?
current_row << line
double_quote_count += line . scan ( '"' ) . count
if double_quote_count % 2 == 1
next
end
raw = begin
CSV . parse ( current_row )
rescue CSV :: MalformedCSVError = > e
puts e . message
puts " * " * 100
puts " Bad row skipped, line is: #{ line } "
puts
puts current_row
puts
puts " double quote count is : #{ double_quote_count } "
puts " * " * 100
current_row = " "
double_quote_count = 0
next
end [ 0 ]
if first
row = RowResolver . create ( raw )
current_row = " "
double_quote_count = 0
first = false
next
end
row . load ( raw )
yield row
current_row = " "
double_quote_count = 0
end
end
def total_rows ( table )
File . foreach ( " #{ @path } / #{ table } .csv " ) . inject ( 0 ) { | c , line | c + 1 } - 1
end
def import_users
puts " " , " creating users "
count = 0
users = [ ]
total = total_rows ( " users " )
csv_parse ( " users " ) do | row |
id = row . id
email = row . email
# fake it
if row . email . blank? || row . email !~ / @ /
email = SecureRandom . hex << " @domain.com "
end
name = row . display_name
username = row . key_custom
created_at = DateTime . parse ( row . dcreate )
username = name if username == " NULL "
2014-11-20 14:53:30 +11:00
username = email . split ( " @ " ) [ 0 ] if username . blank?
name = email . split ( " @ " ) [ 0 ] if name . blank?
2014-10-31 15:19:27 +11:00
users << {
id : id ,
email : email ,
name : name ,
username : username ,
created_at : created_at
}
count += 1
if count % BATCH_SIZE == 0
load_user_batch! users , count - users . length , total
end
end
load_user_batch! users , count , total
end
def import_categories
rows = [ ]
csv_parse ( " categories " ) do | row |
rows << { id : row . id , name : row . name , description : row . description }
end
create_categories ( rows ) do | row |
row
end
end
def normalize_raw! ( raw )
# purple and #1223f3
raw . gsub! ( / \ [color=[ # a-z0-9]+ \ ] /i , " " )
raw . gsub! ( / \ [ \/ color \ ] /i , " " )
2014-11-20 14:53:30 +11:00
raw . gsub! ( / \ [signature \ ].+ \ [ \/ signature \ ] /im , " " )
2014-10-31 15:19:27 +11:00
raw
end
def import_post_batch! ( posts , topics , offset , total )
create_posts ( posts , total : total , offset : offset ) do | post |
mapped = { }
mapped [ :id ] = post [ :id ]
mapped [ :user_id ] = user_id_from_imported_user_id ( post [ :user_id ] ) || - 1
mapped [ :raw ] = post [ :body ]
mapped [ :created_at ] = post [ :created_at ]
topic = topics [ post [ :topic_id ] ]
unless topic [ :post_id ]
mapped [ :category ] = category_from_imported_category_id ( topic [ :category_id ] ) . try ( :name )
mapped [ :title ] = post [ :title ]
topic [ :post_id ] = post [ :id ]
else
parent = topic_lookup_from_imported_post_id ( topic [ :post_id ] )
2014-11-01 18:25:03 +11:00
next unless parent
2014-10-31 15:19:27 +11:00
mapped [ :topic_id ] = parent [ :topic_id ]
reply_to_post_id = post_id_from_imported_post_id ( post [ :reply_id ] )
if reply_to_post_id
reply_to_post_number = @post_number_map [ reply_to_post_id ]
2014-11-01 18:25:03 +11:00
if reply_to_post_number && reply_to_post_number > 1
2014-10-31 15:19:27 +11:00
mapped [ :reply_to_post_number ] = reply_to_post_number
end
end
end
2014-11-01 18:25:03 +11:00
next if topic [ :deleted ] or post [ :deleted ]
2014-10-31 15:19:27 +11:00
mapped
end
posts . clear
end
def import_posts
puts " " , " creating topics and posts "
topic_map = { }
csv_parse ( " topics " ) do | topic |
topic_map [ topic . id ] = {
id : topic . id ,
category_id : topic . forum_category_id ,
deleted : topic . is_deleted . to_i == 1 ,
locked : topic . is_locked . to_i == 1 ,
pinned : topic . is_pinned . to_i == 1
}
end
total = total_rows ( " posts " )
posts = [ ]
count = 0
csv_parse ( " posts " ) do | row |
unless row . dcreate
puts " NO CREATION DATE FOR POST "
p row
next
end
row = {
id : row . id ,
topic_id : row . forum_topic_id ,
reply_id : row . reply_id ,
user_id : row . user_id ,
title : row . title ,
body : normalize_raw! ( row . body ) ,
deleted : row . is_deleted . to_i == 1 ,
created_at : DateTime . parse ( row . dcreate )
}
posts << row
count += 1
if posts . length > 0 && posts . length % BATCH_SIZE == 0
import_post_batch! ( posts , topic_map , count - posts . length , total )
end
end
import_post_batch! ( posts , topic_map , count - posts . length , total ) if posts . length > 0
exit
end
end
unless ARGV [ 0 ] && Dir . exist? ( ARGV [ 0 ] )
puts " " , " Usage: " , " " , " bundle exec ruby script/import_scripts/bespoke_1.rb DIRNAME " , " "
exit 1
end
ImportScripts :: Bespoke . new ( ARGV [ 0 ] ) . perform