mirror of
https://github.com/codeninjasllc/discourse.git
synced 2024-11-27 09:36:19 -05:00
a new bespoke importer, feel free to borrow ideas
This commit is contained in:
parent
1d4daca75d
commit
f6b20ada93
1 changed files with 282 additions and 0 deletions
282
script/import_scripts/bespoke_1.rb
Normal file
282
script/import_scripts/bespoke_1.rb
Normal file
|
@ -0,0 +1,282 @@
|
|||
# bespoke importer for a customer, feel free to borrow ideas
|
||||
#
|
||||
#
|
||||
require 'csv'
|
||||
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
||||
|
||||
# Call it like this:
|
||||
# RAILS_ENV=production bundle exec ruby script/import_scripts/bespoke_1.rb
|
||||
class ImportScripts::Bespoke < ImportScripts::Base
|
||||
|
||||
BATCH_SIZE = 1000
|
||||
|
||||
def initialize(path)
|
||||
@path = path
|
||||
super()
|
||||
@bbcode_to_md = true
|
||||
|
||||
puts "loading post mappings..."
|
||||
@post_number_map = {}
|
||||
Post.pluck(:id, :post_number).each do |post_id, post_number|
|
||||
@post_number_map[post_id] = post_number
|
||||
end
|
||||
end
|
||||
|
||||
def created_post(post)
|
||||
@post_number_map[post.id] = post.post_number
|
||||
super
|
||||
end
|
||||
|
||||
def execute
|
||||
import_users
|
||||
import_categories
|
||||
import_posts
|
||||
|
||||
end
|
||||
|
||||
class RowResolver
|
||||
def load(row)
|
||||
@row = row
|
||||
end
|
||||
|
||||
def self.create(cols)
|
||||
Class.new(RowResolver).new(cols)
|
||||
end
|
||||
|
||||
def initialize(cols)
|
||||
cols.each_with_index do |col,idx|
|
||||
self.class.send(:define_method, col) do
|
||||
@row[idx]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def load_user_batch!(users, offset, total)
|
||||
if users.length > 0
|
||||
create_users(users, offset: offset, total: total) do |user|
|
||||
user
|
||||
end
|
||||
users.clear
|
||||
end
|
||||
end
|
||||
|
||||
def csv_parse(name)
|
||||
filename = "#{@path}/#{name}.csv"
|
||||
first = true
|
||||
row = nil
|
||||
|
||||
current_row = "";
|
||||
double_quote_count = 0
|
||||
|
||||
File.open(filename).each_line do |line|
|
||||
|
||||
# escaping is mental here
|
||||
line.gsub!(/\\(.{1})/){|m| m[-1] == '"'? '""': m[-1]}
|
||||
line.strip!
|
||||
|
||||
current_row << "\n" unless current_row.empty?
|
||||
current_row << line
|
||||
|
||||
double_quote_count += line.scan('"').count
|
||||
|
||||
if double_quote_count % 2 == 1
|
||||
next
|
||||
end
|
||||
|
||||
raw = begin
|
||||
CSV.parse(current_row)
|
||||
rescue CSV::MalformedCSVError => e
|
||||
puts e.message
|
||||
puts "*" * 100
|
||||
puts "Bad row skipped, line is: #{line}"
|
||||
puts
|
||||
puts current_row
|
||||
puts
|
||||
puts "double quote count is : #{double_quote_count}"
|
||||
puts "*" * 100
|
||||
|
||||
current_row = ""
|
||||
double_quote_count = 0
|
||||
next
|
||||
end[0]
|
||||
|
||||
if first
|
||||
row = RowResolver.create(raw)
|
||||
|
||||
current_row = ""
|
||||
double_quote_count = 0
|
||||
first = false
|
||||
next
|
||||
end
|
||||
|
||||
row.load(raw)
|
||||
|
||||
yield row
|
||||
|
||||
current_row = ""
|
||||
double_quote_count = 0
|
||||
end
|
||||
end
|
||||
|
||||
def total_rows(table)
|
||||
File.foreach("#{@path}/#{table}.csv").inject(0) {|c, line| c+1} - 1
|
||||
end
|
||||
|
||||
def import_users
|
||||
puts "", "creating users"
|
||||
|
||||
count = 0
|
||||
users = []
|
||||
|
||||
total = total_rows("users")
|
||||
|
||||
csv_parse("users") do |row|
|
||||
|
||||
id = row.id
|
||||
email = row.email
|
||||
|
||||
# fake it
|
||||
if row.email.blank? || row.email !~ /@/
|
||||
email = SecureRandom.hex << "@domain.com"
|
||||
end
|
||||
|
||||
name = row.display_name
|
||||
username = row.key_custom
|
||||
created_at = DateTime.parse(row.dcreate)
|
||||
|
||||
username = name if username == "NULL"
|
||||
|
||||
users << {
|
||||
id: id,
|
||||
email: email,
|
||||
name: name,
|
||||
username: username,
|
||||
created_at: created_at
|
||||
}
|
||||
|
||||
count += 1
|
||||
if count % BATCH_SIZE == 0
|
||||
load_user_batch! users, count - users.length, total
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
load_user_batch! users, count, total
|
||||
end
|
||||
|
||||
def import_categories
|
||||
rows = []
|
||||
csv_parse("categories") do |row|
|
||||
rows << {id: row.id, name: row.name, description: row.description}
|
||||
end
|
||||
|
||||
create_categories(rows) do |row|
|
||||
row
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_raw!(raw)
|
||||
# purple and #1223f3
|
||||
raw.gsub!(/\[color=[#a-z0-9]+\]/i, "")
|
||||
raw.gsub!(/\[\/color\]/i, "")
|
||||
raw.gsub!(/\[signature\].+\[\/signature\]/i,"")
|
||||
raw
|
||||
end
|
||||
|
||||
def import_post_batch!(posts, topics, offset, total)
|
||||
create_posts(posts, total: total, offset: offset) do |post|
|
||||
|
||||
mapped = {}
|
||||
|
||||
mapped[:id] = post[:id]
|
||||
mapped[:user_id] = user_id_from_imported_user_id(post[:user_id]) || -1
|
||||
mapped[:raw] = post[:body]
|
||||
mapped[:created_at] = post[:created_at]
|
||||
|
||||
topic = topics[post[:topic_id]]
|
||||
|
||||
unless topic[:post_id]
|
||||
mapped[:category] = category_from_imported_category_id(topic[:category_id]).try(:name)
|
||||
mapped[:title] = post[:title]
|
||||
topic[:post_id] = post[:id]
|
||||
else
|
||||
parent = topic_lookup_from_imported_post_id(topic[:post_id])
|
||||
mapped[:topic_id] = parent[:topic_id]
|
||||
|
||||
reply_to_post_id = post_id_from_imported_post_id(post[:reply_id])
|
||||
if reply_to_post_id
|
||||
reply_to_post_number = @post_number_map[reply_to_post_id]
|
||||
if reply_to_post_number
|
||||
mapped[:reply_to_post_number] = reply_to_post_number
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nil if topic[:deleted] or post[:deleted]
|
||||
|
||||
mapped
|
||||
end
|
||||
|
||||
posts.clear
|
||||
end
|
||||
|
||||
def import_posts
|
||||
puts "", "creating topics and posts"
|
||||
|
||||
topic_map = {}
|
||||
|
||||
csv_parse("topics") do |topic|
|
||||
topic_map[topic.id] = {
|
||||
id: topic.id,
|
||||
category_id: topic.forum_category_id,
|
||||
deleted: topic.is_deleted.to_i == 1,
|
||||
locked: topic.is_locked.to_i == 1,
|
||||
pinned: topic.is_pinned.to_i == 1
|
||||
}
|
||||
end
|
||||
|
||||
total = total_rows("posts")
|
||||
|
||||
posts = []
|
||||
count = 0
|
||||
csv_parse("posts") do |row|
|
||||
|
||||
unless row.dcreate
|
||||
puts "NO CREATION DATE FOR POST"
|
||||
p row
|
||||
next
|
||||
end
|
||||
|
||||
row = {
|
||||
id: row.id,
|
||||
topic_id: row.forum_topic_id,
|
||||
reply_id: row.reply_id,
|
||||
user_id: row.user_id,
|
||||
title: row.title,
|
||||
body: normalize_raw!(row.body),
|
||||
deleted: row.is_deleted.to_i == 1,
|
||||
created_at: DateTime.parse(row.dcreate)
|
||||
}
|
||||
posts << row
|
||||
count+=1
|
||||
|
||||
if posts.length > 0 && posts.length % BATCH_SIZE == 0
|
||||
import_post_batch!(posts, topic_map, count - posts.length, total)
|
||||
end
|
||||
end
|
||||
|
||||
import_post_batch!(posts, topic_map, count - posts.length, total) if posts.length > 0
|
||||
|
||||
exit
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
unless ARGV[0] && Dir.exist?(ARGV[0])
|
||||
puts "", "Usage:", "", "bundle exec ruby script/import_scripts/bespoke_1.rb DIRNAME", ""
|
||||
exit 1
|
||||
end
|
||||
|
||||
ImportScripts::Bespoke.new(ARGV[0]).perform
|
Loading…
Reference in a new issue