Import scripts for Drupal and Kunena/Joomla. A new base class for import scripts.

This commit is contained in:
Neil Lalonde 2014-05-30 15:09:58 -04:00
parent bf749f127c
commit daceea73a8
4 changed files with 695 additions and 178 deletions

View file

@ -0,0 +1,253 @@
module ImportScripts; end
class ImportScripts::Base
def initialize
require File.expand_path(File.dirname(__FILE__) + "/../../config/environment")
@existing_users = {}
@failed_users = []
@categories = {}
@posts = {}
@topic_lookup = {}
UserCustomField.where(name: 'import_id').pluck(:user_id, :value).each do |user_id, import_id|
@existing_users[import_id] = user_id
end
CategoryCustomField.where(name: 'import_id').pluck(:category_id, :value).each do |category_id, import_id|
@categories[import_id] = Category.find(category_id.to_i)
end
PostCustomField.where(name: 'import_id').pluck(:post_id, :value).each do |post_id, import_id|
@posts[import_id] = post_id
end
Post.pluck(:id, :topic_id, :post_number).each do |p,t,n|
@topic_lookup[p] = {topic_id: t, post_number: n}
end
end
def perform
Rails.logger.level = 3 # :error, so that we don't create log files that are many GB
SiteSetting.email_domains_blacklist = ''
RateLimiter.disable
execute
update_bumped_at
ensure
RateLimiter.enable
end
# Implementation will do most of its work in its execute method.
# It will need to call create_users, create_categories, and create_posts.
def execute
raise NotImplementedError
end
# Get the Discourse Post id based on the id of the source record
def post_id_from_imported_post_id(import_id)
@posts[import_id] || @posts[import_id.to_s]
end
# Get the Discourse topic info (a hash) based on the id of the source record
def topic_lookup_from_imported_post_id(import_id)
post_id = post_id_from_imported_post_id(import_id)
post_id ? @topic_lookup[post_id] : nil
end
# Get the Discourse User id based on the id of the source user
def user_id_from_imported_user_id(import_id)
@existing_users[import_id] || @existing_users[import_id.to_s]
end
# Get the Discourse Category id based on the id of the source category
def category_from_imported_category_id(import_id)
@categories[import_id] || @categories[import_id.to_s]
end
def create_admin(opts={})
admin = User.new
admin.email = opts[:email] || "sam.saffron@gmail.com"
admin.username = opts[:username] || "sam"
admin.password = SecureRandom.uuid
admin.save!
admin.grant_admin!
admin.change_trust_level!(:regular)
admin.email_tokens.update_all(confirmed: true)
admin
end
# Iterate through a list of user records to be imported.
# Takes a collection, and yields to the block for each element.
# Block should return a hash with the attributes for the User model.
# Required fields are :id and :email, where :id is the id of the
# user in the original datasource. The given id will not be used to
# create the Discourse user record.
def create_users(results)
puts "creating users"
users_created = 0
users_skipped = 0
progress = 0
results.each do |result|
u = yield(result)
if user_id_from_imported_user_id(u[:id])
users_skipped += 1
elsif u[:email].present?
new_user = create_user(u, u[:id])
if new_user.valid?
@existing_users[u[:id].to_s] = new_user.id
users_created += 1
else
@failed_users << u
puts "Failed to create user id #{u[:id]} #{new_user.email}: #{new_user.errors.full_messages}"
end
else
@failed_users << u
puts "Skipping user id #{u[:id]} because email is blank"
end
print_status users_created + users_skipped + @failed_users.length, results.size
end
puts ''
puts "created: #{users_created} users"
puts " failed: #{@failed_users.size}" if @failed_users.size > 0
end
def create_user(opts, import_id)
opts.delete(:id)
existing = User.where(email: opts[:email].downcase, username: opts[:username]).first
return existing if existing and existing.custom_fields["import_id"].to_i == import_id.to_i
opts[:name] = User.suggest_name(opts[:name] || opts[:email])
opts[:username] = UserNameSuggester.suggest((opts[:username].present? ? opts[:username] : nil) || opts[:name] || opts[:email])
opts[:email] = opts[:email].downcase
opts[:trust_level] = TrustLevel.levels[:basic] unless opts[:trust_level]
u = User.new(opts)
u.custom_fields["import_id"] = import_id
u.custom_fields["import_username"] = opts[:username] if opts[:username].present?
begin
u.save!
rescue
# try based on email
existing = User.find_by(email: opts[:email].downcase)
if existing
existing.custom_fields["import_id"] = import_id
existing.save!
u = existing
end
end
u # If there was an error creating the user, u.errors has the messages
end
def find_user_by_import_id(import_id)
UserCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:user)
end
# Iterates through a collection to create categories.
# The block should return a hash with attributes for the new category.
# Required fields are :id and :name, where :id is the id of the
# category in the original datasource. The given id will not be used to
# create the Discourse category record.
# Optional attributes are position, description, and parent_category_id.
def create_categories(results)
puts "creating categories"
results.each do |c|
params = yield(c)
puts " #{params[:name]}"
new_category = create_category(params, params[:id])
@categories[params[:id]] = new_category
end
end
def create_category(opts, import_id)
existing = category_from_imported_category_id(import_id)
return existing if existing
new_category = Category.new(
name: opts[:name],
user_id: -1,
position: opts[:position],
description: opts[:description],
parent_category_id: opts[:parent_category_id]
)
new_category.custom_fields["import_id"] = import_id if import_id
new_category.save!
new_category
end
# Iterates through a collection of posts to be imported.
# It can create topics and replies.
# Attributes will be passed to the PostCreator.
# Topics should give attributes title and category.
# Replies should provide topic_id. Use topic_lookup_from_imported_post_id to find the topic.
def create_posts(results, opts={})
skipped = 0
created = 0
total = opts[:total] || results.size
results.each do |r|
params = yield(r)
if params.nil?
skipped += 1
next # block returns nil to skip a post
end
import_id = params.delete(:id).to_s
if post_id_from_imported_post_id(import_id)
skipped += 1 # already imported this post
else
begin
new_post = create_post(params)
@posts[import_id] = new_post.id
@topic_lookup[new_post.id] = {post_number: new_post.post_number, topic_id: new_post.topic_id}
created += 1
rescue => e
skipped += 1
puts "Error creating post #{import_id}. Skipping."
puts e.message
end
end
print_status skipped + created + (opts[:offset] || 0), total
end
return [created, skipped]
end
def create_post(opts)
user = User.find(opts[:user_id])
opts = opts.merge(skip_validations: true)
PostCreator.create(user, opts)
end
def update_bumped_at
Post.exec_sql("update topics t set bumped_at = (select max(created_at) from posts where topic_id = t.id)")
end
def print_status(current, max)
print "\r%9d / %d (%5.1f%%) " % [current, max, ((current.to_f / max.to_f) * 100).round(1)]
end
def batches(batch_size)
offset = 0
loop do
yield offset
offset += batch_size
end
end
end

View file

@ -2,192 +2,106 @@
# `createdb bbpress` # `createdb bbpress`
# `bundle exec rake db:migrate` # `bundle exec rake db:migrate`
BB_PRESS_DB = "import" require File.expand_path(File.dirname(__FILE__) + "/base.rb")
BB_PRESS_DB = "bbpress"
require 'mysql2' require 'mysql2'
@client = Mysql2::Client.new( class ImportScripts::Bbpress < ImportScripts::Base
host: "localhost",
username: "root",
password: "password",
:database => BB_PRESS_DB
)
def initialize
super
require File.expand_path(File.dirname(__FILE__) + "/../../config/environment") @client = Mysql2::Client.new(
SiteSetting.email_domains_blacklist = '' host: "localhost",
RateLimiter.disable username: "root",
#password: "password",
def create_admin database: BB_PRESS_DB
User.new.tap { |admin| )
admin.email = "sam.saffron@gmail.com"
admin.username = "sam"
admin.password = SecureRandom.uuid
admin.save
admin.grant_admin!
admin.change_trust_level!(:regular)
admin.email_tokens.update_all(confirmed: true)
}
end
def create_user(opts, import_id)
opts[:name] = User.suggest_name(opts[:name] || opts[:email])
opts[:username] = UserNameSuggester.suggest(opts[:username] || opts[:name] || opts[:email])
opts[:email] = opts[:email].downcase
u = User.new(opts)
u.custom_fields["import_id"] = import_id
u.save!
u
rescue
# try based on email
u = User.find_by(email: opts[:email].downcase)
u.custom_fields["import_id"] = import_id
u.save!
u
end
def create_post(opts)
user = User.find(opts[:user_id])
opts = opts.merge(skip_validations: true)
PostCreator.create(user, opts)
end
results = @client.query("
select ID,
user_login username,
display_name name,
user_url website,
user_email email,
user_registered created_at
from wp_users where spam = 0 and deleted = 0").to_a
users = {}
UserCustomField.where(name: 'import_id')
.pluck(:user_id, :value)
.each do |user_id, import_id|
users[import_id.to_i] = user_id
end
skipped = 0
results.delete_if do |u|
skipped+= 1 if users[u["ID"]]
end
puts "Importing #{results.length} users (skipped #{skipped})"
i = 0
results.each do |u|
putc "." if ((i+=1)%10) == 0
id = u.delete("ID")
users[id] = create_user(ActiveSupport::HashWithIndifferentAccess.new(u), id).id
end
results = @client.query("
select ID, post_name from wp_posts where post_type = 'forum'
").to_a
categories={}
CategoryCustomField.where(name: 'import_id')
.pluck(:category_id, :value)
.each do |category_id, import_id|
categories[import_id.to_i] = category_id
end
skipped = 0
results.delete_if do |u|
skipped+= 1 if categories[u["ID"]]
end
puts
puts "Importing #{results.length} categories (skipped #{skipped})"
results.each do |c|
c["post_name"] = "unknown" if c["post_name"].blank?
category = Category.new(name: c["post_name"], user_id: -1)
category.custom_fields["import_id"] = c["ID"]
category.save!
categories[c["ID"]] = category.id
end
results = @client.query("
select ID,
post_author,
post_date,
post_content,
post_title,
post_type,
post_parent
from wp_posts
where post_status <> 'spam'
and post_type in ('topic', 'reply')
order by ID
").to_a
posts={}
PostCustomField.where(name: 'import_id')
.pluck(:post_id, :value)
.each do |post_id, import_id|
posts[import_id.to_i] = post_id
end
skipped = 0
results.delete_if do |u|
skipped+= 1 if posts[u["ID"]]
end
puts "Importing #{results.length} posts (skipped #{skipped})"
topic_lookup = {}
Post.pluck(:id, :topic_id, :post_number).each do |p,t,n|
topic_lookup[p] = {topic_id: t, post_number: n}
end
i = 0
results.each do |post|
putc "." if ((i+=1)%10) == 0
mapped = {}
mapped[:user_id] = users[post["post_author"]]
mapped[:raw] = post["post_content"]
mapped[:created_at] = post["post_date"]
if post["post_type"] == "topic"
mapped[:category] = categories[post["post_parent"]]
mapped[:title] = CGI.unescapeHTML post["post_title"]
else
parent_id = posts[post["post_parent"]]
parent = topic_lookup[parent_id]
unless parent
puts; puts "Skipping #{post["ID"]}: #{post["post_content"][0..40]}"
next
end
mapped[:topic_id] = parent[:topic_id]
mapped[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1
end end
mapped[:custom_fields] = {import_id: post["ID"]} def execute
users_results = @client.query("
select id,
user_login username,
display_name name,
user_url website,
user_email email,
user_registered created_at
from wp_users
where spam = 0
and deleted = 0 limit 50", cache_rows: false)
d_post = create_post(mapped) create_users(users_results) do |u|
posts[post["ID"]] = d_post.id ActiveSupport::HashWithIndifferentAccess.new(u)
topic_lookup[d_post.id] = {post_number: d_post.post_number, topic_id: d_post.topic_id} end
create_categories(@client.query("select id, post_name from wp_posts where post_type = 'forum' and post_name != ''")) do |c|
{id: c['id'], name: c['post_name']}
end
import_posts
end
def import_posts
puts '', "creating topics and posts"
total_count = @client.query("
select count(*) count
from wp_posts
where post_status <> 'spam'
and post_type in ('topic', 'reply')").first['count']
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
select id,
post_author,
post_date,
post_content,
post_title,
post_type,
post_parent
from wp_posts
where post_status <> 'spam'
and post_type in ('topic', 'reply')
order by id
limit #{batch_size}
offset #{offset}", cache_rows: false)
break if results.size < 1
create_posts(results, total: total_count, offset: offset) do |post|
skip = false
mapped = {}
mapped[:id] = post["id"]
mapped[:user_id] = user_id_from_imported_user_id(post["post_author"]) || find_user_by_import_id(post["post_author"]).try(:id) || -1
mapped[:raw] = post["post_content"]
mapped[:created_at] = post["post_date"]
mapped[:custom_fields] = {import_id: post["id"]}
if post["post_type"] == "topic"
mapped[:category] = category_from_imported_category_id(post["post_parent"]).try(:name)
mapped[:title] = CGI.unescapeHTML post["post_title"]
else
parent = topic_lookup_from_imported_post_id(post["post_parent"])
if parent
mapped[:topic_id] = parent[:topic_id]
mapped[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1
else
puts "Skipping #{post["id"]}: #{post["post_content"][0..40]}"
skip = true
end
end
skip ? nil : mapped
end
end
end
end end
Post.exec_sql("update topics t set bumped_at = (select max(created_at) from posts where topic_id = t.id)") ImportScripts::Bbpress.new.perform

View file

@ -0,0 +1,185 @@
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require "mysql2"
class ImportScripts::Drupal < ImportScripts::Base
DRUPAL_DB = "newsite3"
def initialize
super
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: DRUPAL_DB
)
end
def execute
create_users(@client.query("SELECT uid id, name, mail email, created FROM users;")) do |row|
{id: row['id'], username: row['name'], email: row['email'], created_at: Time.zone.at(row['created'])}
end
# Drupal allows duplicate category names, so you may need to exclude some categories or rename them here.
create_categories(@client.query("SELECT tid, name, description FROM taxonomy_term_data WHERE vid = 1;")) do |c|
{id: c['tid'], name: c['name'].try(:strip), description: c['description']}
end
create_blog_topics
create_forum_topics
create_replies
begin
create_admin(email: 'neil.lalonde@discourse.org', username: UserNameSuggester.suggest('neil'))
rescue => e
puts '', "Failed to create admin user"
puts e.message
end
puts '', 'Done'
end
def create_blog_topics
puts '', "creating blog topics"
create_category({
name: 'Blog',
user_id: -1,
description: "Articles from the blog"
}, nil) unless Category.find_by_name('Blog')
results = @client.query("
SELECT n.nid nid, n.title title, n.uid uid, n.created created, n.sticky sticky,
f.body_value body
FROM node n,
field_data_body f
WHERE n.type = 'blog'
AND n.nid = f.entity_id
AND n.status = 1
", cache_rows: false)
create_posts(results) do |row|
{
id: "nid:#{row['nid']}",
user_id: user_id_from_imported_user_id(row['uid']) || -1,
category: 'Blog',
raw: row['body'],
created_at: Time.zone.at(row['created']),
pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil,
title: row['title'].try(:strip),
custom_fields: {import_id: "nid:#{row['nid']}"}
}
end
end
def create_forum_topics
puts '', "creating forum topics"
total_count = @client.query("
SELECT COUNT(*) count
FROM forum_index fi, node n
WHERE n.type = 'forum'
AND fi.nid = n.nid
AND n.status = 1;").first['count']
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
SELECT fi.nid nid,
fi.title title,
fi.tid tid,
n.uid uid,
fi.created created,
fi.sticky sticky,
f.body_value body
FROM forum_index fi,
node n,
field_data_body f
WHERE n.type = 'forum'
AND fi.nid = n.nid
AND n.nid = f.entity_id
AND n.status = 1
LIMIT #{batch_size}
OFFSET #{offset};
", cache_rows: false)
break if results.size < 1
create_posts(results, total: total_count, offset: offset) do |row|
{
id: "nid:#{row['nid']}",
user_id: user_id_from_imported_user_id(row['uid']) || -1,
category: category_from_imported_category_id(row['tid']).try(:name),
raw: row['body'],
created_at: Time.zone.at(row['created']),
pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil,
title: row['title'].try(:strip),
custom_fields: {import_id: "nid:#{row['nid']}"}
}
end
end
end
def create_replies
puts '', "creating replies in topics"
total_count = @client.query("
SELECT COUNT(*) count
FROM comment c,
node n
WHERE n.nid = c.nid
AND c.status = 1
AND n.type IN ('blog', 'forum')
AND n.status = 1;").first['count']
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
SELECT c.cid, c.pid, c.nid, c.uid, c.created,
f.comment_body_value body
FROM comment c,
field_data_comment_body f,
node n
WHERE c.cid = f.entity_id
AND n.nid = c.nid
AND c.status = 1
AND n.type IN ('blog', 'forum')
AND n.status = 1
LIMIT #{batch_size}
OFFSET #{offset};
", cache_rows: false)
break if results.size < 1
create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
if topic_mapping && topic_id = topic_mapping[:topic_id]
h = {
id: "cid:#{row['cid']}",
topic_id: topic_id,
user_id: user_id_from_imported_user_id(row['uid']) || -1,
raw: row['body'],
created_at: Time.zone.at(row['created']),
custom_fields: {import_id: "cid:#{row['cid']}"}
}
if row['pid']
parent = topic_lookup_from_imported_post_id("cid:#{row['pid']}")
h[:reply_to_post_number] = parent[:post_number] if parent and parent[:post_number] > 1
end
h
else
puts "No topic found for comment #{row['cid']}"
nil
end
end
end
end
end
ImportScripts::Drupal.new.perform

View file

@ -0,0 +1,165 @@
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require "mysql2"
require "csv"
# TODO
#
# It would be better to have a mysql dump of the joomla users too.
# But I got a csv file and had an awful time trying to use the LOAD DATA command to put it into a table.
# So, this script reads Joomla users from a csv file for now.
class ImportScripts::Kunena < ImportScripts::Base
KUNENA_DB = "kunena"
JOOMLA_USERS = "j-users.csv"
def initialize
super
@joomla_users_file = ARGV[0]
raise ArgumentError.new('Joomla users file argument missing. Provide full path to joomla users csv file.') if !@joomla_users_file.present?
@users = {}
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: KUNENA_DB
)
end
def execute
check_files_exist
parse_users
create_users(@users) do |id, user|
{ id: id,
email: user[:email],
username: user[:username],
created_at: user[:created_at],
bio_raw: user[:bio],
moderator: user[:moderator] ? true : false,
suspended_at: user[:suspended] ? Time.zone.now : nil,
suspended_till: user[:suspended] ? 100.years.from_now : nil }
end
create_categories(@client.query("SELECT id, parent, name, description, ordering FROM jos_kunena_categories ORDER BY parent, id;")) do |c|
h = {id: c['id'], name: c['name'], description: c['description'], position: c['ordering'].to_i}
if c['parent'].to_i > 0
parent = category_from_imported_category_id(c['parent'])
h[:parent_category_id] = parent.id if parent
end
h
end
import_posts
begin
create_admin(email: 'neil.lalonde@discourse.org', username: UserNameSuggester.suggest('neil'))
rescue => e
puts '', "Failed to create admin user"
puts e.message
end
puts '', 'Done'
end
def check_files_exist
raise ArgumentError.new("File does not exist: #{@joomla_users_file}") unless File.exist?(@joomla_users_file)
end
def read_csv(f)
data = File.read(f)
data.gsub!(/\" \n/,"\"\n")
data.gsub!(/\\\"/,";;")
data.gsub!(/\\/,"\n")
data
end
def parse_users
# Need to merge data from joomla with kunena
puts "parsing joomla user data from #{@joomla_users_file}"
CSV.foreach(@joomla_users_file) do |u|
next unless u[0].to_i > 0 and u[1].present? and u[2].present?
username = u[1].gsub(' ', '_').gsub(/[^A-Za-z0-9_]/, '')[0,User.username_length.end]
if username.length < User.username_length.first
username = username * User.username_length.first
end
@users[u[0].to_i] = {id: u[0].to_i, username: username, email: u[2], created_at: Time.zone.parse(u[3])}
end
puts "parsing kunena user data from mysql"
results = @client.query("SELECT userid, signature, moderator, banned FROM jos_kunena_users;", cache_rows: false)
results.each do |u|
next unless u['userid'].to_i > 0
user = @users[u['userid'].to_i]
if user
user[:bio] = u['signature']
user[:moderator] = (u['moderator'].to_i == 1)
user[:suspended] = u['banned'].present?
end
end
end
def import_posts
puts '', "creating topics and posts"
total_count = @client.query("SELECT COUNT(*) count FROM jos_kunena_messages m;").first['count']
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
SELECT m.id id,
m.thread thread,
m.parent parent,
m.catid catid,
m.userid userid,
m.subject subject,
m.time time,
t.message message
FROM jos_kunena_messages m,
jos_kunena_messages_text t
WHERE m.id = t.mesid
ORDER BY m.id
LIMIT #{batch_size}
OFFSET #{offset};
", cache_rows: false)
break if results.size < 1
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['userid']) || find_user_by_import_id(m['userid']).try(:id) || -1
mapped[:raw] = m["message"]
mapped[:created_at] = Time.zone.at(m['time'])
mapped[:custom_fields] = {import_id: m['id']}
if m['id'] == m['thread']
mapped[:category] = category_from_imported_category_id(m['catid']).try(:name)
mapped[:title] = m['subject']
else
parent = topic_lookup_from_imported_post_id(m['parent'])
if parent
mapped[:topic_id] = parent[:topic_id]
mapped[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1
else
puts "Parent post #{m['parent']} doesn't exist. Skipping #{m["id"]}: #{m["subject"][0..40]}"
skip = true
end
end
skip ? nil : mapped
end
end
end
end
ImportScripts::Kunena.new.perform