diff --git a/script/import_scripts/base.rb b/script/import_scripts/base.rb new file mode 100644 index 000000000..7a1f7e51d --- /dev/null +++ b/script/import_scripts/base.rb @@ -0,0 +1,253 @@ +module ImportScripts; end + +class ImportScripts::Base + + def initialize + require File.expand_path(File.dirname(__FILE__) + "/../../config/environment") + + @existing_users = {} + @failed_users = [] + @categories = {} + @posts = {} + @topic_lookup = {} + + UserCustomField.where(name: 'import_id').pluck(:user_id, :value).each do |user_id, import_id| + @existing_users[import_id] = user_id + end + + CategoryCustomField.where(name: 'import_id').pluck(:category_id, :value).each do |category_id, import_id| + @categories[import_id] = Category.find(category_id.to_i) + end + + PostCustomField.where(name: 'import_id').pluck(:post_id, :value).each do |post_id, import_id| + @posts[import_id] = post_id + end + + Post.pluck(:id, :topic_id, :post_number).each do |p,t,n| + @topic_lookup[p] = {topic_id: t, post_number: n} + end + end + + def perform + Rails.logger.level = 3 # :error, so that we don't create log files that are many GB + SiteSetting.email_domains_blacklist = '' + RateLimiter.disable + + execute + + update_bumped_at + + ensure + RateLimiter.enable + end + + # Implementation will do most of its work in its execute method. + # It will need to call create_users, create_categories, and create_posts. + def execute + raise NotImplementedError + end + + # Get the Discourse Post id based on the id of the source record + def post_id_from_imported_post_id(import_id) + @posts[import_id] || @posts[import_id.to_s] + end + + # Get the Discourse topic info (a hash) based on the id of the source record + def topic_lookup_from_imported_post_id(import_id) + post_id = post_id_from_imported_post_id(import_id) + post_id ? @topic_lookup[post_id] : nil + end + + # Get the Discourse User id based on the id of the source user + def user_id_from_imported_user_id(import_id) + @existing_users[import_id] || @existing_users[import_id.to_s] + end + + # Get the Discourse Category id based on the id of the source category + def category_from_imported_category_id(import_id) + @categories[import_id] || @categories[import_id.to_s] + end + + def create_admin(opts={}) + admin = User.new + admin.email = opts[:email] || "sam.saffron@gmail.com" + admin.username = opts[:username] || "sam" + admin.password = SecureRandom.uuid + admin.save! + admin.grant_admin! + admin.change_trust_level!(:regular) + admin.email_tokens.update_all(confirmed: true) + admin + end + + # Iterate through a list of user records to be imported. + # Takes a collection, and yields to the block for each element. + # Block should return a hash with the attributes for the User model. + # Required fields are :id and :email, where :id is the id of the + # user in the original datasource. The given id will not be used to + # create the Discourse user record. + def create_users(results) + puts "creating users" + users_created = 0 + users_skipped = 0 + progress = 0 + + results.each do |result| + u = yield(result) + + if user_id_from_imported_user_id(u[:id]) + users_skipped += 1 + elsif u[:email].present? + new_user = create_user(u, u[:id]) + + if new_user.valid? + @existing_users[u[:id].to_s] = new_user.id + users_created += 1 + else + @failed_users << u + puts "Failed to create user id #{u[:id]} #{new_user.email}: #{new_user.errors.full_messages}" + end + else + @failed_users << u + puts "Skipping user id #{u[:id]} because email is blank" + end + + print_status users_created + users_skipped + @failed_users.length, results.size + end + + puts '' + puts "created: #{users_created} users" + puts " failed: #{@failed_users.size}" if @failed_users.size > 0 + end + + def create_user(opts, import_id) + opts.delete(:id) + existing = User.where(email: opts[:email].downcase, username: opts[:username]).first + return existing if existing and existing.custom_fields["import_id"].to_i == import_id.to_i + + opts[:name] = User.suggest_name(opts[:name] || opts[:email]) + opts[:username] = UserNameSuggester.suggest((opts[:username].present? ? opts[:username] : nil) || opts[:name] || opts[:email]) + opts[:email] = opts[:email].downcase + opts[:trust_level] = TrustLevel.levels[:basic] unless opts[:trust_level] + + u = User.new(opts) + u.custom_fields["import_id"] = import_id + u.custom_fields["import_username"] = opts[:username] if opts[:username].present? + + begin + u.save! + rescue + # try based on email + existing = User.find_by(email: opts[:email].downcase) + if existing + existing.custom_fields["import_id"] = import_id + existing.save! + u = existing + end + end + + u # If there was an error creating the user, u.errors has the messages + end + + def find_user_by_import_id(import_id) + UserCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:user) + end + + # Iterates through a collection to create categories. + # The block should return a hash with attributes for the new category. + # Required fields are :id and :name, where :id is the id of the + # category in the original datasource. The given id will not be used to + # create the Discourse category record. + # Optional attributes are position, description, and parent_category_id. + def create_categories(results) + puts "creating categories" + + results.each do |c| + params = yield(c) + puts " #{params[:name]}" + new_category = create_category(params, params[:id]) + @categories[params[:id]] = new_category + end + end + + def create_category(opts, import_id) + existing = category_from_imported_category_id(import_id) + return existing if existing + + new_category = Category.new( + name: opts[:name], + user_id: -1, + position: opts[:position], + description: opts[:description], + parent_category_id: opts[:parent_category_id] + ) + new_category.custom_fields["import_id"] = import_id if import_id + new_category.save! + new_category + end + + # Iterates through a collection of posts to be imported. + # It can create topics and replies. + # Attributes will be passed to the PostCreator. + # Topics should give attributes title and category. + # Replies should provide topic_id. Use topic_lookup_from_imported_post_id to find the topic. + def create_posts(results, opts={}) + skipped = 0 + created = 0 + total = opts[:total] || results.size + + results.each do |r| + params = yield(r) + + if params.nil? + skipped += 1 + next # block returns nil to skip a post + end + + import_id = params.delete(:id).to_s + + if post_id_from_imported_post_id(import_id) + skipped += 1 # already imported this post + else + begin + new_post = create_post(params) + @posts[import_id] = new_post.id + @topic_lookup[new_post.id] = {post_number: new_post.post_number, topic_id: new_post.topic_id} + + created += 1 + rescue => e + skipped += 1 + puts "Error creating post #{import_id}. Skipping." + puts e.message + end + end + + print_status skipped + created + (opts[:offset] || 0), total + end + + return [created, skipped] + end + + def create_post(opts) + user = User.find(opts[:user_id]) + opts = opts.merge(skip_validations: true) + + PostCreator.create(user, opts) + end + + def update_bumped_at + Post.exec_sql("update topics t set bumped_at = (select max(created_at) from posts where topic_id = t.id)") + end + + def print_status(current, max) + print "\r%9d / %d (%5.1f%%) " % [current, max, ((current.to_f / max.to_f) * 100).round(1)] + end + + def batches(batch_size) + offset = 0 + loop do + yield offset + offset += batch_size + end + end +end diff --git a/script/import_scripts/bbpress.rb b/script/import_scripts/bbpress.rb index 7f271b87a..0a34a66a3 100644 --- a/script/import_scripts/bbpress.rb +++ b/script/import_scripts/bbpress.rb @@ -2,192 +2,106 @@ # `createdb bbpress` # `bundle exec rake db:migrate` -BB_PRESS_DB = "import" +require File.expand_path(File.dirname(__FILE__) + "/base.rb") + +BB_PRESS_DB = "bbpress" require 'mysql2' -@client = Mysql2::Client.new( - host: "localhost", - username: "root", - password: "password", - :database => BB_PRESS_DB -) +class ImportScripts::Bbpress < ImportScripts::Base + def initialize + super -require File.expand_path(File.dirname(__FILE__) + "/../../config/environment") -SiteSetting.email_domains_blacklist = '' -RateLimiter.disable - -def create_admin - User.new.tap { |admin| - admin.email = "sam.saffron@gmail.com" - admin.username = "sam" - admin.password = SecureRandom.uuid - admin.save - admin.grant_admin! - admin.change_trust_level!(:regular) - admin.email_tokens.update_all(confirmed: true) - } -end - -def create_user(opts, import_id) - opts[:name] = User.suggest_name(opts[:name] || opts[:email]) - opts[:username] = UserNameSuggester.suggest(opts[:username] || opts[:name] || opts[:email]) - opts[:email] = opts[:email].downcase - - u = User.new(opts) - u.custom_fields["import_id"] = import_id - - u.save! - u - -rescue - # try based on email - u = User.find_by(email: opts[:email].downcase) - u.custom_fields["import_id"] = import_id - u.save! - u -end - - -def create_post(opts) - - user = User.find(opts[:user_id]) - opts = opts.merge(skip_validations: true) - - PostCreator.create(user, opts) -end - - -results = @client.query(" - select ID, - user_login username, - display_name name, - user_url website, - user_email email, - user_registered created_at - from wp_users where spam = 0 and deleted = 0").to_a - - -users = {} - -UserCustomField.where(name: 'import_id') - .pluck(:user_id, :value) - .each do |user_id, import_id| - users[import_id.to_i] = user_id -end - -skipped = 0 -results.delete_if do |u| - skipped+= 1 if users[u["ID"]] -end - -puts "Importing #{results.length} users (skipped #{skipped})" - -i = 0 -results.each do |u| - putc "." if ((i+=1)%10) == 0 - - id = u.delete("ID") - users[id] = create_user(ActiveSupport::HashWithIndifferentAccess.new(u), id).id -end - - - -results = @client.query(" - select ID, post_name from wp_posts where post_type = 'forum' - ").to_a - -categories={} - -CategoryCustomField.where(name: 'import_id') - .pluck(:category_id, :value) - .each do |category_id, import_id| - categories[import_id.to_i] = category_id -end - - -skipped = 0 -results.delete_if do |u| - skipped+= 1 if categories[u["ID"]] -end - -puts -puts "Importing #{results.length} categories (skipped #{skipped})" - -results.each do |c| - c["post_name"] = "unknown" if c["post_name"].blank? - category = Category.new(name: c["post_name"], user_id: -1) - category.custom_fields["import_id"] = c["ID"] - category.save! - categories[c["ID"]] = category.id -end - -results = @client.query(" - select ID, - post_author, - post_date, - post_content, - post_title, - post_type, - post_parent - from wp_posts - where post_status <> 'spam' - and post_type in ('topic', 'reply') - order by ID - ").to_a - -posts={} - -PostCustomField.where(name: 'import_id') - .pluck(:post_id, :value) - .each do |post_id, import_id| - posts[import_id.to_i] = post_id -end - - -skipped = 0 -results.delete_if do |u| - skipped+= 1 if posts[u["ID"]] -end - -puts "Importing #{results.length} posts (skipped #{skipped})" - -topic_lookup = {} -Post.pluck(:id, :topic_id, :post_number).each do |p,t,n| - topic_lookup[p] = {topic_id: t, post_number: n} -end - -i = 0 -results.each do |post| - putc "." if ((i+=1)%10) == 0 - - mapped = {} - - mapped[:user_id] = users[post["post_author"]] - mapped[:raw] = post["post_content"] - mapped[:created_at] = post["post_date"] - - if post["post_type"] == "topic" - mapped[:category] = categories[post["post_parent"]] - mapped[:title] = CGI.unescapeHTML post["post_title"] - else - parent_id = posts[post["post_parent"]] - parent = topic_lookup[parent_id] - unless parent - puts; puts "Skipping #{post["ID"]}: #{post["post_content"][0..40]}" - next - end - mapped[:topic_id] = parent[:topic_id] - mapped[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1 + @client = Mysql2::Client.new( + host: "localhost", + username: "root", + #password: "password", + database: BB_PRESS_DB + ) end - mapped[:custom_fields] = {import_id: post["ID"]} + def execute + users_results = @client.query(" + select id, + user_login username, + display_name name, + user_url website, + user_email email, + user_registered created_at + from wp_users + where spam = 0 + and deleted = 0 limit 50", cache_rows: false) - d_post = create_post(mapped) - posts[post["ID"]] = d_post.id - topic_lookup[d_post.id] = {post_number: d_post.post_number, topic_id: d_post.topic_id} + create_users(users_results) do |u| + ActiveSupport::HashWithIndifferentAccess.new(u) + end + + create_categories(@client.query("select id, post_name from wp_posts where post_type = 'forum' and post_name != ''")) do |c| + {id: c['id'], name: c['post_name']} + end + + import_posts + end + + def import_posts + puts '', "creating topics and posts" + + total_count = @client.query(" + select count(*) count + from wp_posts + where post_status <> 'spam' + and post_type in ('topic', 'reply')").first['count'] + + batch_size = 1000 + + batches(batch_size) do |offset| + results = @client.query(" + select id, + post_author, + post_date, + post_content, + post_title, + post_type, + post_parent + from wp_posts + where post_status <> 'spam' + and post_type in ('topic', 'reply') + order by id + limit #{batch_size} + offset #{offset}", cache_rows: false) + + break if results.size < 1 + + create_posts(results, total: total_count, offset: offset) do |post| + skip = false + mapped = {} + + mapped[:id] = post["id"] + mapped[:user_id] = user_id_from_imported_user_id(post["post_author"]) || find_user_by_import_id(post["post_author"]).try(:id) || -1 + mapped[:raw] = post["post_content"] + mapped[:created_at] = post["post_date"] + mapped[:custom_fields] = {import_id: post["id"]} + + if post["post_type"] == "topic" + mapped[:category] = category_from_imported_category_id(post["post_parent"]).try(:name) + mapped[:title] = CGI.unescapeHTML post["post_title"] + else + parent = topic_lookup_from_imported_post_id(post["post_parent"]) + if parent + mapped[:topic_id] = parent[:topic_id] + mapped[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1 + else + puts "Skipping #{post["id"]}: #{post["post_content"][0..40]}" + skip = true + end + end + + skip ? nil : mapped + end + end + end end -Post.exec_sql("update topics t set bumped_at = (select max(created_at) from posts where topic_id = t.id)") +ImportScripts::Bbpress.new.perform diff --git a/script/import_scripts/drupal.rb b/script/import_scripts/drupal.rb new file mode 100644 index 000000000..57538b6da --- /dev/null +++ b/script/import_scripts/drupal.rb @@ -0,0 +1,185 @@ +require File.expand_path(File.dirname(__FILE__) + "/base.rb") + +require "mysql2" + +class ImportScripts::Drupal < ImportScripts::Base + + DRUPAL_DB = "newsite3" + + def initialize + super + + @client = Mysql2::Client.new( + host: "localhost", + username: "root", + #password: "password", + database: DRUPAL_DB + ) + end + + def execute + create_users(@client.query("SELECT uid id, name, mail email, created FROM users;")) do |row| + {id: row['id'], username: row['name'], email: row['email'], created_at: Time.zone.at(row['created'])} + end + + # Drupal allows duplicate category names, so you may need to exclude some categories or rename them here. + create_categories(@client.query("SELECT tid, name, description FROM taxonomy_term_data WHERE vid = 1;")) do |c| + {id: c['tid'], name: c['name'].try(:strip), description: c['description']} + end + + create_blog_topics + create_forum_topics + + create_replies + + begin + create_admin(email: 'neil.lalonde@discourse.org', username: UserNameSuggester.suggest('neil')) + rescue => e + puts '', "Failed to create admin user" + puts e.message + end + + puts '', 'Done' + end + + def create_blog_topics + puts '', "creating blog topics" + + create_category({ + name: 'Blog', + user_id: -1, + description: "Articles from the blog" + }, nil) unless Category.find_by_name('Blog') + + results = @client.query(" + SELECT n.nid nid, n.title title, n.uid uid, n.created created, n.sticky sticky, + f.body_value body + FROM node n, + field_data_body f + WHERE n.type = 'blog' + AND n.nid = f.entity_id + AND n.status = 1 + ", cache_rows: false) + + create_posts(results) do |row| + { + id: "nid:#{row['nid']}", + user_id: user_id_from_imported_user_id(row['uid']) || -1, + category: 'Blog', + raw: row['body'], + created_at: Time.zone.at(row['created']), + pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil, + title: row['title'].try(:strip), + custom_fields: {import_id: "nid:#{row['nid']}"} + } + end + end + + def create_forum_topics + puts '', "creating forum topics" + + total_count = @client.query(" + SELECT COUNT(*) count + FROM forum_index fi, node n + WHERE n.type = 'forum' + AND fi.nid = n.nid + AND n.status = 1;").first['count'] + + batch_size = 1000 + + batches(batch_size) do |offset| + results = @client.query(" + SELECT fi.nid nid, + fi.title title, + fi.tid tid, + n.uid uid, + fi.created created, + fi.sticky sticky, + f.body_value body + FROM forum_index fi, + node n, + field_data_body f + WHERE n.type = 'forum' + AND fi.nid = n.nid + AND n.nid = f.entity_id + AND n.status = 1 + LIMIT #{batch_size} + OFFSET #{offset}; + ", cache_rows: false) + + break if results.size < 1 + + create_posts(results, total: total_count, offset: offset) do |row| + { + id: "nid:#{row['nid']}", + user_id: user_id_from_imported_user_id(row['uid']) || -1, + category: category_from_imported_category_id(row['tid']).try(:name), + raw: row['body'], + created_at: Time.zone.at(row['created']), + pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil, + title: row['title'].try(:strip), + custom_fields: {import_id: "nid:#{row['nid']}"} + } + end + end + end + + def create_replies + puts '', "creating replies in topics" + + total_count = @client.query(" + SELECT COUNT(*) count + FROM comment c, + node n + WHERE n.nid = c.nid + AND c.status = 1 + AND n.type IN ('blog', 'forum') + AND n.status = 1;").first['count'] + + batch_size = 1000 + + batches(batch_size) do |offset| + results = @client.query(" + SELECT c.cid, c.pid, c.nid, c.uid, c.created, + f.comment_body_value body + FROM comment c, + field_data_comment_body f, + node n + WHERE c.cid = f.entity_id + AND n.nid = c.nid + AND c.status = 1 + AND n.type IN ('blog', 'forum') + AND n.status = 1 + LIMIT #{batch_size} + OFFSET #{offset}; + ", cache_rows: false) + + break if results.size < 1 + + create_posts(results, total: total_count, offset: offset) do |row| + topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}") + if topic_mapping && topic_id = topic_mapping[:topic_id] + h = { + id: "cid:#{row['cid']}", + topic_id: topic_id, + user_id: user_id_from_imported_user_id(row['uid']) || -1, + raw: row['body'], + created_at: Time.zone.at(row['created']), + custom_fields: {import_id: "cid:#{row['cid']}"} + } + if row['pid'] + parent = topic_lookup_from_imported_post_id("cid:#{row['pid']}") + h[:reply_to_post_number] = parent[:post_number] if parent and parent[:post_number] > 1 + end + h + else + puts "No topic found for comment #{row['cid']}" + nil + end + end + end + end + +end + +ImportScripts::Drupal.new.perform diff --git a/script/import_scripts/kunena.rb b/script/import_scripts/kunena.rb new file mode 100644 index 000000000..75f569434 --- /dev/null +++ b/script/import_scripts/kunena.rb @@ -0,0 +1,165 @@ +require File.expand_path(File.dirname(__FILE__) + "/base.rb") + +require "mysql2" +require "csv" + +# TODO +# +# It would be better to have a mysql dump of the joomla users too. +# But I got a csv file and had an awful time trying to use the LOAD DATA command to put it into a table. +# So, this script reads Joomla users from a csv file for now. + +class ImportScripts::Kunena < ImportScripts::Base + + KUNENA_DB = "kunena" + JOOMLA_USERS = "j-users.csv" + + def initialize + super + + @joomla_users_file = ARGV[0] + raise ArgumentError.new('Joomla users file argument missing. Provide full path to joomla users csv file.') if !@joomla_users_file.present? + + @users = {} + + @client = Mysql2::Client.new( + host: "localhost", + username: "root", + #password: "password", + database: KUNENA_DB + ) + end + + def execute + check_files_exist + + parse_users + + create_users(@users) do |id, user| + { id: id, + email: user[:email], + username: user[:username], + created_at: user[:created_at], + bio_raw: user[:bio], + moderator: user[:moderator] ? true : false, + suspended_at: user[:suspended] ? Time.zone.now : nil, + suspended_till: user[:suspended] ? 100.years.from_now : nil } + end + + create_categories(@client.query("SELECT id, parent, name, description, ordering FROM jos_kunena_categories ORDER BY parent, id;")) do |c| + h = {id: c['id'], name: c['name'], description: c['description'], position: c['ordering'].to_i} + if c['parent'].to_i > 0 + parent = category_from_imported_category_id(c['parent']) + h[:parent_category_id] = parent.id if parent + end + h + end + + import_posts + + begin + create_admin(email: 'neil.lalonde@discourse.org', username: UserNameSuggester.suggest('neil')) + rescue => e + puts '', "Failed to create admin user" + puts e.message + end + + puts '', 'Done' + end + + def check_files_exist + raise ArgumentError.new("File does not exist: #{@joomla_users_file}") unless File.exist?(@joomla_users_file) + end + + def read_csv(f) + data = File.read(f) + data.gsub!(/\" \n/,"\"\n") + data.gsub!(/\\\"/,";;") + data.gsub!(/\\/,"\n") + data + end + + def parse_users + # Need to merge data from joomla with kunena + + puts "parsing joomla user data from #{@joomla_users_file}" + CSV.foreach(@joomla_users_file) do |u| + next unless u[0].to_i > 0 and u[1].present? and u[2].present? + username = u[1].gsub(' ', '_').gsub(/[^A-Za-z0-9_]/, '')[0,User.username_length.end] + if username.length < User.username_length.first + username = username * User.username_length.first + end + @users[u[0].to_i] = {id: u[0].to_i, username: username, email: u[2], created_at: Time.zone.parse(u[3])} + end + + puts "parsing kunena user data from mysql" + results = @client.query("SELECT userid, signature, moderator, banned FROM jos_kunena_users;", cache_rows: false) + results.each do |u| + next unless u['userid'].to_i > 0 + user = @users[u['userid'].to_i] + if user + user[:bio] = u['signature'] + user[:moderator] = (u['moderator'].to_i == 1) + user[:suspended] = u['banned'].present? + end + end + end + + def import_posts + puts '', "creating topics and posts" + + total_count = @client.query("SELECT COUNT(*) count FROM jos_kunena_messages m;").first['count'] + + batch_size = 1000 + + batches(batch_size) do |offset| + results = @client.query(" + SELECT m.id id, + m.thread thread, + m.parent parent, + m.catid catid, + m.userid userid, + m.subject subject, + m.time time, + t.message message + FROM jos_kunena_messages m, + jos_kunena_messages_text t + WHERE m.id = t.mesid + ORDER BY m.id + LIMIT #{batch_size} + OFFSET #{offset}; + ", cache_rows: false) + + break if results.size < 1 + + create_posts(results, total: total_count, offset: offset) do |m| + skip = false + mapped = {} + + mapped[:id] = m['id'] + mapped[:user_id] = user_id_from_imported_user_id(m['userid']) || find_user_by_import_id(m['userid']).try(:id) || -1 + mapped[:raw] = m["message"] + mapped[:created_at] = Time.zone.at(m['time']) + mapped[:custom_fields] = {import_id: m['id']} + + if m['id'] == m['thread'] + mapped[:category] = category_from_imported_category_id(m['catid']).try(:name) + mapped[:title] = m['subject'] + else + parent = topic_lookup_from_imported_post_id(m['parent']) + if parent + mapped[:topic_id] = parent[:topic_id] + mapped[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1 + else + puts "Parent post #{m['parent']} doesn't exist. Skipping #{m["id"]}: #{m["subject"][0..40]}" + skip = true + end + end + + skip ? nil : mapped + end + end + end +end + +ImportScripts::Kunena.new.perform