2014-07-02 16:50:44 -04:00
if ARGV . include? ( 'bbcode-to-md' )
# Replace (most) bbcode with markdown before creating posts.
# This will dramatically clean up the final posts in Discourse.
#
# In a temp dir:
#
2014-07-11 13:36:05 -04:00
# git clone https://github.com/nlalonde/ruby-bbcode-to-md.git
2014-07-02 16:50:44 -04:00
# cd ruby-bbcode-to-md
# gem build ruby-bbcode-to-md.gemspec
# gem install ruby-bbcode-to-md-0.0.13.gem
require 'ruby-bbcode-to-md'
end
2014-05-30 15:09:58 -04:00
module ImportScripts ; end
class ImportScripts :: Base
def initialize
require File . expand_path ( File . dirname ( __FILE__ ) + " /../../config/environment " )
2014-08-13 16:17:16 -04:00
preload_i18n
2014-05-30 15:09:58 -04:00
2014-07-02 16:50:44 -04:00
@bbcode_to_md = true if ARGV . include? ( 'bbcode-to-md' )
2014-07-16 13:59:30 -04:00
@existing_groups = { }
@failed_groups = [ ]
2014-05-30 15:09:58 -04:00
@existing_users = { }
@failed_users = [ ]
2014-08-11 12:44:17 -04:00
@categories_lookup = { }
@existing_posts = { }
2014-05-30 15:09:58 -04:00
@topic_lookup = { }
2014-08-31 12:09:21 -04:00
@old_site_settings = { }
2014-05-30 15:09:58 -04:00
2014-08-25 06:33:08 -04:00
puts " loading existing groups... "
2014-07-16 13:59:30 -04:00
GroupCustomField . where ( name : 'import_id' ) . pluck ( :group_id , :value ) . each do | group_id , import_id |
@existing_groups [ import_id ] = group_id
end
2014-08-25 06:33:08 -04:00
puts " loading existing users... "
2014-05-30 15:09:58 -04:00
UserCustomField . where ( name : 'import_id' ) . pluck ( :user_id , :value ) . each do | user_id , import_id |
@existing_users [ import_id ] = user_id
end
2014-08-25 06:33:08 -04:00
puts " loading existing categories... "
2014-05-30 15:09:58 -04:00
CategoryCustomField . where ( name : 'import_id' ) . pluck ( :category_id , :value ) . each do | category_id , import_id |
2014-08-11 12:44:17 -04:00
@categories_lookup [ import_id ] = Category . find ( category_id . to_i )
2014-05-30 15:09:58 -04:00
end
2014-08-25 06:33:08 -04:00
puts " loading existing posts... "
2014-05-30 15:09:58 -04:00
PostCustomField . where ( name : 'import_id' ) . pluck ( :post_id , :value ) . each do | post_id , import_id |
2014-08-11 12:44:17 -04:00
@existing_posts [ import_id ] = post_id
2014-05-30 15:09:58 -04:00
end
2014-08-25 06:33:08 -04:00
puts " loading existing topics... "
Post . joins ( :topic ) . pluck ( " posts.id, posts.topic_id, posts.post_number, topics.slug " ) . each do | p |
@topic_lookup [ p [ 0 ] ] = {
topic_id : p [ 1 ] ,
post_number : p [ 2 ] ,
url : Post . url ( p [ 3 ] , p [ 1 ] , p [ 2 ] ) ,
2014-08-18 07:04:08 -04:00
}
2014-05-30 15:09:58 -04:00
end
end
2014-08-13 16:17:16 -04:00
def preload_i18n
I18n . t ( " test " )
ActiveSupport :: Inflector . transliterate ( " test " )
end
2014-05-30 15:09:58 -04:00
def perform
Rails . logger . level = 3 # :error, so that we don't create log files that are many GB
2014-06-05 14:40:11 -04:00
2014-08-31 12:09:21 -04:00
change_site_settings
2014-05-30 15:09:58 -04:00
execute
2014-08-13 16:17:16 -04:00
puts " "
2014-05-30 15:09:58 -04:00
update_bumped_at
2014-09-08 13:36:55 -04:00
update_last_posted_at
2014-06-05 15:30:29 -04:00
update_feature_topic_users
2014-07-03 14:43:24 -04:00
update_category_featured_topics
update_topic_count_replies
2014-09-04 13:08:57 -04:00
reset_topic_counters
2014-06-05 15:30:29 -04:00
2014-08-13 16:17:16 -04:00
puts " " , " Done "
2014-05-30 15:09:58 -04:00
ensure
2014-08-31 12:09:21 -04:00
reset_site_settings
end
def change_site_settings
new_settings = {
email_domains_blacklist : '' ,
min_topic_title_length : 1 ,
min_post_length : 1 ,
min_private_message_post_length : 1 ,
min_private_message_title_length : 1 ,
allow_duplicate_topic_titles : true ,
default_digest_email_frequency : '' ,
disable_emails : true
}
new_settings . each do | key , value |
@old_site_settings [ key ] = SiteSetting . send ( key )
SiteSetting . set ( key , value )
end
RateLimiter . disable
end
def reset_site_settings
@old_site_settings . each do | key , value |
SiteSetting . set ( key , value )
end
2014-05-30 15:09:58 -04:00
RateLimiter . enable
end
# Implementation will do most of its work in its execute method.
# It will need to call create_users, create_categories, and create_posts.
def execute
raise NotImplementedError
end
# Get the Discourse Post id based on the id of the source record
def post_id_from_imported_post_id ( import_id )
2014-08-11 12:44:17 -04:00
@existing_posts [ import_id ] || @existing_posts [ import_id . to_s ]
2014-05-30 15:09:58 -04:00
end
# Get the Discourse topic info (a hash) based on the id of the source record
def topic_lookup_from_imported_post_id ( import_id )
post_id = post_id_from_imported_post_id ( import_id )
post_id ? @topic_lookup [ post_id ] : nil
end
2014-07-16 13:59:30 -04:00
# Get the Discourse Group id based on the id of the source group
def group_id_from_imported_group_id ( import_id )
@existing_groups [ import_id ] || @existing_groups [ import_id . to_s ] || find_group_by_import_id ( import_id ) . try ( :id )
end
def find_group_by_import_id ( import_id )
GroupCustomField . where ( name : 'import_id' , value : import_id . to_s ) . first . try ( :group )
end
2014-05-30 15:09:58 -04:00
# Get the Discourse User id based on the id of the source user
def user_id_from_imported_user_id ( import_id )
2014-07-04 16:05:15 -04:00
@existing_users [ import_id ] || @existing_users [ import_id . to_s ] || find_user_by_import_id ( import_id ) . try ( :id )
2014-06-11 13:47:45 -04:00
end
def find_user_by_import_id ( import_id )
UserCustomField . where ( name : 'import_id' , value : import_id . to_s ) . first . try ( :user )
2014-05-30 15:09:58 -04:00
end
# Get the Discourse Category id based on the id of the source category
def category_from_imported_category_id ( import_id )
2014-08-11 12:44:17 -04:00
@categories_lookup [ import_id ] || @categories_lookup [ import_id . to_s ]
2014-05-30 15:09:58 -04:00
end
def create_admin ( opts = { } )
admin = User . new
admin . email = opts [ :email ] || " sam.saffron@gmail.com "
admin . username = opts [ :username ] || " sam "
admin . password = SecureRandom . uuid
admin . save!
admin . grant_admin!
2014-09-05 01:20:39 -04:00
admin . change_trust_level! ( TrustLevel [ 4 ] )
2014-05-30 15:09:58 -04:00
admin . email_tokens . update_all ( confirmed : true )
admin
end
2014-07-16 13:59:30 -04:00
# Iterate through a list of groups to be imported.
# Takes a collection and yields to the block for each element.
# Block should return a hash with the attributes for each element.
# Required fields are :id and :name, where :id is the id of the
# group in the original datasource. The given id will not be used
# to create the Discourse group record.
def create_groups ( results , opts = { } )
groups_created = 0
groups_skipped = 0
total = opts [ :total ] || results . size
results . each do | result |
g = yield ( result )
if group_id_from_imported_group_id ( g [ :id ] )
groups_skipped += 1
else
new_group = create_group ( g , g [ :id ] )
if new_group . valid?
@existing_groups [ g [ :id ] . to_s ] = new_group . id
groups_created += 1
else
@failed_groups << g
puts " Failed to create group id #{ g [ :id ] } #{ new_group . name } : #{ new_group . errors . full_messages } "
end
end
print_status groups_created + groups_skipped + @failed_groups . length + ( opts [ :offset ] || 0 ) , total
end
return [ groups_created , groups_skipped ]
end
def create_group ( opts , import_id )
opts = opts . dup . tap { | o | o . delete ( :id ) }
import_name = opts [ :name ]
opts [ :name ] = UserNameSuggester . suggest ( import_name )
existing = Group . where ( name : opts [ :name ] ) . first
return existing if existing and existing . custom_fields [ " import_id " ] . to_i == import_id . to_i
g = existing || Group . new ( opts )
g . custom_fields [ " import_id " ] = import_id
g . custom_fields [ " import_name " ] = import_name
g . tap ( & :save )
end
2014-05-30 15:09:58 -04:00
# Iterate through a list of user records to be imported.
# Takes a collection, and yields to the block for each element.
# Block should return a hash with the attributes for the User model.
# Required fields are :id and :email, where :id is the id of the
# user in the original datasource. The given id will not be used to
# create the Discourse user record.
2014-07-04 16:05:15 -04:00
def create_users ( results , opts = { } )
2014-06-11 13:47:45 -04:00
num_users_before = User . count
2014-05-30 15:09:58 -04:00
users_created = 0
users_skipped = 0
progress = 0
2014-07-04 16:05:15 -04:00
total = opts [ :total ] || results . size
2014-05-30 15:09:58 -04:00
results . each do | result |
u = yield ( result )
2015-01-19 09:00:55 -05:00
# block returns nil to skip a user
2014-08-11 12:44:17 -04:00
if u . nil?
users_skipped += 1
2014-09-04 13:18:22 -04:00
else
import_id = u [ :id ]
2014-08-18 07:04:08 -04:00
2014-09-04 13:18:22 -04:00
if user_id_from_imported_user_id ( import_id )
users_skipped += 1
elsif u [ :email ] . present?
new_user = create_user ( u , import_id )
2014-05-30 15:09:58 -04:00
2014-09-04 13:18:22 -04:00
if new_user . valid?
@existing_users [ import_id . to_s ] = new_user . id
users_created += 1
else
@failed_users << u
puts " Failed to create user id: #{ import_id } , username: #{ new_user . username } , email: #{ new_user . email } : #{ new_user . errors . full_messages } "
end
2014-05-30 15:09:58 -04:00
else
@failed_users << u
2014-09-04 13:18:22 -04:00
puts " Skipping user id #{ import_id } because email is blank "
2014-05-30 15:09:58 -04:00
end
end
2014-07-04 16:05:15 -04:00
print_status users_created + users_skipped + @failed_users . length + ( opts [ :offset ] || 0 ) , total
2014-05-30 15:09:58 -04:00
end
2014-07-04 16:05:15 -04:00
return [ users_created , users_skipped ]
2014-05-30 15:09:58 -04:00
end
def create_user ( opts , import_id )
opts . delete ( :id )
2014-08-25 04:48:29 -04:00
merge = opts . delete ( :merge )
2014-07-16 13:59:30 -04:00
post_create_action = opts . delete ( :post_create_action )
2014-08-25 04:48:29 -04:00
2014-05-30 15:09:58 -04:00
existing = User . where ( email : opts [ :email ] . downcase , username : opts [ :username ] ) . first
2014-08-25 04:48:29 -04:00
return existing if existing && ( merge || existing . custom_fields [ " import_id " ] . to_i == import_id . to_i )
2014-05-30 15:09:58 -04:00
2014-06-10 02:07:16 -04:00
bio_raw = opts . delete ( :bio_raw )
2014-08-18 07:04:08 -04:00
website = opts . delete ( :website )
2014-08-14 03:43:32 -04:00
avatar_url = opts . delete ( :avatar_url )
2014-08-13 16:17:16 -04:00
opts [ :name ] = User . suggest_name ( opts [ :email ] ) unless opts [ :name ]
2014-08-14 12:11:28 -04:00
if opts [ :username ] . blank? ||
opts [ :username ] . length < User . username_length . begin ||
opts [ :username ] . length > User . username_length . end ||
opts [ :username ] =~ / [^A-Za-z0-9_] / ||
opts [ :username ] [ 0 ] =~ / [^A-Za-z0-9] / ||
! User . username_available? ( opts [ :username ] )
2014-08-13 16:17:16 -04:00
opts [ :username ] = UserNameSuggester . suggest ( opts [ :username ] || opts [ :name ] || opts [ :email ] )
end
2014-05-30 15:09:58 -04:00
opts [ :email ] = opts [ :email ] . downcase
2014-09-05 01:20:39 -04:00
opts [ :trust_level ] = TrustLevel [ 1 ] unless opts [ :trust_level ]
2014-08-13 17:57:07 -04:00
opts [ :active ] = true
2014-08-13 16:17:16 -04:00
opts [ :import_mode ] = true
2014-05-30 15:09:58 -04:00
u = User . new ( opts )
u . custom_fields [ " import_id " ] = import_id
u . custom_fields [ " import_username " ] = opts [ :username ] if opts [ :username ] . present?
2014-08-14 03:43:32 -04:00
u . custom_fields [ " import_avatar_url " ] = avatar_url if avatar_url . present?
2014-05-30 15:09:58 -04:00
begin
2014-06-10 02:07:16 -04:00
User . transaction do
u . save!
2014-08-18 07:04:08 -04:00
if bio_raw . present? || website . present?
u . user_profile . bio_raw = bio_raw if bio_raw . present?
u . user_profile . website = website if website . present?
2014-06-10 02:07:16 -04:00
u . user_profile . save!
end
end
2014-05-30 15:09:58 -04:00
rescue
# try based on email
existing = User . find_by ( email : opts [ :email ] . downcase )
if existing
existing . custom_fields [ " import_id " ] = import_id
existing . save!
u = existing
end
end
2014-07-16 13:59:30 -04:00
post_create_action . try ( :call , u ) if u . persisted?
2014-05-30 15:09:58 -04:00
u # If there was an error creating the user, u.errors has the messages
end
# Iterates through a collection to create categories.
# The block should return a hash with attributes for the new category.
# Required fields are :id and :name, where :id is the id of the
# category in the original datasource. The given id will not be used to
# create the Discourse category record.
# Optional attributes are position, description, and parent_category_id.
def create_categories ( results )
results . each do | c |
params = yield ( c )
2014-09-10 14:27:18 -04:00
# Basic massaging on the category name
params [ :name ] = " Blank " if params [ :name ] . blank?
params [ :name ] . strip!
params [ :name ] = params [ :name ] [ 0 .. 49 ]
2014-08-18 07:04:08 -04:00
puts " \t #{ params [ :name ] } "
2014-07-04 16:05:15 -04:00
# make sure categories don't go more than 2 levels deep
if params [ :parent_category_id ]
top = Category . find_by_id ( params [ :parent_category_id ] )
top = top . parent_category while top && ! top . parent_category . nil?
params [ :parent_category_id ] = top . id if top
end
2014-05-30 15:09:58 -04:00
new_category = create_category ( params , params [ :id ] )
2014-08-11 12:44:17 -04:00
@categories_lookup [ params [ :id ] ] = new_category
2014-05-30 15:09:58 -04:00
end
end
def create_category ( opts , import_id )
2014-08-18 07:04:08 -04:00
existing = category_from_imported_category_id ( import_id ) || Category . where ( " LOWER(name) = ? " , opts [ :name ] . downcase ) . first
2014-05-30 15:09:58 -04:00
return existing if existing
2014-07-16 13:59:30 -04:00
post_create_action = opts . delete ( :post_create_action )
2014-08-18 07:04:08 -04:00
2014-05-30 15:09:58 -04:00
new_category = Category . new (
name : opts [ :name ] ,
2014-08-11 12:44:17 -04:00
user_id : opts [ :user_id ] || opts [ :user ] . try ( :id ) || - 1 ,
2014-05-30 15:09:58 -04:00
position : opts [ :position ] ,
description : opts [ :description ] ,
parent_category_id : opts [ :parent_category_id ]
)
2014-08-18 07:04:08 -04:00
2014-05-30 15:09:58 -04:00
new_category . custom_fields [ " import_id " ] = import_id if import_id
new_category . save!
2014-08-18 07:04:08 -04:00
2014-07-16 13:59:30 -04:00
post_create_action . try ( :call , new_category )
2014-08-18 07:04:08 -04:00
2014-05-30 15:09:58 -04:00
new_category
end
2014-10-31 00:16:08 -04:00
def created_post ( post )
# override if needed
end
2014-05-30 15:09:58 -04:00
# Iterates through a collection of posts to be imported.
# It can create topics and replies.
# Attributes will be passed to the PostCreator.
# Topics should give attributes title and category.
# Replies should provide topic_id. Use topic_lookup_from_imported_post_id to find the topic.
def create_posts ( results , opts = { } )
skipped = 0
created = 0
total = opts [ :total ] || results . size
results . each do | r |
params = yield ( r )
2014-09-04 13:18:22 -04:00
# block returns nil to skip a post
2014-05-30 15:09:58 -04:00
if params . nil?
skipped += 1
else
2014-09-04 13:18:22 -04:00
import_id = params . delete ( :id ) . to_s
if post_id_from_imported_post_id ( import_id )
skipped += 1 # already imported this post
else
begin
new_post = create_post ( params , import_id )
if new_post . is_a? ( Post )
@existing_posts [ import_id ] = new_post . id
@topic_lookup [ new_post . id ] = {
post_number : new_post . post_number ,
topic_id : new_post . topic_id ,
url : new_post . url ,
}
2014-10-31 00:16:08 -04:00
created_post ( new_post )
2014-09-04 13:18:22 -04:00
created += 1
else
skipped += 1
puts " Error creating post #{ import_id } . Skipping. "
puts new_post . inspect
end
rescue Discourse :: InvalidAccess = > e
skipped += 1
puts " InvalidAccess creating post #{ import_id } . Topic is closed? #{ e . message } "
rescue = > e
2014-07-04 16:05:15 -04:00
skipped += 1
2014-09-04 13:18:22 -04:00
puts " Exception while creating post #{ import_id } . Skipping. "
puts e . message
puts e . backtrace . join ( " \n " )
2014-07-04 16:05:15 -04:00
end
2014-05-30 15:09:58 -04:00
end
end
print_status skipped + created + ( opts [ :offset ] || 0 ) , total
end
return [ created , skipped ]
end
2014-06-25 19:11:52 -04:00
def create_post ( opts , import_id )
2014-05-30 15:09:58 -04:00
user = User . find ( opts [ :user_id ] )
2014-07-16 13:59:30 -04:00
post_create_action = opts . delete ( :post_create_action )
2014-05-30 15:09:58 -04:00
opts = opts . merge ( skip_validations : true )
2014-07-03 14:43:24 -04:00
opts [ :import_mode ] = true
2014-06-25 19:11:52 -04:00
opts [ :custom_fields ] || = { }
opts [ :custom_fields ] [ 'import_id' ] = import_id
2014-05-30 15:09:58 -04:00
2014-07-02 16:50:44 -04:00
if @bbcode_to_md
2014-07-23 15:15:51 -04:00
opts [ :raw ] = opts [ :raw ] . bbcode_to_md ( false ) rescue opts [ :raw ]
2014-07-02 16:50:44 -04:00
end
2014-07-04 16:05:15 -04:00
post_creator = PostCreator . new ( user , opts )
post = post_creator . create
2014-07-16 13:59:30 -04:00
post_create_action . try ( :call , post ) if post
2014-07-04 16:05:15 -04:00
post ? post : post_creator . errors . full_messages
2014-05-30 15:09:58 -04:00
end
2014-07-16 13:59:30 -04:00
# Creates an upload.
# Expects path to be the full path and filename of the source file.
def create_upload ( user_id , path , source_filename )
tmp = Tempfile . new ( 'discourse-upload' )
src = File . open ( path )
FileUtils . copy_stream ( src , tmp )
src . close
tmp . rewind
Upload . create_for ( user_id , tmp , source_filename , File . size ( tmp ) )
ensure
tmp . close rescue nil
tmp . unlink rescue nil
end
2014-06-04 10:37:43 -04:00
def close_inactive_topics ( opts = { } )
2014-08-13 16:17:16 -04:00
puts " " , " Closing topics that have been inactive for more than #{ num_days } days. "
2014-06-04 10:37:43 -04:00
num_days = opts [ :days ] || 30
query = Topic . where ( 'last_posted_at < ?' , num_days . days . ago ) . where ( closed : false )
total_count = query . count
closed_count = 0
query . find_each do | topic |
topic . update_status ( 'closed' , true , Discourse . system_user )
closed_count += 1
print_status ( closed_count , total_count )
end
end
2014-06-04 18:21:45 -04:00
def update_bumped_at
2014-08-25 04:48:29 -04:00
puts " " , " updating bumped_at on topics "
2014-10-06 09:26:52 -04:00
Post . exec_sql ( " update topics t set bumped_at = COALESCE((select max(created_at) from posts where topic_id = t.id and post_type != #{ Post . types [ :moderator_action ] } ), bumped_at) " )
2014-06-04 18:21:45 -04:00
end
2014-09-08 13:36:55 -04:00
def update_last_posted_at
2014-09-08 16:08:41 -04:00
puts " " , " updating last posted at on users "
sql = <<-SQL
WITH lpa AS (
SELECT user_id , MAX ( posts . created_at ) AS last_posted_at
FROM posts
GROUP BY user_id
)
UPDATE users
SET last_posted_at = lpa . last_posted_at
FROM users u1
JOIN lpa ON lpa . user_id = u1 . id
WHERE u1 . id = users . id
AND users . last_posted_at < > lpa . last_posted_at
SQL
User . exec_sql ( sql )
2014-09-08 13:36:55 -04:00
end
2014-06-05 15:30:29 -04:00
def update_feature_topic_users
2014-08-25 04:48:29 -04:00
puts " " , " updating featured topic users "
2014-06-05 15:30:29 -04:00
total_count = Topic . count
progress_count = 0
Topic . find_each do | topic |
topic . feature_topic_users
progress_count += 1
print_status ( progress_count , total_count )
end
end
2014-09-04 13:08:57 -04:00
def reset_topic_counters
puts " " , " reseting topic counters "
total_count = Topic . count
progress_count = 0
Topic . find_each do | topic |
Topic . reset_highest ( topic . id )
progress_count += 1
print_status ( progress_count , total_count )
end
end
2014-07-03 14:43:24 -04:00
def update_category_featured_topics
2014-08-25 04:48:29 -04:00
puts " " , " updating featured topics in categories "
2014-08-22 04:11:12 -04:00
total_count = Category . count
progress_count = 0
2014-07-03 14:43:24 -04:00
Category . find_each do | category |
CategoryFeaturedTopic . feature_topics_for ( category )
2014-08-22 04:11:12 -04:00
progress_count += 1
print_status ( progress_count , total_count )
2014-07-03 14:43:24 -04:00
end
end
def update_topic_count_replies
2014-08-25 04:48:29 -04:00
puts " " , " updating user topic reply counts "
2014-07-03 14:43:24 -04:00
total_count = User . real . count
progress_count = 0
User . real . find_each do | u |
u . user_stat . update_topic_reply_count
u . user_stat . save!
progress_count += 1
print_status ( progress_count , total_count )
end
end
2014-05-30 15:09:58 -04:00
def print_status ( current , max )
2014-10-07 16:47:17 -04:00
print " \r %9d / %d (%5.1f%%) " % [ current , max , ( ( current . to_f / max . to_f ) * 100 ) . round ( 1 ) ]
2014-05-30 15:09:58 -04:00
end
def batches ( batch_size )
offset = 0
loop do
yield offset
offset += batch_size
end
end
end