2014-09-29 20:37:35 +02:00
require " mysql2 "
2015-03-14 22:31:08 -07:00
require File . expand_path ( File . dirname ( __FILE__ ) + " /base.rb " )
2014-09-29 20:37:35 +02:00
# Call it like this:
2015-09-22 05:26:57 -07:00
# RAILS_ENV=production ruby script/import_scripts/mybb.rb
2014-09-29 20:37:35 +02:00
class ImportScripts :: MyBB < ImportScripts :: Base
MYBB_DB = " mybb_db "
2015-03-14 22:31:08 -07:00
TABLE_PREFIX = " mybb_ "
2014-09-29 20:37:35 +02:00
BATCH_SIZE = 1000
def initialize
super
@client = Mysql2 :: Client . new (
host : " localhost " ,
username : " root " ,
#password: "",
database : MYBB_DB
)
end
def execute
import_users
import_categories
import_posts
import_private_messages
suspend_users
end
def import_users
puts '' , " creating users "
total_count = mysql_query ( " SELECT count(*) count
2015-03-14 22:31:08 -07:00
FROM #{TABLE_PREFIX}users u
JOIN #{TABLE_PREFIX}usergroups g ON g.gid = u.usergroup
2014-09-29 20:37:35 +02:00
WHERE g . title != 'Banned' ; " ).first['count']
batches ( BATCH_SIZE ) do | offset |
results = mysql_query (
" SELECT uid id, email email, username, regdate, g.title `group`
2015-03-14 22:31:08 -07:00
FROM #{TABLE_PREFIX}users u
JOIN #{TABLE_PREFIX}usergroups g ON g.gid = u.usergroup
2014-09-29 20:37:35 +02:00
WHERE g . title != 'Banned'
ORDER BY u . uid ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
break if results . size < 1
2015-09-29 12:22:35 +05:30
next if all_records_exist? :users , results . map { | u | u [ " id " ] . to_i }
2015-09-21 16:48:42 -07:00
2014-09-29 20:37:35 +02:00
create_users ( results , total : total_count , offset : offset ) do | user |
{ id : user [ 'id' ] ,
email : user [ 'email' ] ,
username : user [ 'username' ] ,
created_at : Time . zone . at ( user [ 'regdate' ] ) ,
moderator : user [ 'group' ] == 'Super Moderators' ,
admin : user [ 'group' ] == 'Administrators' }
end
end
end
def import_categories
results = mysql_query ( "
SELECT fid id , pid parent_id , left ( name , 50 ) name , description
2015-03-14 22:31:08 -07:00
FROM #{TABLE_PREFIX}forums
2014-09-29 20:37:35 +02:00
ORDER BY pid ASC , fid ASC
" )
create_categories ( results ) do | row |
h = { id : row [ 'id' ] , name : CGI . unescapeHTML ( row [ 'name' ] ) , description : CGI . unescapeHTML ( row [ 'description' ] ) }
if row [ 'parent_id' ] . to_i > 0
2015-03-12 21:15:02 +01:00
h [ :parent_category_id ] = category_id_from_imported_category_id ( row [ 'parent_id' ] )
2014-09-29 20:37:35 +02:00
end
h
end
end
def import_posts
puts " " , " creating topics and posts "
2015-03-14 22:31:08 -07:00
total_count = mysql_query ( " SELECT count(*) count from #{ TABLE_PREFIX } posts " ) . first [ " count " ]
2014-09-29 20:37:35 +02:00
batches ( BATCH_SIZE ) do | offset |
results = mysql_query ( "
SELECT p . pid id ,
p . tid topic_id ,
t . fid category_id ,
t . subject title ,
t . firstpost first_post_id ,
p . uid user_id ,
p . message raw ,
p . dateline post_time
2015-03-14 22:31:08 -07:00
FROM #{TABLE_PREFIX}posts p,
#{TABLE_PREFIX}threads t
2014-09-29 20:37:35 +02:00
WHERE p . tid = t . tid
2014-11-14 14:28:09 +01:00
ORDER BY p . dateline
2014-09-29 20:37:35 +02:00
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
" )
break if results . size < 1
2015-09-21 16:48:42 -07:00
next if all_records_exist? :posts , results . map { | m | m [ 'id' ] . to_i }
2014-09-29 20:37:35 +02:00
create_posts ( results , total : total_count , offset : offset ) do | m |
skip = false
mapped = { }
2014-11-14 14:41:34 +01:00
# If you have imported a phpbb forum to mybb previously there might
2015-03-14 22:31:08 -07:00
# be a problem with #{TABLE_PREFIX}threads.firstpost. If these ids are wrong
2014-11-14 14:41:34 +01:00
# the thread cannot be imported to discourse as the topic post is
# missing. This query retrieves the first_post_id manually. As it
# will decrease the performance it is commented out by default.
# m['first_post_id'] = mysql_query("
# SELECT p.pid id,
2015-03-14 22:31:08 -07:00
# FROM #{TABLE_PREFIX}posts p,
# #{TABLE_PREFIX}threads t
2014-11-14 14:41:34 +01:00
# WHERE p.tid = #{m['topic_id']} AND t.tid = #{m['topic_id']}
# ORDER BY p.dateline
# LIMIT 1
# ").first['id']
2014-09-29 20:37:35 +02:00
mapped [ :id ] = m [ 'id' ]
mapped [ :user_id ] = user_id_from_imported_user_id ( m [ 'user_id' ] ) || - 1
mapped [ :raw ] = process_mybb_post ( m [ 'raw' ] , m [ 'id' ] )
mapped [ :created_at ] = Time . zone . at ( m [ 'post_time' ] )
if m [ 'id' ] == m [ 'first_post_id' ]
2015-03-12 21:15:02 +01:00
mapped [ :category ] = category_id_from_imported_category_id ( m [ 'category_id' ] )
2014-09-29 20:37:35 +02:00
mapped [ :title ] = CGI . unescapeHTML ( m [ 'title' ] )
else
parent = topic_lookup_from_imported_post_id ( m [ 'first_post_id' ] )
if parent
mapped [ :topic_id ] = parent [ :topic_id ]
else
puts " Parent post #{ m [ 'first_post_id' ] } doesn't exist. Skipping #{ m [ " id " ] } : #{ m [ " title " ] [ 0 .. 40 ] } "
skip = true
end
end
skip ? nil : mapped
end
end
end
def import_private_messages
puts " " , " private messages are not implemented "
end
def suspend_users
puts '' , " banned users are not implemented "
end
2015-09-22 22:31:42 -07:00
# Discourse usernames don't allow spaces
2015-09-22 05:26:57 -07:00
def convert_username ( username , post_id )
count = 0
2015-10-14 17:44:27 -07:00
username . gsub! ( / \ s+ / ) { | a | count += 1 ; '_' }
2015-09-22 22:31:42 -07:00
# Warn on MyBB bug that places post text in the quote line - http://community.mybb.com/thread-180526.html
2015-09-22 05:26:57 -07:00
if count > 5
puts " Warning: probably incorrect quote in post #{ post_id } "
end
return username
end
2015-09-22 22:31:42 -07:00
# Take an original post id and return the migrated topic id and post number for it
2015-09-22 05:26:57 -07:00
def post_id_to_post_num_and_topic ( quoted_post_id , post_id )
quoted_post_id_from_imported = post_id_from_imported_post_id ( quoted_post_id . to_i )
if quoted_post_id_from_imported
begin
post = Post . find ( quoted_post_id_from_imported )
return " post: #{ post . post_number } , topic: #{ post . topic_id } "
rescue
puts " Could not find migrated post #{ quoted_post_id_from_imported } quoted by original post #{ post_id } as #{ quoted_post_id } "
return " "
end
else
puts " Original post #{ post_id } quotes nonexistent post #{ quoted_post_id } "
return " "
end
end
2014-09-29 20:37:35 +02:00
def process_mybb_post ( raw , import_id )
s = raw . dup
2015-09-22 05:26:57 -07:00
# convert the quote line
s . gsub! ( / \ [quote='([^']+)'.*?pid='( \ d+).*? \ ] / ) {
" [quote= \" #{ convert_username ( $1 , import_id ) } , " + post_id_to_post_num_and_topic ( $2 , import_id ) + '"]'
}
2014-09-29 20:37:35 +02:00
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
s . gsub! ( / <!-- s( \ S+) -->(?:.*)<!-- s(?: \ S+) --> / , '\1' )
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
s . gsub! ( / <!-- \ w --><a(?:.+)href="( \ S+)"(?:.*)>(.+)< \/ a><!-- \ w --> / , '[\2](\1)' )
# Many phpbb bbcode tags have a hash attached to them. Examples:
# [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex]
s . gsub! ( / :(?: \ w{8}) \ ] / , ']' )
2014-11-14 14:26:13 +01:00
# Remove mybb video tags.
s . gsub! ( / (^ \ [video=.*? \ ])|( \ [ \/ video \ ]$) / , '' )
2014-09-29 20:37:35 +02:00
s = CGI . unescapeHTML ( s )
# phpBB shortens link text like this, which breaks our markdown processing:
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
s . gsub! ( / \ [http(s)?: \/ \/ (www \ .)? / , '[' )
s
end
def mysql_query ( sql )
@client . query ( sql , cache_rows : false )
end
end
ImportScripts :: MyBB . new . perform
2015-09-22 05:26:57 -07:00