mirror of
https://github.com/codeninjasllc/discourse.git
synced 2024-11-23 15:48:43 -05:00
956 lines
36 KiB
Ruby
956 lines
36 KiB
Ruby
# encoding: utf-8
|
||
#
|
||
# Author: Erick Guan <fantasticfears@gmail.com>
|
||
#
|
||
# This script import the data from latest Discuz! X
|
||
# Should work among Discuz! X3.x
|
||
# This script is tested only on Simplified Chinese Discuz! X instances
|
||
# If you want to import data other than Simplified Chinese, email me.
|
||
|
||
require 'php_serialize'
|
||
require 'miro'
|
||
require 'mysql2'
|
||
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
||
|
||
class ImportScripts::DiscuzX < ImportScripts::Base
|
||
|
||
DISCUZX_DB = "ultrax"
|
||
DB_TABLE_PREFIX = 'pre_'
|
||
BATCH_SIZE = 1000
|
||
ORIGINAL_SITE_PREFIX = "oldsite.example.com/forums" # without http(s)://
|
||
NEW_SITE_PREFIX = "http://discourse.example.com" # with http:// or https://
|
||
|
||
# Set DISCUZX_BASE_DIR to the base directory of your discuz installation.
|
||
DISCUZX_BASE_DIR = '/var/www/discuz/upload'
|
||
AVATAR_DIR = '/uc_server/data/avatar'
|
||
ATTACHMENT_DIR = '/data/attachment/forum'
|
||
AUTHORIZED_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'zip', 'rar', 'pdf']
|
||
|
||
def initialize
|
||
super
|
||
|
||
@client = Mysql2::Client.new(
|
||
host: "localhost",
|
||
username: "root",
|
||
#password: "password",
|
||
database: DISCUZX_DB
|
||
)
|
||
@first_post_id_by_topic_id = {}
|
||
|
||
@internal_url_regexps = [
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=viewthread(?:&|&)tid=(?<tid>\d+)(?:[^\[\]\s]*)(?:pid=?(?<pid>\d+))?(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/viewthread\.php\?tid=(?<tid>\d+)(?:[^\[\]\s]*)(?:pid=?(?<pid>\d+))?(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=redirect(?:&|&)goto=findpost(?:&|&)pid=(?<pid>\d+)(?:&|&)ptid=(?<tid>\d+)(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/redirect\.php\?goto=findpost(?:&|&)pid=(?<pid>\d+)(?:&|&)ptid=(?<tid>\d+)(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forumdisplay\.php\?fid=(?<fid>\d+)(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=forumdisplay(?:&|&)fid=(?<fid>\d+)(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/(?<action>index)\.php(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/(?<action>stats)\.php(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/misc.php\?mod=(?<mod>stat|ranklist)(?:[^\[\]\s]*)/
|
||
]
|
||
|
||
end
|
||
|
||
def execute
|
||
get_knowledge_about_duplicated_email
|
||
import_users
|
||
import_categories
|
||
import_posts
|
||
import_private_messages
|
||
import_attachments
|
||
end
|
||
|
||
# add the prefix to the table name
|
||
def table_name(name = nil)
|
||
DB_TABLE_PREFIX + name
|
||
end
|
||
|
||
# find which group members can be granted as admin
|
||
def get_knowledge_about_group
|
||
group_table = table_name 'common_usergroup'
|
||
result = mysql_query(
|
||
"SELECT groupid group_id, radminid role_id
|
||
FROM #{group_table};")
|
||
@moderator_group_id = []
|
||
@admin_group_id = []
|
||
#@banned_group_id = [4,5] # 禁止的用户及其帖子均不导入,如果你想导入这些用户和帖子,请把这个数组清空。
|
||
|
||
result.each do |group|
|
||
case group['role_id']
|
||
when 1 # 管理员
|
||
@admin_group_id << group['group_id']
|
||
when 2, 3 # 超级版主、版主。如果你不希望原普通版主成为Discourse版主,把3去掉。
|
||
@moderator_group_id << group['group_id']
|
||
end
|
||
end
|
||
end
|
||
|
||
def get_knowledge_about_category_slug
|
||
@category_slug = {}
|
||
results = mysql_query("SELECT svalue value
|
||
FROM #{table_name 'common_setting'}
|
||
WHERE skey = 'forumkeys'")
|
||
|
||
return if results.size < 1
|
||
value = results.first['value']
|
||
|
||
return if value.blank?
|
||
|
||
PHP.unserialize(value).each do |category_import_id, slug|
|
||
next if slug.blank?
|
||
@category_slug[category_import_id] = slug
|
||
end
|
||
end
|
||
|
||
def get_knowledge_about_duplicated_email
|
||
@duplicated_email = {}
|
||
results = mysql_query(
|
||
"select a.uid uid, b.uid import_id from pre_common_member a
|
||
join (select uid, email from pre_common_member group by email having count(email) > 1 order by uid asc) b USING(email)
|
||
where a.uid != b.uid")
|
||
|
||
users = @lookup.instance_variable_get :@users
|
||
|
||
results.each do |row|
|
||
@duplicated_email[row['uid']] = row['import_id']
|
||
user_id = users[row['import_id']]
|
||
if user_id
|
||
users[row['uid']] = user_id
|
||
end
|
||
end
|
||
end
|
||
|
||
def import_users
|
||
puts '', "creating users"
|
||
|
||
get_knowledge_about_group
|
||
|
||
sensitive_user_table = table_name 'ucenter_members'
|
||
user_table = table_name 'common_member'
|
||
profile_table = table_name 'common_member_profile'
|
||
status_table = table_name 'common_member_status'
|
||
forum_table = table_name 'common_member_field_forum'
|
||
home_table = table_name 'common_member_field_home'
|
||
total_count = mysql_query("SELECT count(*) count FROM #{user_table};").first['count']
|
||
|
||
batches(BATCH_SIZE) do |offset|
|
||
results = mysql_query(
|
||
"SELECT u.uid id, u.username username, u.email email, u.groupid group_id,
|
||
su.regdate regdate, su.password password_hash, su.salt salt,
|
||
s.regip regip, s.lastip last_visit_ip, s.lastvisit last_visit_time, s.lastpost last_posted_at, s.lastsendmail last_emailed_at,
|
||
u.emailstatus email_confirmed, u.avatarstatus avatar_exists,
|
||
p.site website, p.address address, p.bio bio, p.realname realname, p.qq qq,
|
||
p.resideprovince resideprovince, p.residecity residecity, p.residedist residedist, p.residecommunity residecommunity,
|
||
p.resideprovince birthprovince, p.birthcity birthcity, p.birthdist birthdist, p.birthcommunity birthcommunity,
|
||
h.spacecss spacecss, h.spacenote spacenote,
|
||
f.customstatus customstatus, f.sightml sightml
|
||
FROM #{user_table} u
|
||
LEFT JOIN #{sensitive_user_table} su USING(uid)
|
||
LEFT JOIN #{profile_table} p USING(uid)
|
||
LEFT JOIN #{status_table} s USING(uid)
|
||
LEFT JOIN #{forum_table} f USING(uid)
|
||
LEFT JOIN #{home_table} h USING(uid)
|
||
ORDER BY u.uid ASC
|
||
LIMIT #{BATCH_SIZE}
|
||
OFFSET #{offset};")
|
||
|
||
break if results.size < 1
|
||
|
||
# TODO: breaks the scipt reported by some users
|
||
# next if all_records_exist? :users, users.map {|u| u["id"].to_i}
|
||
|
||
create_users(results, total: total_count, offset: offset) do |user|
|
||
{ id: user['id'],
|
||
email: user['email'],
|
||
username: user['username'],
|
||
name: first_exists(user['realname'], user['customstatus'], user['username']),
|
||
import_pass: user['password_hash'],
|
||
active: true,
|
||
salt: user['salt'],
|
||
# TODO: title: user['customstatus'], # move custom title to name since discourse can't let user custom title https://meta.discourse.org/t/let-users-custom-their-title/37626
|
||
created_at: user['regdate'] ? Time.zone.at(user['regdate']) : nil,
|
||
registration_ip_address: user['regip'],
|
||
ip_address: user['last_visit_ip'],
|
||
last_seen_at: user['last_visit_time'],
|
||
last_emailed_at: user['last_emailed_at'],
|
||
last_posted_at: user['last_posted_at'],
|
||
moderator: @moderator_group_id.include?(user['group_id']),
|
||
admin: @admin_group_id.include?(user['group_id']),
|
||
website: (user['website'] and user['website'].include?('.')) ? user['website'].strip : ( user['qq'] and user['qq'].strip == user['qq'].strip.to_i and user['qq'].strip.to_i > 10000 ) ? 'http://user.qzone.qq.com/' + user['qq'].strip : nil,
|
||
bio_raw: first_exists((user['bio'] and CGI.unescapeHTML(user['bio'])), user['sightml'], user['spacenote']).strip[0,3000],
|
||
location: first_exists(user['address'], (!user['resideprovince'].blank? ? [user['resideprovince'], user['residecity'], user['residedist'], user['residecommunity']] : [user['birthprovince'], user['birthcity'], user['birthdist'], user['birthcommunity']]).reject{|location|location.blank?}.join(' ')),
|
||
post_create_action: lambda do |newmember|
|
||
if user['avatar_exists'] == 1 and newmember.uploaded_avatar_id.blank?
|
||
path, filename = discuzx_avatar_fullpath(user['id'])
|
||
if path
|
||
begin
|
||
upload = create_upload(newmember.id, path, filename)
|
||
if !upload.nil? && upload.persisted?
|
||
newmember.import_mode = false
|
||
newmember.create_user_avatar
|
||
newmember.import_mode = true
|
||
newmember.user_avatar.update(custom_upload_id: upload.id)
|
||
newmember.update(uploaded_avatar_id: upload.id)
|
||
else
|
||
puts "Error: Upload did not persist!"
|
||
end
|
||
rescue SystemCallError => err
|
||
puts "Could not import avatar: #{err.message}"
|
||
end
|
||
end
|
||
end
|
||
if !user['spacecss'].blank? and newmember.user_profile.profile_background.blank?
|
||
# profile background
|
||
if matched = user['spacecss'].match(/body\s*{[^}]*url\('?(.+?)'?\)/i)
|
||
body_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||
end
|
||
if matched = user['spacecss'].match(/#hd\s*{[^}]*url\('?(.+?)'?\)/i)
|
||
header_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||
end
|
||
if matched = user['spacecss'].match(/.blocktitle\s*{[^}]*url\('?(.+?)'?\)/i)
|
||
blocktitle_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||
end
|
||
if matched = user['spacecss'].match(/#ct\s*{[^}]*url\('?(.+?)'?\)/i)
|
||
content_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||
end
|
||
|
||
if body_background || header_background || blocktitle_background || content_background
|
||
profile_background = first_exists(header_background, body_background, content_background, blocktitle_background)
|
||
card_background = first_exists(content_background, body_background, header_background, blocktitle_background)
|
||
upload = create_upload(newmember.id, File.join(DISCUZX_BASE_DIR, profile_background), File.basename(profile_background))
|
||
if upload
|
||
newmember.user_profile.upload_profile_background upload
|
||
else
|
||
puts "WARNING: #{user['username']} (UID: #{user['id']}) profile_background file did not persist!"
|
||
end
|
||
upload = create_upload(newmember.id, File.join(DISCUZX_BASE_DIR, card_background), File.basename(card_background))
|
||
if upload
|
||
newmember.user_profile.upload_card_background upload
|
||
else
|
||
puts "WARNING: #{user['username']} (UID: #{user['id']}) card_background file did not persist!"
|
||
end
|
||
end
|
||
end
|
||
|
||
# we don't send email to the unconfirmed user
|
||
newmember.update(email_digests: user['email_confirmed'] == 1) if newmember.email_digests
|
||
newmember.update(name: '') if !newmember.name.blank? and newmember.name == newmember.username
|
||
end
|
||
}
|
||
end
|
||
end
|
||
end
|
||
|
||
def import_categories
|
||
puts '', "creating categories"
|
||
|
||
get_knowledge_about_category_slug
|
||
|
||
forums_table = table_name 'forum_forum'
|
||
forums_data_table = table_name 'forum_forumfield'
|
||
|
||
results = mysql_query("
|
||
SELECT f.fid id, f.fup parent_id, f.name, f.type type, f.status status, f.displayorder position,
|
||
d.description description, d.rules rules, d.icon, d.extra extra
|
||
FROM #{forums_table} f
|
||
LEFT JOIN #{forums_data_table} d USING(fid)
|
||
ORDER BY parent_id ASC, id ASC
|
||
")
|
||
|
||
max_position = Category.all.max_by(&:position).position
|
||
create_categories(results) do |row|
|
||
next if row['type'] == 'group' or row['status'] == 2 # or row['status'].to_i == 3 # 如果不想导入群组,取消注释
|
||
extra = PHP.unserialize(row['extra']) if !row['extra'].blank?
|
||
if extra and !extra["namecolor"].blank?
|
||
color = extra["namecolor"][1,6]
|
||
end
|
||
|
||
Category.all.max_by(&:position).position
|
||
|
||
h = {
|
||
id: row['id'],
|
||
name: row['name'],
|
||
description: row['description'],
|
||
position: row['position'].to_i + max_position,
|
||
color: color,
|
||
suppress_from_homepage: (row['status'] == 0 or row['status'] == 3),
|
||
post_create_action: lambda do |category|
|
||
if slug = @category_slug[row['id']]
|
||
category.update(slug: slug)
|
||
end
|
||
|
||
raw = process_discuzx_post(row['rules'], nil)
|
||
if @bbcode_to_md
|
||
raw = raw.bbcode_to_md(false) rescue raw
|
||
end
|
||
category.topic.posts.first.update_attribute(:raw, raw)
|
||
if !row['icon'].empty?
|
||
upload = create_upload(Discourse::SYSTEM_USER_ID, File.join(DISCUZX_BASE_DIR, ATTACHMENT_DIR, '../common', row['icon']), File.basename(row['icon']))
|
||
if upload
|
||
category.logo_url = upload.url
|
||
# FIXME: I don't know how to get '/shared' by script. May change to Rails.root
|
||
category.color = Miro::DominantColors.new(File.join('/shared', category.logo_url)).to_hex.first[1,6] if !color
|
||
category.save!
|
||
end
|
||
end
|
||
category
|
||
end
|
||
}
|
||
if row['parent_id'].to_i > 0
|
||
h[:parent_category_id] = category_id_from_imported_category_id(row['parent_id'])
|
||
end
|
||
h
|
||
end
|
||
end
|
||
|
||
def import_posts
|
||
puts "", "creating topics and posts"
|
||
|
||
users_table = table_name 'common_member'
|
||
posts_table = table_name 'forum_post'
|
||
topics_table = table_name 'forum_thread'
|
||
|
||
total_count = mysql_query("SELECT count(*) count FROM #{posts_table}").first['count']
|
||
|
||
batches(BATCH_SIZE) do |offset|
|
||
results = mysql_query("
|
||
SELECT p.pid id,
|
||
p.tid topic_id,
|
||
t.fid category_id,
|
||
t.subject title,
|
||
p.authorid user_id,
|
||
p.message raw,
|
||
p.dateline post_time,
|
||
p2.pid first_id,
|
||
p.invisible status,
|
||
t.special special
|
||
FROM #{posts_table} p
|
||
JOIN #{posts_table} p2 ON p2.first AND p2.tid = p.tid
|
||
JOIN #{topics_table} t ON t.tid = p.tid
|
||
where t.tid < 10000
|
||
ORDER BY id ASC, topic_id ASC
|
||
LIMIT #{BATCH_SIZE}
|
||
OFFSET #{offset};
|
||
")
|
||
# u.status != -1 AND u.groupid != 4 AND u.groupid != 5 用户未被锁定、禁访或禁言。在现实中的 Discuz 论坛,禁止的用户通常是广告机或驱逐的用户,这些不需要导入。
|
||
break if results.size < 1
|
||
|
||
next if all_records_exist? :posts, results.map {|p| p["id"].to_i}
|
||
|
||
create_posts(results, total: total_count, offset: offset) do |m|
|
||
skip = false
|
||
mapped = {}
|
||
|
||
mapped[:id] = m['id']
|
||
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
|
||
mapped[:raw] = process_discuzx_post(m['raw'], m['id'])
|
||
mapped[:created_at] = Time.zone.at(m['post_time'])
|
||
|
||
if m['id'] == m['first_id']
|
||
mapped[:category] = category_id_from_imported_category_id(m['category_id'])
|
||
mapped[:title] = CGI.unescapeHTML(m['title'])
|
||
|
||
if m['special'] == 1
|
||
results = mysql_query("
|
||
SELECT multiple, maxchoices
|
||
FROM #{table_name 'forum_poll'}
|
||
WHERE tid = #{m['topic_id']}")
|
||
poll = results.first || {}
|
||
results = mysql_query("
|
||
SELECT polloption
|
||
FROM #{table_name 'forum_polloption'}
|
||
WHERE tid = #{m['topic_id']}
|
||
ORDER BY displayorder")
|
||
if results.empty?
|
||
puts "WARNING: can't find poll options for topic #{m['topic_id']}, skip poll"
|
||
else
|
||
mapped[:raw].prepend "[poll#{poll['multiple'] ? ' type=multiple' : ''}#{poll['maxchoices'] > 0 ? " max=#{poll['maxchoices']}" : ''}]\n#{results.map{|option|'- ' + option['polloption']}.join("\n")}\n[/poll]\n"
|
||
end
|
||
end
|
||
else
|
||
parent = topic_lookup_from_imported_post_id(m['first_id'])
|
||
|
||
if parent
|
||
mapped[:topic_id] = parent[:topic_id]
|
||
reply_post_import_id = find_post_id_by_quote_number(m['raw'])
|
||
if reply_post_import_id
|
||
post_id = post_id_from_imported_post_id(reply_post_import_id.to_i)
|
||
if (post = Post.find_by(id: post_id))
|
||
if post.topic_id == mapped[:topic_id]
|
||
mapped[:reply_to_post_number] = post.post_number
|
||
else
|
||
puts "post #{m['id']} reply to another topic, skip reply"
|
||
end
|
||
else
|
||
puts "post #{m['id']} reply to not exists post #{reply_post_import_id}, skip reply"
|
||
end
|
||
end
|
||
else
|
||
puts "Parent topic #{m['topic_id']} doesn't exist. Skipping #{m['id']}: #{m['title'][0..40]}"
|
||
skip = true
|
||
end
|
||
|
||
end
|
||
|
||
if m['status'] & 1 == 1 || mapped[:raw].blank?
|
||
mapped[:post_create_action] = lambda do |post|
|
||
PostDestroyer.new(Discourse.system_user, post).perform_delete
|
||
end
|
||
elsif (m['status'] & 2) >> 1 == 1 # waiting for approve
|
||
mapped[:post_create_action] = lambda do |post|
|
||
PostAction.act(Discourse.system_user, post, 6, {take_action: false})
|
||
end
|
||
end
|
||
skip ? nil : mapped
|
||
end
|
||
end
|
||
end
|
||
|
||
def import_bookmarks
|
||
puts '', 'creating bookmarks'
|
||
favorites_table = table_name 'home_favorite'
|
||
posts_table = table_name 'forum_post'
|
||
|
||
total_count = mysql_query("SELECT count(*) count FROM #{favorites_table} WHERE idtype = 'tid'").first['count']
|
||
batches(BATCH_SIZE) do |offset|
|
||
results = mysql_query("
|
||
SELECT p.pid post_id, f.uid user_id
|
||
FROM #{favorites_table} f
|
||
JOIN #{posts_table} p ON f.id = p.tid
|
||
WHERE f.idtype = 'tid' AND p.first
|
||
LIMIT #{BATCH_SIZE}
|
||
OFFSET #{offset};")
|
||
|
||
break if results.size < 1
|
||
|
||
# next if all_records_exist?
|
||
|
||
create_bookmarks(results, total: total_count, offset: offset) do |row|
|
||
{
|
||
user_id: row['user_id'],
|
||
post_id: row['post_id']
|
||
}
|
||
end
|
||
end
|
||
end
|
||
|
||
|
||
def import_private_messages
|
||
puts '', 'creating private messages'
|
||
|
||
pm_indexes = table_name 'ucenter_pm_indexes'
|
||
pm_messages = table_name 'ucenter_pm_messages'
|
||
total_count = mysql_query("SELECT count(*) count FROM #{pm_indexes}").first['count']
|
||
|
||
batches(BATCH_SIZE) do |offset|
|
||
results = mysql_query("
|
||
SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_1
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_2
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_3
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_4
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_5
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_6
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_7
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_8
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_9
|
||
ORDER BY thread_id ASC, id ASC
|
||
LIMIT #{BATCH_SIZE}
|
||
OFFSET #{offset};")
|
||
|
||
break if results.size < 1
|
||
|
||
# next if all_records_exist? :posts, results.map {|m| "pm:#{m['id']}"}
|
||
|
||
create_posts(results, total: total_count, offset: offset) do |m|
|
||
skip = false
|
||
mapped = {}
|
||
|
||
mapped[:id] = "pm:#{m['id']}"
|
||
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
|
||
mapped[:raw] = process_discuzx_post(m['message'], m['id'])
|
||
mapped[:created_at] = Time.zone.at(m['created_at'])
|
||
thread_id = "pm_#{m['thread_id']}"
|
||
|
||
if is_first_pm(m['id'], m['thread_id'])
|
||
# find the title from list table
|
||
pm_thread = mysql_query("
|
||
SELECT plid thread_id, subject
|
||
FROM #{table_name 'ucenter_pm_lists'}
|
||
WHERE plid = #{m['thread_id']};").first
|
||
mapped[:title] = pm_thread['subject']
|
||
mapped[:archetype] = Archetype.private_message
|
||
|
||
# Find the users who are part of this private message.
|
||
import_user_ids = mysql_query("
|
||
SELECT plid thread_id, uid user_id
|
||
FROM #{table_name 'ucenter_pm_members'}
|
||
WHERE plid = #{m['thread_id']};
|
||
").map {|r| r['user_id']}.uniq
|
||
|
||
mapped[:target_usernames] = import_user_ids.map! do |import_user_id|
|
||
import_user_id.to_s == m['user_id'].to_s ? nil : User.find_by(id: user_id_from_imported_user_id(import_user_id)).try(:username)
|
||
end.compact
|
||
|
||
if mapped[:target_usernames].empty? # pm with yourself?
|
||
skip = true
|
||
puts "Skipping pm:#{m['id']} due to no target"
|
||
else
|
||
@first_post_id_by_topic_id[thread_id] = mapped[:id]
|
||
end
|
||
else
|
||
parent = topic_lookup_from_imported_post_id(@first_post_id_by_topic_id[thread_id])
|
||
if parent
|
||
mapped[:topic_id] = parent[:topic_id]
|
||
else
|
||
puts "Parent post pm thread:#{thread_id} doesn't exist. Skipping #{m["id"]}: #{m["message"][0..40]}"
|
||
skip = true
|
||
end
|
||
end
|
||
|
||
skip ? nil : mapped
|
||
end
|
||
|
||
end
|
||
end
|
||
|
||
# search for first pm id for the series of pm
|
||
def is_first_pm(pm_id, thread_id)
|
||
result = mysql_query("
|
||
SELECT pmid id
|
||
FROM #{table_name 'ucenter_pm_indexes'}
|
||
WHERE plid = #{thread_id}
|
||
ORDER BY id")
|
||
result.first['id'].to_s == pm_id.to_s
|
||
end
|
||
|
||
def process_and_upload_inline_images(raw)
|
||
inline_image_regex = /\[img\]([\s\S]*?)\[\/img\]/
|
||
|
||
s = raw.dup
|
||
|
||
s.gsub!(inline_image_regex) do |d|
|
||
matches = inline_image_regex.match(d)
|
||
data = matches[1]
|
||
|
||
upload, filename = upload_inline_image data
|
||
upload ? html_for_upload(upload, filename) : nil
|
||
end
|
||
|
||
end
|
||
|
||
def process_discuzx_post(raw, import_id)
|
||
# raw = process_and_upload_inline_images(raw)
|
||
s = raw.dup
|
||
|
||
# Strip the quote
|
||
# [quote] quotation includes the topic which is the same as reply to in Discourse
|
||
# We get the pid to find the post number the post reply to. So it can be stripped
|
||
s = s.gsub(/\[b\]回复 \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].* 的帖子\[\/url\]\[\/b\]/i, '').strip
|
||
s = s.gsub(/\[b\]回复 \[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\].*?\[\/b\]/i, '').strip
|
||
|
||
s.gsub!(/\[quote\](.*)?\[\/quote\]/im) do |matched|
|
||
content = $1
|
||
post_import_id = find_post_id_by_quote_number(content)
|
||
if post_import_id
|
||
post_id = post_id_from_imported_post_id(post_import_id.to_i)
|
||
if (post = Post.find_by(id: post_id))
|
||
"[quote=\"#{post.user.username}\", post: #{post.post_number}, topic: #{post.topic_id}]\n#{content}\n[/quote]"
|
||
else
|
||
puts "post #{import_id} quote to not exists post #{post_import_id}, skip reply"
|
||
matched[0]
|
||
end
|
||
else
|
||
matched[0]
|
||
end
|
||
end
|
||
|
||
s.gsub!(/\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[\/color\] \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\]\[\/size\]/i, '')
|
||
s.gsub!(/\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[\/color\] \[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\]\[\/size\]/i, '')
|
||
|
||
# convert quote
|
||
s.gsub!(/\[quote\](.*?)\[\/quote\]/m) { "\n" + ($1.strip).gsub(/^/, '> ') + "\n" }
|
||
|
||
# truncate line space, preventing line starting with many blanks to be parsed as code blocks
|
||
s.gsub!(/^ {4,}/, ' ')
|
||
|
||
# TODO: Much better to use bbcode-to-md gem
|
||
# Convert image bbcode with width and height
|
||
s.gsub!(/\[img[^\]]*\]https?:\/\/#{ORIGINAL_SITE_PREFIX}\/(.*)\[\/img\]/i, '[x-attach]\1[/x-attach]') # dont convert attachment
|
||
s.gsub!(/<img[^>]*src="https?:\/\/#{ORIGINAL_SITE_PREFIX}\/(.*)".*?>/i, '[x-attach]\1[/x-attach]') # dont convert attachment
|
||
s.gsub!(/\[img[^\]]*\]https?:\/\/www\.touhou\.cc\/blog\/(.*)\[\/img\]/i, '[x-attach]../blog/\1[/x-attach]') # 私货
|
||
s.gsub!(/\[img[^\]]*\]https?:\/\/www\.touhou\.cc\/ucenter\/avatar.php\?uid=(\d+)[^\]]*\[\/img\]/i) { "[x-attach]#{discuzx_avatar_fullpath($1,false)[0]}[/x-attach]" } # 私货
|
||
s.gsub!(/\[img=(\d+),(\d+)\]([^\]]*)\[\/img\]/i, '<img width="\1" height="\2" src="\3">')
|
||
s.gsub!(/\[img\]([^\]]*)\[\/img\]/i, '<img src="\1">')
|
||
|
||
s.gsub!(/\[qq\]([^\]]*)\[\/qq\]/i, '<a href="http://wpa.qq.com/msgrd?V=3&Uin=\1&Site=[Discuz!]&from=discuz&Menu=yes" target="_blank"><!--<img src="static/image/common/qq_big.gif" border="0">-->QQ 交谈</a>')
|
||
|
||
s.gsub!(/\[email\]([^\]]*)\[\/email\]/i, '[url=mailto:\1]\1[/url]') # bbcode-to-md can convert it
|
||
s.gsub!(/\[s\]([^\]]*)\[\/s\]/i, '<s>\1</s>')
|
||
s.gsub!(/\[sup\]([^\]]*)\[\/sup\]/i, '<sup>\1</sup>')
|
||
s.gsub!(/\[sub\]([^\]]*)\[\/sub\]/i, '<sub>\1</sub>')
|
||
s.gsub!(/\[hr\]/i, "\n---\n")
|
||
|
||
# remove the media tag
|
||
s.gsub!(/\[\/?media[^\]]*\]/i, "\n")
|
||
s.gsub!(/\[\/?flash[^\]]*\]/i, "\n")
|
||
s.gsub!(/\[\/?audio[^\]]*\]/i, "\n")
|
||
s.gsub!(/\[\/?video[^\]]*\]/i, "\n")
|
||
|
||
# Remove the font, p and backcolor tag
|
||
# Discourse doesn't support the font tag
|
||
s.gsub!(/\[font=[^\]]*?\]/i, '')
|
||
s.gsub!(/\[\/font\]/i, '')
|
||
s.gsub!(/\[p=[^\]]*?\]/i, '')
|
||
s.gsub!(/\[\/p\]/i, '')
|
||
s.gsub!(/\[backcolor=[^\]]*?\]/i, '')
|
||
s.gsub!(/\[\/backcolor\]/i, '')
|
||
|
||
# Remove the size tag
|
||
# I really have no idea what is this
|
||
s.gsub!(/\[size=[^\]]*?\]/i, '')
|
||
s.gsub!(/\[\/size\]/i, '')
|
||
|
||
# Remove the color tag
|
||
s.gsub!(/\[color=[^\]]*?\]/i, '')
|
||
s.gsub!(/\[\/color\]/i, '')
|
||
|
||
# Remove the hide tag
|
||
s.gsub!(/\[\/?hide\]/i, '')
|
||
s.gsub!(/\[\/?free[^\]]*\]/i, "\n")
|
||
|
||
# Remove the align tag
|
||
# still don't know what it is
|
||
s.gsub!(/\[align=[^\]]*?\]/i, "\n")
|
||
s.gsub!(/\[\/align\]/i, "\n")
|
||
s.gsub!(/\[float=[^\]]*?\]/i, "\n")
|
||
s.gsub!(/\[\/float\]/i, "\n")
|
||
|
||
# Convert code
|
||
s.gsub!(/\[\/?code\]/i, "\n```\n")
|
||
|
||
# The edit notice should be removed
|
||
# example: 本帖最后由 Helloworld 于 2015-1-28 22:05 编辑
|
||
s.gsub!(/\[i=s\] 本帖最后由[\s\S]*?编辑 \[\/i\]/, '')
|
||
|
||
# Convert the custom smileys to emojis
|
||
# `{:cry:}` to `:cry`
|
||
s.gsub!(/\{(\:\S*?\:)\}/, '\1')
|
||
|
||
# Replace internal forum links that aren't in the <!-- l --> format
|
||
# convert list tags to ul and list=1 tags to ol
|
||
# (basically, we're only missing list=a here...)
|
||
s.gsub!(/\[list\](.*?)\[\/list:u\]/m, '[ul]\1[/ul]')
|
||
s.gsub!(/\[list=1\](.*?)\[\/list:o\]/m, '[ol]\1[/ol]')
|
||
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
|
||
s.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]')
|
||
|
||
# Discuz can create PM out of a post, which will generates like
|
||
# [url=http://example.com/forum.php?mod=redirect&goto=findpost&pid=111&ptid=11][b]关于您在“主题名称”的帖子[/b][/url]
|
||
s.gsub!(pm_url_regexp) do |discuzx_link|
|
||
replace_internal_link(discuzx_link, $1)
|
||
end
|
||
|
||
# [url][b]text[/b][/url] to **[url]text[/url]**
|
||
s.gsub!(/(\[url=[^\[\]]*?\])\[b\](\S*)\[\/b\](\[\/url\])/, '**\1\2\3**')
|
||
|
||
@internal_url_regexps.each do |internal_url_regexp|
|
||
s.gsub!(internal_url_regexp) do |discuzx_link|
|
||
replace_internal_link(discuzx_link, ($~[:tid].to_i rescue nil), ($~[:pid].to_i rescue nil), ($~[:fid].to_i rescue nil), ($~[:action] rescue nil))
|
||
end
|
||
end
|
||
|
||
# @someone without the url
|
||
s.gsub!(/@\[url=[^\[\]]*?\](\S*)\[\/url\]/i, '@\1')
|
||
|
||
s.scan(/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/[^\[\]\s]*/) {|link|puts "WARNING: post #{import_id} can't replace internal url #{link}"}
|
||
|
||
s.strip
|
||
end
|
||
|
||
def replace_internal_link(discuzx_link, import_topic_id, import_post_id, import_category_id, action)
|
||
if import_post_id
|
||
post_id = post_id_from_imported_post_id import_post_id
|
||
if post_id
|
||
post = Post.find post_id
|
||
return post.full_url if post
|
||
end
|
||
end
|
||
|
||
if import_topic_id
|
||
|
||
results = mysql_query("SELECT pid
|
||
FROM #{table_name 'forum_post'}
|
||
WHERE tid = #{import_topic_id} AND first
|
||
LIMIT 1")
|
||
|
||
return discuzx_link unless results.size > 0
|
||
|
||
linked_post_id = results.first['pid']
|
||
lookup = topic_lookup_from_imported_post_id(linked_post_id)
|
||
|
||
if lookup
|
||
return "#{NEW_SITE_PREFIX}#{lookup[:url]}"
|
||
else
|
||
return discuzx_link
|
||
end
|
||
|
||
end
|
||
|
||
if import_category_id
|
||
category_id = category_id_from_imported_category_id import_category_id
|
||
if category_id
|
||
category = Category.find category_id
|
||
return category.url if category
|
||
end
|
||
end
|
||
|
||
case action
|
||
when 'index'
|
||
return "#{NEW_SITE_PREFIX}/"
|
||
when 'stat', 'stats', 'ranklist'
|
||
return "#{NEW_SITE_PREFIX}/users"
|
||
end
|
||
|
||
discuzx_link
|
||
end
|
||
|
||
def pm_url_regexp
|
||
@pm_url_regexp ||= Regexp.new("http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}/forum\\.php\\?mod=redirect&goto=findpost&pid=\\d+&ptid=(\\d+)")
|
||
end
|
||
|
||
# This step is done separately because it can take multiple attempts to get right (because of
|
||
# missing files, wrong paths, authorized extensions, etc.).
|
||
def import_attachments
|
||
setting = AUTHORIZED_EXTENSIONS.join('|')
|
||
SiteSetting.authorized_extensions = setting if setting != SiteSetting.authorized_extensions
|
||
|
||
attachment_regex = /\[attach\](\d+)\[\/attach\]/
|
||
attachment_link_regex = /\[x-attach\](.+)\[\/x-attach\]/
|
||
|
||
current_count = 0
|
||
total_count = mysql_query("SELECT count(*) count FROM #{table_name 'forum_post'};").first['count']
|
||
|
||
success_count = 0
|
||
fail_count = 0
|
||
|
||
puts '', "Importing attachments...", ''
|
||
|
||
Post.find_each do |post|
|
||
next unless post.custom_fields['import_id'] == post.custom_fields['import_id'].to_i.to_s
|
||
|
||
user = post.user
|
||
|
||
current_count += 1
|
||
print_status current_count, total_count
|
||
|
||
new_raw = post.raw.dup
|
||
|
||
inline_attachments = []
|
||
|
||
new_raw.gsub!(attachment_regex) do |s|
|
||
attachment_id = $1.to_i
|
||
inline_attachments.push attachment_id
|
||
|
||
upload, filename = find_upload(user, post, attachment_id)
|
||
unless upload
|
||
fail_count += 1
|
||
next
|
||
end
|
||
|
||
html_for_upload(upload, filename)
|
||
end
|
||
new_raw.gsub!(attachment_link_regex) do |s|
|
||
attachment_file = $1
|
||
|
||
filename = File.basename(attachment_file)
|
||
upload = create_upload(user.id, File.join(DISCUZX_BASE_DIR, attachment_file), filename)
|
||
unless upload
|
||
fail_count += 1
|
||
next
|
||
end
|
||
|
||
html_for_upload(upload, filename)
|
||
end
|
||
|
||
sql = "SELECT aid
|
||
FROM #{table_name 'forum_attachment'}
|
||
WHERE pid = #{post.custom_fields['import_id']}"
|
||
if !inline_attachments.empty?
|
||
sql << " AND aid NOT IN (#{inline_attachments.join(',')})"
|
||
end
|
||
|
||
results = mysql_query(sql)
|
||
|
||
results.each do |attachment|
|
||
attachment_id = attachment['aid']
|
||
upload, filename = find_upload(user, post, attachment_id)
|
||
unless upload
|
||
fail_count += 1
|
||
next
|
||
end
|
||
html = html_for_upload(upload, filename)
|
||
unless new_raw.include? html
|
||
new_raw << "\n"
|
||
new_raw << html
|
||
end
|
||
end
|
||
|
||
if new_raw != post.raw
|
||
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, { bypass_bump: true, edit_reason: '从 Discuz 中导入附件' })
|
||
end
|
||
|
||
success_count += 1
|
||
end
|
||
|
||
puts '', ''
|
||
puts "succeeded: #{success_count}"
|
||
puts " failed: #{fail_count}" if fail_count > 0
|
||
puts ''
|
||
end
|
||
|
||
# Create the full path to the discuz avatar specified from user id
|
||
def discuzx_avatar_fullpath(user_id, absolute=true)
|
||
padded_id = user_id.to_s.rjust(9, '0')
|
||
|
||
part_1 = padded_id[0..2]
|
||
part_2 = padded_id[3..4]
|
||
part_3 = padded_id[5..6]
|
||
part_4 = padded_id[-2..-1]
|
||
file_name = "#{part_4}_avatar_big.jpg"
|
||
|
||
if absolute
|
||
return File.join(DISCUZX_BASE_DIR, AVATAR_DIR, part_1, part_2, part_3, file_name), file_name
|
||
else
|
||
return File.join(AVATAR_DIR, part_1, part_2, part_3, file_name), file_name
|
||
end
|
||
end
|
||
|
||
# post id is in the quote block
|
||
def find_post_id_by_quote_number(raw)
|
||
case raw
|
||
when /\[url=forum.php\?mod=redirect&goto=findpost&pid=(\d+)&ptid=\d+\]/ #standard
|
||
$1
|
||
when /\[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=(\d+)&ptid=\d+\]/ # old discuz 7 format
|
||
$1
|
||
when /\[quote\][\S\s]*pid=(\d+)[\S\s]*\[\/quote\]/ # quote
|
||
$1
|
||
end
|
||
end
|
||
|
||
# for some reason, discuz inlined some png file
|
||
# the corresponding image stored is broken in a way
|
||
def upload_inline_image(data)
|
||
return unless data
|
||
|
||
puts 'Creating inline image'
|
||
|
||
encoded_photo = data['data:image/png;base64,'.length .. -1]
|
||
if encoded_photo
|
||
raw_file = Base64.decode64(encoded_photo)
|
||
else
|
||
puts 'Error parsed inline photo', data[0..20]
|
||
return
|
||
end
|
||
|
||
real_filename = "#{SecureRandom.hex}.png"
|
||
filename = Tempfile.new(['inline', '.png'])
|
||
begin
|
||
filename.binmode
|
||
filename.write(raw_file)
|
||
filename.rewind
|
||
|
||
upload = create_upload(Discourse::SYSTEM_USER_ID, filename, real_filename)
|
||
ensure
|
||
filename.close rescue nil
|
||
filename.unlink rescue nil
|
||
end
|
||
|
||
if upload.nil? || !upload.valid?
|
||
puts "Upload not valid :("
|
||
puts upload.errors.inspect if upload
|
||
return nil
|
||
end
|
||
|
||
return upload, real_filename
|
||
end
|
||
|
||
# find the uploaded file and real name from the db
|
||
def find_upload(user, post, upload_id)
|
||
attachment_table = table_name 'forum_attachment'
|
||
# search for table id
|
||
sql = "SELECT a.pid post_id,
|
||
a.aid upload_id,
|
||
a.tableid table_id
|
||
FROM #{attachment_table} a
|
||
WHERE a.pid = #{post.custom_fields['import_id']}
|
||
AND a.aid = #{upload_id};"
|
||
results = mysql_query(sql)
|
||
|
||
unless (meta_data = results.first)
|
||
puts "Couldn't find forum_attachment record meta data for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}"
|
||
return nil
|
||
end
|
||
|
||
# search for uploaded file meta data
|
||
sql = "SELECT a.pid post_id,
|
||
a.aid upload_id,
|
||
a.tid topic_id,
|
||
a.uid user_id,
|
||
a.dateline uploaded_time,
|
||
a.filename real_filename,
|
||
a.attachment attachment_path,
|
||
a.remote is_remote,
|
||
a.description description,
|
||
a.isimage is_image,
|
||
a.thumb is_thumb
|
||
FROM #{attachment_table}_#{meta_data['table_id']} a
|
||
WHERE a.aid = #{upload_id};"
|
||
results = mysql_query(sql)
|
||
|
||
unless (row = results.first)
|
||
puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}"
|
||
return nil
|
||
end
|
||
|
||
filename = File.join(DISCUZX_BASE_DIR, ATTACHMENT_DIR, row['attachment_path'])
|
||
unless File.exists?(filename)
|
||
puts "Attachment file doesn't exist: #{filename}"
|
||
return nil
|
||
end
|
||
real_filename = row['real_filename']
|
||
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
|
||
upload = create_upload(user.id, filename, real_filename)
|
||
|
||
if upload.nil? || !upload.valid?
|
||
puts "Upload not valid :("
|
||
puts upload.errors.inspect if upload
|
||
return nil
|
||
end
|
||
|
||
return upload, real_filename
|
||
rescue Mysql2::Error => e
|
||
puts "SQL Error"
|
||
puts e.message
|
||
puts sql
|
||
return nil
|
||
end
|
||
|
||
def first_exists(*items)
|
||
items.find{|item|!item.blank?} || ''
|
||
end
|
||
|
||
def mysql_query(sql)
|
||
@client.query(sql, cache_rows: false)
|
||
end
|
||
end
|
||
|
||
ImportScripts::DiscuzX.new.perform
|