FEATURE: Lots of improvements to the phpBB3 importer

- Extensive refactoring of the existing importer
- Configuration of import with settings.yml instead of editing code
- Supports importing from phpBB 3.0.x and 3.1.x
- Imports all attachments (not just the ones embedded with [attachment])
  from posts and private messages
- Imports all existing attachments without the need to configure allowed
  file extensions or file sizes
- Imports polls
- Imports bookmarks
- Imports sticky topics and (global) announcements as pinned topics
- Imports categories in the original order and sets the content of the
  category description topic
- Sets the creation date of category description topics to the creation
  date of the first topic in each category
- Imports additional user attributes: last seen date, registration
  IP address, website, date of birth, location
- Optionally set the user's name to its username
- Users that didn't activate their account in phpBB3 are imported as
  inactive users
- All imported, active users are automatically approved
- Users that were deactivated in phpBB3 get suspended for 200 years
  during the import
- Anonymous user can be imported as suspended users instead of the
  system user
- Forums of type "link" are not imported as categories anymore
- Internal links to posts get rewritten during the import (previously
  only links to topics got rewritten)
- Ordered lists with BBCode [list=a] (which are unsupported in
  Discourse) get imported as if they would be [list=1]
- Importing of avatars, attachments, private messages, polls and
  bookmarks can be disabled via configuration file
- Optional fixing of private messages for forums that have been upgraded
  from phpBB2 prevents the import of duplicate messages and tries to
  group related messages into topics
- Table prefix (default: phpbb) is configurable
- Most of phpBB's default smilies are mapped to Emojis and all other
  smilies get uploaded and embedded as images. Smiley mappings can be
  added or overridden in the settings.yml file.
This commit is contained in:
Gerhard Schlager 2015-07-05 23:17:03 +02:00
parent 2dd01c61b0
commit 1cb45861c5
20 changed files with 1696 additions and 484 deletions

View file

@ -1,486 +1,29 @@
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::PhpBB3 < ImportScripts::Base
PHPBB_DB = "phpbb"
BATCH_SIZE = 1000
ORIGINAL_SITE_PREFIX = "oldsite.example.com/forums" # without http(s)://
NEW_SITE_PREFIX = "http://discourse.example.com" # with http:// or https://
# Set PHPBB_BASE_DIR to the base directory of your phpBB installation.
# When importing, you should place the subdirectories "files" (containing all
# attachments) and "images" (containing avatars) in PHPBB_BASE_DIR.
# If nil, [attachment] tags and avatars won't be processed.
# Edit AUTHORIZED_EXTENSIONS as needed.
# If you used ATTACHMENTS_BASE_DIR before, e.g. ATTACHMENTS_BASE_DIR = '/var/www/phpbb/files/'
# would become PHPBB_BASE_DIR = '/var/www/phpbb'
# now.
PHPBB_BASE_DIR = '/var/www/phpbb'
AUTHORIZED_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'zip', 'rar', 'pdf']
# Avatar types to import.:
# 1 = uploaded avatars (you should probably leave this here)
# 2 = hotlinked avatars - WARNING: this will considerably slow down your import
# if there are many hotlinked avatars and some of them unavailable!
# 3 = galery avatars (the predefined avatars phpBB offers. They will be converted to uploaded avatars)
IMPORT_AVATARS = [1, 3]
def initialize
super
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: PHPBB_DB
)
phpbb_read_config
end
def execute
import_users
import_categories
import_posts
import_private_messages
import_attachments unless PHPBB_BASE_DIR.nil?
suspend_users
end
def import_users
puts '', "creating users"
total_count = mysql_query("SELECT count(*) count
FROM phpbb_users u
JOIN phpbb_groups g ON g.group_id = u.group_id
WHERE g.group_name != 'BOTS'
AND u.user_type != 1;").first['count']
batches(BATCH_SIZE) do |offset|
results = mysql_query(
"SELECT user_id id, user_email email, username, user_regdate, group_name, user_avatar_type, user_avatar
FROM phpbb_users u
JOIN phpbb_groups g ON g.group_id = u.group_id
WHERE g.group_name != 'BOTS'
AND u.user_type != 1
ORDER BY u.user_id ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
break if results.size < 1
create_users(results, total: total_count, offset: offset) do |user|
{ id: user['id'],
email: user['email'],
username: user['username'],
created_at: Time.zone.at(user['user_regdate']),
moderator: user['group_name'] == 'GLOBAL_MODERATORS',
admin: user['group_name'] == 'ADMINISTRATORS',
post_create_action: proc do |newmember|
if not PHPBB_BASE_DIR.nil? and IMPORT_AVATARS.include?(user['user_avatar_type']) and newmember.uploaded_avatar_id.blank?
path = phpbb_avatar_fullpath(user['user_avatar_type'], user['user_avatar'])
if path
begin
upload = create_upload(newmember.id, path, user['user_avatar'])
if upload.persisted?
newmember.import_mode = false
newmember.create_user_avatar
newmember.import_mode = true
newmember.user_avatar.update(custom_upload_id: upload.id)
newmember.update(uploaded_avatar_id: upload.id)
else
puts "Error: Upload did not persist!"
end
rescue SystemCallError => err
puts "Could not import avatar: #{err.message}"
end
end
end
end
}
end
end
end
def import_categories
results = mysql_query("
SELECT forum_id id, parent_id, left(forum_name, 50) name, forum_desc description
FROM phpbb_forums
ORDER BY parent_id ASC, forum_id ASC
")
create_categories(results) do |row|
h = {id: row['id'], name: CGI.unescapeHTML(row['name']), description: CGI.unescapeHTML(row['description'])}
if row['parent_id'].to_i > 0
h[:parent_category_id] = category_id_from_imported_category_id(row['parent_id'])
end
h
end
end
def import_posts
puts "", "creating topics and posts"
total_count = mysql_query("SELECT count(*) count from phpbb_posts").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
SELECT p.post_id id,
p.topic_id topic_id,
t.forum_id category_id,
t.topic_title title,
t.topic_first_post_id first_post_id,
p.poster_id user_id,
p.post_text raw,
p.post_time post_time
FROM phpbb_posts p,
phpbb_topics t
WHERE p.topic_id = t.topic_id
ORDER BY id
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
break if results.size < 1
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
mapped[:raw] = process_phpbb_post(m['raw'], m['id'])
mapped[:created_at] = Time.zone.at(m['post_time'])
if m['id'] == m['first_post_id']
mapped[:category] = category_id_from_imported_category_id(m['category_id'])
mapped[:title] = CGI.unescapeHTML(m['title'])
else
parent = topic_lookup_from_imported_post_id(m['first_post_id'])
if parent
mapped[:topic_id] = parent[:topic_id]
else
puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
skip = true
end
end
skip ? nil : mapped
end
end
end
def import_private_messages
puts "", "creating private messages"
total_count = mysql_query("SELECT count(*) count from phpbb_privmsgs").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
SELECT msg_id id,
root_level,
author_id user_id,
message_time,
message_subject,
message_text
FROM phpbb_privmsgs
ORDER BY root_level ASC, msg_id ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
break if results.size < 1
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = "pm:#{m['id']}"
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
mapped[:raw] = process_phpbb_post(m['message_text'], m['id'])
mapped[:created_at] = Time.zone.at(m['message_time'])
if m['root_level'] == 0
mapped[:title] = CGI.unescapeHTML(m['message_subject'])
mapped[:archetype] = Archetype.private_message
# Find the users who are part of this private message.
# Found from the to_address of phpbb_privmsgs, by looking at
# all the rows with the same root_level.
# to_address looks like this: "u_91:u_1234:u_200"
# The "u_" prefix is discarded and the rest is a user_id.
import_user_ids = mysql_query("
SELECT to_address
FROM phpbb_privmsgs
WHERE msg_id = #{m['id']}
OR root_level = #{m['id']}").map { |r| r['to_address'].split(':') }.flatten!.map! { |u| u[2..-1] }
mapped[:target_usernames] = import_user_ids.map! do |import_user_id|
import_user_id.to_s == m['user_id'].to_s ? nil : User.find_by_id(user_id_from_imported_user_id(import_user_id)).try(:username)
end.compact.uniq
skip = true if mapped[:target_usernames].empty? # pm with yourself?
else
parent = topic_lookup_from_imported_post_id("pm:#{m['root_level']}")
if parent
mapped[:topic_id] = parent[:topic_id]
else
puts "Parent post pm:#{m['root_level']} doesn't exist. Skipping #{m["id"]}: #{m["message_subject"][0..40]}"
skip = true
end
end
skip ? nil : mapped
end
end
end
def suspend_users
puts '', "updating banned users"
where = "ban_userid > 0 AND (ban_end = 0 OR ban_end > #{Time.zone.now.to_i})"
banned = 0
failed = 0
total = mysql_query("SELECT count(*) count FROM phpbb_banlist WHERE #{where}").first['count']
system_user = Discourse.system_user
mysql_query("SELECT ban_userid, ban_start, ban_end, ban_give_reason FROM phpbb_banlist WHERE #{where}").each do |b|
user = find_user_by_import_id(b['ban_userid'])
if user
user.suspended_at = Time.zone.at(b['ban_start'])
user.suspended_till = b['ban_end'] > 0 ? Time.zone.at(b['ban_end']) : 200.years.from_now
if user.save
StaffActionLogger.new(system_user).log_user_suspend(user, b['ban_give_reason'])
banned += 1
else
puts "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}"
failed += 1
end
else
puts "Not found: #{b['ban_userid']}"
failed += 1
end
print_status banned + failed, total
end
end
def process_phpbb_post(raw, import_id)
s = raw.dup
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
s.gsub!(/<!-- s(\S+) --><img (?:[^>]+) \/><!-- s(?:\S+) -->/, '\1')
# Internal forum links of this form: <!-- l --><a class="postlink-local" href="https://example.com/forums/viewtopic.php?f=26&amp;t=3412">viewtopic.php?f=26&amp;t=3412</a><!-- l -->
s.gsub!(/<!-- l --><a(?:.+)href="(?:\S+)"(?:.*)>viewtopic(?:.*)t=(\d+)<\/a><!-- l -->/) do |phpbb_link|
replace_internal_link(phpbb_link, $1, import_id)
end
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
s.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/, '[\2](\1)')
# Many phpbb bbcode tags have a hash attached to them. Examples:
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
s.gsub!(/:(?:\w{8})\]/, ']')
s = CGI.unescapeHTML(s)
# phpBB shortens link text like this, which breaks our markdown processing:
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[')
# Replace internal forum links that aren't in the <!-- l --> format
s.gsub!(internal_url_regexp) do |phpbb_link|
replace_internal_link(phpbb_link, $1, import_id)
end
# convert list tags to ul and list=1 tags to ol
# (basically, we're only missing list=a here...)
s.gsub!(/\[list\](.*?)\[\/list:u\]/m, '[ul]\1[/ul]')
s.gsub!(/\[list=1\](.*?)\[\/list:o\]/m, '[ol]\1[/ol]')
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
s.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]')
s
end
def replace_internal_link(phpbb_link, import_topic_id, from_import_post_id)
results = mysql_query("select topic_first_post_id from phpbb_topics where topic_id = #{import_topic_id}")
return phpbb_link unless results.size > 0
linked_topic_id = results.first['topic_first_post_id']
lookup = topic_lookup_from_imported_post_id(linked_topic_id)
return phpbb_link unless lookup
t = Topic.find_by_id(lookup[:topic_id])
if t
"#{NEW_SITE_PREFIX}/t/#{t.slug}/#{t.id}"
else
phpbb_link
end
end
def internal_url_regexp
@internal_url_regexp ||= Regexp.new("http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}/viewtopic\\.php?(?:\\S*)t=(\\d+)")
end
# This step is done separately because it can take multiple attempts to get right (because of
# missing files, wrong paths, authorized extensions, etc.).
def import_attachments
setting = AUTHORIZED_EXTENSIONS.join('|')
SiteSetting.authorized_extensions = setting if setting != SiteSetting.authorized_extensions
r = /\[attachment=[\d]+\]<\!-- [\w]+ --\>([^<]+)<\!-- [\w]+ --\>\[\/attachment\]/
user = Discourse.system_user
current_count = 0
total_count = Post.count
success_count = 0
fail_count = 0
puts '', "Importing attachments...", ''
Post.find_each do |post|
current_count += 1
print_status current_count, total_count
new_raw = post.raw.dup
new_raw.gsub!(r) do |s|
matches = r.match(s)
real_filename = matches[1]
# note: currently, we do not import PM attachments.
# If this should be desired, this has to be fixed,
# otherwise, the SQL state coughs up an error for the
# clause "WHERE post_msg_id = pm12345"...
next s if post.custom_fields['import_id'].start_with?('pm:')
sql = "SELECT physical_filename,
mimetype
FROM phpbb_attachments
WHERE post_msg_id = #{post.custom_fields['import_id']}
AND real_filename = '#{real_filename}';"
begin
results = mysql_query(sql)
rescue Mysql2::Error => e
puts "SQL Error"
puts e.message
puts sql
fail_count += 1
next s
end
row = results.first
if !row
puts "Couldn't find phpbb_attachments record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}, real_filename = #{real_filename}"
fail_count += 1
next s
end
filename = File.join(PHPBB_BASE_DIR+'/files', row['physical_filename'])
if !File.exists?(filename)
puts "Attachment file doesn't exist: #{filename}"
fail_count += 1
next s
end
upload = create_upload(user.id, filename, real_filename)
if upload.nil? || !upload.valid?
puts "Upload not valid :("
puts upload.errors.inspect if upload
fail_count += 1
next s
end
success_count += 1
html_for_upload(upload, real_filename)
end
if new_raw != post.raw
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, { bypass_bump: true, edit_reason: 'Migrate from PHPBB3' })
end
end
puts '', ''
puts "succeeded: #{success_count}"
puts " failed: #{fail_count}" if fail_count > 0
puts ''
end
# Read avatar config from phpBB configuration table.
# Stored there: - paths relative to the phpBB install path
# - "salt", i.e. base filename for uploaded avatars
#
def phpbb_read_config
results = mysql_query("SELECT config_name, config_value
FROM phpbb_config;")
if results.size<1
puts "could not read config... no avatars and attachments will be imported!"
return
end
results.each do |result|
if result['config_name']=='avatar_gallery_path'
@avatar_gallery_path = result['config_value']
elsif result['config_name']=='avatar_path'
@avatar_path = result['config_value']
elsif result['config_name']=='avatar_salt'
@avatar_salt = result['config_value']
end
end
end
# Create the full path to the phpBB avatar specified by avatar_type and filename.
#
def phpbb_avatar_fullpath(avatar_type, filename)
case avatar_type
when 1 # uploaded avatar
filename.gsub!(/_[0-9]+\./,'.') # we need 1337.jpg, not 1337_2983745.jpg
path=@avatar_path
PHPBB_BASE_DIR+'/'+path+'/'+@avatar_salt+'_'+filename
when 3 # gallery avatar
path=@avatar_gallery_path
PHPBB_BASE_DIR+'/'+path+'/'+filename
when 2 # hotlinked avatar
begin
hotlinked = FileHelper.download(filename, SiteSetting.max_image_size_kb.kilobytes, "discourse-hotlinked")
rescue StandardError => err
puts "Error downloading avatar: #{err.message}. Skipping..."
return nil
end
if hotlinked
if hotlinked.size <= SiteSetting.max_image_size_kb.kilobytes
return hotlinked
else
Rails.logger.error("Failed to pull hotlinked image: #{filename} - Image is bigger than #{@max_size}")
nil
end
else
Rails.logger.error("There was an error while downloading '#{filename}' locally.")
nil
end
else
puts 'Invalid avatar type #{avatar_type}, skipping'
nil
end
end
def mysql_query(sql)
@client.query(sql, cache_rows: false)
end
if ARGV.length != 1 || !File.exists?(ARGV[0])
STDERR.puts '', 'Usage of phpBB3 importer:', 'bundle exec ruby phpbb3.rb <path/to/settings.yml>'
STDERR.puts '', "Use the settings file from #{File.expand_path('phpbb3/settings.yml', File.dirname(__FILE__))} as an example."
exit 1
end
ImportScripts::PhpBB3.new.perform
module ImportScripts
module PhpBB3
require_relative 'phpbb3/support/settings'
require_relative 'phpbb3/database/database'
@settings = Settings.load(ARGV[0])
# We need to load the gem files for ruby-bbcode-to-md and the database adapter
# (e.g. mysql2) before bundler gets initialized by the base importer.
# Otherwise we get an error since those gems are not always in the Gemfile.
require 'ruby-bbcode-to-md' if @settings.use_bbcode_to_md
begin
@database = Database.create(@settings.database)
rescue UnsupportedVersionError => error
STDERR.puts '', error.message
exit 1
end
require_relative 'phpbb3/importer'
Importer.new(@settings, @database).perform
end
end

View file

@ -0,0 +1,56 @@
require 'mysql2'
module ImportScripts::PhpBB3
class Database
# @param database_settings [ImportScripts::PhpBB3::DatabaseSettings]
def self.create(database_settings)
Database.new(database_settings).create_database
end
# @param database_settings [ImportScripts::PhpBB3::DatabaseSettings]
def initialize(database_settings)
@database_settings = database_settings
@database_client = create_database_client
end
# @return [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
def create_database
version = get_phpbb_version
if version.start_with?('3.0')
require_relative 'database_3_0'
Database_3_0.new(@database_client, @database_settings)
elsif version.start_with?('3.1')
require_relative 'database_3_1'
Database_3_1.new(@database_client, @database_settings)
else
raise UnsupportedVersionError, "Unsupported version (#{version}) of phpBB detected.\n" \
<< 'Currently only 3.0.x and 3.1.x are supported by this importer.'
end
end
protected
def create_database_client
Mysql2::Client.new(
host: @database_settings.host,
username: @database_settings.username,
password: @database_settings.password,
database: @database_settings.schema
)
end
def get_phpbb_version
table_prefix = @database_settings.table_prefix
@database_client.query(<<-SQL, cache_rows: false, symbolize_keys: true).first[:config_value]
SELECT config_value
FROM #{table_prefix}_config
WHERE config_name = 'version'
SQL
end
end
class UnsupportedVersionError < RuntimeError;
end
end

View file

@ -0,0 +1,333 @@
require_relative 'database_base'
require_relative '../support/constants'
module ImportScripts::PhpBB3
class Database_3_0 < DatabaseBase
def count_users
count(<<-SQL)
SELECT COUNT(*) AS count
FROM #{@table_prefix}_users u
JOIN #{@table_prefix}_groups g ON g.group_id = u.group_id
WHERE u.user_type != #{Constants::USER_TYPE_IGNORE}
SQL
end
def fetch_users(offset)
query(<<-SQL)
SELECT u.user_id, u.user_email, u.username, u.user_regdate, u.user_lastvisit, u.user_ip,
u.user_type, u.user_inactive_reason, g.group_name, b.ban_start, b.ban_end, b.ban_reason,
u.user_posts, u.user_website, u.user_from, u.user_birthday, u.user_avatar_type, u.user_avatar
FROM #{@table_prefix}_users u
JOIN #{@table_prefix}_groups g ON (g.group_id = u.group_id)
LEFT OUTER JOIN #{@table_prefix}_banlist b ON (
u.user_id = b.ban_userid AND b.ban_exclude = 0 AND
(b.ban_end = 0 OR b.ban_end >= UNIX_TIMESTAMP())
)
WHERE u.user_type != #{Constants::USER_TYPE_IGNORE}
ORDER BY u.user_id ASC
LIMIT #{@batch_size}
OFFSET #{offset}
SQL
end
def count_anonymous_users
count(<<-SQL)
SELECT COUNT(DISTINCT post_username) AS count
FROM #{@table_prefix}_posts
WHERE post_username <> ''
SQL
end
def fetch_anonymous_users(offset)
query(<<-SQL)
SELECT post_username, MIN(post_time) AS first_post_time
FROM #{@table_prefix}_posts
WHERE post_username <> ''
GROUP BY post_username
ORDER BY post_username ASC
LIMIT #{@batch_size}
OFFSET #{offset}
SQL
end
def fetch_categories
query(<<-SQL)
SELECT f.forum_id, f.parent_id, f.forum_name, f.forum_name, f.forum_desc, x.first_post_time
FROM phpbb_forums f
LEFT OUTER JOIN (
SELECT MIN(topic_time) AS first_post_time, forum_id
FROM phpbb_topics
GROUP BY forum_id
) x ON (f.forum_id = x.forum_id)
WHERE f.forum_type != #{Constants::FORUM_TYPE_LINK}
ORDER BY f.parent_id ASC, f.left_id ASC
SQL
end
def count_posts
count(<<-SQL)
SELECT COUNT(*) AS count
FROM #{@table_prefix}_posts
SQL
end
def fetch_posts(offset)
query(<<-SQL)
SELECT p.post_id, p.topic_id, t.forum_id, t.topic_title, t.topic_first_post_id, p.poster_id,
p.post_text, p.post_time, p.post_username, t.topic_status, t.topic_type, t.poll_title,
CASE WHEN t.poll_length > 0 THEN t.poll_start + t.poll_length ELSE NULL END AS poll_end,
t.poll_max_options, p.post_attachment
FROM #{@table_prefix}_posts p
JOIN #{@table_prefix}_topics t ON (p.topic_id = t.topic_id)
ORDER BY p.post_id ASC
LIMIT #{@batch_size}
OFFSET #{offset}
SQL
end
def get_first_post_id(topic_id)
query(<<-SQL).first[:topic_first_post_id]
SELECT topic_first_post_id
FROM #{@table_prefix}_topics
WHERE topic_id = #{topic_id}
SQL
end
def fetch_poll_options(topic_id)
query(<<-SQL)
SELECT poll_option_id, poll_option_text, poll_option_total
FROM #{@table_prefix}_poll_options
WHERE topic_id = #{topic_id}
ORDER BY poll_option_id
SQL
end
def fetch_poll_votes(topic_id)
# this query ignores votes from users that do not exist anymore
query(<<-SQL)
SELECT u.user_id, v.poll_option_id
FROM #{@table_prefix}_poll_votes v
JOIN #{@table_prefix}_users u ON (v.vote_user_id = u.user_id)
WHERE v.topic_id = #{topic_id}
SQL
end
def count_voters(topic_id)
# anonymous voters can't be counted, but lets try to make the count look "correct" anyway
count(<<-SQL)
SELECT MAX(count) AS count
FROM (
SELECT COUNT(DISTINCT vote_user_id) AS count
FROM #{@table_prefix}_poll_votes
WHERE topic_id = #{topic_id}
UNION
SELECT MAX(poll_option_total) AS count
FROM #{@table_prefix}_poll_options
WHERE topic_id = #{topic_id}
) x
SQL
end
def get_max_attachment_size
query(<<-SQL).first[:filesize]
SELECT IFNULL(MAX(filesize), 0) AS filesize
FROM #{@table_prefix}_attachments
SQL
end
def fetch_attachments(topic_id, post_id)
query(<<-SQL)
SELECT physical_filename, real_filename
FROM #{@table_prefix}_attachments
WHERE topic_id = #{topic_id} AND post_msg_id = #{post_id}
ORDER BY filetime DESC, post_msg_id ASC
SQL
end
def count_messages(use_fixed_messages)
if use_fixed_messages
count(<<-SQL)
SELECT COUNT(*) AS count
FROM #{@table_prefix}_import_privmsgs
SQL
else
count(<<-SQL)
SELECT COUNT(*) AS count
FROM #{@table_prefix}_privmsgs
SQL
end
end
def fetch_messages(use_fixed_messages, offset)
if use_fixed_messages
query(<<-SQL)
SELECT m.msg_id, i.root_msg_id, m.author_id, m.message_time, m.message_subject, m.message_text,
IFNULL(a.attachment_count, 0) AS attachment_count
FROM #{@table_prefix}_privmsgs m
JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id)
LEFT OUTER JOIN (
SELECT post_msg_id, COUNT(*) AS attachment_count
FROM #{@table_prefix}_attachments
WHERE topic_id = 0
GROUP BY post_msg_id
) a ON (m.msg_id = a.post_msg_id)
ORDER BY i.root_msg_id ASC, m.msg_id ASC
LIMIT #{@batch_size}
OFFSET #{offset}
SQL
else
query(<<-SQL)
SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject,
m.message_text, IFNULL(a.attachment_count, 0) AS attachment_count
FROM #{@table_prefix}_privmsgs m
LEFT OUTER JOIN (
SELECT post_msg_id, COUNT(*) AS attachment_count
FROM #{@table_prefix}_attachments
WHERE topic_id = 0
GROUP BY post_msg_id
) a ON (m.msg_id = a.post_msg_id)
ORDER BY m.root_level ASC, m.msg_id ASC
LIMIT #{@batch_size}
OFFSET #{offset}
SQL
end
end
def fetch_message_participants(msg_id, use_fixed_messages)
if use_fixed_messages
query(<<-SQL)
SELECT m.to_address
FROM #{@table_prefix}_privmsgs m
JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id)
WHERE i.msg_id = #{msg_id} OR i.root_msg_id = #{msg_id}
SQL
else
query(<<-SQL)
SELECT m.to_address
FROM #{@table_prefix}_privmsgs m
WHERE m.msg_id = #{msg_id} OR m.root_level = #{msg_id}
SQL
end
end
def calculate_fixed_messages
drop_temp_import_message_table
create_temp_import_message_table
fill_temp_import_message_table
drop_import_message_table
create_import_message_table
fill_import_message_table
drop_temp_import_message_table
end
def count_bookmarks
count(<<-SQL)
SELECT COUNT(*) AS count
FROM #{@table_prefix}_bookmarks
SQL
end
def fetch_bookmarks(offset)
query(<<-SQL)
SELECT b.user_id, t.topic_first_post_id
FROM #{@table_prefix}_bookmarks b
JOIN #{@table_prefix}_topics t ON (b.topic_id = t.topic_id)
ORDER BY b.user_id ASC, b.topic_id ASC
LIMIT #{@batch_size}
OFFSET #{offset}
SQL
end
def get_config_values
query(<<-SQL).first
SELECT
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'version') AS phpbb_version,
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'avatar_gallery_path') AS avatar_gallery_path,
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'avatar_path') AS avatar_path,
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'avatar_salt') AS avatar_salt,
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'smilies_path') AS smilies_path,
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'upload_path') AS attachment_path
SQL
end
protected
def drop_temp_import_message_table
query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs_temp")
end
def create_temp_import_message_table
query(<<-SQL)
CREATE TABLE #{@table_prefix}_import_privmsgs_temp (
msg_id MEDIUMINT(8) NOT NULL,
root_msg_id MEDIUMINT(8) NOT NULL,
recipient_id MEDIUMINT(8),
normalized_subject VARCHAR(255) NOT NULL,
PRIMARY KEY (msg_id)
)
SQL
end
# this removes duplicate messages, converts the to_address to a number
# and stores the message_subject in lowercase and without the prefix "Re: "
def fill_temp_import_message_table
query(<<-SQL)
INSERT INTO #{@table_prefix}_import_privmsgs_temp (msg_id, root_msg_id, recipient_id, normalized_subject)
SELECT m.msg_id, m.root_level,
CASE WHEN m.root_level = 0 AND INSTR(m.to_address, ':') = 0 THEN
CAST(SUBSTRING(m.to_address, 3) AS SIGNED INTEGER)
ELSE NULL END AS recipient_id,
LOWER(CASE WHEN m.message_subject LIKE 'Re: %' THEN
SUBSTRING(m.message_subject, 5)
ELSE m.message_subject END) AS normalized_subject
FROM #{@table_prefix}_privmsgs m
WHERE NOT EXISTS (
SELECT 1
FROM #{@table_prefix}_privmsgs x
WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
AND x.to_address = m.to_address AND x.message_time = m.message_time
)
SQL
end
def drop_import_message_table
query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs")
end
def create_import_message_table
query(<<-SQL)
CREATE TABLE #{@table_prefix}_import_privmsgs (
msg_id MEDIUMINT(8) NOT NULL,
root_msg_id MEDIUMINT(8) NOT NULL,
PRIMARY KEY (msg_id),
INDEX #{@table_prefix}_import_privmsgs_root_msg_id (root_msg_id)
)
SQL
end
# this tries to calculate the actual root_level (= msg_id of the first message in a
# private conversation) based on subject, time, author and recipient
def fill_import_message_table
query(<<-SQL)
INSERT INTO #{@table_prefix}_import_privmsgs (msg_id, root_msg_id)
SELECT m.msg_id, CASE WHEN i.root_msg_id = 0 THEN
COALESCE((
SELECT a.msg_id
FROM #{@table_prefix}_privmsgs a
JOIN #{@table_prefix}_import_privmsgs_temp b ON (a.msg_id = b.msg_id)
WHERE ((a.author_id = m.author_id AND b.recipient_id = i.recipient_id) OR
(a.author_id = i.recipient_id AND b.recipient_id = m.author_id))
AND b.normalized_subject = i.normalized_subject
AND a.msg_id <> m.msg_id
AND a.message_time < m.message_time
ORDER BY a.message_time ASC
LIMIT 1
), 0) ELSE i.root_msg_id END AS root_msg_id
FROM #{@table_prefix}_privmsgs m
JOIN #{@table_prefix}_import_privmsgs_temp i ON (m.msg_id = i.msg_id)
SQL
end
end
end

View file

@ -0,0 +1,26 @@
require_relative 'database_3_0'
require_relative '../support/constants/constants'
module ImportScripts::PhpBB3
class Database_3_1 < Database_3_0
def fetch_users(offset)
query(<<-SQL)
SELECT u.user_id, u.user_email, u.username, u.user_regdate, u.user_lastvisit, u.user_ip,
u.user_type, u.user_inactive_reason, g.group_name, b.ban_start, b.ban_end, b.ban_reason,
u.user_posts, f.pf_phpbb_website AS user_website, f.pf_phpbb_location AS user_from,
u.user_birthday, u.user_avatar_type, u.user_avatar
FROM #{@table_prefix}_users u
JOIN #{@table_prefix}_profile_fields_data f ON (u.user_id = f.user_id)
JOIN #{@table_prefix}_groups g ON (g.group_id = u.group_id)
LEFT OUTER JOIN #{@table_prefix}_banlist b ON (
u.user_id = b.ban_userid AND b.ban_exclude = 0 AND
(b.ban_end = 0 OR b.ban_end >= UNIX_TIMESTAMP())
)
WHERE u.user_type != #{Constants::USER_TYPE_IGNORE}
ORDER BY u.user_id ASC
LIMIT #{@batch_size}
OFFSET #{offset}
SQL
end
end
end

View file

@ -0,0 +1,24 @@
module ImportScripts::PhpBB3
class DatabaseBase
# @param database_client [Mysql2::Client]
# @param database_settings [ImportScripts::PhpBB3::DatabaseSettings]
def initialize(database_client, database_settings)
@database_client = database_client
@batch_size = database_settings.batch_size
@table_prefix = database_settings.table_prefix
end
protected
# Executes a database query.
def query(sql)
@database_client.query(sql, cache_rows: false, symbolize_keys: true)
end
# Executes a database query and returns the value of the 'count' column.
def count(sql)
query(sql).first[:count]
end
end
end

View file

@ -0,0 +1,152 @@
require_relative '../base'
require_relative 'support/settings'
require_relative 'database/database'
require_relative 'importers/importer_factory'
module ImportScripts::PhpBB3
class Importer < ImportScripts::Base
# @param settings [ImportScripts::PhpBB3::Settings]
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
def initialize(settings, database)
@settings = settings
super()
@database = database
@php_config = database.get_config_values
@importers = ImporterFactory.new(@database, @lookup, @uploader, @settings, @php_config)
end
def perform
super if settings_check_successful?
end
protected
def execute
puts '', "importing from phpBB #{@php_config[:phpbb_version]}"
import_users
import_anonymous_users if @settings.import_anonymous_users
import_categories
import_posts
import_private_messages if @settings.import_private_messages
import_bookmarks if @settings.import_bookmarks
end
def get_site_settings_for_import
settings = super
max_file_size_kb = @database.get_max_attachment_size
settings[:max_image_size_kb] = [max_file_size_kb, SiteSetting.max_image_size_kb].max
settings[:max_attachment_size_kb] = [max_file_size_kb, SiteSetting.max_attachment_size_kb].max
settings
end
def settings_check_successful?
true
end
def import_users
puts '', 'creating users'
total_count = @database.count_users
importer = @importers.user_importer
batches do |offset|
rows = @database.fetch_users(offset)
break if rows.size < 1
create_users(rows, total: total_count, offset: offset) do |row|
importer.map_user(row)
end
end
end
def import_anonymous_users
puts '', 'creating anonymous users'
total_count = @database.count_anonymous_users
importer = @importers.user_importer
batches do |offset|
rows = @database.fetch_anonymous_users(offset)
break if rows.size < 1
create_users(rows, total: total_count, offset: offset) do |row|
importer.map_anonymous_user(row)
end
end
end
def import_categories
puts '', 'creating categories'
rows = @database.fetch_categories
importer = @importers.category_importer
create_categories(rows) do |row|
importer.map_category(row)
end
end
def import_posts
puts '', 'creating topics and posts'
total_count = @database.count_posts
importer = @importers.post_importer
batches do |offset|
rows = @database.fetch_posts(offset)
break if rows.size < 1
create_posts(rows, total: total_count, offset: offset) do |row|
importer.map_post(row)
end
end
end
def import_private_messages
if @settings.fix_private_messages
puts '', 'fixing private messages'
@database.calculate_fixed_messages
end
puts '', 'creating private messages'
total_count = @database.count_messages(@settings.fix_private_messages)
importer = @importers.message_importer
batches do |offset|
rows = @database.fetch_messages(@settings.fix_private_messages, offset)
break if rows.size < 1
create_posts(rows, total: total_count, offset: offset) do |row|
importer.map_message(row)
end
end
end
def import_bookmarks
puts '', 'creating bookmarks'
total_count = @database.count_bookmarks
importer = @importers.bookmark_importer
batches do |offset|
rows = @database.fetch_bookmarks(offset)
break if rows.size < 1
create_bookmarks(rows, total: total_count, offset: offset) do |row|
importer.map_bookmark(row)
end
end
end
def update_last_seen_at
# no need for this since the importer sets last_seen_at for each user during the import
end
def use_bbcode_to_md?
@settings.use_bbcode_to_md
end
def batches
super(@settings.database.batch_size)
end
end
end

View file

@ -0,0 +1,36 @@
module ImportScripts::PhpBB3
class AttachmentImporter
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
# @param uploader [ImportScripts::Uploader]
# @param settings [ImportScripts::PhpBB3::Settings]
# @param phpbb_config [Hash]
def initialize(database, uploader, settings, phpbb_config)
@database = database
@uploader = uploader
@attachment_path = File.join(settings.base_dir, phpbb_config[:attachment_path])
end
def import_attachments(user_id, post_id, topic_id = 0)
rows = @database.fetch_attachments(topic_id, post_id)
return nil if rows.size < 1
attachments = []
rows.each do |row|
path = File.join(@attachment_path, row[:physical_filename])
filename = CGI.unescapeHTML(row[:real_filename])
upload = @uploader.create_upload(user_id, path, filename)
if upload.nil? || !upload.valid?
puts "Failed to upload #{path}"
puts upload.errors.inspect if upload
else
attachments << @uploader.html_for_upload(upload, filename)
end
end
attachments
end
end
end

View file

@ -0,0 +1,107 @@
module ImportScripts::PhpBB3
class AvatarImporter
# @param uploader [ImportScripts::Uploader]
# @param settings [ImportScripts::PhpBB3::Settings]
# @param phpbb_config [Hash]
def initialize(uploader, settings, phpbb_config)
@uploader = uploader
@settings = settings
@uploaded_avatar_path = File.join(settings.base_dir, phpbb_config[:avatar_path])
@gallery_path = File.join(settings.base_dir, phpbb_config[:avatar_gallery_path])
@avatar_salt = phpbb_config[:avatar_salt]
end
def import_avatar(user, row)
avatar_type = row[:user_avatar_type]
return unless is_avatar_importable?(user, avatar_type)
filename = row[:user_avatar]
path = get_avatar_path(avatar_type, filename)
return if path.nil?
begin
filename = "avatar#{File.extname(path)}"
upload = @uploader.create_upload(user.id, path, filename)
if upload.persisted?
user.import_mode = false
user.create_user_avatar
user.import_mode = true
user.user_avatar.update(custom_upload_id: upload.id)
user.update(uploaded_avatar_id: upload.id)
else
Rails.logger.error("Could not persist avatar for user #{user.username}")
end
rescue SystemCallError => err
Rails.logger.error("Could not import avatar for user #{user.username}: #{err.message}")
end
end
protected
def is_avatar_importable?(user, avatar_type)
is_allowed_avatar_type?(avatar_type) && user.uploaded_avatar_id.blank?
end
def get_avatar_path(avatar_type, filename)
case avatar_type
when Constants::AVATAR_TYPE_UPLOADED then
filename.gsub!(/_[0-9]+\./, '.') # we need 1337.jpg, not 1337_2983745.jpg
get_uploaded_path(filename)
when Constants::AVATAR_TYPE_GALLERY then
get_gallery_path(filename)
when Constants::AVATAR_TYPE_REMOTE then
download_avatar(filename)
else
Rails.logger.error("Invalid avatar type #{avatar_type}. Skipping...")
nil
end
end
# Tries to download the remote avatar.
def download_avatar(url)
max_image_size_kb = SiteSetting.max_image_size_kb.kilobytes
begin
avatar_file = FileHelper.download(url, max_image_size_kb, 'discourse-avatar')
rescue StandardError => err
warn "Error downloading avatar: #{err.message}. Skipping..."
return nil
end
if avatar_file
if avatar_file.size <= max_image_size_kb
return avatar_file
else
Rails.logger.error("Failed to download remote avatar: #{url} - Image is larger than #{max_image_size_kb} KB")
return nil
end
end
Rails.logger.error("There was an error while downloading '#{url}' locally.")
nil
end
def get_uploaded_path(filename)
File.join(@uploaded_avatar_path, "#{@avatar_salt}_#{filename}")
end
def get_gallery_path(filename)
File.join(@gallery_path, filename)
end
def is_allowed_avatar_type?(avatar_type)
case avatar_type
when Constants::AVATAR_TYPE_UPLOADED then
@settings.import_uploaded_avatars
when Constants::AVATAR_TYPE_REMOTE then
@settings.import_remote_avatars
when Constants::AVATAR_TYPE_GALLERY then
@settings.import_gallery_avatars
else
false
end
end
end
end

View file

@ -0,0 +1,10 @@
module ImportScripts::PhpBB3
class BookmarkImporter
def map_bookmark(row)
{
user_id: row[:user_id],
post_id: row[:topic_first_post_id]
}
end
end
end

View file

@ -0,0 +1,47 @@
module ImportScripts::PhpBB3
class CategoryImporter
# @param lookup [ImportScripts::LookupContainer]
# @param text_processor [ImportScripts::PhpBB3::TextProcessor]
def initialize(lookup, text_processor)
@lookup = lookup
@text_processor = text_processor
end
def map_category(row)
{
id: row[:forum_id],
name: CGI.unescapeHTML(row[:forum_name]),
parent_category_id: @lookup.category_id_from_imported_category_id(row[:parent_id]),
post_create_action: proc do |category|
update_category_description(category, row)
end
}
end
protected
# @param category [Category]
def update_category_description(category, row)
return if row[:forum_desc].blank? && row[:first_post_time].blank?
topic = category.topic
post = topic.first_post
if row[:first_post_time].present?
created_at = Time.zone.at(row[:first_post_time])
topic.created_at = created_at
topic.save
post.created_at = created_at
post.save
end
if row[:forum_desc].present?
changes = {raw: @text_processor.process_raw_text(row[:forum_desc])}
opts = {revised_at: post.created_at, bypass_bump: true}
post.revise(Discourse.system_user, changes, opts)
end
end
end
end

View file

@ -0,0 +1,69 @@
require_relative 'attachment_importer'
require_relative 'avatar_importer'
require_relative 'bookmark_importer'
require_relative 'category_importer'
require_relative 'message_importer'
require_relative 'poll_importer'
require_relative 'post_importer'
require_relative 'user_importer'
require_relative '../support/smiley_processor'
require_relative '../support/text_processor'
module ImportScripts::PhpBB3
class ImporterFactory
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
# @param lookup [ImportScripts::LookupContainer]
# @param uploader [ImportScripts::Uploader]
# @param settings [ImportScripts::PhpBB3::Settings]
# @param phpbb_config [Hash]
def initialize(database, lookup, uploader, settings, phpbb_config)
@database = database
@lookup = lookup
@uploader = uploader
@settings = settings
@phpbb_config = phpbb_config
end
def user_importer
UserImporter.new(avatar_importer, @settings)
end
def category_importer
CategoryImporter.new(@lookup, text_processor)
end
def post_importer
PostImporter.new(@lookup, text_processor, attachment_importer, poll_importer, @settings)
end
def message_importer
MessageImporter.new(@database, @lookup, text_processor, attachment_importer, @settings)
end
def bookmark_importer
BookmarkImporter.new
end
protected
def attachment_importer
AttachmentImporter.new(@database, @uploader, @settings, @phpbb_config)
end
def avatar_importer
AvatarImporter.new(@uploader, @settings, @phpbb_config)
end
def poll_importer
PollImporter.new(@lookup, @database, text_processor)
end
def text_processor
@text_processor ||= TextProcessor.new(@lookup, @database, smiley_processor, @settings)
end
def smiley_processor
SmileyProcessor.new(@uploader, @settings, @phpbb_config)
end
end
end

View file

@ -0,0 +1,83 @@
module ImportScripts::PhpBB3
class MessageImporter
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
# @param lookup [ImportScripts::LookupContainer]
# @param text_processor [ImportScripts::PhpBB3::TextProcessor]
# @param attachment_importer [ImportScripts::PhpBB3::AttachmentImporter]
# @param settings [ImportScripts::PhpBB3::Settings]
def initialize(database, lookup, text_processor, attachment_importer, settings)
@database = database
@lookup = lookup
@text_processor = text_processor
@attachment_importer = attachment_importer
@settings = settings
end
def map_message(row)
user_id = @lookup.user_id_from_imported_user_id(row[:author_id]) || Discourse.system_user.id
attachments = import_attachments(row, user_id)
mapped = {
id: "pm:#{row[:msg_id]}",
user_id: user_id,
created_at: Time.zone.at(row[:message_time]),
raw: @text_processor.process_private_msg(row[:message_text], attachments)
}
if row[:root_msg_id] == 0
map_first_message(row, mapped)
else
map_other_message(row, mapped)
end
end
protected
def import_attachments(row, user_id)
if @settings.import_attachments && row[:attachment_count] > 0
@attachment_importer.import_attachments(user_id, row[:msg_id])
end
end
def map_first_message(row, mapped)
mapped[:title] = CGI.unescapeHTML(row[:message_subject])
mapped[:archetype] = Archetype.private_message
mapped[:target_usernames] = get_usernames(row[:msg_id], row[:author_id])
if mapped[:target_usernames].empty? # pm with yourself?
puts "Private message without recipients. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}"
return nil
end
mapped
end
def map_other_message(row, mapped)
parent_msg_id = "pm:#{row[:root_msg_id]}"
parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id)
if parent.blank?
puts "Parent post #{parent_msg_id} doesn't exist. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}"
return nil
end
mapped[:topic_id] = parent[:topic_id]
mapped
end
def get_usernames(msg_id, author_id)
# Find the users who are part of this private message.
# Found from the to_address of phpbb_privmsgs, by looking at
# all the rows with the same root_msg_id.
# to_address looks like this: "u_91:u_1234:u_200"
# The "u_" prefix is discarded and the rest is a user_id.
import_user_ids = @database.fetch_message_participants(msg_id, @settings.fix_private_messages)
.map { |r| r[:to_address].split(':') }
.flatten!.uniq.map! { |u| u[2..-1] }
import_user_ids.map! do |import_user_id|
import_user_id.to_s == author_id.to_s ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username)
end.compact
end
end
end

View file

@ -0,0 +1,155 @@
module ImportScripts::PhpBB3
class PollImporter
POLL_PLUGIN_NAME = 'poll'
# @param lookup [ImportScripts::LookupContainer]
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
# @param text_processor [ImportScripts::PhpBB3::TextProcessor]
def initialize(lookup, database, text_processor)
@lookup = lookup
@database = database
@text_processor = text_processor
poll_plugin = Discourse.plugins.find { |p| p.metadata.name == POLL_PLUGIN_NAME }.singleton_class
@default_poll_name = poll_plugin.const_get(:DEFAULT_POLL_NAME)
@polls_field = poll_plugin.const_get(:POLLS_CUSTOM_FIELD)
@votes_field = poll_plugin.const_get(:VOTES_CUSTOM_FIELD)
end
# @param poll [ImportScripts::PhpBB3::Poll]
def map_poll(topic_id, poll)
options = get_poll_options(topic_id)
poll_text = get_poll_text(options, poll)
extracted_poll = extract_default_poll(topic_id, poll_text)
update_poll(extracted_poll, options, topic_id, poll)
mapped_poll = {
raw: poll_text,
custom_fields: {}
}
add_polls_field(mapped_poll[:custom_fields], extracted_poll)
add_vote_fields(mapped_poll[:custom_fields], topic_id, poll)
mapped_poll
end
protected
def get_poll_options(topic_id)
rows = @database.fetch_poll_options(topic_id)
options_by_text = {}
rows.each do |row|
option_text = @text_processor.process_raw_text(row[:poll_option_text]).delete("\n")
if options_by_text.key?(option_text)
# phpBB allows duplicate options (why?!) - we need to merge them
option = options_by_text[option_text]
option[:ids] << row[:poll_option_id]
option[:votes] += row[:poll_option_total]
else
options_by_text[option_text] = {
ids: [row[:poll_option_id]],
text: option_text,
votes: row[:poll_option_total]
}
end
end
options_by_text.values
end
# @param options [Array]
# @param poll [ImportScripts::PhpBB3::Poll]
def get_poll_text(options, poll)
poll_text = "#{poll.title}\n"
if poll.max_options > 1
poll_text << "[poll type=multiple max=#{poll.max_options}]"
else
poll_text << '[poll]'
end
options.each do |option|
poll_text << "\n- #{option[:text]}"
end
poll_text << "\n[/poll]"
end
def extract_default_poll(topic_id, poll_text)
extracted_polls = DiscoursePoll::Poll::extract(poll_text, topic_id)
extracted_polls.each do |poll|
return poll if poll['name'] == @default_poll_name
end
end
# @param poll [ImportScripts::PhpBB3::Poll]
def update_poll(default_poll, imported_options, topic_id, poll)
default_poll['voters'] = @database.count_voters(topic_id) # this includes anonymous voters
default_poll['status'] = poll.has_ended? ? :open : :closed
default_poll['options'].each_with_index do |option, index|
imported_option = imported_options[index]
option['votes'] = imported_option[:votes]
poll.add_option_id(imported_option[:ids], option['id'])
end
end
def add_polls_field(custom_fields, default_poll)
custom_fields[@polls_field] = {@default_poll_name => default_poll}
end
# @param custom_fields [Hash]
# @param poll [ImportScripts::PhpBB3::Poll]
def add_vote_fields(custom_fields, topic_id, poll)
rows = @database.fetch_poll_votes(topic_id)
warned = false
rows.each do |row|
option_id = poll.option_id_from_imported_option_id(row[:poll_option_id])
user_id = @lookup.user_id_from_imported_user_id(row[:user_id])
if option_id.present? && user_id.present?
key = "#{@votes_field}-#{user_id}"
if custom_fields.key?(key)
votes = custom_fields[key][@default_poll_name]
else
votes = []
custom_fields[key] = {@default_poll_name => votes}
end
votes << option_id
else !warned
Rails.logger.warn("Topic with id #{topic_id} has invalid votes.")
end
end
end
end
class Poll
attr_reader :title
attr_reader :max_options
def initialize(title, max_options, end_timestamp)
@title = title
@max_options = max_options
@end_timestamp = end_timestamp
@option_ids = {}
end
def has_ended?
@end_timestamp.nil? || Time.zone.at(@end_timestamp) > Time.now
end
def add_option_id(imported_ids, option_id)
imported_ids.each { |imported_id| @option_ids[imported_id] = option_id }
end
def option_id_from_imported_option_id(imported_id)
@option_ids[imported_id]
end
end
end

View file

@ -0,0 +1,79 @@
module ImportScripts::PhpBB3
class PostImporter
# @param lookup [ImportScripts::LookupContainer]
# @param text_processor [ImportScripts::PhpBB3::TextProcessor]
# @param attachment_importer [ImportScripts::PhpBB3::AttachmentImporter]
# @param poll_importer [ImportScripts::PhpBB3::PollImporter]
# @param settings [ImportScripts::PhpBB3::Settings]
def initialize(lookup, text_processor, attachment_importer, poll_importer, settings)
@lookup = lookup
@text_processor = text_processor
@attachment_importer = attachment_importer
@poll_importer = poll_importer
@settings = settings
end
def map_post(row)
imported_user_id = row[:post_username].blank? ? row[:poster_id] : row[:post_username]
user_id = @lookup.user_id_from_imported_user_id(imported_user_id) || Discourse.system_user.id
is_first_post = row[:post_id] == row[:topic_first_post_id]
attachments = import_attachments(row, user_id)
mapped = {
id: row[:post_id],
user_id: user_id,
created_at: Time.zone.at(row[:post_time]),
raw: @text_processor.process_post(row[:post_text], attachments)
}
if is_first_post
map_first_post(row, mapped)
else
map_other_post(row, mapped)
end
end
protected
def import_attachments(row, user_id)
if @settings.import_attachments && row[:post_attachment] > 0
@attachment_importer.import_attachments(user_id, row[:post_id], row[:topic_id])
end
end
def map_first_post(row, mapped)
mapped[:category] = @lookup.category_id_from_imported_category_id(row[:forum_id])
mapped[:title] = CGI.unescapeHTML(row[:topic_title]).strip[0...255]
mapped[:pinned_at] = mapped[:created_at] unless row[:topic_type] == Constants::POST_NORMAL
mapped[:pinned_globally] = row[:topic_type] == Constants::POST_GLOBAL
add_poll(row, mapped) if @settings.import_polls
mapped
end
def map_other_post(row, mapped)
parent = @lookup.topic_lookup_from_imported_post_id(row[:topic_first_post_id])
if parent.blank?
puts "Parent post #{row[:topic_first_post_id]} doesn't exist. Skipping #{row[:post_id]}: #{row[:topic_title][0..40]}"
return nil
end
mapped[:topic_id] = parent[:topic_id]
mapped
end
def add_poll(row, mapped_post)
return if row[:poll_title].blank?
poll = Poll.new(row[:poll_title], row[:poll_max_options], row[:poll_end])
mapped_poll = @poll_importer.map_poll(row[:topic_id], poll)
if mapped_poll.present?
mapped_post[:raw] = mapped_poll[:raw] << "\n" << mapped_post[:raw]
mapped_post[:custom_fields] = mapped_poll[:custom_fields]
end
end
end
end

View file

@ -0,0 +1,97 @@
require_relative '../support/constants'
module ImportScripts::PhpBB3
class UserImporter
# @param avatar_importer [ImportScripts::PhpBB3::AvatarImporter]
# @param settings [ImportScripts::PhpBB3::Settings]
def initialize(avatar_importer, settings)
@avatar_importer = avatar_importer
@settings = settings
end
def map_user(row)
is_active_user = row[:user_inactive_reason] != Constants::INACTIVE_REGISTER
{
id: row[:user_id],
email: row[:user_email],
username: row[:username],
name: @settings.username_as_name ? row[:username] : '',
created_at: Time.zone.at(row[:user_regdate]),
last_seen_at: row[:user_lastvisit] == 0 ? Time.zone.at(row[:user_regdate]) : Time.zone.at(row[:user_lastvisit]),
registration_ip_address: (IPAddr.new(row[:user_ip]) rescue nil),
active: is_active_user,
trust_level: row[:user_posts] == 0 ? TrustLevel[0] : TrustLevel[1],
approved: is_active_user,
approved_by_id: is_active_user ? Discourse.system_user.id : nil,
approved_at: is_active_user ? Time.now : nil,
moderator: row[:group_name] == Constants::GROUP_MODERATORS,
admin: row[:group_name] == Constants::GROUP_ADMINISTRATORS,
website: row[:user_website],
location: row[:user_from],
date_of_birth: parse_birthdate(row),
post_create_action: proc do |user|
suspend_user(user, row)
@avatar_importer.import_avatar(user, row) if row[:user_avatar_type].present?
end
}
end
def map_anonymous_user(row)
username = row[:post_username]
{
id: username,
email: "anonymous_no_email_#{username}",
username: username,
name: '',
created_at: Time.zone.at(row[:first_post_time]),
active: true,
trust_level: TrustLevel[0],
approved: true,
approved_by_id: Discourse.system_user.id,
approved_at: Time.now,
post_create_action: proc do |user|
row[:user_inactive_reason] = Constants::INACTIVE_MANUAL
row[:ban_reason] = 'Anonymous user from phpBB3' # TODO i18n
suspend_user(user, row, true)
end
}
end
protected
def parse_birthdate(row)
return nil if row[:user_birthday].blank?
Date.strptime(row[:user_birthday].delete(' '), '%d-%m-%Y') rescue nil
end
# Suspends the user if it is currently banned.
def suspend_user(user, row, disable_email = false)
if row[:user_inactive_reason] == Constants::INACTIVE_MANUAL
user.suspended_at = Time.now
user.suspended_till = 200.years.from_now
ban_reason = row[:ban_reason].blank? ? 'Account deactivated by administrator' : row[:ban_reason] # TODO i18n
elsif row[:ban_start].present?
user.suspended_at = Time.zone.at(row[:ban_start])
user.suspended_till = row[:ban_end] > 0 ? Time.zone.at(row[:ban_end]) : 200.years.from_now
ban_reason = row[:ban_reason]
else
return
end
if disable_email
user.email_digests = false
user.email_private_messages = false
user.email_direct = false
user.email_always = false
end
if user.save
StaffActionLogger.new(Discourse.system_user).log_user_suspend(user, ban_reason)
else
Rails.logger.error("Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}")
end
end
end
end

View file

@ -0,0 +1,59 @@
# This is an example settings file for the phpBB3 importer.
database:
type: MySQL # currently only MySQL is supported - more to come soon
host: localhost
username: root
password:
schema: phpbb
table_prefix: phpbb # Usually all table names start with phpbb. Change this, if your forum is using a different prefix.
batch_size: 1000 # Don't change this unless you know what you're doing. The default (1000) should work just fine.
import:
# Enable this option if you want to have a better conversion of BBCodes to Markdown.
# WARNING: This can slow down your import.
use_bbcode_to_md: false
# This is the path to the root directory of your current phpBB installation (or a copy of it).
# The importer expects to find the /files and /images directories within the base directory.
# This is only needed if you want to import avatars, attachments or custom smilies.
phpbb_base_dir: /var/www/phpbb
site_prefix:
# this is needed for rewriting internal links in posts
original: oldsite.example.com/forums # without http(s)://
new: http://discourse.example.com # with http:// or https://
avatars:
uploaded: true # import uploaded avatars
gallery: true # import the predefined avatars phpBB offers
remote: false # WARNING: This can considerably slow down your import. It will try to download remote avatars.
# When true: Anonymous users are imported as suspended users. They can't login and have no email address.
# When false: The system user will be used for all anonymous users.
anonymous_users: true
# By default all the following things get imported. You can disable them by setting them to false.
bookmarks: true
attachments: true
private_messages: true
polls: true
# This tries to fix Private Messages that were imported from phpBB2 to phpBB3.
# You should enable this option if you see duplicate messages or lots of related
# messages as topics with just one post (e.g. 'Importer', 'Re: Importer', 'Re: Importer'
# should be one topic named 'Importer' and consist of 3 posts).
fix_private_messages: false
# When true: each imported user will have the original username from phpBB as its name
# When false: the name of each user will be blank
username_as_name: false
# Map Emojis to smilies used in phpBB. Most of the default smilies already have a mapping, but you can override
# the mappings here, if you don't like some of them.
# The mapping syntax is: emoji_name: 'smiley_in_phpbb'
# Or map multiple smilies to one Emoji: emoji_name: ['smiley1', 'smiley2']
emojis:
# here are two example mappings...
smiley: [':D', ':-D', ':grin:']
heart: ':love:'

View file

@ -0,0 +1,35 @@
module ImportScripts::PhpBB3
class Constants
ACTIVE_USER = 0
INACTIVE_REGISTER = 1 # Newly registered account
INACTIVE_PROFILE = 2 # Profile details changed
INACTIVE_MANUAL = 3 # Account deactivated by administrator
INACTIVE_REMIND = 4 # Forced user account reactivation
GROUP_ADMINISTRATORS = 'ADMINISTRATORS'
GROUP_MODERATORS = 'GLOBAL_MODERATORS'
# https://wiki.phpbb.com/Table.phpbb_users
USER_TYPE_NORMAL = 0
USER_TYPE_INACTIVE = 1
USER_TYPE_IGNORE = 2
USER_TYPE_FOUNDER = 3
AVATAR_TYPE_UPLOADED = 1
AVATAR_TYPE_REMOTE = 2
AVATAR_TYPE_GALLERY = 3
FORUM_TYPE_CATEGORY = 0
FORUM_TYPE_POST = 1
FORUM_TYPE_LINK = 2
TOPIC_UNLOCKED = 0
TOPIC_LOCKED = 1
TOPIC_MOVED = 2
POST_NORMAL = 0
POST_STICKY = 1
POST_ANNOUNCE = 2
POST_GLOBAL = 3
end
end

View file

@ -0,0 +1,78 @@
require 'yaml'
module ImportScripts::PhpBB3
class Settings
def self.load(filename)
yaml = YAML::load_file(filename)
Settings.new(yaml)
end
attr_reader :import_anonymous_users
attr_reader :import_attachments
attr_reader :import_private_messages
attr_reader :import_polls
attr_reader :import_bookmarks
attr_reader :import_uploaded_avatars
attr_reader :import_remote_avatars
attr_reader :import_gallery_avatars
attr_reader :fix_private_messages
attr_reader :use_bbcode_to_md
attr_reader :original_site_prefix
attr_reader :new_site_prefix
attr_reader :base_dir
attr_reader :username_as_name
attr_reader :emojis
attr_reader :database
def initialize(yaml)
import_settings = yaml['import']
@import_anonymous_users = import_settings['anonymous_users']
@import_attachments = import_settings['attachments']
@import_private_messages = import_settings['private_messages']
@import_polls = import_settings['polls']
@import_bookmarks = import_settings['bookmarks']
avatar_settings = import_settings['avatars']
@import_uploaded_avatars = avatar_settings['uploaded']
@import_remote_avatars = avatar_settings['remote']
@import_gallery_avatars = avatar_settings['gallery']
@fix_private_messages = import_settings['fix_private_messages']
@use_bbcode_to_md =import_settings['use_bbcode_to_md']
@original_site_prefix = import_settings['site_prefix']['original']
@new_site_prefix = import_settings['site_prefix']['new']
@base_dir = import_settings['phpbb_base_dir']
@username_as_name = import_settings['username_as_name']
@emojis = import_settings.fetch('emojis', [])
@database = DatabaseSettings.new(yaml['database'])
end
end
class DatabaseSettings
attr_reader :type
attr_reader :host
attr_reader :username
attr_reader :password
attr_reader :schema
attr_reader :table_prefix
attr_reader :batch_size
def initialize(yaml)
@type = yaml['type']
@host = yaml['host']
@username = yaml['username']
@password = yaml['password']
@schema = yaml['schema']
@table_prefix = yaml['table_prefix']
@batch_size = yaml['batch_size']
end
end
end

View file

@ -0,0 +1,90 @@
module ImportScripts::PhpBB3
class SmileyProcessor
# @param uploader [ImportScripts::Uploader]
# @param settings [ImportScripts::PhpBB3::Settings]
# @param phpbb_config [Hash]
def initialize(uploader, settings, phpbb_config)
@uploader = uploader
@smilies_path = File.join(settings.base_dir, phpbb_config[:smilies_path])
@smiley_map = {}
add_default_smilies
add_configured_smilies(settings.emojis)
end
def replace_smilies(text)
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/(.+?)" alt="(.*?)" title="(.*?)" \/><!-- s(?:\S+) -->/) do
smiley = $1
@smiley_map.fetch(smiley) do
upload_smiley(smiley, $2, $3, $4) || smiley_as_text(smiley)
end
end
end
protected
def add_default_smilies
{
[':D', ':-D', ':grin:'] => ':smiley:',
[':)', ':-)', ':smile:'] => ':smile:',
[';)', ';-)', ':wink:'] => ':wink:',
[':(', ':-(', ':sad:'] => ':frowning:',
[':o', ':-o', ':eek:'] => ':astonished:',
[':shock:'] => ':open_mouth:',
[':?', ':-?', ':???:'] => ':confused:',
['8-)', ':cool:'] => ':sunglasses:',
[':lol:'] => ':laughing:',
[':x', ':-x', ':mad:'] => ':angry:',
[':P', ':-P', ':razz:'] => ':stuck_out_tongue:',
[':oops:'] => ':blush:',
[':cry:'] => ':cry:',
[':evil:'] => ':imp:',
[':twisted:'] => ':smiling_imp:',
[':roll:'] => ':unamused:',
[':!:'] => ':exclamation:',
[':?:'] => ':question:',
[':idea:'] => ':bulb:',
[':arrow:'] => ':arrow_right:',
[':|', ':-|'] => ':neutral_face:'
}.each do |smilies, emoji|
smilies.each { |smiley| @smiley_map[smiley] = emoji }
end
end
def add_configured_smilies(emojis)
emojis.each do |emoji, smilies|
Array.wrap(smilies)
.each { |smiley| @smiley_map[smiley] = ":#{emoji}:" }
end
end
def upload_smiley(smiley, path, alt_text, title)
path = File.join(@smilies_path, path)
filename = File.basename(path)
upload = @uploader.create_upload(Discourse::SYSTEM_USER_ID, path, filename)
if upload.nil? || !upload.valid?
puts "Failed to upload #{path}"
puts upload.errors.inspect if upload
html = nil
else
html = embedded_image_html(upload, alt_text, title)
@smiley_map[smiley] = html
end
html
end
def embedded_image_html(upload, alt_text, title)
image_width = [upload.width, SiteSetting.max_image_width].compact.min
image_height = [upload.height, SiteSetting.max_image_height].compact.min
%Q[<img src="#{upload.url}" width="#{image_width}" height="#{image_height}" alt="#{alt_text}" title="#{title}"/>]
end
def smiley_as_text(smiley)
@smiley_map[smiley] = smiley
end
end
end

View file

@ -0,0 +1,133 @@
module ImportScripts::PhpBB3
class TextProcessor
# @param lookup [ImportScripts::LookupContainer]
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
# @param smiley_processor [ImportScripts::PhpBB3::SmileyProcessor]
# @param settings [ImportScripts::PhpBB3::Settings]
def initialize(lookup, database, smiley_processor, settings)
@lookup = lookup
@database = database
@smiley_processor = smiley_processor
@new_site_prefix = settings.new_site_prefix
create_internal_link_regexps(settings.original_site_prefix)
end
def process_raw_text(raw)
text = raw.dup
text = CGI.unescapeHTML(text)
clean_bbcodes(text)
process_smilies(text)
process_links(text)
process_lists(text)
text
end
def process_post(raw, attachments)
text = process_raw_text(raw)
text = process_attachments(text, attachments) if attachments.present?
text
end
def process_private_msg(raw, attachments)
text = process_raw_text(raw)
text = process_attachments(text, attachments) if attachments.present?
text
end
protected
def clean_bbcodes(text)
# Many phpbb bbcode tags have a hash attached to them. Examples:
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
text.gsub!(/:(?:\w{8})\]/, ']')
end
def process_smilies(text)
@smiley_processor.replace_smilies(text)
end
def process_links(text)
# Internal forum links can have this forms:
# for topics: <!-- l --><a class="postlink-local" href="https://example.com/forums/viewtopic.php?f=26&amp;t=3412">viewtopic.php?f=26&amp;t=3412</a><!-- l -->
# for posts: <!-- l --><a class="postlink-local" href="https://example.com/forums/viewtopic.php?p=1732#p1732">viewtopic.php?p=1732#p1732</a><!-- l -->
text.gsub!(@long_internal_link_regexp) do |link|
replace_internal_link(link, $1, $2)
end
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
text.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/i, '[\2](\1)')
# Replace internal forum links that aren't in the <!-- l --> format
text.gsub!(@short_internal_link_regexp) do |link|
replace_internal_link(link, $1, $2)
end
# phpBB shortens link text like this, which breaks our markdown processing:
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[')
end
def replace_internal_link(link, import_topic_id, import_post_id)
if import_post_id.nil?
replace_internal_topic_link(link, import_topic_id)
else
replace_internal_post_link(link, import_post_id)
end
end
def replace_internal_topic_link(link, import_topic_id)
import_post_id = @database.get_first_post_id(import_topic_id)
return link if import_post_id.nil?
replace_internal_post_link(link, import_post_id)
end
def replace_internal_post_link(link, import_post_id)
topic = @lookup.topic_lookup_from_imported_post_id(import_post_id)
topic ? "#{@new_site_prefix}#{topic[:url]}" : link
end
def process_lists(text)
# convert list tags to ul and list=1 tags to ol
# list=a is not supported, so handle it like list=1
# list=9 and list=x have the same result as list=1 and list=a
text.gsub!(/\[list\](.*?)\[\/list:u\]/mi, '[ul]\1[/ul]')
text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi, '[ol]\1[/ol]')
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
text.gsub!(/\[\*\](.*?)\[\/\*:m\]/mi, '[li]\1[/li]')
end
# This replaces existing [attachment] BBCodes with the corresponding HTML tags for Discourse.
# All attachments that haven't been referenced in the text are appended to the end of the text.
def process_attachments(text, attachments)
attachment_regexp = /\[attachment=([\d])+\]<!-- [\w]+ -->([^<]+)<!-- [\w]+ -->\[\/attachment\]?/i
unreferenced_attachments = attachments.dup
text = text.gsub(attachment_regexp) do
index = $1.to_i
real_filename = $2
unreferenced_attachments[index] = nil
attachments.fetch(index, real_filename)
end
unreferenced_attachments = unreferenced_attachments.compact
text << "\n" << unreferenced_attachments.join("\n") unless unreferenced_attachments.empty?
text
end
def create_internal_link_regexps(original_site_prefix)
host = original_site_prefix.gsub('.', '\.')
link_regex = "http(?:s)?://#{host}/viewtopic\\.php\\?(?:\\S*)(?:t=(\\d+)|p=(\\d+)(?:#p\\d+)?)"
@long_internal_link_regexp = Regexp.new(%Q|<!-- l --><a(?:.+)href="#{link_regex}"(?:.*)</a><!-- l -->|, Regexp::IGNORECASE)
@short_internal_link_regexp = Regexp.new(link_regex, Regexp::IGNORECASE)
end
end
end