mirror of
https://github.com/codeninjasllc/discourse.git
synced 2024-11-28 18:17:29 -05:00
1cb45861c5
- Extensive refactoring of the existing importer - Configuration of import with settings.yml instead of editing code - Supports importing from phpBB 3.0.x and 3.1.x - Imports all attachments (not just the ones embedded with [attachment]) from posts and private messages - Imports all existing attachments without the need to configure allowed file extensions or file sizes - Imports polls - Imports bookmarks - Imports sticky topics and (global) announcements as pinned topics - Imports categories in the original order and sets the content of the category description topic - Sets the creation date of category description topics to the creation date of the first topic in each category - Imports additional user attributes: last seen date, registration IP address, website, date of birth, location - Optionally set the user's name to its username - Users that didn't activate their account in phpBB3 are imported as inactive users - All imported, active users are automatically approved - Users that were deactivated in phpBB3 get suspended for 200 years during the import - Anonymous user can be imported as suspended users instead of the system user - Forums of type "link" are not imported as categories anymore - Internal links to posts get rewritten during the import (previously only links to topics got rewritten) - Ordered lists with BBCode [list=a] (which are unsupported in Discourse) get imported as if they would be [list=1] - Importing of avatars, attachments, private messages, polls and bookmarks can be disabled via configuration file - Optional fixing of private messages for forums that have been upgraded from phpBB2 prevents the import of duplicate messages and tries to group related messages into topics - Table prefix (default: phpbb) is configurable - Most of phpBB's default smilies are mapped to Emojis and all other smilies get uploaded and embedded as images. Smiley mappings can be added or overridden in the settings.yml file.
333 lines
11 KiB
Ruby
333 lines
11 KiB
Ruby
require_relative 'database_base'
|
|
require_relative '../support/constants'
|
|
|
|
module ImportScripts::PhpBB3
|
|
class Database_3_0 < DatabaseBase
|
|
def count_users
|
|
count(<<-SQL)
|
|
SELECT COUNT(*) AS count
|
|
FROM #{@table_prefix}_users u
|
|
JOIN #{@table_prefix}_groups g ON g.group_id = u.group_id
|
|
WHERE u.user_type != #{Constants::USER_TYPE_IGNORE}
|
|
SQL
|
|
end
|
|
|
|
def fetch_users(offset)
|
|
query(<<-SQL)
|
|
SELECT u.user_id, u.user_email, u.username, u.user_regdate, u.user_lastvisit, u.user_ip,
|
|
u.user_type, u.user_inactive_reason, g.group_name, b.ban_start, b.ban_end, b.ban_reason,
|
|
u.user_posts, u.user_website, u.user_from, u.user_birthday, u.user_avatar_type, u.user_avatar
|
|
FROM #{@table_prefix}_users u
|
|
JOIN #{@table_prefix}_groups g ON (g.group_id = u.group_id)
|
|
LEFT OUTER JOIN #{@table_prefix}_banlist b ON (
|
|
u.user_id = b.ban_userid AND b.ban_exclude = 0 AND
|
|
(b.ban_end = 0 OR b.ban_end >= UNIX_TIMESTAMP())
|
|
)
|
|
WHERE u.user_type != #{Constants::USER_TYPE_IGNORE}
|
|
ORDER BY u.user_id ASC
|
|
LIMIT #{@batch_size}
|
|
OFFSET #{offset}
|
|
SQL
|
|
end
|
|
|
|
def count_anonymous_users
|
|
count(<<-SQL)
|
|
SELECT COUNT(DISTINCT post_username) AS count
|
|
FROM #{@table_prefix}_posts
|
|
WHERE post_username <> ''
|
|
SQL
|
|
end
|
|
|
|
def fetch_anonymous_users(offset)
|
|
query(<<-SQL)
|
|
SELECT post_username, MIN(post_time) AS first_post_time
|
|
FROM #{@table_prefix}_posts
|
|
WHERE post_username <> ''
|
|
GROUP BY post_username
|
|
ORDER BY post_username ASC
|
|
LIMIT #{@batch_size}
|
|
OFFSET #{offset}
|
|
SQL
|
|
end
|
|
|
|
def fetch_categories
|
|
query(<<-SQL)
|
|
SELECT f.forum_id, f.parent_id, f.forum_name, f.forum_name, f.forum_desc, x.first_post_time
|
|
FROM phpbb_forums f
|
|
LEFT OUTER JOIN (
|
|
SELECT MIN(topic_time) AS first_post_time, forum_id
|
|
FROM phpbb_topics
|
|
GROUP BY forum_id
|
|
) x ON (f.forum_id = x.forum_id)
|
|
WHERE f.forum_type != #{Constants::FORUM_TYPE_LINK}
|
|
ORDER BY f.parent_id ASC, f.left_id ASC
|
|
SQL
|
|
end
|
|
|
|
def count_posts
|
|
count(<<-SQL)
|
|
SELECT COUNT(*) AS count
|
|
FROM #{@table_prefix}_posts
|
|
SQL
|
|
end
|
|
|
|
def fetch_posts(offset)
|
|
query(<<-SQL)
|
|
SELECT p.post_id, p.topic_id, t.forum_id, t.topic_title, t.topic_first_post_id, p.poster_id,
|
|
p.post_text, p.post_time, p.post_username, t.topic_status, t.topic_type, t.poll_title,
|
|
CASE WHEN t.poll_length > 0 THEN t.poll_start + t.poll_length ELSE NULL END AS poll_end,
|
|
t.poll_max_options, p.post_attachment
|
|
FROM #{@table_prefix}_posts p
|
|
JOIN #{@table_prefix}_topics t ON (p.topic_id = t.topic_id)
|
|
ORDER BY p.post_id ASC
|
|
LIMIT #{@batch_size}
|
|
OFFSET #{offset}
|
|
SQL
|
|
end
|
|
|
|
def get_first_post_id(topic_id)
|
|
query(<<-SQL).first[:topic_first_post_id]
|
|
SELECT topic_first_post_id
|
|
FROM #{@table_prefix}_topics
|
|
WHERE topic_id = #{topic_id}
|
|
SQL
|
|
end
|
|
|
|
def fetch_poll_options(topic_id)
|
|
query(<<-SQL)
|
|
SELECT poll_option_id, poll_option_text, poll_option_total
|
|
FROM #{@table_prefix}_poll_options
|
|
WHERE topic_id = #{topic_id}
|
|
ORDER BY poll_option_id
|
|
SQL
|
|
end
|
|
|
|
def fetch_poll_votes(topic_id)
|
|
# this query ignores votes from users that do not exist anymore
|
|
query(<<-SQL)
|
|
SELECT u.user_id, v.poll_option_id
|
|
FROM #{@table_prefix}_poll_votes v
|
|
JOIN #{@table_prefix}_users u ON (v.vote_user_id = u.user_id)
|
|
WHERE v.topic_id = #{topic_id}
|
|
SQL
|
|
end
|
|
|
|
def count_voters(topic_id)
|
|
# anonymous voters can't be counted, but lets try to make the count look "correct" anyway
|
|
count(<<-SQL)
|
|
SELECT MAX(count) AS count
|
|
FROM (
|
|
SELECT COUNT(DISTINCT vote_user_id) AS count
|
|
FROM #{@table_prefix}_poll_votes
|
|
WHERE topic_id = #{topic_id}
|
|
UNION
|
|
SELECT MAX(poll_option_total) AS count
|
|
FROM #{@table_prefix}_poll_options
|
|
WHERE topic_id = #{topic_id}
|
|
) x
|
|
SQL
|
|
end
|
|
|
|
def get_max_attachment_size
|
|
query(<<-SQL).first[:filesize]
|
|
SELECT IFNULL(MAX(filesize), 0) AS filesize
|
|
FROM #{@table_prefix}_attachments
|
|
SQL
|
|
end
|
|
|
|
def fetch_attachments(topic_id, post_id)
|
|
query(<<-SQL)
|
|
SELECT physical_filename, real_filename
|
|
FROM #{@table_prefix}_attachments
|
|
WHERE topic_id = #{topic_id} AND post_msg_id = #{post_id}
|
|
ORDER BY filetime DESC, post_msg_id ASC
|
|
SQL
|
|
end
|
|
|
|
def count_messages(use_fixed_messages)
|
|
if use_fixed_messages
|
|
count(<<-SQL)
|
|
SELECT COUNT(*) AS count
|
|
FROM #{@table_prefix}_import_privmsgs
|
|
SQL
|
|
else
|
|
count(<<-SQL)
|
|
SELECT COUNT(*) AS count
|
|
FROM #{@table_prefix}_privmsgs
|
|
SQL
|
|
end
|
|
end
|
|
|
|
def fetch_messages(use_fixed_messages, offset)
|
|
if use_fixed_messages
|
|
query(<<-SQL)
|
|
SELECT m.msg_id, i.root_msg_id, m.author_id, m.message_time, m.message_subject, m.message_text,
|
|
IFNULL(a.attachment_count, 0) AS attachment_count
|
|
FROM #{@table_prefix}_privmsgs m
|
|
JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id)
|
|
LEFT OUTER JOIN (
|
|
SELECT post_msg_id, COUNT(*) AS attachment_count
|
|
FROM #{@table_prefix}_attachments
|
|
WHERE topic_id = 0
|
|
GROUP BY post_msg_id
|
|
) a ON (m.msg_id = a.post_msg_id)
|
|
ORDER BY i.root_msg_id ASC, m.msg_id ASC
|
|
LIMIT #{@batch_size}
|
|
OFFSET #{offset}
|
|
SQL
|
|
else
|
|
query(<<-SQL)
|
|
SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject,
|
|
m.message_text, IFNULL(a.attachment_count, 0) AS attachment_count
|
|
FROM #{@table_prefix}_privmsgs m
|
|
LEFT OUTER JOIN (
|
|
SELECT post_msg_id, COUNT(*) AS attachment_count
|
|
FROM #{@table_prefix}_attachments
|
|
WHERE topic_id = 0
|
|
GROUP BY post_msg_id
|
|
) a ON (m.msg_id = a.post_msg_id)
|
|
ORDER BY m.root_level ASC, m.msg_id ASC
|
|
LIMIT #{@batch_size}
|
|
OFFSET #{offset}
|
|
SQL
|
|
end
|
|
end
|
|
|
|
def fetch_message_participants(msg_id, use_fixed_messages)
|
|
if use_fixed_messages
|
|
query(<<-SQL)
|
|
SELECT m.to_address
|
|
FROM #{@table_prefix}_privmsgs m
|
|
JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id)
|
|
WHERE i.msg_id = #{msg_id} OR i.root_msg_id = #{msg_id}
|
|
SQL
|
|
else
|
|
query(<<-SQL)
|
|
SELECT m.to_address
|
|
FROM #{@table_prefix}_privmsgs m
|
|
WHERE m.msg_id = #{msg_id} OR m.root_level = #{msg_id}
|
|
SQL
|
|
end
|
|
end
|
|
|
|
def calculate_fixed_messages
|
|
drop_temp_import_message_table
|
|
create_temp_import_message_table
|
|
fill_temp_import_message_table
|
|
|
|
drop_import_message_table
|
|
create_import_message_table
|
|
fill_import_message_table
|
|
|
|
drop_temp_import_message_table
|
|
end
|
|
|
|
def count_bookmarks
|
|
count(<<-SQL)
|
|
SELECT COUNT(*) AS count
|
|
FROM #{@table_prefix}_bookmarks
|
|
SQL
|
|
end
|
|
|
|
def fetch_bookmarks(offset)
|
|
query(<<-SQL)
|
|
SELECT b.user_id, t.topic_first_post_id
|
|
FROM #{@table_prefix}_bookmarks b
|
|
JOIN #{@table_prefix}_topics t ON (b.topic_id = t.topic_id)
|
|
ORDER BY b.user_id ASC, b.topic_id ASC
|
|
LIMIT #{@batch_size}
|
|
OFFSET #{offset}
|
|
SQL
|
|
end
|
|
|
|
def get_config_values
|
|
query(<<-SQL).first
|
|
SELECT
|
|
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'version') AS phpbb_version,
|
|
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'avatar_gallery_path') AS avatar_gallery_path,
|
|
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'avatar_path') AS avatar_path,
|
|
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'avatar_salt') AS avatar_salt,
|
|
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'smilies_path') AS smilies_path,
|
|
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'upload_path') AS attachment_path
|
|
SQL
|
|
end
|
|
|
|
protected
|
|
|
|
def drop_temp_import_message_table
|
|
query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs_temp")
|
|
end
|
|
|
|
def create_temp_import_message_table
|
|
query(<<-SQL)
|
|
CREATE TABLE #{@table_prefix}_import_privmsgs_temp (
|
|
msg_id MEDIUMINT(8) NOT NULL,
|
|
root_msg_id MEDIUMINT(8) NOT NULL,
|
|
recipient_id MEDIUMINT(8),
|
|
normalized_subject VARCHAR(255) NOT NULL,
|
|
PRIMARY KEY (msg_id)
|
|
)
|
|
SQL
|
|
end
|
|
|
|
# this removes duplicate messages, converts the to_address to a number
|
|
# and stores the message_subject in lowercase and without the prefix "Re: "
|
|
def fill_temp_import_message_table
|
|
query(<<-SQL)
|
|
INSERT INTO #{@table_prefix}_import_privmsgs_temp (msg_id, root_msg_id, recipient_id, normalized_subject)
|
|
SELECT m.msg_id, m.root_level,
|
|
CASE WHEN m.root_level = 0 AND INSTR(m.to_address, ':') = 0 THEN
|
|
CAST(SUBSTRING(m.to_address, 3) AS SIGNED INTEGER)
|
|
ELSE NULL END AS recipient_id,
|
|
LOWER(CASE WHEN m.message_subject LIKE 'Re: %' THEN
|
|
SUBSTRING(m.message_subject, 5)
|
|
ELSE m.message_subject END) AS normalized_subject
|
|
FROM #{@table_prefix}_privmsgs m
|
|
WHERE NOT EXISTS (
|
|
SELECT 1
|
|
FROM #{@table_prefix}_privmsgs x
|
|
WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
|
|
AND x.to_address = m.to_address AND x.message_time = m.message_time
|
|
)
|
|
SQL
|
|
end
|
|
|
|
def drop_import_message_table
|
|
query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs")
|
|
end
|
|
|
|
def create_import_message_table
|
|
query(<<-SQL)
|
|
CREATE TABLE #{@table_prefix}_import_privmsgs (
|
|
msg_id MEDIUMINT(8) NOT NULL,
|
|
root_msg_id MEDIUMINT(8) NOT NULL,
|
|
PRIMARY KEY (msg_id),
|
|
INDEX #{@table_prefix}_import_privmsgs_root_msg_id (root_msg_id)
|
|
)
|
|
SQL
|
|
end
|
|
|
|
# this tries to calculate the actual root_level (= msg_id of the first message in a
|
|
# private conversation) based on subject, time, author and recipient
|
|
def fill_import_message_table
|
|
query(<<-SQL)
|
|
INSERT INTO #{@table_prefix}_import_privmsgs (msg_id, root_msg_id)
|
|
SELECT m.msg_id, CASE WHEN i.root_msg_id = 0 THEN
|
|
COALESCE((
|
|
SELECT a.msg_id
|
|
FROM #{@table_prefix}_privmsgs a
|
|
JOIN #{@table_prefix}_import_privmsgs_temp b ON (a.msg_id = b.msg_id)
|
|
WHERE ((a.author_id = m.author_id AND b.recipient_id = i.recipient_id) OR
|
|
(a.author_id = i.recipient_id AND b.recipient_id = m.author_id))
|
|
AND b.normalized_subject = i.normalized_subject
|
|
AND a.msg_id <> m.msg_id
|
|
AND a.message_time < m.message_time
|
|
ORDER BY a.message_time ASC
|
|
LIMIT 1
|
|
), 0) ELSE i.root_msg_id END AS root_msg_id
|
|
FROM #{@table_prefix}_privmsgs m
|
|
JOIN #{@table_prefix}_import_privmsgs_temp i ON (m.msg_id = i.msg_id)
|
|
SQL
|
|
end
|
|
end
|
|
end
|