mirror of
https://github.com/codeninjasllc/discourse.git
synced 2024-11-23 23:58:31 -05:00
Merge pull request #3948 from discoursehosting/origin/nabble-importer
Extended Nabble importer functionality
This commit is contained in:
commit
ed7d64c87e
1 changed files with 141 additions and 7 deletions
|
@ -1,5 +1,38 @@
|
|||
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
||||
require 'pg'
|
||||
require_relative 'base/uploader'
|
||||
|
||||
=begin
|
||||
if you want to create mock users for posts made by anonymous participants,
|
||||
run the following SQL prior to importing.
|
||||
|
||||
-- first attribute any anonymous posts to existing users (if any)
|
||||
|
||||
UPDATE node
|
||||
SET owner_id = p.user_id, anonymous_name = NULL
|
||||
FROM ( SELECT lower(name) AS name, user_id FROM user_ ) p
|
||||
WHERE p.name = lower(node.anonymous_name)
|
||||
AND owner_id IS NULL;
|
||||
|
||||
-- then create mock users
|
||||
|
||||
INSERT INTO user_ (email, name, joined, registered)
|
||||
SELECT lower(anonymous_name) || '@dummy.com', MIN(anonymous_name), MIN(when_created), MIN(when_created)
|
||||
FROM node
|
||||
WHERE anonymous_name IS NOT NULL
|
||||
GROUP BY lower(anonymous_name);
|
||||
|
||||
-- then move these posts to the new users
|
||||
-- (yes, this is the same query as the first one indeed)
|
||||
|
||||
UPDATE node
|
||||
SET owner_id = p.user_id, anonymous_name = NULL
|
||||
FROM ( SELECT lower(name) AS name, user_id FROM user_ ) p
|
||||
WHERE p.name = lower(node.anonymous_name)
|
||||
AND owner_id IS NULL;
|
||||
|
||||
=end
|
||||
|
||||
|
||||
class ImportScripts::Nabble < ImportScripts::Base
|
||||
# CHANGE THESE BEFORE RUNNING THE IMPORTER
|
||||
|
@ -15,6 +48,7 @@ class ImportScripts::Nabble < ImportScripts::Base
|
|||
@tagmap = []
|
||||
@td = PG::TextDecoder::TimestampWithTimeZone.new
|
||||
@client = PG.connect(dbname: DB_NAME)
|
||||
@uploader = ImportScripts::Uploader.new
|
||||
end
|
||||
|
||||
def execute
|
||||
|
@ -42,17 +76,45 @@ class ImportScripts::Nabble < ImportScripts::Base
|
|||
|
||||
next if all_records_exist? :users, users.map {|u| u["user_id"].to_i}
|
||||
|
||||
create_users(users, total: total_count, offset: offset) do |user|
|
||||
create_users(users, total: total_count, offset: offset) do |row|
|
||||
{
|
||||
id: user["user_id"],
|
||||
email: user["email"] || (SecureRandom.hex << "@domain.com"),
|
||||
created_at: Time.zone.at(@td.decode(user["joined"])),
|
||||
name: user["name"]
|
||||
id: row["user_id"],
|
||||
email: row["email"] || (SecureRandom.hex << "@domain.com"),
|
||||
created_at: Time.zone.at(@td.decode(row["joined"])),
|
||||
name: row["name"],
|
||||
post_create_action: proc do |user|
|
||||
import_avatar(user, row["user_id"])
|
||||
end
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def import_avatar(user, org_id)
|
||||
filename = 'avatar' + org_id.to_s
|
||||
path = File.join('/tmp/nab', filename)
|
||||
res = @client.exec("SELECT content FROM file_avatar WHERE name='avatar100.png' AND user_id = #{org_id} LIMIT 1")
|
||||
return if res.ntuples() < 1
|
||||
|
||||
binary = res[0]['content']
|
||||
File.open(path, 'wb') { |f|
|
||||
f.write(PG::Connection.unescape_bytea(binary))
|
||||
}
|
||||
|
||||
upload = @uploader.create_upload(user.id, path, filename)
|
||||
|
||||
if upload.persisted?
|
||||
user.import_mode = false
|
||||
user.create_user_avatar
|
||||
user.import_mode = true
|
||||
user.user_avatar.update(custom_upload_id: upload.id)
|
||||
user.update(uploaded_avatar_id: upload.id)
|
||||
else
|
||||
Rails.logger.error("Could not persist avatar for user #{user.username}")
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
def parse_email(msg)
|
||||
receiver = Email::Receiver.new(msg, skip_sanity_check: true)
|
||||
mail = Mail.read_from_string(msg)
|
||||
|
@ -87,6 +149,8 @@ class ImportScripts::Nabble < ImportScripts::Base
|
|||
create_posts(topics, total: topic_count, offset: offset) do |t|
|
||||
raw = body_from(t)
|
||||
next unless raw
|
||||
raw = process_content(raw)
|
||||
raw = process_attachments(raw, t['node_id'])
|
||||
|
||||
{ id: t['node_id'],
|
||||
title: t['subject'],
|
||||
|
@ -94,7 +158,7 @@ class ImportScripts::Nabble < ImportScripts::Base
|
|||
created_at: Time.zone.at(@td.decode(t["when_created"])),
|
||||
category: CATEGORY_ID,
|
||||
raw: raw,
|
||||
cook_method: Post.cook_methods[:email] }
|
||||
cook_method: Post.cook_methods[:regular] }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -105,6 +169,60 @@ class ImportScripts::Nabble < ImportScripts::Base
|
|||
puts "Skipped #{p['node_id']}"
|
||||
end
|
||||
|
||||
def process_content(txt)
|
||||
txt.gsub! /\<quote author="(.*?)"\>/, '[quote="\1"]'
|
||||
txt.gsub! /\<\/quote\>/, '[/quote]'
|
||||
txt.gsub!(/\<raw\>(.*?)\<\/raw\>/m) do |match|
|
||||
c = Regexp.last_match[1].indent(4);
|
||||
"\n#{c}\n"
|
||||
end
|
||||
|
||||
# lines starting with # are comments, not headings, insert a space to prevent markdown
|
||||
txt.gsub! /\n#/m, ' #'
|
||||
|
||||
# in the languagetool forum, quite a lot of XML was not marked as raw
|
||||
# so we treat <rule...>...</rule> and <category...>...</category> as raw
|
||||
|
||||
# uncomment below if you want to use this
|
||||
|
||||
#txt.gsub!(/<rule(.*?)>(.*?<\/rule>)/m) do |match|
|
||||
# c = Regexp.last_match[2].indent(4);
|
||||
# "\n <rule#{Regexp.last_match[1]}>#{c}\n"
|
||||
#end
|
||||
#txt.gsub!(/<category(.*?)>(.*?<\/category>)/m) do |match|
|
||||
# c = Regexp.last_match[2].indent(4);
|
||||
# "\n <rule#{Regexp.last_match[1]}>#{c}\n"
|
||||
#end
|
||||
txt
|
||||
end
|
||||
|
||||
def process_attachments(txt, postid)
|
||||
txt.gsub!(/<nabble_img src="(.*?)" (.*?)>/m) do |match|
|
||||
basename = Regexp.last_match[1]
|
||||
fn = File.join('/tmp/nab', basename)
|
||||
|
||||
binary = @client.exec("SELECT content FROM file_node WHERE name='#{basename}' AND node_id = #{postid}")[0]['content']
|
||||
File.open(fn, 'wb') { |f|
|
||||
f.write(PG::Connection.unescape_bytea(binary))
|
||||
}
|
||||
upload = @uploader.create_upload(0, fn, basename)
|
||||
@uploader.embedded_image_html(upload)
|
||||
end
|
||||
|
||||
txt.gsub!(/<nabble_a href="(.*?)">(.*?)<\/nabble_a>/m) do |match|
|
||||
basename = Regexp.last_match[1]
|
||||
fn = File.join('/tmp/nab', basename)
|
||||
|
||||
binary = @client.exec("SELECT content FROM file_node WHERE name='#{basename}' AND node_id = #{postid}")[0]['content']
|
||||
File.open(fn, 'wb') { |f|
|
||||
f.write(PG::Connection.unescape_bytea(binary))
|
||||
}
|
||||
upload = @uploader.create_upload(0, fn, basename)
|
||||
@uploader.attachment_html(upload, basename)
|
||||
end
|
||||
txt
|
||||
end
|
||||
|
||||
def import_replies
|
||||
puts "", "creating topic replies"
|
||||
|
||||
|
@ -143,15 +261,31 @@ class ImportScripts::Nabble < ImportScripts::Base
|
|||
|
||||
raw = body_from(p)
|
||||
next unless raw
|
||||
raw = process_content(raw)
|
||||
raw = process_attachments(raw, id)
|
||||
{ id: id,
|
||||
topic_id: topic_id,
|
||||
user_id: user_id_from_imported_user_id(p['owner_id']) || Discourse::SYSTEM_USER_ID,
|
||||
created_at: Time.zone.at(@td.decode(p["when_created"])),
|
||||
raw: raw,
|
||||
cook_method: Post.cook_methods[:email] }
|
||||
cook_method: Post.cook_methods[:regular] }
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class String
|
||||
def indent(count, char = ' ')
|
||||
gsub(/([^\n]*)(\n|$)/) do |match|
|
||||
last_iteration = ($1 == "" && $2 == "")
|
||||
line = ""
|
||||
line << (char * count) unless last_iteration
|
||||
line << $1
|
||||
line << $2
|
||||
line
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
ImportScripts::Nabble.new.perform
|
||||
|
|
Loading…
Reference in a new issue