diff --git a/script/import_scripts/nabble.rb b/script/import_scripts/nabble.rb index 78e484975..ccf8d495e 100644 --- a/script/import_scripts/nabble.rb +++ b/script/import_scripts/nabble.rb @@ -1,5 +1,38 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb") require 'pg' +require_relative 'base/uploader' + +=begin + if you want to create mock users for posts made by anonymous participants, + run the following SQL prior to importing. + +-- first attribute any anonymous posts to existing users (if any) + +UPDATE node +SET owner_id = p.user_id, anonymous_name = NULL +FROM ( SELECT lower(name) AS name, user_id FROM user_ ) p +WHERE p.name = lower(node.anonymous_name) + AND owner_id IS NULL; + +-- then create mock users + +INSERT INTO user_ (email, name, joined, registered) + SELECT lower(anonymous_name) || '@dummy.com', MIN(anonymous_name), MIN(when_created), MIN(when_created) + FROM node + WHERE anonymous_name IS NOT NULL + GROUP BY lower(anonymous_name); + +-- then move these posts to the new users +-- (yes, this is the same query as the first one indeed) + +UPDATE node +SET owner_id = p.user_id, anonymous_name = NULL +FROM ( SELECT lower(name) AS name, user_id FROM user_ ) p +WHERE p.name = lower(node.anonymous_name) + AND owner_id IS NULL; + +=end + class ImportScripts::Nabble < ImportScripts::Base # CHANGE THESE BEFORE RUNNING THE IMPORTER @@ -15,6 +48,7 @@ class ImportScripts::Nabble < ImportScripts::Base @tagmap = [] @td = PG::TextDecoder::TimestampWithTimeZone.new @client = PG.connect(dbname: DB_NAME) + @uploader = ImportScripts::Uploader.new end def execute @@ -42,17 +76,45 @@ class ImportScripts::Nabble < ImportScripts::Base next if all_records_exist? :users, users.map {|u| u["user_id"].to_i} - create_users(users, total: total_count, offset: offset) do |user| + create_users(users, total: total_count, offset: offset) do |row| { - id: user["user_id"], - email: user["email"] || (SecureRandom.hex << "@domain.com"), - created_at: Time.zone.at(@td.decode(user["joined"])), - name: user["name"] + id: row["user_id"], + email: row["email"] || (SecureRandom.hex << "@domain.com"), + created_at: Time.zone.at(@td.decode(row["joined"])), + name: row["name"], + post_create_action: proc do |user| + import_avatar(user, row["user_id"]) + end } end end end + def import_avatar(user, org_id) + filename = 'avatar' + org_id.to_s + path = File.join('/tmp/nab', filename) + res = @client.exec("SELECT content FROM file_avatar WHERE name='avatar100.png' AND user_id = #{org_id} LIMIT 1") + return if res.ntuples() < 1 + + binary = res[0]['content'] + File.open(path, 'wb') { |f| + f.write(PG::Connection.unescape_bytea(binary)) + } + + upload = @uploader.create_upload(user.id, path, filename) + + if upload.persisted? + user.import_mode = false + user.create_user_avatar + user.import_mode = true + user.user_avatar.update(custom_upload_id: upload.id) + user.update(uploaded_avatar_id: upload.id) + else + Rails.logger.error("Could not persist avatar for user #{user.username}") + end + + end + def parse_email(msg) receiver = Email::Receiver.new(msg, skip_sanity_check: true) mail = Mail.read_from_string(msg) @@ -87,6 +149,8 @@ class ImportScripts::Nabble < ImportScripts::Base create_posts(topics, total: topic_count, offset: offset) do |t| raw = body_from(t) next unless raw + raw = process_content(raw) + raw = process_attachments(raw, t['node_id']) { id: t['node_id'], title: t['subject'], @@ -94,7 +158,7 @@ class ImportScripts::Nabble < ImportScripts::Base created_at: Time.zone.at(@td.decode(t["when_created"])), category: CATEGORY_ID, raw: raw, - cook_method: Post.cook_methods[:email] } + cook_method: Post.cook_methods[:regular] } end end end @@ -105,6 +169,60 @@ class ImportScripts::Nabble < ImportScripts::Base puts "Skipped #{p['node_id']}" end + def process_content(txt) + txt.gsub! /\/, '[quote="\1"]' + txt.gsub! /\<\/quote\>/, '[/quote]' + txt.gsub!(/\(.*?)\<\/raw\>/m) do |match| + c = Regexp.last_match[1].indent(4); + "\n#{c}\n" + end + + # lines starting with # are comments, not headings, insert a space to prevent markdown + txt.gsub! /\n#/m, ' #' + + # in the languagetool forum, quite a lot of XML was not marked as raw + # so we treat ... and ... as raw + + # uncomment below if you want to use this + + #txt.gsub!(/(.*?<\/rule>)/m) do |match| + # c = Regexp.last_match[2].indent(4); + # "\n #{c}\n" + #end + #txt.gsub!(/(.*?<\/category>)/m) do |match| + # c = Regexp.last_match[2].indent(4); + # "\n #{c}\n" + #end + txt + end + + def process_attachments(txt, postid) + txt.gsub!(//m) do |match| + basename = Regexp.last_match[1] + fn = File.join('/tmp/nab', basename) + + binary = @client.exec("SELECT content FROM file_node WHERE name='#{basename}' AND node_id = #{postid}")[0]['content'] + File.open(fn, 'wb') { |f| + f.write(PG::Connection.unescape_bytea(binary)) + } + upload = @uploader.create_upload(0, fn, basename) + @uploader.embedded_image_html(upload) + end + + txt.gsub!(/(.*?)<\/nabble_a>/m) do |match| + basename = Regexp.last_match[1] + fn = File.join('/tmp/nab', basename) + + binary = @client.exec("SELECT content FROM file_node WHERE name='#{basename}' AND node_id = #{postid}")[0]['content'] + File.open(fn, 'wb') { |f| + f.write(PG::Connection.unescape_bytea(binary)) + } + upload = @uploader.create_upload(0, fn, basename) + @uploader.attachment_html(upload, basename) + end + txt + end + def import_replies puts "", "creating topic replies" @@ -143,15 +261,31 @@ class ImportScripts::Nabble < ImportScripts::Base raw = body_from(p) next unless raw + raw = process_content(raw) + raw = process_attachments(raw, id) { id: id, topic_id: topic_id, user_id: user_id_from_imported_user_id(p['owner_id']) || Discourse::SYSTEM_USER_ID, created_at: Time.zone.at(@td.decode(p["when_created"])), raw: raw, - cook_method: Post.cook_methods[:email] } + cook_method: Post.cook_methods[:regular] } end end end end +class String + def indent(count, char = ' ') + gsub(/([^\n]*)(\n|$)/) do |match| + last_iteration = ($1 == "" && $2 == "") + line = "" + line << (char * count) unless last_iteration + line << $1 + line << $2 + line + end + end +end + + ImportScripts::Nabble.new.perform