FIX: sfn importer

This commit is contained in:
Régis Hanol 2015-04-17 16:32:01 +02:00
parent 78f5fe190a
commit 58a7faad01

View file

@ -1,49 +1,77 @@
# custom importer for www.sfn.org, feel free to borrow ideas
require 'mysql2'
require "csv"
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Sfn < ImportScripts::Base
BATCH_SIZE = 1000
BATCH_SIZE = 100_000
MIN_CREATED_AT = "2003-11-01"
def initialize
super
end
def execute
load_external_users
import_users
import_categories
import_topics
import_posts
end
def load_external_users
puts "", "loading external users..."
@personify_id_to_contact_key = {}
contacts = mysql_query <<-SQL
SELECT ContactKey AS "contact_key",
PersonifyID AS "personify_id"
FROM Contact
SQL
contacts.each do |contact|
personify_id = contact["personify_id"].split(",").first
@personify_id_to_contact_key[personify_id] = contact["contact_key"]
end
@external_users = {}
CSV.foreach("/Users/zogstrip/Downloads/sfn.csv", col_sep: ";") do |row|
next unless @personify_id_to_contact_key.include?(row[0])
id = @personify_id_to_contact_key[row[0]]
full_name = [row[1].strip, row[2].strip, row[3].strip].join(" ").strip
@external_users[id] = { email: row[4], full_name: full_name }
end
end
def import_users
puts "", "importing users..."
user_count = mysql_query <<-SQL
SELECT COUNT(DISTINCT cm.ContactKey) AS "count"
FROM CommunityMember cm
LEFT JOIN EgroupSubscription es ON es.ContactKey = cm.ContactKey
WHERE LENGTH(COALESCE(es.EmailAddr_, "")) > 5
SELECT COUNT(ContactKey) AS "count" FROM Contact
SQL
user_count = user_count.first["count"]
batches(BATCH_SIZE) do |offset|
users = mysql_query <<-SQL
SELECT cm.ContactKey AS "id",
cm.InvitedOn AS "created_at",
es.EmailAddr_ AS "email",
es.FullName_ AS "name",
SELECT c.ContactKey AS "id",
c.Bio AS "bio",
c.ProfileImage AS "avatar"
FROM CommunityMember cm
LEFT JOIN EgroupSubscription es ON es.ContactKey = cm.ContactKey
LEFT JOIN Contact c ON c.ContactKey = cm.ContactKey
WHERE LENGTH(COALESCE(es.EmailAddr_, "")) > 5
GROUP BY cm.ContactKey
ORDER BY "created_at"
c.ProfileImage AS "avatar",
es.EmailAddr_ AS "email",
es.FullName_ AS "full_name",
GREATEST('#{MIN_CREATED_AT}', COALESCE(cm.InvitedOn, '#{MIN_CREATED_AT}')) AS "created_at"
FROM Contact c
LEFT JOIN EgroupSubscription es ON es.ContactKey = c.ContactKey
LEFT JOIN CommunityMember cm ON cm.ContactKey = c.ContactKey
GROUP BY c.ContactKey
ORDER BY cm.InvitedOn
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
@ -51,11 +79,18 @@ class ImportScripts::Sfn < ImportScripts::Base
break if users.size < 1
create_users(users, total: user_count, offset: offset) do |user|
external_user = @external_users[user["id"]]
email = user["email"].presence || external_user.try(:[], :email)
full_name = user["full_name"].presence || external_user.try(:[], :full_name)
bio = (user["bio"] || "")[0..250]
next if email.blank?
{
id: user["id"],
name: user["name"],
email: user["email"],
bio_raw: user["bio"],
email: email,
name: full_name,
bio_raw: bio,
created_at: user["created_at"],
post_create_action: proc do |newuser|
next if user["avatar"].blank?
@ -90,6 +125,7 @@ class ImportScripts::Sfn < ImportScripts::Base
"LATP Fellows",
"Mid and Advanced Career",
"Neurobiology of Disease Workshop",
"Neuronline Champions",
"Neuroscience 2015",
"Neuroscience Scholars Program",
"NSP Associates",
@ -118,6 +154,7 @@ class ImportScripts::Sfn < ImportScripts::Base
"{CDF80A92-925A-46DD-A867-8558FA72D016}" => "LATP Fellows",
"{E71E237B-7C23-4596-AECA-655BD8ED50DB}" => "Mid and Advanced Career",
"{1D674C38-17CB-4C48-826A-D465AC3F8948}" => "Neurobiology of Disease Workshop",
"{80C5835E-974E-4D44-BA01-C2C4F8BA91D7}" => "Neuronline Champions",
"{3D4F885B-0037-403B-83DD-62FAA8E81DF1}" => "Neuroscience 2015",
"{9ACC3B40-E4A3-4FFD-AADC-C8403EB6231D}" => "Neuroscience 2015",
"{9FC30FFB-E450-4361-8844-0266C3D96868}" => "Neuroscience Scholars Program",
@ -170,6 +207,7 @@ class ImportScripts::Sfn < ImportScripts::Base
FROM EgroupMessages
WHERE ParentId_ = 0
AND ApprovedRejectedPendingInd = "Approved"
AND (CrosspostFromMessageKey IS NULL OR CrosspostFromMessageKey = '{00000000-0000-0000-0000-000000000000}')
SQL
topic_count = topic_count.first["count"]
@ -185,7 +223,8 @@ class ImportScripts::Sfn < ImportScripts::Base
FROM EgroupMessages
WHERE ParentId_ = 0
AND ApprovedRejectedPendingInd = "Approved"
ORDER BY "created_at"
AND (CrosspostFromMessageKey IS NULL OR CrosspostFromMessageKey = '{00000000-0000-0000-0000-000000000000}')
ORDER BY CreatStamp_
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
@ -194,12 +233,17 @@ class ImportScripts::Sfn < ImportScripts::Base
create_posts(topics, total: topic_count, offset: offset) do |topic|
next unless category_id = CATEGORY_MAPPING[topic["category_id"]]
title = topic["title"][0..250]
raw = cleanup_raw(topic["raw"])
next if raw.blank?
{
id: topic["id"],
category: category_id_from_imported_category_id(category_id),
user_id: user_id_from_imported_user_id(topic["user_id"]) || Discourse::SYSTEM_USER_ID,
title: topic["title"][0..250],
raw: cleanup_raw(topic["raw"]),
title: title,
raw: raw,
created_at: topic["created_at"],
}
end
@ -214,6 +258,7 @@ class ImportScripts::Sfn < ImportScripts::Base
FROM EgroupMessages
WHERE ParentId_ > 0
AND ApprovedRejectedPendingInd = "Approved"
AND (CrosspostFromMessageKey IS NULL OR CrosspostFromMessageKey = '{00000000-0000-0000-0000-000000000000}')
SQL
posts_count = posts_count.first["count"]
@ -228,7 +273,8 @@ class ImportScripts::Sfn < ImportScripts::Base
FROM EgroupMessages
WHERE ParentId_ > 0
AND ApprovedRejectedPendingInd = "Approved"
ORDER BY "created_at"
AND (CrosspostFromMessageKey IS NULL OR CrosspostFromMessageKey = '{00000000-0000-0000-0000-000000000000}')
ORDER BY CreatStamp_
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
@ -237,6 +283,10 @@ class ImportScripts::Sfn < ImportScripts::Base
create_posts(posts, total: posts_count, offset: offset) do |post|
next unless parent = topic_lookup_from_imported_post_id(post["topic_id"])
raw = cleanup_raw(post["raw"])
next if raw.blank?
{
id: post["id"],
topic_id: parent[:topic_id],