mirror of
https://github.com/codeninjasllc/discourse.git
synced 2025-02-25 07:54:11 -05:00
FIX: sfn importer
This commit is contained in:
parent
78f5fe190a
commit
58a7faad01
1 changed files with 74 additions and 24 deletions
|
@ -1,49 +1,77 @@
|
||||||
# custom importer for www.sfn.org, feel free to borrow ideas
|
# custom importer for www.sfn.org, feel free to borrow ideas
|
||||||
|
|
||||||
require 'mysql2'
|
require "csv"
|
||||||
|
require "mysql2"
|
||||||
|
|
||||||
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
||||||
|
|
||||||
class ImportScripts::Sfn < ImportScripts::Base
|
class ImportScripts::Sfn < ImportScripts::Base
|
||||||
|
|
||||||
BATCH_SIZE = 1000
|
BATCH_SIZE = 100_000
|
||||||
|
MIN_CREATED_AT = "2003-11-01"
|
||||||
|
|
||||||
def initialize
|
def initialize
|
||||||
super
|
super
|
||||||
end
|
end
|
||||||
|
|
||||||
def execute
|
def execute
|
||||||
|
load_external_users
|
||||||
import_users
|
import_users
|
||||||
import_categories
|
import_categories
|
||||||
import_topics
|
import_topics
|
||||||
import_posts
|
import_posts
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def load_external_users
|
||||||
|
puts "", "loading external users..."
|
||||||
|
|
||||||
|
@personify_id_to_contact_key = {}
|
||||||
|
|
||||||
|
contacts = mysql_query <<-SQL
|
||||||
|
SELECT ContactKey AS "contact_key",
|
||||||
|
PersonifyID AS "personify_id"
|
||||||
|
FROM Contact
|
||||||
|
SQL
|
||||||
|
|
||||||
|
contacts.each do |contact|
|
||||||
|
personify_id = contact["personify_id"].split(",").first
|
||||||
|
@personify_id_to_contact_key[personify_id] = contact["contact_key"]
|
||||||
|
end
|
||||||
|
|
||||||
|
@external_users = {}
|
||||||
|
|
||||||
|
CSV.foreach("/Users/zogstrip/Downloads/sfn.csv", col_sep: ";") do |row|
|
||||||
|
next unless @personify_id_to_contact_key.include?(row[0])
|
||||||
|
|
||||||
|
id = @personify_id_to_contact_key[row[0]]
|
||||||
|
full_name = [row[1].strip, row[2].strip, row[3].strip].join(" ").strip
|
||||||
|
|
||||||
|
@external_users[id] = { email: row[4], full_name: full_name }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def import_users
|
def import_users
|
||||||
puts "", "importing users..."
|
puts "", "importing users..."
|
||||||
|
|
||||||
user_count = mysql_query <<-SQL
|
user_count = mysql_query <<-SQL
|
||||||
SELECT COUNT(DISTINCT cm.ContactKey) AS "count"
|
SELECT COUNT(ContactKey) AS "count" FROM Contact
|
||||||
FROM CommunityMember cm
|
|
||||||
LEFT JOIN EgroupSubscription es ON es.ContactKey = cm.ContactKey
|
|
||||||
WHERE LENGTH(COALESCE(es.EmailAddr_, "")) > 5
|
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
user_count = user_count.first["count"]
|
user_count = user_count.first["count"]
|
||||||
|
|
||||||
batches(BATCH_SIZE) do |offset|
|
batches(BATCH_SIZE) do |offset|
|
||||||
users = mysql_query <<-SQL
|
users = mysql_query <<-SQL
|
||||||
SELECT cm.ContactKey AS "id",
|
SELECT c.ContactKey AS "id",
|
||||||
cm.InvitedOn AS "created_at",
|
|
||||||
es.EmailAddr_ AS "email",
|
|
||||||
es.FullName_ AS "name",
|
|
||||||
c.Bio AS "bio",
|
c.Bio AS "bio",
|
||||||
c.ProfileImage AS "avatar"
|
c.ProfileImage AS "avatar",
|
||||||
FROM CommunityMember cm
|
es.EmailAddr_ AS "email",
|
||||||
LEFT JOIN EgroupSubscription es ON es.ContactKey = cm.ContactKey
|
es.FullName_ AS "full_name",
|
||||||
LEFT JOIN Contact c ON c.ContactKey = cm.ContactKey
|
GREATEST('#{MIN_CREATED_AT}', COALESCE(cm.InvitedOn, '#{MIN_CREATED_AT}')) AS "created_at"
|
||||||
WHERE LENGTH(COALESCE(es.EmailAddr_, "")) > 5
|
FROM Contact c
|
||||||
GROUP BY cm.ContactKey
|
LEFT JOIN EgroupSubscription es ON es.ContactKey = c.ContactKey
|
||||||
ORDER BY "created_at"
|
LEFT JOIN CommunityMember cm ON cm.ContactKey = c.ContactKey
|
||||||
|
GROUP BY c.ContactKey
|
||||||
|
ORDER BY cm.InvitedOn
|
||||||
LIMIT #{BATCH_SIZE}
|
LIMIT #{BATCH_SIZE}
|
||||||
OFFSET #{offset}
|
OFFSET #{offset}
|
||||||
SQL
|
SQL
|
||||||
|
@ -51,11 +79,18 @@ class ImportScripts::Sfn < ImportScripts::Base
|
||||||
break if users.size < 1
|
break if users.size < 1
|
||||||
|
|
||||||
create_users(users, total: user_count, offset: offset) do |user|
|
create_users(users, total: user_count, offset: offset) do |user|
|
||||||
|
external_user = @external_users[user["id"]]
|
||||||
|
email = user["email"].presence || external_user.try(:[], :email)
|
||||||
|
full_name = user["full_name"].presence || external_user.try(:[], :full_name)
|
||||||
|
bio = (user["bio"] || "")[0..250]
|
||||||
|
|
||||||
|
next if email.blank?
|
||||||
|
|
||||||
{
|
{
|
||||||
id: user["id"],
|
id: user["id"],
|
||||||
name: user["name"],
|
email: email,
|
||||||
email: user["email"],
|
name: full_name,
|
||||||
bio_raw: user["bio"],
|
bio_raw: bio,
|
||||||
created_at: user["created_at"],
|
created_at: user["created_at"],
|
||||||
post_create_action: proc do |newuser|
|
post_create_action: proc do |newuser|
|
||||||
next if user["avatar"].blank?
|
next if user["avatar"].blank?
|
||||||
|
@ -90,6 +125,7 @@ class ImportScripts::Sfn < ImportScripts::Base
|
||||||
"LATP Fellows",
|
"LATP Fellows",
|
||||||
"Mid and Advanced Career",
|
"Mid and Advanced Career",
|
||||||
"Neurobiology of Disease Workshop",
|
"Neurobiology of Disease Workshop",
|
||||||
|
"Neuronline Champions",
|
||||||
"Neuroscience 2015",
|
"Neuroscience 2015",
|
||||||
"Neuroscience Scholars Program",
|
"Neuroscience Scholars Program",
|
||||||
"NSP Associates",
|
"NSP Associates",
|
||||||
|
@ -118,6 +154,7 @@ class ImportScripts::Sfn < ImportScripts::Base
|
||||||
"{CDF80A92-925A-46DD-A867-8558FA72D016}" => "LATP Fellows",
|
"{CDF80A92-925A-46DD-A867-8558FA72D016}" => "LATP Fellows",
|
||||||
"{E71E237B-7C23-4596-AECA-655BD8ED50DB}" => "Mid and Advanced Career",
|
"{E71E237B-7C23-4596-AECA-655BD8ED50DB}" => "Mid and Advanced Career",
|
||||||
"{1D674C38-17CB-4C48-826A-D465AC3F8948}" => "Neurobiology of Disease Workshop",
|
"{1D674C38-17CB-4C48-826A-D465AC3F8948}" => "Neurobiology of Disease Workshop",
|
||||||
|
"{80C5835E-974E-4D44-BA01-C2C4F8BA91D7}" => "Neuronline Champions",
|
||||||
"{3D4F885B-0037-403B-83DD-62FAA8E81DF1}" => "Neuroscience 2015",
|
"{3D4F885B-0037-403B-83DD-62FAA8E81DF1}" => "Neuroscience 2015",
|
||||||
"{9ACC3B40-E4A3-4FFD-AADC-C8403EB6231D}" => "Neuroscience 2015",
|
"{9ACC3B40-E4A3-4FFD-AADC-C8403EB6231D}" => "Neuroscience 2015",
|
||||||
"{9FC30FFB-E450-4361-8844-0266C3D96868}" => "Neuroscience Scholars Program",
|
"{9FC30FFB-E450-4361-8844-0266C3D96868}" => "Neuroscience Scholars Program",
|
||||||
|
@ -170,6 +207,7 @@ class ImportScripts::Sfn < ImportScripts::Base
|
||||||
FROM EgroupMessages
|
FROM EgroupMessages
|
||||||
WHERE ParentId_ = 0
|
WHERE ParentId_ = 0
|
||||||
AND ApprovedRejectedPendingInd = "Approved"
|
AND ApprovedRejectedPendingInd = "Approved"
|
||||||
|
AND (CrosspostFromMessageKey IS NULL OR CrosspostFromMessageKey = '{00000000-0000-0000-0000-000000000000}')
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
topic_count = topic_count.first["count"]
|
topic_count = topic_count.first["count"]
|
||||||
|
@ -185,7 +223,8 @@ class ImportScripts::Sfn < ImportScripts::Base
|
||||||
FROM EgroupMessages
|
FROM EgroupMessages
|
||||||
WHERE ParentId_ = 0
|
WHERE ParentId_ = 0
|
||||||
AND ApprovedRejectedPendingInd = "Approved"
|
AND ApprovedRejectedPendingInd = "Approved"
|
||||||
ORDER BY "created_at"
|
AND (CrosspostFromMessageKey IS NULL OR CrosspostFromMessageKey = '{00000000-0000-0000-0000-000000000000}')
|
||||||
|
ORDER BY CreatStamp_
|
||||||
LIMIT #{BATCH_SIZE}
|
LIMIT #{BATCH_SIZE}
|
||||||
OFFSET #{offset}
|
OFFSET #{offset}
|
||||||
SQL
|
SQL
|
||||||
|
@ -194,12 +233,17 @@ class ImportScripts::Sfn < ImportScripts::Base
|
||||||
|
|
||||||
create_posts(topics, total: topic_count, offset: offset) do |topic|
|
create_posts(topics, total: topic_count, offset: offset) do |topic|
|
||||||
next unless category_id = CATEGORY_MAPPING[topic["category_id"]]
|
next unless category_id = CATEGORY_MAPPING[topic["category_id"]]
|
||||||
|
|
||||||
|
title = topic["title"][0..250]
|
||||||
|
raw = cleanup_raw(topic["raw"])
|
||||||
|
next if raw.blank?
|
||||||
|
|
||||||
{
|
{
|
||||||
id: topic["id"],
|
id: topic["id"],
|
||||||
category: category_id_from_imported_category_id(category_id),
|
category: category_id_from_imported_category_id(category_id),
|
||||||
user_id: user_id_from_imported_user_id(topic["user_id"]) || Discourse::SYSTEM_USER_ID,
|
user_id: user_id_from_imported_user_id(topic["user_id"]) || Discourse::SYSTEM_USER_ID,
|
||||||
title: topic["title"][0..250],
|
title: title,
|
||||||
raw: cleanup_raw(topic["raw"]),
|
raw: raw,
|
||||||
created_at: topic["created_at"],
|
created_at: topic["created_at"],
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
@ -214,6 +258,7 @@ class ImportScripts::Sfn < ImportScripts::Base
|
||||||
FROM EgroupMessages
|
FROM EgroupMessages
|
||||||
WHERE ParentId_ > 0
|
WHERE ParentId_ > 0
|
||||||
AND ApprovedRejectedPendingInd = "Approved"
|
AND ApprovedRejectedPendingInd = "Approved"
|
||||||
|
AND (CrosspostFromMessageKey IS NULL OR CrosspostFromMessageKey = '{00000000-0000-0000-0000-000000000000}')
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
posts_count = posts_count.first["count"]
|
posts_count = posts_count.first["count"]
|
||||||
|
@ -228,7 +273,8 @@ class ImportScripts::Sfn < ImportScripts::Base
|
||||||
FROM EgroupMessages
|
FROM EgroupMessages
|
||||||
WHERE ParentId_ > 0
|
WHERE ParentId_ > 0
|
||||||
AND ApprovedRejectedPendingInd = "Approved"
|
AND ApprovedRejectedPendingInd = "Approved"
|
||||||
ORDER BY "created_at"
|
AND (CrosspostFromMessageKey IS NULL OR CrosspostFromMessageKey = '{00000000-0000-0000-0000-000000000000}')
|
||||||
|
ORDER BY CreatStamp_
|
||||||
LIMIT #{BATCH_SIZE}
|
LIMIT #{BATCH_SIZE}
|
||||||
OFFSET #{offset}
|
OFFSET #{offset}
|
||||||
SQL
|
SQL
|
||||||
|
@ -237,6 +283,10 @@ class ImportScripts::Sfn < ImportScripts::Base
|
||||||
|
|
||||||
create_posts(posts, total: posts_count, offset: offset) do |post|
|
create_posts(posts, total: posts_count, offset: offset) do |post|
|
||||||
next unless parent = topic_lookup_from_imported_post_id(post["topic_id"])
|
next unless parent = topic_lookup_from_imported_post_id(post["topic_id"])
|
||||||
|
|
||||||
|
raw = cleanup_raw(post["raw"])
|
||||||
|
next if raw.blank?
|
||||||
|
|
||||||
{
|
{
|
||||||
id: post["id"],
|
id: post["id"],
|
||||||
topic_id: parent[:topic_id],
|
topic_id: parent[:topic_id],
|
||||||
|
|
Loading…
Reference in a new issue