mirror of
https://github.com/codeninjasllc/discourse.git
synced 2024-11-30 10:58:31 -05:00
FEATURE: Zoho importer
This commit is contained in:
parent
80bdc7333c
commit
08e10c2a9d
3 changed files with 269 additions and 1 deletions
|
@ -366,7 +366,6 @@ class ImportScripts::Base
|
||||||
end
|
end
|
||||||
|
|
||||||
new_category = create_category(params, params[:id])
|
new_category = create_category(params, params[:id])
|
||||||
@lookup.add_category(params[:id], new_category)
|
|
||||||
|
|
||||||
created += 1
|
created += 1
|
||||||
end
|
end
|
||||||
|
@ -396,6 +395,8 @@ class ImportScripts::Base
|
||||||
new_category.custom_fields["import_id"] = import_id if import_id
|
new_category.custom_fields["import_id"] = import_id if import_id
|
||||||
new_category.save!
|
new_category.save!
|
||||||
|
|
||||||
|
@lookup.add_category(import_id, new_category)
|
||||||
|
|
||||||
post_create_action.try(:call, new_category)
|
post_create_action.try(:call, new_category)
|
||||||
|
|
||||||
new_category
|
new_category
|
||||||
|
@ -645,6 +646,23 @@ class ImportScripts::Base
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def update_user_signup_date_based_on_first_post
|
||||||
|
puts "", "setting users' signup date based on the date of their first post"
|
||||||
|
|
||||||
|
total_count = User.count
|
||||||
|
progress_count = 0
|
||||||
|
|
||||||
|
User.find_each do |user|
|
||||||
|
first = user.posts.order('created_at ASC').first
|
||||||
|
if first
|
||||||
|
user.created_at = first.created_at
|
||||||
|
user.save!
|
||||||
|
end
|
||||||
|
progress_count += 1
|
||||||
|
print_status(progress_count, total_count)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def html_for_upload(upload, display_filename)
|
def html_for_upload(upload, display_filename)
|
||||||
@uploader.html_for_upload(upload, display_filename)
|
@uploader.html_for_upload(upload, display_filename)
|
||||||
end
|
end
|
||||||
|
|
75
script/import_scripts/base/csv_helper.rb
Normal file
75
script/import_scripts/base/csv_helper.rb
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
module ImportScripts
|
||||||
|
module CsvHelper
|
||||||
|
class RowResolver
|
||||||
|
def load(row)
|
||||||
|
@row = row
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.create(cols)
|
||||||
|
Class.new(RowResolver).new(cols)
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(cols)
|
||||||
|
cols.each_with_index do |col,idx|
|
||||||
|
self.class.send(:define_method, col.downcase.gsub(/[\W]/, '_').squeeze('_')) do
|
||||||
|
@row[idx]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def csv_parse(filename, col_sep = ',')
|
||||||
|
first = true
|
||||||
|
row = nil
|
||||||
|
|
||||||
|
current_row = ""
|
||||||
|
double_quote_count = 0
|
||||||
|
|
||||||
|
File.open(filename).each_line do |line|
|
||||||
|
|
||||||
|
line.strip!
|
||||||
|
|
||||||
|
current_row << "\n" unless current_row.empty?
|
||||||
|
current_row << line
|
||||||
|
|
||||||
|
double_quote_count += line.scan('"').count
|
||||||
|
|
||||||
|
next if double_quote_count % 2 == 1 # this row continues on a new line. don't parse until we have the whole row.
|
||||||
|
|
||||||
|
raw = begin
|
||||||
|
CSV.parse(current_row, col_sep: col_sep)
|
||||||
|
rescue CSV::MalformedCSVError => e
|
||||||
|
puts e.message
|
||||||
|
puts "*" * 100
|
||||||
|
puts "Bad row skipped, line is: #{line}"
|
||||||
|
puts
|
||||||
|
puts current_row
|
||||||
|
puts
|
||||||
|
puts "double quote count is : #{double_quote_count}"
|
||||||
|
puts "*" * 100
|
||||||
|
|
||||||
|
current_row = ""
|
||||||
|
double_quote_count = 0
|
||||||
|
|
||||||
|
next
|
||||||
|
end[0]
|
||||||
|
|
||||||
|
if first
|
||||||
|
row = RowResolver.create(raw)
|
||||||
|
|
||||||
|
current_row = ""
|
||||||
|
double_quote_count = 0
|
||||||
|
first = false
|
||||||
|
next
|
||||||
|
end
|
||||||
|
|
||||||
|
row.load(raw)
|
||||||
|
|
||||||
|
yield row
|
||||||
|
|
||||||
|
current_row = ""
|
||||||
|
double_quote_count = 0
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
175
script/import_scripts/zoho.rb
Normal file
175
script/import_scripts/zoho.rb
Normal file
|
@ -0,0 +1,175 @@
|
||||||
|
# Import from Zoho.
|
||||||
|
# Be sure to get the posts CSV file, AND the user list csv file with people's email addresses.
|
||||||
|
# You may need to contact Zoho support for the user list.
|
||||||
|
#
|
||||||
|
# * Zoho data doesn't indicate which users are admins or moderators, so you'll need to grant
|
||||||
|
# those privileges manually after the import finishes.
|
||||||
|
# * The posts and users csv files don't seem to have consistent usernames, and sometimes use
|
||||||
|
# full names instead of usernames. This may cause duplicate users with slightly different
|
||||||
|
# usernames to be created.
|
||||||
|
|
||||||
|
require 'csv'
|
||||||
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
||||||
|
require File.expand_path(File.dirname(__FILE__) + "/base/csv_helper.rb")
|
||||||
|
|
||||||
|
# Call it like this:
|
||||||
|
# bundle exec ruby script/import_scripts/zoho.rb <path-to-csv-files>
|
||||||
|
class ImportScripts::Zoho < ImportScripts::Base
|
||||||
|
|
||||||
|
include ImportScripts::CsvHelper
|
||||||
|
|
||||||
|
BATCH_SIZE = 1000
|
||||||
|
|
||||||
|
def initialize(path)
|
||||||
|
@path = path
|
||||||
|
@all_posts = []
|
||||||
|
@categories = {} # key is the parent category, value is an array of sub-categories
|
||||||
|
@topic_mapping = {}
|
||||||
|
@current_row = nil
|
||||||
|
super()
|
||||||
|
end
|
||||||
|
|
||||||
|
def execute
|
||||||
|
import_users
|
||||||
|
import_posts
|
||||||
|
update_tl0
|
||||||
|
update_user_signup_date_based_on_first_post
|
||||||
|
end
|
||||||
|
|
||||||
|
def cleanup_zoho_username(s)
|
||||||
|
s.strip.gsub(/[^A-Za-z0-9_\.\-]/, '')
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_users
|
||||||
|
puts "", "Importing users"
|
||||||
|
create_users( CSV.parse(File.read(File.join(@path, 'users.csv'))) ) do |u|
|
||||||
|
username = cleanup_zoho_username(u[0])
|
||||||
|
{
|
||||||
|
id: username,
|
||||||
|
username: username,
|
||||||
|
email: u[1],
|
||||||
|
created_at: Time.zone.now # TODO: updated created_at based on the oldest post by each user
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_posts
|
||||||
|
# 0 Forum Name
|
||||||
|
# 1 Category Name
|
||||||
|
# 2 Topic Title
|
||||||
|
# 3 Permalink
|
||||||
|
# 4 Posted Time
|
||||||
|
# 5 Content
|
||||||
|
# 6 Author
|
||||||
|
# 7 Attachments
|
||||||
|
# 8 Votes
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
puts "", "Parsing posts CSV"
|
||||||
|
|
||||||
|
csv_parse(File.join(@path, "posts.csv")) do |row|
|
||||||
|
@all_posts << row.dup
|
||||||
|
if @categories[row.forum_name].nil?
|
||||||
|
@categories[row.forum_name] = []
|
||||||
|
end
|
||||||
|
|
||||||
|
unless @categories[row.forum_name].include?(row.category_name)
|
||||||
|
@categories[row.forum_name] << row.category_name
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
puts "", "Creating categories"
|
||||||
|
|
||||||
|
# Create categories
|
||||||
|
@categories.each do |parent, subcats|
|
||||||
|
c = create_category({name: parent}, parent)
|
||||||
|
subcats.each do |subcat|
|
||||||
|
next if subcat == "Uncategorized" || subcat == "Uncategorised"
|
||||||
|
create_category({name: subcat, parent_category_id: c.id}, "#{parent}:#{subcat}")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
puts "", "Creating topics and posts"
|
||||||
|
|
||||||
|
created, skipped = create_posts(@all_posts, total: @all_posts.size) do |row|
|
||||||
|
@current_row = row
|
||||||
|
|
||||||
|
# fetch user
|
||||||
|
username = cleanup_zoho_username(row.author)
|
||||||
|
|
||||||
|
next if username.blank? # no author for this post, so skip
|
||||||
|
|
||||||
|
user_id = user_id_from_imported_user_id(username)
|
||||||
|
|
||||||
|
if user_id.nil?
|
||||||
|
# user CSV file didn't have a user with this username. create it now with an invalid email address.
|
||||||
|
u = create_user(
|
||||||
|
{ id: username,
|
||||||
|
username: username,
|
||||||
|
email: "#{username}@example.com",
|
||||||
|
created_at: Time.zone.parse(row.posted_time) },
|
||||||
|
username
|
||||||
|
)
|
||||||
|
user_id = u.id
|
||||||
|
end
|
||||||
|
|
||||||
|
if @topic_mapping[row.permalink].nil?
|
||||||
|
category_id = nil
|
||||||
|
if row.category_name != "Uncategorized" && row.category_name != "Uncategorised"
|
||||||
|
category_id = category_id_from_imported_category_id("#{row.forum_name}:#{row.category_name}")
|
||||||
|
else
|
||||||
|
category_id = category_id_from_imported_category_id(row.forum_name)
|
||||||
|
end
|
||||||
|
|
||||||
|
# create topic
|
||||||
|
{
|
||||||
|
id: import_post_id(row),
|
||||||
|
user_id: user_id,
|
||||||
|
category: category_id,
|
||||||
|
title: row.topic_title,
|
||||||
|
raw: row.content,
|
||||||
|
created_at: Time.zone.parse(row.posted_time)
|
||||||
|
}
|
||||||
|
# created_post callback will be called
|
||||||
|
else
|
||||||
|
{
|
||||||
|
id: import_post_id(row),
|
||||||
|
user_id: user_id,
|
||||||
|
raw: row.content,
|
||||||
|
created_at: Time.zone.parse(row.posted_time),
|
||||||
|
topic_id: @topic_mapping[row.permalink]
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
puts ""
|
||||||
|
puts "Created: #{created}"
|
||||||
|
puts "Skipped: #{skipped}"
|
||||||
|
puts ""
|
||||||
|
end
|
||||||
|
|
||||||
|
def created_post(post)
|
||||||
|
unless @topic_mapping[@current_row.permalink]
|
||||||
|
@topic_mapping[@current_row.permalink] = post.topic_id
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_post_id(row)
|
||||||
|
# Try to make up a unique id based on the data Zoho gives us.
|
||||||
|
# The posted_time seems to be the same for all posts in a topic, so we can't use that.
|
||||||
|
Digest::SHA1.hexdigest "#{row.permalink}:#{row.content}"
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
unless ARGV[0] && Dir.exist?(ARGV[0])
|
||||||
|
if ARGV[0] && !Dir.exist?(ARGV[0])
|
||||||
|
puts "", "ERROR! Dir #{ARGV[0]} not found.", ""
|
||||||
|
end
|
||||||
|
|
||||||
|
puts "", "Usage:", "", " bundle exec ruby script/import_scripts/zoho.rb DIRNAME", ""
|
||||||
|
exit 1
|
||||||
|
end
|
||||||
|
|
||||||
|
ImportScripts::Zoho.new(ARGV[0]).perform
|
Loading…
Reference in a new issue