mirror of
https://github.com/codeninjasllc/discourse.git
synced 2024-11-27 09:36:19 -05:00
Add Socialcast importer
This commit is contained in:
parent
4a2f0e772c
commit
f7f8226b4c
12 changed files with 8854 additions and 0 deletions
21
script/import_scripts/socialcast/README.md
Normal file
21
script/import_scripts/socialcast/README.md
Normal file
|
@ -0,0 +1,21 @@
|
|||
|
||||
To get started, copy the config.ex.yml to config.yml, and then update the properties for your Socialcast instance.
|
||||
|
||||
This importer uses the [Socialcast API](https://socialcast.github.io/socialcast/apidoc.html).
|
||||
|
||||
```
|
||||
domain: 'my-socialcast-domain'
|
||||
username: 'my-socialcast-username'
|
||||
password: 'my-socialcast-password'
|
||||
```
|
||||
|
||||
Create the directory for the json files to export: `mkdir output`
|
||||
Then run `ruby export.rb /path/to/config.yml`
|
||||
|
||||
Create a category named "Socialcast Import" or all topics will be imported into
|
||||
the "Uncategorized" category.
|
||||
|
||||
Topics will be tagged with the names of the groups they were originally posted
|
||||
in on Socialcast.
|
||||
|
||||
To run the import, run `ruby import.rb`
|
50
script/import_scripts/socialcast/create_title.rb
Normal file
50
script/import_scripts/socialcast/create_title.rb
Normal file
|
@ -0,0 +1,50 @@
|
|||
require 'uri'
|
||||
|
||||
class CreateTitle
|
||||
|
||||
def self.from_body(body)
|
||||
title = remove_mentions body
|
||||
title = remove_urls title
|
||||
title = remove_stray_punctuation title
|
||||
title = first_long_line title
|
||||
return unless title
|
||||
|
||||
sentences = complete_sentences title
|
||||
if !sentences.nil?
|
||||
title = sentences[1]
|
||||
else
|
||||
title = complete_words title
|
||||
end
|
||||
|
||||
return title unless title.nil? || title.size < 20
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def self.remove_mentions(text)
|
||||
text.gsub(/@[\w]*/, '')
|
||||
end
|
||||
|
||||
def self.remove_urls(text)
|
||||
text.gsub(URI::regexp(['http', 'https', 'mailto', 'ftp', 'ldap', 'ldaps']), '')
|
||||
end
|
||||
|
||||
def self.remove_stray_punctuation(text)
|
||||
text.gsub(/\s+?[^a-zA-Z0-9\s]\s+/, "\n")
|
||||
end
|
||||
|
||||
def self.first_long_line(text)
|
||||
lines = text.split("\n").select {|t| t.strip.size >= 20}
|
||||
return if lines.empty?
|
||||
lines[0].strip
|
||||
end
|
||||
|
||||
def self.complete_sentences(text)
|
||||
/(^.*[\S]{2,}[.!?:]+)\W/.match(text[0...80] + ' ')
|
||||
end
|
||||
|
||||
def self.complete_words(text)
|
||||
return text[0...80].rpartition(/\s/)[0] + "..." if text.size >= 80
|
||||
text
|
||||
end
|
||||
end
|
58
script/import_scripts/socialcast/export.rb
Normal file
58
script/import_scripts/socialcast/export.rb
Normal file
|
@ -0,0 +1,58 @@
|
|||
require 'yaml'
|
||||
require 'fileutils'
|
||||
require_relative 'socialcast_api'
|
||||
|
||||
def load_config file
|
||||
config = YAML::load_file(File.join(__dir__, file))
|
||||
@domain = config['domain']
|
||||
@username = config['username']
|
||||
@password = config['password']
|
||||
end
|
||||
|
||||
def export
|
||||
@api = SocialcastApi.new @domain, @username, @password
|
||||
create_dir("output/users")
|
||||
create_dir("output/messages")
|
||||
export_users
|
||||
export_messages
|
||||
end
|
||||
|
||||
def export_users(page=1)
|
||||
users = @api.list_users({page: page})
|
||||
return if users.empty?
|
||||
users.each do |user|
|
||||
File.open("output/users/#{user['id']}.json", 'w') do |f|
|
||||
puts user['contact_info']['email']
|
||||
f.write user.to_json
|
||||
f.close
|
||||
end
|
||||
end
|
||||
export_users page + 1
|
||||
end
|
||||
|
||||
def export_messages(page=1)
|
||||
messages = @api.list_messages({page: page})
|
||||
return if messages.empty?
|
||||
messages.each do |message|
|
||||
File.open("output/messages/#{message['id']}.json", 'w') do |f|
|
||||
title = message['title']
|
||||
title = message['body'] if title.empty?
|
||||
title = title.split('\n')[0][0..50] unless title.empty?
|
||||
|
||||
puts "#{message['id']}: #{title}"
|
||||
f.write message.to_json
|
||||
f.close
|
||||
end
|
||||
end
|
||||
export_messages page + 1
|
||||
end
|
||||
|
||||
def create_dir(path)
|
||||
path = File.join(__dir__, path)
|
||||
unless File.directory?(path)
|
||||
FileUtils.mkdir_p(path)
|
||||
end
|
||||
end
|
||||
|
||||
load_config ARGV.shift
|
||||
export
|
102
script/import_scripts/socialcast/import.rb
Normal file
102
script/import_scripts/socialcast/import.rb
Normal file
|
@ -0,0 +1,102 @@
|
|||
require_relative './socialcast_message.rb'
|
||||
require_relative './socialcast_user.rb'
|
||||
require 'set'
|
||||
require File.expand_path(File.dirname(__FILE__) + "/../base.rb")
|
||||
|
||||
class ImportScripts::Socialcast < ImportScripts::Base
|
||||
|
||||
MESSAGES_DIR = "output/messages"
|
||||
USERS_DIR = "output/users"
|
||||
|
||||
def initialize
|
||||
super
|
||||
@system_user = Discourse.system_user
|
||||
end
|
||||
|
||||
def execute
|
||||
puts "", "Importing Socialcast Users..."
|
||||
import_users
|
||||
puts "", "Importing Socialcast Messages..."
|
||||
import_messages
|
||||
EmailToken.delete_all
|
||||
puts "", "Done"
|
||||
end
|
||||
|
||||
def import_messages
|
||||
topics = 0
|
||||
imported = 0
|
||||
total = count_files(MESSAGES_DIR)
|
||||
Dir.foreach(MESSAGES_DIR) do |filename|
|
||||
next if filename == '.' or filename == '..'
|
||||
topics += 1
|
||||
message_json = File.read MESSAGES_DIR + '/' + filename
|
||||
message = SocialcastMessage.new(message_json)
|
||||
next unless message.title
|
||||
created_topic = import_topic message.topic
|
||||
if created_topic
|
||||
import_posts message.replies, created_topic.topic_id
|
||||
end
|
||||
imported += 1
|
||||
print_status topics, total
|
||||
end
|
||||
puts "", "Imported #{imported} topics. Skipped #{total - imported}."
|
||||
end
|
||||
|
||||
def import_users
|
||||
users = 0
|
||||
total = count_files(USERS_DIR)
|
||||
Dir.foreach(USERS_DIR) do |filename|
|
||||
next if filename == '.' or filename == '..'
|
||||
user_json = File.read USERS_DIR + '/' + filename
|
||||
user = SocialcastUser.new(user_json).user
|
||||
create_user user, user[:id]
|
||||
users += 1
|
||||
print_status users, total
|
||||
end
|
||||
end
|
||||
|
||||
def count_files(path)
|
||||
Dir.foreach(path).select {|f| f != '.' && f != '..'}.count
|
||||
end
|
||||
|
||||
def import_topic topic
|
||||
post = nil
|
||||
if post_id = post_id_from_imported_post_id(topic[:id])
|
||||
post = Post.find(post_id) # already imported this topic
|
||||
else
|
||||
topic[:user_id] = user_id_from_imported_user_id(topic[:author_id]) || -1
|
||||
topic[:category] = 'Socialcast Import'
|
||||
|
||||
post = create_post(topic, topic[:id])
|
||||
|
||||
unless post.is_a?(Post)
|
||||
puts "Error creating topic #{topic[:id]}. Skipping."
|
||||
puts post.inspect
|
||||
end
|
||||
end
|
||||
|
||||
post
|
||||
end
|
||||
|
||||
def import_posts posts, topic_id
|
||||
posts.each do |post|
|
||||
import_post post, topic_id
|
||||
end
|
||||
end
|
||||
|
||||
def import_post post, topic_id
|
||||
return if post_id_from_imported_post_id(post[:id]) # already imported
|
||||
post[:topic_id] = topic_id
|
||||
post[:user_id] = user_id_from_imported_user_id(post[:author_id]) || -1
|
||||
new_post = create_post post, post[:id]
|
||||
unless new_post.is_a?(Post)
|
||||
puts "Error creating post #{post[:id]}. Skipping."
|
||||
puts new_post.inspect
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
if __FILE__==$0
|
||||
ImportScripts::Socialcast.new.perform
|
||||
end
|
39
script/import_scripts/socialcast/socialcast_api.rb
Normal file
39
script/import_scripts/socialcast/socialcast_api.rb
Normal file
|
@ -0,0 +1,39 @@
|
|||
require 'base64'
|
||||
require 'json'
|
||||
require 'rest-client'
|
||||
|
||||
class SocialcastApi
|
||||
|
||||
attr_accessor :domain, :username, :password
|
||||
|
||||
def initialize domain, username, password
|
||||
@domain = domain
|
||||
@username = username
|
||||
@password = password
|
||||
end
|
||||
|
||||
def base_url
|
||||
"https://#{@domain}.socialcast.com/api"
|
||||
end
|
||||
|
||||
def headers
|
||||
encoded = Base64.encode64 "#{@username}:#{@password}"
|
||||
{:Authorization => "Basic #{encoded.strip!}", :Accept => "application/json"}
|
||||
end
|
||||
|
||||
def request url
|
||||
JSON.parse(RestClient.get url, headers)
|
||||
end
|
||||
|
||||
def list_users(opts={})
|
||||
page = opts[:page] ? opts[:page] : 1
|
||||
response = request "#{base_url}/users?page=#{page}"
|
||||
response['users'].sort {|u| u['id']}
|
||||
end
|
||||
|
||||
def list_messages(opts={})
|
||||
page = opts[:page] ? opts[:page] : 1
|
||||
response = request "#{base_url}/messages?page=#{page}"
|
||||
response['messages'].sort {|m| m['id']}
|
||||
end
|
||||
end
|
63
script/import_scripts/socialcast/socialcast_message.rb
Normal file
63
script/import_scripts/socialcast/socialcast_message.rb
Normal file
|
@ -0,0 +1,63 @@
|
|||
require 'json'
|
||||
require 'cgi'
|
||||
require 'time'
|
||||
require_relative 'create_title.rb'
|
||||
|
||||
class SocialcastMessage
|
||||
|
||||
def initialize message_json
|
||||
@parsed_json = JSON.parse message_json
|
||||
end
|
||||
|
||||
def topic
|
||||
topic = {}
|
||||
topic[:id] = @parsed_json['id']
|
||||
topic[:author_id] = @parsed_json['user']['id']
|
||||
topic[:title] = title
|
||||
topic[:raw] = @parsed_json['body']
|
||||
topic[:created_at] = Time.parse @parsed_json['created_at']
|
||||
topic[:tags] = [group] if group
|
||||
topic
|
||||
end
|
||||
|
||||
def title
|
||||
CreateTitle.from_body @parsed_json['body']
|
||||
end
|
||||
|
||||
def group
|
||||
@parsed_json['group']['groupname'] if @parsed_json['group']
|
||||
end
|
||||
|
||||
def url
|
||||
@parsed_json['url']
|
||||
end
|
||||
|
||||
def message_type
|
||||
@parsed_json['message_type']
|
||||
end
|
||||
|
||||
def replies
|
||||
posts = []
|
||||
comments = @parsed_json['comments']
|
||||
comments.each do |comment|
|
||||
posts << post_from_comment(comment)
|
||||
end
|
||||
posts
|
||||
end
|
||||
|
||||
def post_from_comment(comment)
|
||||
post = {}
|
||||
post[:id] = comment['id']
|
||||
post[:author_id] = comment['user']['id']
|
||||
post[:raw] = comment['text']
|
||||
post[:created_at] = Time.parse comment['created_at']
|
||||
post
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def unescape html
|
||||
return nil unless html
|
||||
CGI.unescapeHTML html
|
||||
end
|
||||
end
|
24
script/import_scripts/socialcast/socialcast_user.rb
Normal file
24
script/import_scripts/socialcast/socialcast_user.rb
Normal file
|
@ -0,0 +1,24 @@
|
|||
require 'json'
|
||||
require 'cgi'
|
||||
require 'time'
|
||||
|
||||
class SocialcastUser
|
||||
|
||||
def initialize user_json
|
||||
@parsed_json = JSON.parse user_json
|
||||
end
|
||||
|
||||
def user
|
||||
email = @parsed_json['contact_info']['email']
|
||||
email = "#{@parsed_json['id']}@noemail.com" unless email
|
||||
|
||||
user = {}
|
||||
user[:id] = @parsed_json['id']
|
||||
user[:name] = @parsed_json['name']
|
||||
user[:username] = @parsed_json['username']
|
||||
user[:email] = email
|
||||
user[:staged] = true
|
||||
user
|
||||
end
|
||||
|
||||
end
|
3
script/import_scripts/socialcast/test/config.ex.yml
Normal file
3
script/import_scripts/socialcast/test/config.ex.yml
Normal file
|
@ -0,0 +1,3 @@
|
|||
domain: 'demo'
|
||||
username: 'emily@socialcast.com'
|
||||
password: 'demo'
|
111
script/import_scripts/socialcast/test/test_create_title.rb
Normal file
111
script/import_scripts/socialcast/test/test_create_title.rb
Normal file
|
@ -0,0 +1,111 @@
|
|||
require 'minitest/autorun'
|
||||
require_relative '../create_title.rb'
|
||||
|
||||
class TestCreateTitle < Minitest::Test
|
||||
|
||||
def test_create_title_1
|
||||
body = "@GreatCheerThreading \nWhere can I find information on how GCTS stacks up against the competition? What are the key differentiators?"
|
||||
expected = "Where can I find information on how GCTS stacks up against the competition?"
|
||||
title = CreateTitle.from_body body
|
||||
assert_equal(expected, title)
|
||||
end
|
||||
|
||||
def test_create_title_2
|
||||
body = "GCTS in 200 stores across town. How many threads per inch would you guess? @GreatCheerThreading"
|
||||
expected = "GCTS in 200 stores across town. How many threads per inch would you guess?"
|
||||
title = CreateTitle.from_body body
|
||||
assert_equal(expected, title)
|
||||
end
|
||||
|
||||
def test_create_title_3
|
||||
body = "gFabric Sheets 1.2 now has Great Cheer Threads, letting you feel the softness running through the cotton fibers."
|
||||
expected = "gFabric Sheets 1.2 now has Great Cheer Threads, letting you feel the softness..."
|
||||
title = CreateTitle.from_body body
|
||||
assert_equal(expected, title)
|
||||
end
|
||||
|
||||
def test_create_title_4
|
||||
body = "Great Cheer Threads® for GCTS Platinum Partners |\n Rules And Spools"
|
||||
expected = "Great Cheer Threads® for GCTS Platinum Partners"
|
||||
title = CreateTitle.from_body body
|
||||
assert_equal(expected, title)
|
||||
end
|
||||
|
||||
def test_create_title_5
|
||||
body = "One sentence. Two sentence. Three sentence. Four is going to go on and on for more words than we want."
|
||||
expected = "One sentence. Two sentence. Three sentence."
|
||||
title = CreateTitle.from_body body
|
||||
assert_equal(expected, title)
|
||||
end
|
||||
|
||||
def test_create_title_6
|
||||
body = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site)?\n\n//cc @RD @GreatCheerThreading"
|
||||
expected = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site)?"
|
||||
title = CreateTitle.from_body body
|
||||
assert_equal(expected, title)
|
||||
end
|
||||
|
||||
def test_create_title_6b
|
||||
body = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site of yore)?\n\n//cc @RD @GreatCheerThreading"
|
||||
expected = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site..."
|
||||
title = CreateTitle.from_body body
|
||||
assert_equal(expected, title)
|
||||
end
|
||||
|
||||
def test_create_title_6c
|
||||
body = "Anyone know of any invite codes for www.greatcheer.io?! (the Great Cheer v2 site of yore)?\n\n//cc @RD @GreatCheerThreading"
|
||||
expected = "Anyone know of any invite codes for www.greatcheer.io?!"
|
||||
title = CreateTitle.from_body body
|
||||
assert_equal(expected, title)
|
||||
end
|
||||
|
||||
def test_create_title_7
|
||||
body = "@GreatCheerThreading \n\nDoes anyone know what the plan is to move to denser 1.2 threads for GCTS?\n\nI have a customer interested in the higher thread counts offered in 1.2."
|
||||
expected = "Does anyone know what the plan is to move to denser 1.2 threads for GCTS?"
|
||||
title = CreateTitle.from_body body
|
||||
assert_equal(expected, title)
|
||||
end
|
||||
|
||||
def test_create_title_8
|
||||
body = "@GreatCheerThreading @FabricWeavingWorldwide \n\nI was just chatting with a customer, after receiving this email:\n\n\"Ours is more of a ‘conceptual’ question. We have too much fiber"
|
||||
expected = "I was just chatting with a customer, after receiving this email:"
|
||||
title = CreateTitle.from_body body
|
||||
assert_equal(expected, title)
|
||||
end
|
||||
|
||||
def test_create_title_9
|
||||
body = "Hi,\n\nDoes anyone have a PPT deck on whats new in cotton (around 10 or so slides) nothing to detailed as per what we have in the current 1.x version?\n\nI am not after a what's coming in cotton 2"
|
||||
expected = "Does anyone have a PPT deck on whats new in cotton (around 10 or so slides)..."
|
||||
title = CreateTitle.from_body body
|
||||
assert_equal(expected, title)
|
||||
end
|
||||
|
||||
def test_create_title_10
|
||||
body = "foo\nbar\nbaz"
|
||||
expected = nil
|
||||
title = CreateTitle.from_body body
|
||||
assert_equal(expected, title)
|
||||
end
|
||||
|
||||
def test_create_title_11
|
||||
body = "Hi Guys,\nI'm working with #gtcs and one of the things we're playing with is TC. What better tool to demo and use than our own \nhttps://greatcheerthreading.com/themostthreads/cool-stuff\n\nThis used to work great in 2013,"
|
||||
expected = "I'm working with #gtcs and one of the things we're playing with is TC."
|
||||
title = CreateTitle.from_body body
|
||||
assert_equal(expected, title)
|
||||
end
|
||||
|
||||
def test_create_title_12
|
||||
body = ""
|
||||
expected = nil
|
||||
title = CreateTitle.from_body body
|
||||
assert_equal(expected, title)
|
||||
end
|
||||
|
||||
def test_create_title_13
|
||||
body = "Embroidered TC ... http://blogs.greatcheerthreading.com/thread/embroidering-the-threads-is-just-the-beginning\n@SoftStuff @TightWeave and team hopefully can share their thoughts on this recent post."
|
||||
expected = "and team hopefully can share their thoughts on this recent post."
|
||||
title = CreateTitle.from_body body
|
||||
assert_equal(expected, title)
|
||||
end
|
||||
|
||||
end
|
8272
script/import_scripts/socialcast/test/test_data.rb
Normal file
8272
script/import_scripts/socialcast/test/test_data.rb
Normal file
File diff suppressed because it is too large
Load diff
84
script/import_scripts/socialcast/test/test_socialcast_api.rb
Normal file
84
script/import_scripts/socialcast/test/test_socialcast_api.rb
Normal file
|
@ -0,0 +1,84 @@
|
|||
require 'minitest/autorun'
|
||||
require 'yaml'
|
||||
require_relative '../socialcast_api.rb'
|
||||
require_relative './test_data.rb'
|
||||
|
||||
class TestSocialcastApi < Minitest::Test
|
||||
|
||||
DEBUG = false
|
||||
|
||||
def initialize args
|
||||
config = YAML::load_file(File.join(__dir__, 'config.ex.yml'))
|
||||
@domain = config['domain']
|
||||
@username = config['username']
|
||||
@password = config['password']
|
||||
@kb_id = config['kb_id']
|
||||
@question_id = config['question_id']
|
||||
super args
|
||||
end
|
||||
|
||||
def setup
|
||||
@socialcast = SocialcastApi.new @domain, @username, @password
|
||||
end
|
||||
|
||||
def test_intialize
|
||||
assert_equal @domain, @socialcast.domain
|
||||
assert_equal @username, @socialcast.username
|
||||
assert_equal @password, @socialcast.password
|
||||
end
|
||||
|
||||
def test_base_url
|
||||
assert_equal 'https://demo.socialcast.com/api', @socialcast.base_url
|
||||
end
|
||||
|
||||
def test_headers
|
||||
headers = @socialcast.headers
|
||||
assert_equal 'Basic ZW1pbHlAc29jaWFsY2FzdC5jb206ZGVtbw==', headers[:Authorization]
|
||||
assert_equal 'application/json', headers[:Accept]
|
||||
end
|
||||
|
||||
def test_list_users
|
||||
users = @socialcast.list_users
|
||||
expected = JSON.parse(USERS)['users'].sort {|u| u['id']}
|
||||
assert_equal 15, users.size
|
||||
assert_equal expected[0], users[0]
|
||||
end
|
||||
|
||||
def test_list_users_next_page
|
||||
users = @socialcast.list_users({page: 2})
|
||||
assert_equal 0, users.size
|
||||
end
|
||||
|
||||
def test_list_messages
|
||||
messages = @socialcast.list_messages
|
||||
expected = JSON.parse(MESSAGES)['messages'].sort {|m| m['id']}
|
||||
assert_equal 20, messages.size
|
||||
check_keys expected[0], messages[0]
|
||||
end
|
||||
|
||||
def test_messages_next_page
|
||||
messages = @socialcast.list_messages({page: 2})
|
||||
expected = JSON.parse(MESSAGES_PG_2)['messages'].sort {|m| m['id']}
|
||||
assert_equal 20, messages.size
|
||||
check_keys expected[0], messages[0]
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def check_keys expected, actual
|
||||
msg = "### caller[0]:\nKey not found in actual keys: #{actual.keys}\n"
|
||||
expected.keys.each do |k|
|
||||
assert (actual.keys.include? k), "#{k}"
|
||||
end
|
||||
end
|
||||
|
||||
def debug message, show=false
|
||||
if show || DEBUG
|
||||
puts '### ' + caller[0]
|
||||
puts ''
|
||||
puts message
|
||||
puts ''
|
||||
puts ''
|
||||
end
|
||||
end
|
||||
end
|
27
script/import_scripts/socialcast/title.rb
Normal file
27
script/import_scripts/socialcast/title.rb
Normal file
|
@ -0,0 +1,27 @@
|
|||
require_relative './socialcast_message.rb'
|
||||
require_relative './socialcast_user.rb'
|
||||
require 'set'
|
||||
require File.expand_path(File.dirname(__FILE__) + "/../base.rb")
|
||||
|
||||
MESSAGES_DIR = "output/messages"
|
||||
|
||||
def titles
|
||||
topics = 0
|
||||
total = count_files(MESSAGES_DIR)
|
||||
Dir.foreach(MESSAGES_DIR) do |filename|
|
||||
next if filename == '.' or filename == '..'
|
||||
message_json = File.read MESSAGES_DIR + '/' + filename
|
||||
message = SocialcastMessage.new(message_json)
|
||||
next unless message.title
|
||||
#puts "#{filename}, #{message.replies.size}, #{message.topic[:raw].size}, #{message.message_type}, #{message.title}"
|
||||
puts "[#{message.title}](#{message.url})"
|
||||
topics += 1
|
||||
end
|
||||
puts "", "Imported #{topics} topics. Skipped #{total - topics}."
|
||||
end
|
||||
|
||||
def count_files(path)
|
||||
Dir.foreach(path).select {|f| f != '.' && f != '..'}.count
|
||||
end
|
||||
|
||||
titles
|
Loading…
Reference in a new issue