Add Socialcast importer

This commit is contained in:
David McClure 2016-05-25 23:54:38 -07:00
parent 4a2f0e772c
commit f7f8226b4c
12 changed files with 8854 additions and 0 deletions

View file

@ -0,0 +1,21 @@
To get started, copy the config.ex.yml to config.yml, and then update the properties for your Socialcast instance.
This importer uses the [Socialcast API](https://socialcast.github.io/socialcast/apidoc.html).
```
domain: 'my-socialcast-domain'
username: 'my-socialcast-username'
password: 'my-socialcast-password'
```
Create the directory for the json files to export: `mkdir output`
Then run `ruby export.rb /path/to/config.yml`
Create a category named "Socialcast Import" or all topics will be imported into
the "Uncategorized" category.
Topics will be tagged with the names of the groups they were originally posted
in on Socialcast.
To run the import, run `ruby import.rb`

View file

@ -0,0 +1,50 @@
require 'uri'
class CreateTitle
def self.from_body(body)
title = remove_mentions body
title = remove_urls title
title = remove_stray_punctuation title
title = first_long_line title
return unless title
sentences = complete_sentences title
if !sentences.nil?
title = sentences[1]
else
title = complete_words title
end
return title unless title.nil? || title.size < 20
end
private
def self.remove_mentions(text)
text.gsub(/@[\w]*/, '')
end
def self.remove_urls(text)
text.gsub(URI::regexp(['http', 'https', 'mailto', 'ftp', 'ldap', 'ldaps']), '')
end
def self.remove_stray_punctuation(text)
text.gsub(/\s+?[^a-zA-Z0-9\s]\s+/, "\n")
end
def self.first_long_line(text)
lines = text.split("\n").select {|t| t.strip.size >= 20}
return if lines.empty?
lines[0].strip
end
def self.complete_sentences(text)
/(^.*[\S]{2,}[.!?:]+)\W/.match(text[0...80] + ' ')
end
def self.complete_words(text)
return text[0...80].rpartition(/\s/)[0] + "..." if text.size >= 80
text
end
end

View file

@ -0,0 +1,58 @@
require 'yaml'
require 'fileutils'
require_relative 'socialcast_api'
def load_config file
config = YAML::load_file(File.join(__dir__, file))
@domain = config['domain']
@username = config['username']
@password = config['password']
end
def export
@api = SocialcastApi.new @domain, @username, @password
create_dir("output/users")
create_dir("output/messages")
export_users
export_messages
end
def export_users(page=1)
users = @api.list_users({page: page})
return if users.empty?
users.each do |user|
File.open("output/users/#{user['id']}.json", 'w') do |f|
puts user['contact_info']['email']
f.write user.to_json
f.close
end
end
export_users page + 1
end
def export_messages(page=1)
messages = @api.list_messages({page: page})
return if messages.empty?
messages.each do |message|
File.open("output/messages/#{message['id']}.json", 'w') do |f|
title = message['title']
title = message['body'] if title.empty?
title = title.split('\n')[0][0..50] unless title.empty?
puts "#{message['id']}: #{title}"
f.write message.to_json
f.close
end
end
export_messages page + 1
end
def create_dir(path)
path = File.join(__dir__, path)
unless File.directory?(path)
FileUtils.mkdir_p(path)
end
end
load_config ARGV.shift
export

View file

@ -0,0 +1,102 @@
require_relative './socialcast_message.rb'
require_relative './socialcast_user.rb'
require 'set'
require File.expand_path(File.dirname(__FILE__) + "/../base.rb")
class ImportScripts::Socialcast < ImportScripts::Base
MESSAGES_DIR = "output/messages"
USERS_DIR = "output/users"
def initialize
super
@system_user = Discourse.system_user
end
def execute
puts "", "Importing Socialcast Users..."
import_users
puts "", "Importing Socialcast Messages..."
import_messages
EmailToken.delete_all
puts "", "Done"
end
def import_messages
topics = 0
imported = 0
total = count_files(MESSAGES_DIR)
Dir.foreach(MESSAGES_DIR) do |filename|
next if filename == '.' or filename == '..'
topics += 1
message_json = File.read MESSAGES_DIR + '/' + filename
message = SocialcastMessage.new(message_json)
next unless message.title
created_topic = import_topic message.topic
if created_topic
import_posts message.replies, created_topic.topic_id
end
imported += 1
print_status topics, total
end
puts "", "Imported #{imported} topics. Skipped #{total - imported}."
end
def import_users
users = 0
total = count_files(USERS_DIR)
Dir.foreach(USERS_DIR) do |filename|
next if filename == '.' or filename == '..'
user_json = File.read USERS_DIR + '/' + filename
user = SocialcastUser.new(user_json).user
create_user user, user[:id]
users += 1
print_status users, total
end
end
def count_files(path)
Dir.foreach(path).select {|f| f != '.' && f != '..'}.count
end
def import_topic topic
post = nil
if post_id = post_id_from_imported_post_id(topic[:id])
post = Post.find(post_id) # already imported this topic
else
topic[:user_id] = user_id_from_imported_user_id(topic[:author_id]) || -1
topic[:category] = 'Socialcast Import'
post = create_post(topic, topic[:id])
unless post.is_a?(Post)
puts "Error creating topic #{topic[:id]}. Skipping."
puts post.inspect
end
end
post
end
def import_posts posts, topic_id
posts.each do |post|
import_post post, topic_id
end
end
def import_post post, topic_id
return if post_id_from_imported_post_id(post[:id]) # already imported
post[:topic_id] = topic_id
post[:user_id] = user_id_from_imported_user_id(post[:author_id]) || -1
new_post = create_post post, post[:id]
unless new_post.is_a?(Post)
puts "Error creating post #{post[:id]}. Skipping."
puts new_post.inspect
end
end
end
if __FILE__==$0
ImportScripts::Socialcast.new.perform
end

View file

@ -0,0 +1,39 @@
require 'base64'
require 'json'
require 'rest-client'
class SocialcastApi
attr_accessor :domain, :username, :password
def initialize domain, username, password
@domain = domain
@username = username
@password = password
end
def base_url
"https://#{@domain}.socialcast.com/api"
end
def headers
encoded = Base64.encode64 "#{@username}:#{@password}"
{:Authorization => "Basic #{encoded.strip!}", :Accept => "application/json"}
end
def request url
JSON.parse(RestClient.get url, headers)
end
def list_users(opts={})
page = opts[:page] ? opts[:page] : 1
response = request "#{base_url}/users?page=#{page}"
response['users'].sort {|u| u['id']}
end
def list_messages(opts={})
page = opts[:page] ? opts[:page] : 1
response = request "#{base_url}/messages?page=#{page}"
response['messages'].sort {|m| m['id']}
end
end

View file

@ -0,0 +1,63 @@
require 'json'
require 'cgi'
require 'time'
require_relative 'create_title.rb'
class SocialcastMessage
def initialize message_json
@parsed_json = JSON.parse message_json
end
def topic
topic = {}
topic[:id] = @parsed_json['id']
topic[:author_id] = @parsed_json['user']['id']
topic[:title] = title
topic[:raw] = @parsed_json['body']
topic[:created_at] = Time.parse @parsed_json['created_at']
topic[:tags] = [group] if group
topic
end
def title
CreateTitle.from_body @parsed_json['body']
end
def group
@parsed_json['group']['groupname'] if @parsed_json['group']
end
def url
@parsed_json['url']
end
def message_type
@parsed_json['message_type']
end
def replies
posts = []
comments = @parsed_json['comments']
comments.each do |comment|
posts << post_from_comment(comment)
end
posts
end
def post_from_comment(comment)
post = {}
post[:id] = comment['id']
post[:author_id] = comment['user']['id']
post[:raw] = comment['text']
post[:created_at] = Time.parse comment['created_at']
post
end
private
def unescape html
return nil unless html
CGI.unescapeHTML html
end
end

View file

@ -0,0 +1,24 @@
require 'json'
require 'cgi'
require 'time'
class SocialcastUser
def initialize user_json
@parsed_json = JSON.parse user_json
end
def user
email = @parsed_json['contact_info']['email']
email = "#{@parsed_json['id']}@noemail.com" unless email
user = {}
user[:id] = @parsed_json['id']
user[:name] = @parsed_json['name']
user[:username] = @parsed_json['username']
user[:email] = email
user[:staged] = true
user
end
end

View file

@ -0,0 +1,3 @@
domain: 'demo'
username: 'emily@socialcast.com'
password: 'demo'

View file

@ -0,0 +1,111 @@
require 'minitest/autorun'
require_relative '../create_title.rb'
class TestCreateTitle < Minitest::Test
def test_create_title_1
body = "@GreatCheerThreading \nWhere can I find information on how GCTS stacks up against the competition? What are the key differentiators?"
expected = "Where can I find information on how GCTS stacks up against the competition?"
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_2
body = "GCTS in 200 stores across town. How many threads per inch would you guess? @GreatCheerThreading"
expected = "GCTS in 200 stores across town. How many threads per inch would you guess?"
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_3
body = "gFabric Sheets 1.2 now has Great Cheer Threads, letting you feel the softness running through the cotton fibers."
expected = "gFabric Sheets 1.2 now has Great Cheer Threads, letting you feel the softness..."
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_4
body = "Great Cheer Threads® for GCTS Platinum Partners |\n Rules And Spools"
expected = "Great Cheer Threads® for GCTS Platinum Partners"
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_5
body = "One sentence. Two sentence. Three sentence. Four is going to go on and on for more words than we want."
expected = "One sentence. Two sentence. Three sentence."
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_6
body = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site)?\n\n//cc @RD @GreatCheerThreading"
expected = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site)?"
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_6b
body = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site of yore)?\n\n//cc @RD @GreatCheerThreading"
expected = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site..."
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_6c
body = "Anyone know of any invite codes for www.greatcheer.io?! (the Great Cheer v2 site of yore)?\n\n//cc @RD @GreatCheerThreading"
expected = "Anyone know of any invite codes for www.greatcheer.io?!"
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_7
body = "@GreatCheerThreading \n\nDoes anyone know what the plan is to move to denser 1.2 threads for GCTS?\n\nI have a customer interested in the higher thread counts offered in 1.2."
expected = "Does anyone know what the plan is to move to denser 1.2 threads for GCTS?"
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_8
body = "@GreatCheerThreading @FabricWeavingWorldwide \n\nI was just chatting with a customer, after receiving this email:\n\n\"Ours is more of a conceptual question. We have too much fiber"
expected = "I was just chatting with a customer, after receiving this email:"
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_9
body = "Hi,\n\nDoes anyone have a PPT deck on whats new in cotton (around 10 or so slides) nothing to detailed as per what we have in the current 1.x version?\n\nI am not after a what's coming in cotton 2"
expected = "Does anyone have a PPT deck on whats new in cotton (around 10 or so slides)..."
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_10
body = "foo\nbar\nbaz"
expected = nil
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_11
body = "Hi Guys,\nI'm working with #gtcs and one of the things we're playing with is TC. What better tool to demo and use than our own \nhttps://greatcheerthreading.com/themostthreads/cool-stuff\n\nThis used to work great in 2013,"
expected = "I'm working with #gtcs and one of the things we're playing with is TC."
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_12
body = ""
expected = nil
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_13
body = "Embroidered TC ... http://blogs.greatcheerthreading.com/thread/embroidering-the-threads-is-just-the-beginning\n@SoftStuff @TightWeave and team hopefully can share their thoughts on this recent post."
expected = "and team hopefully can share their thoughts on this recent post."
title = CreateTitle.from_body body
assert_equal(expected, title)
end
end

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,84 @@
require 'minitest/autorun'
require 'yaml'
require_relative '../socialcast_api.rb'
require_relative './test_data.rb'
class TestSocialcastApi < Minitest::Test
DEBUG = false
def initialize args
config = YAML::load_file(File.join(__dir__, 'config.ex.yml'))
@domain = config['domain']
@username = config['username']
@password = config['password']
@kb_id = config['kb_id']
@question_id = config['question_id']
super args
end
def setup
@socialcast = SocialcastApi.new @domain, @username, @password
end
def test_intialize
assert_equal @domain, @socialcast.domain
assert_equal @username, @socialcast.username
assert_equal @password, @socialcast.password
end
def test_base_url
assert_equal 'https://demo.socialcast.com/api', @socialcast.base_url
end
def test_headers
headers = @socialcast.headers
assert_equal 'Basic ZW1pbHlAc29jaWFsY2FzdC5jb206ZGVtbw==', headers[:Authorization]
assert_equal 'application/json', headers[:Accept]
end
def test_list_users
users = @socialcast.list_users
expected = JSON.parse(USERS)['users'].sort {|u| u['id']}
assert_equal 15, users.size
assert_equal expected[0], users[0]
end
def test_list_users_next_page
users = @socialcast.list_users({page: 2})
assert_equal 0, users.size
end
def test_list_messages
messages = @socialcast.list_messages
expected = JSON.parse(MESSAGES)['messages'].sort {|m| m['id']}
assert_equal 20, messages.size
check_keys expected[0], messages[0]
end
def test_messages_next_page
messages = @socialcast.list_messages({page: 2})
expected = JSON.parse(MESSAGES_PG_2)['messages'].sort {|m| m['id']}
assert_equal 20, messages.size
check_keys expected[0], messages[0]
end
private
def check_keys expected, actual
msg = "### caller[0]:\nKey not found in actual keys: #{actual.keys}\n"
expected.keys.each do |k|
assert (actual.keys.include? k), "#{k}"
end
end
def debug message, show=false
if show || DEBUG
puts '### ' + caller[0]
puts ''
puts message
puts ''
puts ''
end
end
end

View file

@ -0,0 +1,27 @@
require_relative './socialcast_message.rb'
require_relative './socialcast_user.rb'
require 'set'
require File.expand_path(File.dirname(__FILE__) + "/../base.rb")
MESSAGES_DIR = "output/messages"
def titles
topics = 0
total = count_files(MESSAGES_DIR)
Dir.foreach(MESSAGES_DIR) do |filename|
next if filename == '.' or filename == '..'
message_json = File.read MESSAGES_DIR + '/' + filename
message = SocialcastMessage.new(message_json)
next unless message.title
#puts "#{filename}, #{message.replies.size}, #{message.topic[:raw].size}, #{message.message_type}, #{message.title}"
puts "[#{message.title}](#{message.url})"
topics += 1
end
puts "", "Imported #{topics} topics. Skipped #{total - topics}."
end
def count_files(path)
Dir.foreach(path).select {|f| f != '.' && f != '..'}.count
end
titles