mirror of
https://github.com/codeninjasllc/discourse.git
synced 2025-04-29 23:43:55 -04:00
Updated import for TypePad
This commit is contained in:
parent
44888f7acc
commit
d3f1eb395d
7 changed files with 158 additions and 26 deletions
app/models
config
lib
spec/components
|
@ -10,6 +10,9 @@ class TopicEmbed < ActiveRecord::Base
|
||||||
def self.import(user, url, title, contents)
|
def self.import(user, url, title, contents)
|
||||||
return unless url =~ /^https?\:\/\//
|
return unless url =~ /^https?\:\/\//
|
||||||
|
|
||||||
|
if SiteSetting.embed_truncate
|
||||||
|
contents = first_paragraph_from(contents)
|
||||||
|
end
|
||||||
contents << "\n<hr>\n<small>#{I18n.t('embed.imported_from', link: "<a href='#{url}'>#{url}</a>")}</small>\n"
|
contents << "\n<hr>\n<small>#{I18n.t('embed.imported_from', link: "<a href='#{url}'>#{url}</a>")}</small>\n"
|
||||||
|
|
||||||
embed = TopicEmbed.where(embed_url: url).first
|
embed = TopicEmbed.where(embed_url: url).first
|
||||||
|
@ -34,6 +37,7 @@ class TopicEmbed < ActiveRecord::Base
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
|
absolutize_urls(url, contents)
|
||||||
post = embed.post
|
post = embed.post
|
||||||
# Update the topic if it changed
|
# Update the topic if it changed
|
||||||
if content_sha1 != embed.content_sha1
|
if content_sha1 != embed.content_sha1
|
||||||
|
@ -63,7 +67,7 @@ class TopicEmbed < ActiveRecord::Base
|
||||||
prefix = "#{uri.scheme}://#{uri.host}"
|
prefix = "#{uri.scheme}://#{uri.host}"
|
||||||
prefix << ":#{uri.port}" if uri.port != 80 && uri.port != 443
|
prefix << ":#{uri.port}" if uri.port != 80 && uri.port != 443
|
||||||
|
|
||||||
fragment = Nokogiri::HTML.fragment(contents)
|
fragment = Nokogiri::HTML.fragment("<div>#{contents}</div>")
|
||||||
fragment.css('a').each do |a|
|
fragment.css('a').each do |a|
|
||||||
href = a['href']
|
href = a['href']
|
||||||
if href.present? && href.start_with?('/')
|
if href.present? && href.start_with?('/')
|
||||||
|
@ -76,14 +80,28 @@ class TopicEmbed < ActiveRecord::Base
|
||||||
a['src'] = "#{prefix}/#{src.sub(/^\/+/, '')}"
|
a['src'] = "#{prefix}/#{src.sub(/^\/+/, '')}"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
fragment.at('div').inner_html
|
||||||
fragment.to_html
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.topic_id_for_embed(embed_url)
|
def self.topic_id_for_embed(embed_url)
|
||||||
TopicEmbed.where(embed_url: embed_url).pluck(:topic_id).first
|
TopicEmbed.where(embed_url: embed_url).pluck(:topic_id).first
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.first_paragraph_from(html)
|
||||||
|
doc = Nokogiri::HTML(html)
|
||||||
|
|
||||||
|
result = ""
|
||||||
|
doc.css('p').each do |p|
|
||||||
|
if p.text.present?
|
||||||
|
result << p.to_s
|
||||||
|
return result if result.size >= 100
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return result unless result.blank?
|
||||||
|
|
||||||
|
# If there is no first paragaph, return the first div (onebox)
|
||||||
|
doc.css('div').first
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# == Schema Information
|
# == Schema Information
|
||||||
|
|
|
@ -49,7 +49,7 @@ en:
|
||||||
|
|
||||||
loading: "Loading Discussion..."
|
loading: "Loading Discussion..."
|
||||||
permalink: "Permalink"
|
permalink: "Permalink"
|
||||||
imported_from: "Discussion topic for the original blog entry at: %{link}"
|
imported_from: "This is a companion discussion topic for the original blog entry at: %{link}"
|
||||||
in_reply_to: "in reply to %{username}"
|
in_reply_to: "in reply to %{username}"
|
||||||
replies:
|
replies:
|
||||||
one: "1 reply"
|
one: "1 reply"
|
||||||
|
@ -859,6 +859,7 @@ en:
|
||||||
feed_polling_enabled: "Whether to import a RSS/ATOM feed as posts"
|
feed_polling_enabled: "Whether to import a RSS/ATOM feed as posts"
|
||||||
feed_polling_url: "URL of RSS/ATOM feed to import"
|
feed_polling_url: "URL of RSS/ATOM feed to import"
|
||||||
embed_by_username: "Discourse username of the user who creates the topics"
|
embed_by_username: "Discourse username of the user who creates the topics"
|
||||||
|
embed_truncate: "Truncate the imported posts"
|
||||||
embed_category: "Category of created topics"
|
embed_category: "Category of created topics"
|
||||||
embed_post_limit: "Maximum number of posts to embed"
|
embed_post_limit: "Maximum number of posts to embed"
|
||||||
tos_accept_required: "If enabled, users will need to check a box on the signup form to confirm that they accept the terms of service. Edit 'Signup Form: Terms of Service Message' in the Content tab to change the message."
|
tos_accept_required: "If enabled, users will need to check a box on the signup form to confirm that they accept the terms of service. Edit 'Signup Form: Terms of Service Message' in the Content tab to change the message."
|
||||||
|
|
|
@ -403,6 +403,7 @@ embedding:
|
||||||
embed_by_username: ''
|
embed_by_username: ''
|
||||||
embed_category: ''
|
embed_category: ''
|
||||||
embed_post_limit: 100
|
embed_post_limit: 100
|
||||||
|
embed_truncate: false
|
||||||
|
|
||||||
legal:
|
legal:
|
||||||
tos_url:
|
tos_url:
|
||||||
|
|
|
@ -209,6 +209,7 @@ class PostCreator
|
||||||
end
|
end
|
||||||
|
|
||||||
def rollback_if_host_spam_detected
|
def rollback_if_host_spam_detected
|
||||||
|
return if @opts[:skip_validations]
|
||||||
if @post.has_host_spam?
|
if @post.has_host_spam?
|
||||||
@post.errors.add(:base, I18n.t(:spamming_host))
|
@post.errors.add(:base, I18n.t(:spamming_host))
|
||||||
@errors = @post.errors
|
@errors = @post.errors
|
||||||
|
|
|
@ -1,14 +1,20 @@
|
||||||
|
require 'open-uri'
|
||||||
|
|
||||||
class Typepad < Thor
|
class Typepad < Thor
|
||||||
desc "import", "Imports posts from a Disqus XML export"
|
desc "import", "Imports posts from a Disqus XML export"
|
||||||
method_option :file, aliases: '-f', required: true, desc: "The typepad file to import"
|
method_option :file, aliases: '-f', required: true, desc: "The typepad file to import"
|
||||||
method_option :dry_run, required: false, desc: "Just output what will be imported rather than doing it"
|
method_option :dry_run, required: false, desc: "Just output what will be imported rather than doing it"
|
||||||
method_option :post_as, aliases: '-p', required: true, desc: "The Discourse username to post as"
|
method_option :post_as, aliases: '-p', required: true, desc: "The Discourse username to post as"
|
||||||
|
method_option :google_api, aliases: '-g', required: false, desc: "The google plus API key to use to fetch usernames"
|
||||||
|
|
||||||
|
|
||||||
def import
|
def import
|
||||||
require './config/environment'
|
require './config/environment'
|
||||||
|
|
||||||
email_blacklist = SiteSetting.email_domains_blacklist
|
backup_settings = {}
|
||||||
|
%w(email_domains_blacklist).each do |s|
|
||||||
|
backup_settings[s] = SiteSetting.send(s)
|
||||||
|
end
|
||||||
|
|
||||||
user = User.where(username_lower: options[:post_as].downcase).first
|
user = User.where(username_lower: options[:post_as].downcase).first
|
||||||
if user.nil?
|
if user.nil?
|
||||||
|
@ -24,44 +30,56 @@ class Typepad < Thor
|
||||||
inside_block = true
|
inside_block = true
|
||||||
entry = ""
|
entry = ""
|
||||||
|
|
||||||
n = 0
|
|
||||||
entries = []
|
entries = []
|
||||||
File.open(options[:file]).each_line do |l|
|
File.open(options[:file]).each_line do |l|
|
||||||
l = l.scrub
|
l = l.scrub
|
||||||
|
|
||||||
if l =~ /^--------$/
|
if l =~ /^--------$/
|
||||||
entries << process_entry(entry)
|
parsed_entry = process_entry(entry)
|
||||||
|
if parsed_entry
|
||||||
|
puts "Parsed #{parsed_entry[:title]}"
|
||||||
|
entries << parsed_entry
|
||||||
|
end
|
||||||
entry = ""
|
entry = ""
|
||||||
else
|
else
|
||||||
entry << l
|
entry << l
|
||||||
end
|
end
|
||||||
break if entries.size > 5
|
|
||||||
end
|
end
|
||||||
|
|
||||||
entries.compact!
|
entries.each_with_index do |e,i|
|
||||||
|
if e[:title] =~ /Head/
|
||||||
|
puts "#{i}: #{e[:title]}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
RateLimiter.disable
|
RateLimiter.disable
|
||||||
|
|
||||||
SiteSetting.email_domains_blacklist = ""
|
SiteSetting.email_domains_blacklist = ""
|
||||||
|
|
||||||
puts "import it"
|
puts "Importing #{entries.size} entries"
|
||||||
puts entries.size
|
entries.each_with_index do |entry, idx|
|
||||||
entries.each do |entry|
|
puts "Importing (#{idx+1}/#{entries.size})"
|
||||||
|
next if entry[:body].blank?
|
||||||
|
|
||||||
|
puts entry[:unique_url]
|
||||||
post = TopicEmbed.import(user, entry[:unique_url], entry[:title], entry[:body])
|
post = TopicEmbed.import(user, entry[:unique_url], entry[:title], entry[:body])
|
||||||
if post.present?
|
if post.present?
|
||||||
|
post.update_column(:created_at, entry[:date])
|
||||||
|
post.topic.update_column(:created_at, entry[:date])
|
||||||
|
post.topic.update_column(:bumped_at, entry[:date])
|
||||||
entry[:comments].each do |c|
|
entry[:comments].each do |c|
|
||||||
username = c[:author]
|
username = c[:author]
|
||||||
if c[:email].present?
|
|
||||||
|
if c[:email].present? && c[:email] != "none@unknown.com"
|
||||||
email = c[:email]
|
email = c[:email]
|
||||||
post_user = User.where(email: email).first
|
post_user = User.where(email: email).first
|
||||||
if post_user.blank?
|
if post_user.blank?
|
||||||
post_user = User.create!(email: email, username: UserNameSuggester.suggest(username))
|
post_user = User.create!(name: c[:name], email: email, username: UserNameSuggester.suggest(username))
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
suggested = UserNameSuggester.suggest(username)
|
post_user = User.where(username: username).first
|
||||||
post_user = User.where(username: suggested)
|
|
||||||
if post_user.blank?
|
if post_user.blank?
|
||||||
post_user = User.create!(email: "#{suggested}@no-email-found.com", username: UserNameSuggester.suggest(username))
|
suggested = UserNameSuggester.suggest(username)
|
||||||
|
post_user = User.create!(name: c[:name], email: "#{suggested}@no-email-found.com", username: suggested)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -69,16 +87,24 @@ class Typepad < Thor
|
||||||
topic_id: post.topic_id,
|
topic_id: post.topic_id,
|
||||||
raw: c[:body],
|
raw: c[:body],
|
||||||
cooked: c[:body],
|
cooked: c[:body],
|
||||||
created_at: Time.now
|
created_at: c[:date],
|
||||||
|
skip_validations: true
|
||||||
}
|
}
|
||||||
post = PostCreator.new(post_user, attrs).create
|
begin
|
||||||
|
post = PostCreator.new(post_user, attrs).create
|
||||||
|
puts post.errors.inspect if post.id.blank?
|
||||||
|
rescue => ex
|
||||||
|
puts "Error creating post: #{ex.inspect}"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
ensure
|
ensure
|
||||||
RateLimiter.enable
|
RateLimiter.enable
|
||||||
SiteSetting.email_domains_blacklist = email_blacklist
|
backup_settings.each do |s, v|
|
||||||
|
SiteSetting.send("#{s.to_s}=", v)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
@ -92,14 +118,14 @@ class Typepad < Thor
|
||||||
def parse_meta_data(section)
|
def parse_meta_data(section)
|
||||||
result = {}
|
result = {}
|
||||||
section.split(/\n/).each do |l|
|
section.split(/\n/).each do |l|
|
||||||
if l =~ /^([^:]+)\: (.*)$/
|
if l =~ /^([A-Z\ ]+)\: (.*)$/
|
||||||
key, value = Regexp.last_match[1], Regexp.last_match[2]
|
key, value = Regexp.last_match[1], Regexp.last_match[2]
|
||||||
clean_type!(key)
|
clean_type!(key)
|
||||||
value.strip!
|
value.strip!
|
||||||
result[key.to_sym] = value
|
result[key.to_sym] = value
|
||||||
else
|
else
|
||||||
result[:body] ||= ""
|
result[:body] ||= ""
|
||||||
result[:body] << l
|
result[:body] << l << "\n"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
result
|
result
|
||||||
|
@ -123,6 +149,7 @@ class Typepad < Thor
|
||||||
sections = entry.split(/-----/)
|
sections = entry.split(/-----/)
|
||||||
entry = parse_meta_data(sections[0]).slice(:date, :title, :unique_url)
|
entry = parse_meta_data(sections[0]).slice(:date, :title, :unique_url)
|
||||||
entry[:comments] = []
|
entry[:comments] = []
|
||||||
|
entry[:date] = entry[:date] ? DateTime.strptime(entry[:date], "%m/%d/%Y") : Time.now
|
||||||
sections[1..-1].each do |s|
|
sections[1..-1].each do |s|
|
||||||
type, value = parse_section(s)
|
type, value = parse_section(s)
|
||||||
case type
|
case type
|
||||||
|
@ -130,12 +157,90 @@ class Typepad < Thor
|
||||||
entry[type] = value
|
entry[type] = value
|
||||||
when :comment
|
when :comment
|
||||||
comment = parse_comment(value).slice(:author, :email, :url, :body, :date)
|
comment = parse_comment(value).slice(:author, :email, :url, :body, :date)
|
||||||
|
|
||||||
|
if options[:google_api] && comment[:author] =~ /plus.google.com\/(\d+)/
|
||||||
|
gplus_id = Regexp.last_match[1]
|
||||||
|
from_redis = $redis.get("gplus:#{gplus_id}")
|
||||||
|
if from_redis.blank?
|
||||||
|
json = ::JSON.parse(open("https://www.googleapis.com/plus/v1/people/#{gplus_id}?key=#{options[:google_api]}").read)
|
||||||
|
from_redis = json['displayName']
|
||||||
|
$redis.set("gplus:#{gplus_id}", from_redis)
|
||||||
|
end
|
||||||
|
comment[:author] = from_redis
|
||||||
|
end
|
||||||
|
|
||||||
|
if comment[:author] =~ /([^\.]+)\.wordpress\.com/
|
||||||
|
comment[:author] = Regexp.last_match[1]
|
||||||
|
end
|
||||||
|
|
||||||
|
if comment[:author] =~ /([^\.]+)\.blogspot\.com/
|
||||||
|
comment[:author] = Regexp.last_match[1]
|
||||||
|
end
|
||||||
|
|
||||||
|
if comment[:author] =~ /twitter.com\/([a-zA-Z0-9]+)/
|
||||||
|
comment[:author] = Regexp.last_match[1]
|
||||||
|
end
|
||||||
|
|
||||||
|
if comment[:author] =~ /www.facebook.com\/profile.php\?id=(\d+)/
|
||||||
|
fb_id = Regexp.last_match[1]
|
||||||
|
from_redis = $redis.get("fb:#{fb_id}")
|
||||||
|
if from_redis.blank?
|
||||||
|
json = ::JSON.parse(open("http://graph.facebook.com/#{fb_id}").read)
|
||||||
|
from_redis = json['username']
|
||||||
|
$redis.set("fb:#{fb_id}", from_redis)
|
||||||
|
end
|
||||||
|
comment[:author] = from_redis
|
||||||
|
end
|
||||||
|
|
||||||
|
comment[:name] = comment[:author]
|
||||||
|
if comment[:author]
|
||||||
|
comment[:author].gsub!(/^[_\.]+/, '')
|
||||||
|
comment[:author].gsub!(/[_\.]+$/, '')
|
||||||
|
|
||||||
|
if comment[:author].size < 12
|
||||||
|
comment[:author].gsub!(/ /, '_')
|
||||||
|
else
|
||||||
|
segments = []
|
||||||
|
current = ""
|
||||||
|
|
||||||
|
last_upper = nil
|
||||||
|
comment[:author].each_char do |c|
|
||||||
|
is_upper = /[[:upper:]]/.match(c)
|
||||||
|
|
||||||
|
if (current.size > 1 && is_upper != last_upper)
|
||||||
|
segments << current
|
||||||
|
current = ""
|
||||||
|
end
|
||||||
|
last_upper = is_upper
|
||||||
|
|
||||||
|
if c == " " || c == "." || c == "_" || c == "-"
|
||||||
|
segments << current
|
||||||
|
current = ""
|
||||||
|
else
|
||||||
|
current << c
|
||||||
|
end
|
||||||
|
end
|
||||||
|
segments.delete_if {|s| s.nil? || s.size < 2}
|
||||||
|
segments << current
|
||||||
|
|
||||||
|
comment[:author] = segments[0]
|
||||||
|
if segments.size > 1 && segments[1][0] =~ /[a-zA-Z]/
|
||||||
|
comment[:author] << segments[1][0]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
comment[:author] = "commenter" if comment[:author].blank?
|
||||||
|
comment[:author] = "codinghorror" if comment[:author] == "Jeff Atwood" || comment[:author] == "JeffAtwood" || comment[:author] == "Jeff_Atwood"
|
||||||
|
|
||||||
|
comment[:date] = comment[:date] ? DateTime.strptime(comment[:date], "%m/%d/%Y") : Time.now
|
||||||
entry[:comments] << comment if comment[:body].present?
|
entry[:comments] << comment if comment[:body].present?
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
entry[:title] && entry[:body] ? entry : nil
|
entry[:title] && entry[:body] ? entry : nil
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -19,8 +19,7 @@ module UserNameSuggester
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.find_available_username_based_on(name)
|
def self.find_available_username_based_on(name)
|
||||||
sanitize_username!(name)
|
name = rightsize_username(sanitize_username!(name))
|
||||||
name = rightsize_username(name)
|
|
||||||
i = 1
|
i = 1
|
||||||
attempt = name
|
attempt = name
|
||||||
until User.username_available?(attempt)
|
until User.username_available?(attempt)
|
||||||
|
@ -33,12 +32,15 @@ module UserNameSuggester
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.sanitize_username!(name)
|
def self.sanitize_username!(name)
|
||||||
|
name = ActiveSupport::Inflector.transliterate(name)
|
||||||
name.gsub!(/^[^[:alnum:]]+|\W+$/, "")
|
name.gsub!(/^[^[:alnum:]]+|\W+$/, "")
|
||||||
name.gsub!(/\W+/, "_")
|
name.gsub!(/\W+/, "_")
|
||||||
|
name.gsub!(/^\_+/, '')
|
||||||
|
name
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.rightsize_username(name)
|
def self.rightsize_username(name)
|
||||||
name.ljust(User.username_length.begin, '1')[0, User.username_length.end]
|
name.ljust(User.username_length.begin, '1')[0, User.username_length.end]
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -22,6 +22,10 @@ describe UserNameSuggester do
|
||||||
UserNameSuggester.suggest("Darth%^Vader").should == 'Darth_Vader'
|
UserNameSuggester.suggest("Darth%^Vader").should == 'Darth_Vader'
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "transliterates some characters" do
|
||||||
|
UserNameSuggester.suggest("Jørn").should == 'Jorn'
|
||||||
|
end
|
||||||
|
|
||||||
it 'adds 1 to an existing username' do
|
it 'adds 1 to an existing username' do
|
||||||
user = Fabricate(:user)
|
user = Fabricate(:user)
|
||||||
UserNameSuggester.suggest(user.username).should == "#{user.username}1"
|
UserNameSuggester.suggest(user.username).should == "#{user.username}1"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue