mirror of
https://github.com/codeninjasllc/discourse.git
synced 2024-11-23 23:58:31 -05:00
Updated import for TypePad
This commit is contained in:
parent
44888f7acc
commit
d3f1eb395d
7 changed files with 158 additions and 26 deletions
|
@ -10,6 +10,9 @@ class TopicEmbed < ActiveRecord::Base
|
|||
def self.import(user, url, title, contents)
|
||||
return unless url =~ /^https?\:\/\//
|
||||
|
||||
if SiteSetting.embed_truncate
|
||||
contents = first_paragraph_from(contents)
|
||||
end
|
||||
contents << "\n<hr>\n<small>#{I18n.t('embed.imported_from', link: "<a href='#{url}'>#{url}</a>")}</small>\n"
|
||||
|
||||
embed = TopicEmbed.where(embed_url: url).first
|
||||
|
@ -34,6 +37,7 @@ class TopicEmbed < ActiveRecord::Base
|
|||
end
|
||||
end
|
||||
else
|
||||
absolutize_urls(url, contents)
|
||||
post = embed.post
|
||||
# Update the topic if it changed
|
||||
if content_sha1 != embed.content_sha1
|
||||
|
@ -63,7 +67,7 @@ class TopicEmbed < ActiveRecord::Base
|
|||
prefix = "#{uri.scheme}://#{uri.host}"
|
||||
prefix << ":#{uri.port}" if uri.port != 80 && uri.port != 443
|
||||
|
||||
fragment = Nokogiri::HTML.fragment(contents)
|
||||
fragment = Nokogiri::HTML.fragment("<div>#{contents}</div>")
|
||||
fragment.css('a').each do |a|
|
||||
href = a['href']
|
||||
if href.present? && href.start_with?('/')
|
||||
|
@ -76,14 +80,28 @@ class TopicEmbed < ActiveRecord::Base
|
|||
a['src'] = "#{prefix}/#{src.sub(/^\/+/, '')}"
|
||||
end
|
||||
end
|
||||
|
||||
fragment.to_html
|
||||
fragment.at('div').inner_html
|
||||
end
|
||||
|
||||
def self.topic_id_for_embed(embed_url)
|
||||
TopicEmbed.where(embed_url: embed_url).pluck(:topic_id).first
|
||||
end
|
||||
|
||||
def self.first_paragraph_from(html)
|
||||
doc = Nokogiri::HTML(html)
|
||||
|
||||
result = ""
|
||||
doc.css('p').each do |p|
|
||||
if p.text.present?
|
||||
result << p.to_s
|
||||
return result if result.size >= 100
|
||||
end
|
||||
end
|
||||
return result unless result.blank?
|
||||
|
||||
# If there is no first paragaph, return the first div (onebox)
|
||||
doc.css('div').first
|
||||
end
|
||||
end
|
||||
|
||||
# == Schema Information
|
||||
|
|
|
@ -49,7 +49,7 @@ en:
|
|||
|
||||
loading: "Loading Discussion..."
|
||||
permalink: "Permalink"
|
||||
imported_from: "Discussion topic for the original blog entry at: %{link}"
|
||||
imported_from: "This is a companion discussion topic for the original blog entry at: %{link}"
|
||||
in_reply_to: "in reply to %{username}"
|
||||
replies:
|
||||
one: "1 reply"
|
||||
|
@ -859,6 +859,7 @@ en:
|
|||
feed_polling_enabled: "Whether to import a RSS/ATOM feed as posts"
|
||||
feed_polling_url: "URL of RSS/ATOM feed to import"
|
||||
embed_by_username: "Discourse username of the user who creates the topics"
|
||||
embed_truncate: "Truncate the imported posts"
|
||||
embed_category: "Category of created topics"
|
||||
embed_post_limit: "Maximum number of posts to embed"
|
||||
tos_accept_required: "If enabled, users will need to check a box on the signup form to confirm that they accept the terms of service. Edit 'Signup Form: Terms of Service Message' in the Content tab to change the message."
|
||||
|
|
|
@ -403,6 +403,7 @@ embedding:
|
|||
embed_by_username: ''
|
||||
embed_category: ''
|
||||
embed_post_limit: 100
|
||||
embed_truncate: false
|
||||
|
||||
legal:
|
||||
tos_url:
|
||||
|
|
|
@ -209,6 +209,7 @@ class PostCreator
|
|||
end
|
||||
|
||||
def rollback_if_host_spam_detected
|
||||
return if @opts[:skip_validations]
|
||||
if @post.has_host_spam?
|
||||
@post.errors.add(:base, I18n.t(:spamming_host))
|
||||
@errors = @post.errors
|
||||
|
|
|
@ -1,14 +1,20 @@
|
|||
require 'open-uri'
|
||||
|
||||
class Typepad < Thor
|
||||
desc "import", "Imports posts from a Disqus XML export"
|
||||
method_option :file, aliases: '-f', required: true, desc: "The typepad file to import"
|
||||
method_option :dry_run, required: false, desc: "Just output what will be imported rather than doing it"
|
||||
method_option :post_as, aliases: '-p', required: true, desc: "The Discourse username to post as"
|
||||
method_option :google_api, aliases: '-g', required: false, desc: "The google plus API key to use to fetch usernames"
|
||||
|
||||
|
||||
def import
|
||||
require './config/environment'
|
||||
|
||||
email_blacklist = SiteSetting.email_domains_blacklist
|
||||
backup_settings = {}
|
||||
%w(email_domains_blacklist).each do |s|
|
||||
backup_settings[s] = SiteSetting.send(s)
|
||||
end
|
||||
|
||||
user = User.where(username_lower: options[:post_as].downcase).first
|
||||
if user.nil?
|
||||
|
@ -24,44 +30,56 @@ class Typepad < Thor
|
|||
inside_block = true
|
||||
entry = ""
|
||||
|
||||
n = 0
|
||||
entries = []
|
||||
File.open(options[:file]).each_line do |l|
|
||||
l = l.scrub
|
||||
|
||||
if l =~ /^--------$/
|
||||
entries << process_entry(entry)
|
||||
parsed_entry = process_entry(entry)
|
||||
if parsed_entry
|
||||
puts "Parsed #{parsed_entry[:title]}"
|
||||
entries << parsed_entry
|
||||
end
|
||||
entry = ""
|
||||
else
|
||||
entry << l
|
||||
end
|
||||
break if entries.size > 5
|
||||
end
|
||||
|
||||
entries.compact!
|
||||
entries.each_with_index do |e,i|
|
||||
if e[:title] =~ /Head/
|
||||
puts "#{i}: #{e[:title]}"
|
||||
end
|
||||
end
|
||||
|
||||
RateLimiter.disable
|
||||
|
||||
SiteSetting.email_domains_blacklist = ""
|
||||
|
||||
puts "import it"
|
||||
puts entries.size
|
||||
entries.each do |entry|
|
||||
puts "Importing #{entries.size} entries"
|
||||
entries.each_with_index do |entry, idx|
|
||||
puts "Importing (#{idx+1}/#{entries.size})"
|
||||
next if entry[:body].blank?
|
||||
|
||||
puts entry[:unique_url]
|
||||
post = TopicEmbed.import(user, entry[:unique_url], entry[:title], entry[:body])
|
||||
if post.present?
|
||||
post.update_column(:created_at, entry[:date])
|
||||
post.topic.update_column(:created_at, entry[:date])
|
||||
post.topic.update_column(:bumped_at, entry[:date])
|
||||
entry[:comments].each do |c|
|
||||
username = c[:author]
|
||||
if c[:email].present?
|
||||
|
||||
if c[:email].present? && c[:email] != "none@unknown.com"
|
||||
email = c[:email]
|
||||
post_user = User.where(email: email).first
|
||||
if post_user.blank?
|
||||
post_user = User.create!(email: email, username: UserNameSuggester.suggest(username))
|
||||
post_user = User.create!(name: c[:name], email: email, username: UserNameSuggester.suggest(username))
|
||||
end
|
||||
else
|
||||
suggested = UserNameSuggester.suggest(username)
|
||||
post_user = User.where(username: suggested)
|
||||
post_user = User.where(username: username).first
|
||||
if post_user.blank?
|
||||
post_user = User.create!(email: "#{suggested}@no-email-found.com", username: UserNameSuggester.suggest(username))
|
||||
suggested = UserNameSuggester.suggest(username)
|
||||
post_user = User.create!(name: c[:name], email: "#{suggested}@no-email-found.com", username: suggested)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -69,16 +87,24 @@ class Typepad < Thor
|
|||
topic_id: post.topic_id,
|
||||
raw: c[:body],
|
||||
cooked: c[:body],
|
||||
created_at: Time.now
|
||||
created_at: c[:date],
|
||||
skip_validations: true
|
||||
}
|
||||
post = PostCreator.new(post_user, attrs).create
|
||||
begin
|
||||
post = PostCreator.new(post_user, attrs).create
|
||||
puts post.errors.inspect if post.id.blank?
|
||||
rescue => ex
|
||||
puts "Error creating post: #{ex.inspect}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
ensure
|
||||
RateLimiter.enable
|
||||
SiteSetting.email_domains_blacklist = email_blacklist
|
||||
backup_settings.each do |s, v|
|
||||
SiteSetting.send("#{s.to_s}=", v)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
@ -92,14 +118,14 @@ class Typepad < Thor
|
|||
def parse_meta_data(section)
|
||||
result = {}
|
||||
section.split(/\n/).each do |l|
|
||||
if l =~ /^([^:]+)\: (.*)$/
|
||||
if l =~ /^([A-Z\ ]+)\: (.*)$/
|
||||
key, value = Regexp.last_match[1], Regexp.last_match[2]
|
||||
clean_type!(key)
|
||||
value.strip!
|
||||
result[key.to_sym] = value
|
||||
else
|
||||
result[:body] ||= ""
|
||||
result[:body] << l
|
||||
result[:body] << l << "\n"
|
||||
end
|
||||
end
|
||||
result
|
||||
|
@ -123,6 +149,7 @@ class Typepad < Thor
|
|||
sections = entry.split(/-----/)
|
||||
entry = parse_meta_data(sections[0]).slice(:date, :title, :unique_url)
|
||||
entry[:comments] = []
|
||||
entry[:date] = entry[:date] ? DateTime.strptime(entry[:date], "%m/%d/%Y") : Time.now
|
||||
sections[1..-1].each do |s|
|
||||
type, value = parse_section(s)
|
||||
case type
|
||||
|
@ -130,12 +157,90 @@ class Typepad < Thor
|
|||
entry[type] = value
|
||||
when :comment
|
||||
comment = parse_comment(value).slice(:author, :email, :url, :body, :date)
|
||||
|
||||
if options[:google_api] && comment[:author] =~ /plus.google.com\/(\d+)/
|
||||
gplus_id = Regexp.last_match[1]
|
||||
from_redis = $redis.get("gplus:#{gplus_id}")
|
||||
if from_redis.blank?
|
||||
json = ::JSON.parse(open("https://www.googleapis.com/plus/v1/people/#{gplus_id}?key=#{options[:google_api]}").read)
|
||||
from_redis = json['displayName']
|
||||
$redis.set("gplus:#{gplus_id}", from_redis)
|
||||
end
|
||||
comment[:author] = from_redis
|
||||
end
|
||||
|
||||
if comment[:author] =~ /([^\.]+)\.wordpress\.com/
|
||||
comment[:author] = Regexp.last_match[1]
|
||||
end
|
||||
|
||||
if comment[:author] =~ /([^\.]+)\.blogspot\.com/
|
||||
comment[:author] = Regexp.last_match[1]
|
||||
end
|
||||
|
||||
if comment[:author] =~ /twitter.com\/([a-zA-Z0-9]+)/
|
||||
comment[:author] = Regexp.last_match[1]
|
||||
end
|
||||
|
||||
if comment[:author] =~ /www.facebook.com\/profile.php\?id=(\d+)/
|
||||
fb_id = Regexp.last_match[1]
|
||||
from_redis = $redis.get("fb:#{fb_id}")
|
||||
if from_redis.blank?
|
||||
json = ::JSON.parse(open("http://graph.facebook.com/#{fb_id}").read)
|
||||
from_redis = json['username']
|
||||
$redis.set("fb:#{fb_id}", from_redis)
|
||||
end
|
||||
comment[:author] = from_redis
|
||||
end
|
||||
|
||||
comment[:name] = comment[:author]
|
||||
if comment[:author]
|
||||
comment[:author].gsub!(/^[_\.]+/, '')
|
||||
comment[:author].gsub!(/[_\.]+$/, '')
|
||||
|
||||
if comment[:author].size < 12
|
||||
comment[:author].gsub!(/ /, '_')
|
||||
else
|
||||
segments = []
|
||||
current = ""
|
||||
|
||||
last_upper = nil
|
||||
comment[:author].each_char do |c|
|
||||
is_upper = /[[:upper:]]/.match(c)
|
||||
|
||||
if (current.size > 1 && is_upper != last_upper)
|
||||
segments << current
|
||||
current = ""
|
||||
end
|
||||
last_upper = is_upper
|
||||
|
||||
if c == " " || c == "." || c == "_" || c == "-"
|
||||
segments << current
|
||||
current = ""
|
||||
else
|
||||
current << c
|
||||
end
|
||||
end
|
||||
segments.delete_if {|s| s.nil? || s.size < 2}
|
||||
segments << current
|
||||
|
||||
comment[:author] = segments[0]
|
||||
if segments.size > 1 && segments[1][0] =~ /[a-zA-Z]/
|
||||
comment[:author] << segments[1][0]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
comment[:author] = "commenter" if comment[:author].blank?
|
||||
comment[:author] = "codinghorror" if comment[:author] == "Jeff Atwood" || comment[:author] == "JeffAtwood" || comment[:author] == "Jeff_Atwood"
|
||||
|
||||
comment[:date] = comment[:date] ? DateTime.strptime(comment[:date], "%m/%d/%Y") : Time.now
|
||||
entry[:comments] << comment if comment[:body].present?
|
||||
end
|
||||
end
|
||||
|
||||
entry[:title] && entry[:body] ? entry : nil
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
|
|
@ -19,8 +19,7 @@ module UserNameSuggester
|
|||
end
|
||||
|
||||
def self.find_available_username_based_on(name)
|
||||
sanitize_username!(name)
|
||||
name = rightsize_username(name)
|
||||
name = rightsize_username(sanitize_username!(name))
|
||||
i = 1
|
||||
attempt = name
|
||||
until User.username_available?(attempt)
|
||||
|
@ -33,8 +32,11 @@ module UserNameSuggester
|
|||
end
|
||||
|
||||
def self.sanitize_username!(name)
|
||||
name = ActiveSupport::Inflector.transliterate(name)
|
||||
name.gsub!(/^[^[:alnum:]]+|\W+$/, "")
|
||||
name.gsub!(/\W+/, "_")
|
||||
name.gsub!(/^\_+/, '')
|
||||
name
|
||||
end
|
||||
|
||||
def self.rightsize_username(name)
|
||||
|
|
|
@ -22,6 +22,10 @@ describe UserNameSuggester do
|
|||
UserNameSuggester.suggest("Darth%^Vader").should == 'Darth_Vader'
|
||||
end
|
||||
|
||||
it "transliterates some characters" do
|
||||
UserNameSuggester.suggest("Jørn").should == 'Jorn'
|
||||
end
|
||||
|
||||
it 'adds 1 to an existing username' do
|
||||
user = Fabricate(:user)
|
||||
UserNameSuggester.suggest(user.username).should == "#{user.username}1"
|
||||
|
|
Loading…
Reference in a new issue