diff --git a/app/models/topic_embed.rb b/app/models/topic_embed.rb
index 99498db26..2be0005f8 100644
--- a/app/models/topic_embed.rb
+++ b/app/models/topic_embed.rb
@@ -10,6 +10,9 @@ class TopicEmbed < ActiveRecord::Base
def self.import(user, url, title, contents)
return unless url =~ /^https?\:\/\//
+ if SiteSetting.embed_truncate
+ contents = first_paragraph_from(contents)
+ end
contents << "\n
\n#{I18n.t('embed.imported_from', link: "#{url}")}\n"
embed = TopicEmbed.where(embed_url: url).first
@@ -34,6 +37,7 @@ class TopicEmbed < ActiveRecord::Base
end
end
else
+ absolutize_urls(url, contents)
post = embed.post
# Update the topic if it changed
if content_sha1 != embed.content_sha1
@@ -63,7 +67,7 @@ class TopicEmbed < ActiveRecord::Base
prefix = "#{uri.scheme}://#{uri.host}"
prefix << ":#{uri.port}" if uri.port != 80 && uri.port != 443
- fragment = Nokogiri::HTML.fragment(contents)
+ fragment = Nokogiri::HTML.fragment("#{contents}
")
fragment.css('a').each do |a|
href = a['href']
if href.present? && href.start_with?('/')
@@ -76,14 +80,28 @@ class TopicEmbed < ActiveRecord::Base
a['src'] = "#{prefix}/#{src.sub(/^\/+/, '')}"
end
end
-
- fragment.to_html
+ fragment.at('div').inner_html
end
def self.topic_id_for_embed(embed_url)
TopicEmbed.where(embed_url: embed_url).pluck(:topic_id).first
end
+ def self.first_paragraph_from(html)
+ doc = Nokogiri::HTML(html)
+
+ result = ""
+ doc.css('p').each do |p|
+ if p.text.present?
+ result << p.to_s
+ return result if result.size >= 100
+ end
+ end
+ return result unless result.blank?
+
+ # If there is no first paragaph, return the first div (onebox)
+ doc.css('div').first
+ end
end
# == Schema Information
diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml
index 0c5298f1b..c0d15c96c 100644
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@@ -49,7 +49,7 @@ en:
loading: "Loading Discussion..."
permalink: "Permalink"
- imported_from: "Discussion topic for the original blog entry at: %{link}"
+ imported_from: "This is a companion discussion topic for the original blog entry at: %{link}"
in_reply_to: "in reply to %{username}"
replies:
one: "1 reply"
@@ -859,6 +859,7 @@ en:
feed_polling_enabled: "Whether to import a RSS/ATOM feed as posts"
feed_polling_url: "URL of RSS/ATOM feed to import"
embed_by_username: "Discourse username of the user who creates the topics"
+ embed_truncate: "Truncate the imported posts"
embed_category: "Category of created topics"
embed_post_limit: "Maximum number of posts to embed"
tos_accept_required: "If enabled, users will need to check a box on the signup form to confirm that they accept the terms of service. Edit 'Signup Form: Terms of Service Message' in the Content tab to change the message."
diff --git a/config/site_settings.yml b/config/site_settings.yml
index 1902b8b8f..1e77b9acd 100644
--- a/config/site_settings.yml
+++ b/config/site_settings.yml
@@ -403,6 +403,7 @@ embedding:
embed_by_username: ''
embed_category: ''
embed_post_limit: 100
+ embed_truncate: false
legal:
tos_url:
diff --git a/lib/post_creator.rb b/lib/post_creator.rb
index 29bfd6281..25de03753 100644
--- a/lib/post_creator.rb
+++ b/lib/post_creator.rb
@@ -209,6 +209,7 @@ class PostCreator
end
def rollback_if_host_spam_detected
+ return if @opts[:skip_validations]
if @post.has_host_spam?
@post.errors.add(:base, I18n.t(:spamming_host))
@errors = @post.errors
diff --git a/lib/tasks/typepad.thor b/lib/tasks/typepad.thor
index 0adc59ce3..71d3fa79f 100644
--- a/lib/tasks/typepad.thor
+++ b/lib/tasks/typepad.thor
@@ -1,14 +1,20 @@
+require 'open-uri'
+
class Typepad < Thor
desc "import", "Imports posts from a Disqus XML export"
method_option :file, aliases: '-f', required: true, desc: "The typepad file to import"
method_option :dry_run, required: false, desc: "Just output what will be imported rather than doing it"
method_option :post_as, aliases: '-p', required: true, desc: "The Discourse username to post as"
+ method_option :google_api, aliases: '-g', required: false, desc: "The google plus API key to use to fetch usernames"
def import
require './config/environment'
- email_blacklist = SiteSetting.email_domains_blacklist
+ backup_settings = {}
+ %w(email_domains_blacklist).each do |s|
+ backup_settings[s] = SiteSetting.send(s)
+ end
user = User.where(username_lower: options[:post_as].downcase).first
if user.nil?
@@ -24,44 +30,56 @@ class Typepad < Thor
inside_block = true
entry = ""
- n = 0
entries = []
File.open(options[:file]).each_line do |l|
l = l.scrub
if l =~ /^--------$/
- entries << process_entry(entry)
+ parsed_entry = process_entry(entry)
+ if parsed_entry
+ puts "Parsed #{parsed_entry[:title]}"
+ entries << parsed_entry
+ end
entry = ""
else
entry << l
end
- break if entries.size > 5
end
- entries.compact!
+ entries.each_with_index do |e,i|
+ if e[:title] =~ /Head/
+ puts "#{i}: #{e[:title]}"
+ end
+ end
RateLimiter.disable
-
SiteSetting.email_domains_blacklist = ""
- puts "import it"
- puts entries.size
- entries.each do |entry|
+ puts "Importing #{entries.size} entries"
+ entries.each_with_index do |entry, idx|
+ puts "Importing (#{idx+1}/#{entries.size})"
+ next if entry[:body].blank?
+
+ puts entry[:unique_url]
post = TopicEmbed.import(user, entry[:unique_url], entry[:title], entry[:body])
if post.present?
+ post.update_column(:created_at, entry[:date])
+ post.topic.update_column(:created_at, entry[:date])
+ post.topic.update_column(:bumped_at, entry[:date])
entry[:comments].each do |c|
username = c[:author]
- if c[:email].present?
+
+ if c[:email].present? && c[:email] != "none@unknown.com"
email = c[:email]
post_user = User.where(email: email).first
if post_user.blank?
- post_user = User.create!(email: email, username: UserNameSuggester.suggest(username))
+ post_user = User.create!(name: c[:name], email: email, username: UserNameSuggester.suggest(username))
end
else
- suggested = UserNameSuggester.suggest(username)
- post_user = User.where(username: suggested)
+ post_user = User.where(username: username).first
if post_user.blank?
- post_user = User.create!(email: "#{suggested}@no-email-found.com", username: UserNameSuggester.suggest(username))
+ suggested = UserNameSuggester.suggest(username)
+ post_user = User.create!(name: c[:name], email: "#{suggested}@no-email-found.com", username: suggested)
end
end
@@ -69,16 +87,24 @@ class Typepad < Thor
topic_id: post.topic_id,
raw: c[:body],
cooked: c[:body],
- created_at: Time.now
+ created_at: c[:date],
+ skip_validations: true
}
- post = PostCreator.new(post_user, attrs).create
+ begin
+ post = PostCreator.new(post_user, attrs).create
+ puts post.errors.inspect if post.id.blank?
+ rescue => ex
+ puts "Error creating post: #{ex.inspect}"
+ end
end
end
end
ensure
RateLimiter.enable
- SiteSetting.email_domains_blacklist = email_blacklist
+ backup_settings.each do |s, v|
+ SiteSetting.send("#{s.to_s}=", v)
+ end
end
private
@@ -92,14 +118,14 @@ class Typepad < Thor
def parse_meta_data(section)
result = {}
section.split(/\n/).each do |l|
- if l =~ /^([^:]+)\: (.*)$/
+ if l =~ /^([A-Z\ ]+)\: (.*)$/
key, value = Regexp.last_match[1], Regexp.last_match[2]
clean_type!(key)
value.strip!
result[key.to_sym] = value
else
result[:body] ||= ""
- result[:body] << l
+ result[:body] << l << "\n"
end
end
result
@@ -123,6 +149,7 @@ class Typepad < Thor
sections = entry.split(/-----/)
entry = parse_meta_data(sections[0]).slice(:date, :title, :unique_url)
entry[:comments] = []
+ entry[:date] = entry[:date] ? DateTime.strptime(entry[:date], "%m/%d/%Y") : Time.now
sections[1..-1].each do |s|
type, value = parse_section(s)
case type
@@ -130,12 +157,90 @@ class Typepad < Thor
entry[type] = value
when :comment
comment = parse_comment(value).slice(:author, :email, :url, :body, :date)
+
+ if options[:google_api] && comment[:author] =~ /plus.google.com\/(\d+)/
+ gplus_id = Regexp.last_match[1]
+ from_redis = $redis.get("gplus:#{gplus_id}")
+ if from_redis.blank?
+ json = ::JSON.parse(open("https://www.googleapis.com/plus/v1/people/#{gplus_id}?key=#{options[:google_api]}").read)
+ from_redis = json['displayName']
+ $redis.set("gplus:#{gplus_id}", from_redis)
+ end
+ comment[:author] = from_redis
+ end
+
+ if comment[:author] =~ /([^\.]+)\.wordpress\.com/
+ comment[:author] = Regexp.last_match[1]
+ end
+
+ if comment[:author] =~ /([^\.]+)\.blogspot\.com/
+ comment[:author] = Regexp.last_match[1]
+ end
+
+ if comment[:author] =~ /twitter.com\/([a-zA-Z0-9]+)/
+ comment[:author] = Regexp.last_match[1]
+ end
+
+ if comment[:author] =~ /www.facebook.com\/profile.php\?id=(\d+)/
+ fb_id = Regexp.last_match[1]
+ from_redis = $redis.get("fb:#{fb_id}")
+ if from_redis.blank?
+ json = ::JSON.parse(open("http://graph.facebook.com/#{fb_id}").read)
+ from_redis = json['username']
+ $redis.set("fb:#{fb_id}", from_redis)
+ end
+ comment[:author] = from_redis
+ end
+
+ comment[:name] = comment[:author]
+ if comment[:author]
+ comment[:author].gsub!(/^[_\.]+/, '')
+ comment[:author].gsub!(/[_\.]+$/, '')
+
+ if comment[:author].size < 12
+ comment[:author].gsub!(/ /, '_')
+ else
+ segments = []
+ current = ""
+
+ last_upper = nil
+ comment[:author].each_char do |c|
+ is_upper = /[[:upper:]]/.match(c)
+
+ if (current.size > 1 && is_upper != last_upper)
+ segments << current
+ current = ""
+ end
+ last_upper = is_upper
+
+ if c == " " || c == "." || c == "_" || c == "-"
+ segments << current
+ current = ""
+ else
+ current << c
+ end
+ end
+ segments.delete_if {|s| s.nil? || s.size < 2}
+ segments << current
+
+ comment[:author] = segments[0]
+ if segments.size > 1 && segments[1][0] =~ /[a-zA-Z]/
+ comment[:author] << segments[1][0]
+ end
+ end
+ end
+
+ comment[:author] = "commenter" if comment[:author].blank?
+ comment[:author] = "codinghorror" if comment[:author] == "Jeff Atwood" || comment[:author] == "JeffAtwood" || comment[:author] == "Jeff_Atwood"
+
+ comment[:date] = comment[:date] ? DateTime.strptime(comment[:date], "%m/%d/%Y") : Time.now
entry[:comments] << comment if comment[:body].present?
end
end
entry[:title] && entry[:body] ? entry : nil
end
+
end
diff --git a/lib/user_name_suggester.rb b/lib/user_name_suggester.rb
index 17f97885c..8c5fc8c85 100644
--- a/lib/user_name_suggester.rb
+++ b/lib/user_name_suggester.rb
@@ -19,8 +19,7 @@ module UserNameSuggester
end
def self.find_available_username_based_on(name)
- sanitize_username!(name)
- name = rightsize_username(name)
+ name = rightsize_username(sanitize_username!(name))
i = 1
attempt = name
until User.username_available?(attempt)
@@ -33,12 +32,15 @@ module UserNameSuggester
end
def self.sanitize_username!(name)
+ name = ActiveSupport::Inflector.transliterate(name)
name.gsub!(/^[^[:alnum:]]+|\W+$/, "")
name.gsub!(/\W+/, "_")
+ name.gsub!(/^\_+/, '')
+ name
end
def self.rightsize_username(name)
name.ljust(User.username_length.begin, '1')[0, User.username_length.end]
end
-end
\ No newline at end of file
+end
diff --git a/spec/components/user_name_suggester_spec.rb b/spec/components/user_name_suggester_spec.rb
index a70be9afd..7cd0b16b7 100644
--- a/spec/components/user_name_suggester_spec.rb
+++ b/spec/components/user_name_suggester_spec.rb
@@ -22,6 +22,10 @@ describe UserNameSuggester do
UserNameSuggester.suggest("Darth%^Vader").should == 'Darth_Vader'
end
+ it "transliterates some characters" do
+ UserNameSuggester.suggest("Jørn").should == 'Jorn'
+ end
+
it 'adds 1 to an existing username' do
user = Fabricate(:user)
UserNameSuggester.suggest(user.username).should == "#{user.username}1"