mirror of
https://github.com/codeninjasllc/discourse.git
synced 2024-11-23 23:58:31 -05:00
Improvements to importing a mailing list
This commit is contained in:
parent
0ea54e9255
commit
3b35972d25
1 changed files with 42 additions and 10 deletions
|
@ -37,6 +37,7 @@ class ImportScripts::Mbox < ImportScripts::Base
|
||||||
topics = []
|
topics = []
|
||||||
|
|
||||||
topic_lookup = {}
|
topic_lookup = {}
|
||||||
|
topic_titles = {}
|
||||||
replies = []
|
replies = []
|
||||||
|
|
||||||
all_messages do |mail, filename|
|
all_messages do |mail, filename|
|
||||||
|
@ -44,21 +45,55 @@ class ImportScripts::Mbox < ImportScripts::Base
|
||||||
|
|
||||||
msg_id = mail['Message-ID'].to_s
|
msg_id = mail['Message-ID'].to_s
|
||||||
reply_to = mail['In-Reply-To'].to_s
|
reply_to = mail['In-Reply-To'].to_s
|
||||||
|
title = clean_title(mail['Subject'].to_s)
|
||||||
|
|
||||||
if reply_to.present?
|
if reply_to.present?
|
||||||
topic = topic_lookup[reply_to] || reply_to
|
topic = topic_lookup[reply_to] || reply_to
|
||||||
topic_lookup[msg_id] = topic
|
topic_lookup[msg_id] = topic
|
||||||
replies << {id: msg_id, topic: topic, file: filename}
|
replies << {id: msg_id, topic: topic, file: filename, title: title}
|
||||||
else
|
else
|
||||||
topics << {id: msg_id, file: filename}
|
topics << {id: msg_id, file: filename, title: title}
|
||||||
|
topic_titles[title] ||= msg_id
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Replies without parents should be hoisted to topics
|
||||||
|
to_hoist = []
|
||||||
|
replies.each do |r|
|
||||||
|
to_hoist << r if !topic_lookup[r[:topic]]
|
||||||
|
end
|
||||||
|
|
||||||
|
to_hoist.each do |h|
|
||||||
|
replies.delete(h)
|
||||||
|
topics << {id: h[:id], file: h[:file], title: h[:title]}
|
||||||
|
topic_titles[h[:title]] ||= h[:id]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Topics with duplicate replies should be replies
|
||||||
|
to_group = []
|
||||||
|
topics.each do |t|
|
||||||
|
first = topic_titles[t[:title]]
|
||||||
|
to_group << t if first && first != t[:id]
|
||||||
|
end
|
||||||
|
|
||||||
|
to_group.each do |t|
|
||||||
|
topics.delete(t)
|
||||||
|
replies << {id: t[:id], topic: topic_titles[t[:title]], file: t[:file], title: t[:title]}
|
||||||
|
end
|
||||||
|
|
||||||
File.write(USER_INDEX_PATH, {users: users}.to_json)
|
File.write(USER_INDEX_PATH, {users: users}.to_json)
|
||||||
File.write(TOPIC_INDEX_PATH, {topics: topics}.to_json)
|
File.write(TOPIC_INDEX_PATH, {topics: topics}.to_json)
|
||||||
File.write(REPLY_INDEX_PATH, {replies: replies}.to_json)
|
File.write(REPLY_INDEX_PATH, {replies: replies}.to_json)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def clean_title(title)
|
||||||
|
title.gsub(/^Re: */i, '')
|
||||||
|
end
|
||||||
|
|
||||||
|
def clean_raw(raw)
|
||||||
|
raw.gsub(/-- \nYou received this message because you are subscribed to the Google Groups "[^"]*" group.\nTo unsubscribe from this group and stop receiving emails from it, send an email to [^+@]+\+unsubscribe@googlegroups.com\.\nFor more options, visit https:\/\/groups\.google\.com\/groups\/opt_out\./, '')
|
||||||
|
end
|
||||||
|
|
||||||
def import_users
|
def import_users
|
||||||
puts "", "importing users"
|
puts "", "importing users"
|
||||||
|
|
||||||
|
@ -100,7 +135,7 @@ class ImportScripts::Mbox < ImportScripts::Base
|
||||||
topics = all_topics[offset..offset+BATCH_SIZE-1]
|
topics = all_topics[offset..offset+BATCH_SIZE-1]
|
||||||
break if topics.nil?
|
break if topics.nil?
|
||||||
|
|
||||||
next if all_records_exist? :posts, topics.map {|t| t['id'].to_i}
|
next if all_records_exist? :posts, topics.map {|t| t['id']}
|
||||||
|
|
||||||
create_posts(topics, total: topic_count, offset: offset) do |t|
|
create_posts(topics, total: topic_count, offset: offset) do |t|
|
||||||
raw_email = File.read(t['file'])
|
raw_email = File.read(t['file'])
|
||||||
|
@ -116,11 +151,11 @@ class ImportScripts::Mbox < ImportScripts::Base
|
||||||
title = mail.subject.gsub(/\[[^\]]+\]+/, '').strip
|
title = mail.subject.gsub(/\[[^\]]+\]+/, '').strip
|
||||||
|
|
||||||
{ id: t['id'],
|
{ id: t['id'],
|
||||||
title: title,
|
title: clean_title(title),
|
||||||
user_id: user_id_from_imported_user_id(mail.from.first) || Discourse::SYSTEM_USER_ID,
|
user_id: user_id_from_imported_user_id(mail.from.first) || Discourse::SYSTEM_USER_ID,
|
||||||
created_at: mail.date,
|
created_at: mail.date,
|
||||||
category: CATEGORY_ID,
|
category: CATEGORY_ID,
|
||||||
raw: raw,
|
raw: clean_raw(raw),
|
||||||
cook_method: Post.cook_methods[:email] }
|
cook_method: Post.cook_methods[:email] }
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -129,9 +164,6 @@ class ImportScripts::Mbox < ImportScripts::Base
|
||||||
def import_replies
|
def import_replies
|
||||||
puts "", "creating topic replies"
|
puts "", "creating topic replies"
|
||||||
|
|
||||||
all_topics = ::JSON.parse(File.read(TOPIC_INDEX_PATH))['topics']
|
|
||||||
topic_count = all_topics.size
|
|
||||||
|
|
||||||
replies = ::JSON.parse(File.read(REPLY_INDEX_PATH))['replies']
|
replies = ::JSON.parse(File.read(REPLY_INDEX_PATH))['replies']
|
||||||
post_count = replies.size
|
post_count = replies.size
|
||||||
|
|
||||||
|
@ -139,7 +171,7 @@ class ImportScripts::Mbox < ImportScripts::Base
|
||||||
posts = replies[offset..offset+BATCH_SIZE-1]
|
posts = replies[offset..offset+BATCH_SIZE-1]
|
||||||
break if posts.nil?
|
break if posts.nil?
|
||||||
|
|
||||||
next if all_records_exist? :posts, posts.map {|p| p['id'].to_i}
|
next if all_records_exist? :posts, posts.map {|p| p['id']}
|
||||||
|
|
||||||
create_posts(posts, total: post_count, offset: offset) do |p|
|
create_posts(posts, total: post_count, offset: offset) do |p|
|
||||||
parent_id = p['topic']
|
parent_id = p['topic']
|
||||||
|
@ -161,7 +193,7 @@ class ImportScripts::Mbox < ImportScripts::Base
|
||||||
topic_id: topic_id,
|
topic_id: topic_id,
|
||||||
user_id: user_id_from_imported_user_id(mail.from.first) || Discourse::SYSTEM_USER_ID,
|
user_id: user_id_from_imported_user_id(mail.from.first) || Discourse::SYSTEM_USER_ID,
|
||||||
created_at: mail.date,
|
created_at: mail.date,
|
||||||
raw: raw,
|
raw: clean_raw(raw),
|
||||||
cook_method: Post.cook_methods[:email] }
|
cook_method: Post.cook_methods[:email] }
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue