Support for mapping multiple mbox imports into categories

This commit is contained in:
Robin Ward 2016-06-28 16:35:19 -04:00
parent 61ce5c210c
commit 8e5a22ba5d

View file

@ -5,13 +5,19 @@ class ImportScripts::Mbox < ImportScripts::Base
# CHANGE THESE BEFORE RUNNING THE IMPORTER # CHANGE THESE BEFORE RUNNING THE IMPORTER
BATCH_SIZE = 1000 BATCH_SIZE = 1000
CATEGORY_ID = 6
MBOX_DIR = File.expand_path("~/import/site") MBOX_DIR = File.expand_path("~/import/site")
# Remove to not split individual files # Remove to not split individual files
SPLIT_AT = /^From (.*) at/ SPLIT_AT = /^From (.*) at/
# Will create a category if it doesn't exist
CATEGORY_MAPPINGS = {
"default" => "uncategorized",
# ex: "jobs-folder" => "jobs"
}
def execute def execute
import_categories
create_email_indices create_email_indices
create_user_indices create_user_indices
massage_indices massage_indices
@ -20,6 +26,14 @@ class ImportScripts::Mbox < ImportScripts::Base
import_replies import_replies
end end
def import_categories
mappings = CATEGORY_MAPPINGS.values - ['uncategorized']
create_categories(mappings) do |c|
{id: c, name: c}
end
end
def open_db def open_db
SQLite3::Database.new("#{MBOX_DIR}/index.db") SQLite3::Database.new("#{MBOX_DIR}/index.db")
end end
@ -43,6 +57,12 @@ class ImportScripts::Mbox < ImportScripts::Base
def all_messages def all_messages
files = Dir["#{MBOX_DIR}/messages/*"] files = Dir["#{MBOX_DIR}/messages/*"]
CATEGORY_MAPPINGS.keys.each do |k|
files << Dir["#{MBOX_DIR}/#{k}/*"]
end
files.flatten!
files.each_with_index do |f, idx| files.each_with_index do |f, idx|
if SPLIT_AT.present? if SPLIT_AT.present?
msg = "" msg = ""
@ -52,7 +72,7 @@ class ImportScripts::Mbox < ImportScripts::Base
if line =~ SPLIT_AT if line =~ SPLIT_AT
if !msg.empty? if !msg.empty?
mail = Mail.read_from_string(msg) mail = Mail.read_from_string(msg)
yield mail yield mail, f
print_status(idx, files.size) print_status(idx, files.size)
msg = "" msg = ""
end end
@ -62,14 +82,14 @@ class ImportScripts::Mbox < ImportScripts::Base
if !msg.empty? if !msg.empty?
mail = Mail.read_from_string(msg) mail = Mail.read_from_string(msg)
yield mail yield mail, f
print_status(idx, files.size) print_status(idx, files.size)
msg = "" msg = ""
end end
else else
raw = File.read(f) raw = File.read(f)
mail = Mail.read_from_string(raw) mail = Mail.read_from_string(raw)
yield mail yield mail, f
print_status(idx, files.size) print_status(idx, files.size)
end end
@ -155,7 +175,8 @@ class ImportScripts::Mbox < ImportScripts::Base
title VARCHAR(255) NOT NULL, title VARCHAR(255) NOT NULL,
reply_to VARCHAR(955) NULL, reply_to VARCHAR(955) NULL,
email_date DATETIME NOT NULL, email_date DATETIME NOT NULL,
message TEXT NOT NULL message TEXT NOT NULL,
category VARCHAR(255) NOT NULL
); );
SQL SQL
@ -164,7 +185,12 @@ class ImportScripts::Mbox < ImportScripts::Base
puts "", "creating indices" puts "", "creating indices"
all_messages do |mail| all_messages do |mail, filename|
directory = filename.sub("#{MBOX_DIR}/", '').split("/")[0]
category = CATEGORY_MAPPINGS[directory] || CATEGORY_MAPPINGS['default'] || 'uncategorized'
msg_id = mail['Message-ID'].to_s msg_id = mail['Message-ID'].to_s
# Many ways to get a name # Many ways to get a name
@ -174,9 +200,16 @@ class ImportScripts::Mbox < ImportScripts::Base
reply_to = mail['In-Reply-To'].to_s reply_to = mail['In-Reply-To'].to_s
email_date = mail['date'].to_s email_date = mail['date'].to_s
db.execute "INSERT OR IGNORE INTO emails (msg_id, from_email, from_name, title, reply_to, email_date, message) db.execute "INSERT OR IGNORE INTO emails (msg_id,
VALUES (?, ?, ?, ?, ?, ?, ?)", from_email,
[msg_id, from_email, from_name, title, reply_to, email_date, mail.to_s] from_name,
title,
reply_to,
email_date,
message,
category)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
[msg_id, from_email, from_name, title, reply_to, email_date, mail.to_s, category]
end end
ensure ensure
db.close db.close
@ -273,7 +306,8 @@ class ImportScripts::Mbox < ImportScripts::Base
from_name, from_name,
title, title,
email_date, email_date,
message message,
category
FROM emails FROM emails
WHERE reply_to IS NULL") WHERE reply_to IS NULL")
@ -320,7 +354,7 @@ class ImportScripts::Mbox < ImportScripts::Base
title: clean_title(title), title: clean_title(title),
user_id: user_id_from_imported_user_id(from_email) || Discourse::SYSTEM_USER_ID, user_id: user_id_from_imported_user_id(from_email) || Discourse::SYSTEM_USER_ID,
created_at: mail.date, created_at: mail.date,
category: CATEGORY_ID, category: t[6],
raw: clean_raw(raw), raw: clean_raw(raw),
cook_method: Post.cook_methods[:email] } cook_method: Post.cook_methods[:email] }
end end