Support for mapping multiple mbox imports into categories

This commit is contained in:
Robin Ward 2016-06-28 16:35:19 -04:00
parent 61ce5c210c
commit 8e5a22ba5d

View file

@ -5,13 +5,19 @@ class ImportScripts::Mbox < ImportScripts::Base
# CHANGE THESE BEFORE RUNNING THE IMPORTER
BATCH_SIZE = 1000
CATEGORY_ID = 6
MBOX_DIR = File.expand_path("~/import/site")
# Remove to not split individual files
SPLIT_AT = /^From (.*) at/
# Will create a category if it doesn't exist
CATEGORY_MAPPINGS = {
"default" => "uncategorized",
# ex: "jobs-folder" => "jobs"
}
def execute
import_categories
create_email_indices
create_user_indices
massage_indices
@ -20,6 +26,14 @@ class ImportScripts::Mbox < ImportScripts::Base
import_replies
end
def import_categories
mappings = CATEGORY_MAPPINGS.values - ['uncategorized']
create_categories(mappings) do |c|
{id: c, name: c}
end
end
def open_db
SQLite3::Database.new("#{MBOX_DIR}/index.db")
end
@ -43,6 +57,12 @@ class ImportScripts::Mbox < ImportScripts::Base
def all_messages
files = Dir["#{MBOX_DIR}/messages/*"]
CATEGORY_MAPPINGS.keys.each do |k|
files << Dir["#{MBOX_DIR}/#{k}/*"]
end
files.flatten!
files.each_with_index do |f, idx|
if SPLIT_AT.present?
msg = ""
@ -52,7 +72,7 @@ class ImportScripts::Mbox < ImportScripts::Base
if line =~ SPLIT_AT
if !msg.empty?
mail = Mail.read_from_string(msg)
yield mail
yield mail, f
print_status(idx, files.size)
msg = ""
end
@ -62,14 +82,14 @@ class ImportScripts::Mbox < ImportScripts::Base
if !msg.empty?
mail = Mail.read_from_string(msg)
yield mail
yield mail, f
print_status(idx, files.size)
msg = ""
end
else
raw = File.read(f)
mail = Mail.read_from_string(raw)
yield mail
yield mail, f
print_status(idx, files.size)
end
@ -155,7 +175,8 @@ class ImportScripts::Mbox < ImportScripts::Base
title VARCHAR(255) NOT NULL,
reply_to VARCHAR(955) NULL,
email_date DATETIME NOT NULL,
message TEXT NOT NULL
message TEXT NOT NULL,
category VARCHAR(255) NOT NULL
);
SQL
@ -164,7 +185,12 @@ class ImportScripts::Mbox < ImportScripts::Base
puts "", "creating indices"
all_messages do |mail|
all_messages do |mail, filename|
directory = filename.sub("#{MBOX_DIR}/", '').split("/")[0]
category = CATEGORY_MAPPINGS[directory] || CATEGORY_MAPPINGS['default'] || 'uncategorized'
msg_id = mail['Message-ID'].to_s
# Many ways to get a name
@ -174,9 +200,16 @@ class ImportScripts::Mbox < ImportScripts::Base
reply_to = mail['In-Reply-To'].to_s
email_date = mail['date'].to_s
db.execute "INSERT OR IGNORE INTO emails (msg_id, from_email, from_name, title, reply_to, email_date, message)
VALUES (?, ?, ?, ?, ?, ?, ?)",
[msg_id, from_email, from_name, title, reply_to, email_date, mail.to_s]
db.execute "INSERT OR IGNORE INTO emails (msg_id,
from_email,
from_name,
title,
reply_to,
email_date,
message,
category)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
[msg_id, from_email, from_name, title, reply_to, email_date, mail.to_s, category]
end
ensure
db.close
@ -273,7 +306,8 @@ class ImportScripts::Mbox < ImportScripts::Base
from_name,
title,
email_date,
message
message,
category
FROM emails
WHERE reply_to IS NULL")
@ -320,7 +354,7 @@ class ImportScripts::Mbox < ImportScripts::Base
title: clean_title(title),
user_id: user_id_from_imported_user_id(from_email) || Discourse::SYSTEM_USER_ID,
created_at: mail.date,
category: CATEGORY_ID,
category: t[6],
raw: clean_raw(raw),
cook_method: Post.cook_methods[:email] }
end