From 0d0225133cf01a1bb676901a40a351c43cf9f117 Mon Sep 17 00:00:00 2001
From: riking <rikingcoding@gmail.com>
Date: Tue, 26 Aug 2014 17:30:12 -0700
Subject: [PATCH 1/5] FIX: Failed incoming emails could create empty topics

A failure condition is eliminated where a topic would be created, but post
creation would fail, leaving the forum with a topic without any posts.
By asking PostCreator to create the topic instead, inside of its
transaction, this failure condition is eliminated.

Additionally, attachments are restored to working status. Previously,
the attachment code would build up the post raw, but then drop it and
not do anything with the result (creating orphaned uploads). By actually
placing the raw value back in the options hash, it is included in the
created post.
---
 lib/email/receiver.rb | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/lib/email/receiver.rb b/lib/email/receiver.rb
index 3845a9798..ff3db75f9 100644
--- a/lib/email/receiver.rb
+++ b/lib/email/receiver.rb
@@ -94,7 +94,6 @@ module Email
       {type: :invalid, obj: nil}
     end
 
-    private
 
     def parse_body
       html = nil
@@ -168,37 +167,37 @@ module Email
 [/quote]"
     end
 
+    private
+
     def create_reply
-      create_post_with_attachments(email_log.user, @body, @email_log.topic_id, @email_log.post.post_number)
+      create_post_with_attachments(@email_log.user,
+                                   raw: @body,
+                                   topic_id: @email_log.topic_id,
+                                   reply_to_post_number: @email_log.post.post_number)
     end
 
     def create_new_topic
-      topic = TopicCreator.new(
-        @user,
-        Guardian.new(@user),
-        category: @category_id,
-        title: @message.subject,
-      ).create
-
-      post = create_post_with_attachments(@user, @body, topic.id)
+      post = create_post_with_attachments(@user,
+                                          raw: @body,
+                                          title: @message.subject,
+                                          category: @category_id)
 
       EmailLog.create(
         email_type: "topic_via_incoming_email",
-        to_address: @message.to.first,
-        topic_id: topic.id,
+        to_address: @message.from.first, # pick from address because we want the user's email
+        topic_id: post.topic.id,
         user_id: @user.id,
       )
 
       post
     end
 
-    def create_post_with_attachments(user, raw, topic_id, reply_to_post_number=nil)
+    def create_post_with_attachments(user, post_opts={})
       options = {
-        raw: raw,
-        topic_id: topic_id,
         cooking_options: { traditional_markdown_linebreaks: true },
-      }
-      options[:reply_to_post_number] = reply_to_post_number if reply_to_post_number
+      }.merge(post_opts)
+
+      raw = options[:raw]
 
       # deal with attachments
       @message.attachments.each do |attachment|
@@ -215,9 +214,10 @@ module Email
         ensure
           tmp.close!
         end
-
       end
 
+      options[:raw] = raw
+
       create_post(user, options)
     end
 
@@ -232,9 +232,11 @@ module Email
     def create_post(user, options)
       creator = PostCreator.new(user, options)
       post = creator.create
+
       if creator.errors.present?
         raise InvalidPost, creator.errors.full_messages.join("\n")
       end
+
       post
     end
 

From cb55ef47027e8994cbd093b6938989057f03cc9d Mon Sep 17 00:00:00 2001
From: riking <rikingcoding@gmail.com>
Date: Tue, 26 Aug 2014 12:31:47 -0700
Subject: [PATCH 2/5] Add Email::HtmlCleaner for email processing

This class is in charge of stripping out most of the crap from the HTML
portion of emails that email clients generate, so that it can be sanely
post-processed for signatures and quoting boundaries.
---
 lib/email/html_cleaner.rb | 120 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 120 insertions(+)
 create mode 100644 lib/email/html_cleaner.rb

diff --git a/lib/email/html_cleaner.rb b/lib/email/html_cleaner.rb
new file mode 100644
index 000000000..19e4b4173
--- /dev/null
+++ b/lib/email/html_cleaner.rb
@@ -0,0 +1,120 @@
+module Email
+  # HtmlCleaner cleans up the extremely dirty HTML that many email clients
+  # generate by stripping out any excess divs or spans, removing styling in
+  # the process (which also makes the html more suitable to be parsed as
+  # Markdown).
+  class HtmlCleaner
+    # Elements to hoist all children out of
+    HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td)
+    # Node types to always delete
+    HTML_DELETE_ELEMENT_TYPES = [Nokogiri::XML::Node::DTD_NODE,
+                                 Nokogiri::XML::Node::COMMENT_NODE,
+                                 ]
+
+    # Private variables:
+    #   @doc - nokogiri document
+    #   @out - same as @doc, but only if trimming has occured
+    def initialize(html)
+      if String === html
+        @doc = Nokogiri::HTML(html)
+      else
+        @doc = html
+      end
+    end
+
+    class << self
+      # Email::HtmlCleaner.trim(inp, opts={})
+      #
+      # Arguments:
+      #   inp - Either a HTML string or a Nokogiri document.
+      # Options:
+      #   :return => :doc, :string
+      #     Specify the desired return type.
+      #     Defaults to the type of the input.
+      #     A value of :string is equivalent to calling get_document_text()
+      #     on the returned document.
+      def trim(inp, opts={})
+        cleaner = HtmlCleaner.new(inp)
+
+        opts[:return] ||= ((String === inp) ? :string : :doc)
+
+        if opts[:return] == :string
+          cleaner.output_html
+        else
+          cleaner.output_document
+        end
+      end
+
+      # Email::HtmlCleaner.get_document_text(doc)
+      #
+      # Get the body portion of the document, including html, as a string.
+      def get_document_text(doc)
+        body = doc.xpath('//body')
+        if body
+          body.inner_html
+        else
+          doc.inner_html
+        end
+      end
+    end
+
+    def output_document
+      @out ||= begin
+                 doc = @doc
+                 trim_process_node doc
+                 add_newlines doc
+                 doc
+      end
+    end
+
+    def output_html
+      HtmlCleaner.get_document_text(output_document)
+    end
+
+    private
+
+    def add_newlines(doc)
+      doc.xpath('//br').each do |br|
+        br.replace(Nokogiri::XML::Text.new("\n", doc))
+      end
+    end
+
+    def trim_process_node(node)
+      if should_hoist?(node)
+        hoisted = trim_hoist_element node
+        hoisted.each { |child| trim_process_node child }
+      elsif should_delete?(node)
+        node.remove
+      else
+        if children = node.children
+          children.each { |child| trim_process_node child }
+        end
+      end
+
+      node
+    end
+
+    def trim_hoist_element(element)
+      hoisted = []
+      element.children.each do |child|
+        element.before(child)
+        hoisted << child
+      end
+      element.remove
+      hoisted
+    end
+
+    def should_hoist?(node)
+      return false unless node.element?
+      HTML_HOIST_ELEMENTS.include? node.name
+    end
+
+    def should_delete?(node)
+      return true if HTML_DELETE_ELEMENT_TYPES.include? node.type
+      return true if node.element? && node.name == 'head'
+      return true if node.text? && node.text.strip.blank?
+
+      false
+    end
+  end
+end

From 0a09593f3b6193010c0d19a20df8dd7d0d56cb52 Mon Sep 17 00:00:00 2001
From: riking <rikingcoding@gmail.com>
Date: Tue, 26 Aug 2014 17:31:51 -0700
Subject: [PATCH 3/5] FIX: Prefer HTML in incoming emails, heavily refactor
 email receiver

This commit heavily refactors Email::Receiver to both better handle
different emails and improve testability.

A primary focus of the refactor is reducing the usage of class
variables, in favor of actually passing parameters - making it possible
for multiple tests to use the same Receiver instance.

The EmailLog reported when a topic is created is reflected to put the
user's email in the to_address field, instead of the system address.

The discourse_email_parser function is renamed to
discourse_email_trimmer, and additional stopping conditions are added to
make up for EmailReplyParser's inability to deal with html at the start
of a line.

The force_encoding calls are refactored out to a 'fix_charset' method.

parse_body is renamed to select_body, and the scrub_html method is
dropped in favor of the new HtmlCleaner class.

A new parse_body method is added, which performs the job of the removed
lines of code in the 'process' method.

EmailUnparsableError is redefined again, to be encoding errors (when the
declared encoding is not what was delivered).
---
 lib/email/receiver.rb | 108 +++++++++++++++++++++++-------------------
 1 file changed, 59 insertions(+), 49 deletions(-)

diff --git a/lib/email/receiver.rb b/lib/email/receiver.rb
index ff3db75f9..c644950ce 100644
--- a/lib/email/receiver.rb
+++ b/lib/email/receiver.rb
@@ -1,3 +1,4 @@
+require 'email/html_cleaner'
 #
 # Handles an incoming message
 #
@@ -26,20 +27,12 @@ module Email
     def process
       raise EmptyEmailError if @raw.blank?
 
-      @message = Mail.new(@raw)
+      message = Mail.new(@raw)
 
-      # First remove the known discourse stuff.
-      parse_body
-      raise EmptyEmailError if @body.blank?
-
-      # Then run the github EmailReplyParser on it in case we didn't catch it
-      @body = EmailReplyParser.read(@body).visible_text.force_encoding('UTF-8')
-
-      discourse_email_parser
-      raise EmailUnparsableError if @body.blank?
+      body = parse_body message
 
       dest_info = {type: :invalid, obj: nil}
-      @message.to.each do |to_address|
+      message.to.each do |to_address|
         if dest_info[:type] == :invalid
           dest_info = check_address to_address
         end
@@ -47,6 +40,10 @@ module Email
 
       raise BadDestinationAddress if dest_info[:type] == :invalid
 
+      # TODO get to a state where we can remove this
+      @message = message
+      @body = body
+
       if dest_info[:type] == :category
         raise BadDestinationAddress unless SiteSetting.email_in
         category = dest_info[:obj]
@@ -74,6 +71,8 @@ module Email
 
         create_reply
       end
+    rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError => e
+      raise EmailUnparsableError.new(e)
     end
 
     def check_address(address)
@@ -94,56 +93,63 @@ module Email
       {type: :invalid, obj: nil}
     end
 
+    def parse_body(message)
+      body = select_body message
+      raise EmptyEmailError if body.strip.blank?
 
-    def parse_body
+      body = discourse_email_trimmer body
+      raise EmptyEmailError if body.strip.blank?
+
+      body = EmailReplyParser.parse_reply body
+      raise EmptyEmailError if body.strip.blank?
+
+      body
+    end
+
+    def select_body(message)
       html = nil
-
-      # If the message is multipart, find the best type for our purposes
-      if @message.multipart?
-        if p = @message.text_part
-          @body = p.charset ? p.body.decoded.force_encoding(p.charset).encode("UTF-8").to_s : p.body.to_s
-          return @body
-        elsif p = @message.html_part
-          html = p.charset ? p.body.decoded.force_encoding(p.charset).encode("UTF-8").to_s : p.body.to_s
+      # If the message is multipart, return that part (favor html)
+      if message.multipart?
+        html = fix_charset message.html_part
+        text = fix_charset message.text_part
+        # TODO picking text if available may be better
+        if text && !html
+          return text
         end
+      elsif message.content_type =~ /text\/html/
+        html = fix_charset message
       end
 
-      if @message.content_type =~ /text\/html/
-        if defined? @message.charset
-          html = @message.body.decoded.force_encoding(@message.charset).encode("UTF-8").to_s
-        else
-          html = @message.body.to_s
-        end
+      if html
+        body = HtmlCleaner.new(html).output_html
+      else
+        body = fix_charset message
       end
 
-      if html.present?
-        @body = scrub_html(html)
-        return @body
-      end
-
-      @body = @message.charset ? @message.body.decoded.force_encoding(@message.charset).encode("UTF-8").to_s.strip : @message.body.to_s
-
       # Certain trigger phrases that means we didn't parse correctly
-      @body = nil if @body =~ /Content\-Type\:/ ||
-                     @body =~ /multipart\/alternative/ ||
-                     @body =~ /text\/plain/
+      if body =~ /Content\-Type\:/ || body =~ /multipart\/alternative/ || body =~ /text\/plain/
+        raise EmptyEmailError
+      end
 
-      @body
+      body
     end
 
-    def scrub_html(html)
-      # If we have an HTML message, strip the markup
-      doc = Nokogiri::HTML(html)
+    # Force encoding to UTF-8 on a Mail::Message or Mail::Part
+    def fix_charset(object)
+      return nil if object.nil?
 
-      # Blackberry is annoying in that it only provides HTML. We can easily extract it though
-      content = doc.at("#BB10_response_div")
-      return content.text if content.present?
-
-      doc.xpath("//text()").text
+      if object.charset
+        object.body.decoded.force_encoding(object.charset).encode("UTF-8").to_s
+      else
+        object.body.to_s
+      end
     end
 
-    def discourse_email_parser
-      lines = @body.scrub.lines.to_a
+    REPLYING_HEADER_LABELS = ['From', 'Sent', 'To', 'Subject', 'Reply To']
+    REPLYING_HEADER_REGEX = Regexp.union(REPLYING_HEADER_LABELS.map { |lbl| "#{lbl}:" })
+
+    def discourse_email_trimmer(body)
+      lines = body.scrub.lines.to_a
       range_end = 0
 
       lines.each_with_index do |l, idx|
@@ -154,11 +160,15 @@ module Email
                  # Let's try it and see how well it works.
                  (l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/)
 
+        # Headers on subsequent lines
+        break if (0..2).all? { |off| lines[idx+off] =~ REPLYING_HEADER_REGEX }
+        # Headers on the same line
+        break if REPLYING_HEADER_LABELS.count { |lbl| l.include? lbl } >= 3
+
         range_end = idx
       end
 
-      @body = lines[0..range_end].join
-      @body.strip!
+      lines[0..range_end].join.strip
     end
 
     def wrap_body_in_quote(user_email)

From 1c9f6159cd6a03a4dc74b0912f475fc73c39d613 Mon Sep 17 00:00:00 2001
From: riking <rikingcoding@gmail.com>
Date: Tue, 26 Aug 2014 17:08:53 -0700
Subject: [PATCH 4/5] Update the Receiver and PollMailbox specs for the changes

Tests are both added, moved, and deleted.

Add test for topic not being created

Move html_only.eml to parse_body testing section
---
 spec/components/email/receiver_spec.rb     | 271 ++++++++++++++-------
 spec/fixtures/emails/boundary.eml          |   4 +-
 spec/fixtures/emails/multiline_wrote.eml   |  23 --
 spec/fixtures/emails/multipart.eml         |  67 -----
 spec/fixtures/emails/paragraphs.cooked     |   7 +
 spec/fixtures/emails/too_many_mentions.eml |  31 +++
 spec/fixtures/emails/too_short.eml         |  21 ++
 spec/jobs/poll_mailbox_spec.rb             |   4 +-
 spec/spec_helper.rb                        |   1 +
 9 files changed, 247 insertions(+), 182 deletions(-)
 delete mode 100644 spec/fixtures/emails/multiline_wrote.eml
 delete mode 100644 spec/fixtures/emails/multipart.eml
 create mode 100644 spec/fixtures/emails/paragraphs.cooked
 create mode 100644 spec/fixtures/emails/too_many_mentions.eml
 create mode 100644 spec/fixtures/emails/too_short.eml

diff --git a/spec/components/email/receiver_spec.rb b/spec/components/email/receiver_spec.rb
index 71bc6a4d5..c5ef25543 100644
--- a/spec/components/email/receiver_spec.rb
+++ b/spec/components/email/receiver_spec.rb
@@ -8,124 +8,225 @@ describe Email::Receiver do
   before do
     SiteSetting.reply_by_email_address = "reply+%{reply_key}@appmail.adventuretime.ooo"
     SiteSetting.email_in = false
+    SiteSetting.title = "Discourse"
   end
 
-  describe 'invalid emails' do
+  describe 'parse_body' do
+    def test_parse_body(mail_string)
+      Email::Receiver.new(nil).parse_body(Mail::Message.new mail_string)
+    end
+
     it "raises EmptyEmailError if the message is blank" do
-      expect { Email::Receiver.new("").process }.to raise_error(Email::Receiver::EmptyEmailError)
+      expect { test_parse_body("") }.to raise_error(Email::Receiver::EmptyEmailError)
     end
 
     it "raises EmptyEmailError if the message is not an email" do
-      expect { Email::Receiver.new("asdf" * 30).process}.to raise_error(Email::Receiver::EmptyEmailError)
+      expect { test_parse_body("asdf" * 30) }.to raise_error(Email::Receiver::EmptyEmailError)
     end
 
-    it "raises EmailUnparsableError if there is no reply content" do
-      expect { Email::Receiver.new(fixture_file("emails/no_content_reply.eml")).process}.to raise_error(Email::Receiver::EmailUnparsableError)
+    it "raises EmptyEmailError if there is no reply content" do
+      expect { test_parse_body(fixture_file("emails/no_content_reply.eml")) }.to raise_error(Email::Receiver::EmptyEmailError)
     end
-  end
 
-  describe "with multipart" do
-    let(:reply_below) { fixture_file("emails/multipart.eml") }
-    let(:receiver) { Email::Receiver.new(reply_below) }
-
-    it "processes correctly" do
-      expect { receiver.process}.to raise_error(Email::Receiver::EmailLogNotFound)
-      expect(receiver.body).to eq(
-"So presumably all the quoted garbage and my (proper) signature will get
-stripped from my reply?")
+    pending "raises EmailUnparsableError if the headers are corrupted" do
+      expect { ; }.to raise_error(Email::Receiver::EmailUnparsableError)
     end
-  end
 
-  describe "html only" do
-    let(:reply_below) { fixture_file("emails/html_only.eml") }
-    let(:receiver) { Email::Receiver.new(reply_below) }
-
-    it "processes correctly" do
-      expect { receiver.process}.to raise_error(Email::Receiver::EmailLogNotFound)
-      expect(receiver.body).to eq("The EC2 instance - I've seen that there tends to be odd and " +
-                                  "unrecommended settings on the Bitnami installs that I've checked out.")
+    it "can parse the html section" do
+      test_parse_body(fixture_file("emails/html_only.eml")).should == "The EC2 instance - I've seen that there tends to be odd and " +
+          "unrecommended settings on the Bitnami installs that I've checked out."
     end
-  end
 
-  describe "it supports a dutch reply" do
-    let(:dutch) { fixture_file("emails/dutch.eml") }
-    let(:receiver) { Email::Receiver.new(dutch) }
-
-    it "processes correctly" do
-      expect { receiver.process}.to raise_error(Email::Receiver::EmailLogNotFound)
-      expect(receiver.body).to eq("Dit is een antwoord in het Nederlands.")
+    it "supports a Dutch reply" do
+      test_parse_body(fixture_file("emails/dutch.eml")).should == "Dit is een antwoord in het Nederlands."
     end
-  end
 
-  describe "It supports a non english reply" do
-    let(:hebrew) { fixture_file("emails/hebrew.eml") }
-    let(:receiver) { Email::Receiver.new(hebrew) }
-
-    it "processes correctly" do
+    it "supports a Hebrew reply" do
       I18n.expects(:t).with('user_notifications.previous_discussion').returns('כלטוב')
-      expect { receiver.process}.to raise_error(Email::Receiver::EmailLogNotFound)
-      expect(receiver.body).to eq("שלום")
+
+      # The force_encoding call is only needed for the test - it is passed on fine to the cooked post
+      test_parse_body(fixture_file("emails/hebrew.eml")).force_encoding("UTF-8").should == "שלום"
     end
-  end
 
-  describe "It supports a non UTF-8 reply" do
-    let(:big5) { fixture_file("emails/big5.eml") }
-    let(:receiver) { Email::Receiver.new(big5) }
-
-    it "processes correctly" do
+    it "supports a BIG5-encoded reply" do
       I18n.expects(:t).with('user_notifications.previous_discussion').returns('媽!我上電視了!')
-      expect { receiver.process}.to raise_error(Email::Receiver::EmailLogNotFound)
-      expect(receiver.body).to eq("媽!我上電視了!")
+
+      # The force_encoding call is only needed for the test - it is passed on fine to the cooked post
+      test_parse_body(fixture_file("emails/big5.eml")).force_encoding("UTF-8").should == "媽!我上電視了!"
     end
-  end
 
-  describe "via" do
-    let(:wrote) { fixture_file("emails/via_line.eml") }
-    let(:receiver) { Email::Receiver.new(wrote) }
+    it "removes 'via' lines if they match the site title" do
+      SiteSetting.title = "Discourse"
 
-    it "removes via lines if we know them" do
-      expect { receiver.process}.to raise_error(Email::Receiver::EmailLogNotFound)
-      expect(receiver.body).to eq("Hello this email has content!")
+      test_parse_body(fixture_file("emails/via_line.eml")).should == "Hello this email has content!"
     end
-  end
 
-  describe "if wrote is on a second line" do
-    let(:wrote) { fixture_file("emails/multiline_wrote.eml") }
-    let(:receiver) { Email::Receiver.new(wrote) }
-
-    it "processes correctly" do
-      expect { receiver.process}.to raise_error(Email::Receiver::EmailLogNotFound)
-      expect(receiver.body).to eq("Thanks!")
+    it "removes the 'Previous Discussion' marker" do
+      test_parse_body(fixture_file("emails/previous.eml")).should == "This will not include the previous discussion that is present in this email."
     end
-  end
 
-  describe "remove previous discussion" do
-    let(:previous) { fixture_file("emails/previous.eml") }
-    let(:receiver) { Email::Receiver.new(previous) }
-
-    it "processes correctly" do
-      expect { receiver.process}.to raise_error(Email::Receiver::EmailLogNotFound)
-      expect(receiver.body).to eq("This will not include the previous discussion that is present in this email.")
-    end
-  end
-
-  describe "multiple paragraphs" do
-    let(:paragraphs) { fixture_file("emails/paragraphs.eml") }
-    let(:receiver) { Email::Receiver.new(paragraphs) }
-
-    it "processes correctly" do
-      expect { receiver.process}.to raise_error(Email::Receiver::EmailLogNotFound)
-      expect(receiver.body).to eq(
+    it "handles multiple paragraphs" do
+      test_parse_body(fixture_file("emails/paragraphs.eml")).
+          should == (
 "Is there any reason the *old* candy can't be be kept in silos while the new candy
 is imported into *new* silos?
 
 The thing about candy is it stays delicious for a long time -- we can just keep
 it there without worrying about it too much, imo.
 
-Thanks for listening.")
+Thanks for listening."
+      )
     end
   end
 
+  describe "posting replies" do
+    let(:reply_key) { raise "Override this in a lower describe block" }
+    let(:email_raw) { raise "Override this in a lower describe block" }
+    # ----
+    let(:receiver) { Email::Receiver.new(email_raw) }
+    let(:post) { create_post }
+    let(:topic) { post.topic }
+    let(:posting_user) { post.user }
+    let(:replying_user_email) { 'jake@adventuretime.ooo' }
+    let(:replying_user) { Fabricate(:user, email: replying_user_email, trust_level: 2)}
+    let(:email_log) { EmailLog.new(reply_key: reply_key,
+                                   post: post,
+                                   post_id: post.id,
+                                   topic_id: post.topic_id,
+                                   email_type: 'user_posted',
+                                   user: replying_user,
+                                   user_id: replying_user.id,
+                                   to_address: replying_user_email
+    ) }
+
+    before do
+      email_log.save
+    end
+
+    # === Success Posting ===
+
+    describe "valid_reply.eml" do
+      let!(:reply_key) { '59d8df8370b7e95c5a49fbf86aeb2c93' }
+      let!(:email_raw) { fixture_file("emails/valid_reply.eml") }
+
+      it "creates a post with the correct content" do
+        start_count = topic.posts.count
+
+        receiver.process
+
+        topic.posts.count.should == (start_count + 1)
+        topic.posts.last.cooked.strip.should == fixture_file("emails/valid_reply.cooked").strip
+      end
+    end
+
+    describe "paragraphs.eml" do
+      let!(:reply_key) { '59d8df8370b7e95c5a49fbf86aeb2c93' }
+      let!(:email_raw) { fixture_file("emails/paragraphs.eml") }
+
+      it "cooks multiple paragraphs with traditional Markdown linebreaks" do
+        start_count = topic.posts.count
+
+        receiver.process
+
+        topic.posts.count.should == (start_count + 1)
+        topic.posts.last.cooked.strip.should == fixture_file("emails/paragraphs.cooked").strip
+        topic.posts.last.cooked.should_not match /<br/
+      end
+    end
+
+    describe "attachment.eml" do
+      let!(:reply_key) { '636ca428858779856c226bb145ef4fad' }
+      let!(:email_raw) {
+        fixture_file("emails/attachment.eml")
+        .gsub("TO", "reply+#{reply_key}@appmail.adventuretime.ooo")
+        .gsub("FROM", replying_user_email)
+      }
+
+      let(:upload_sha) { '04df605be528d03876685c52166d4b063aabb78a' }
+
+      it "creates a post with an attachment" do
+        start_count = topic.posts.count
+        Upload.find_by(sha1: upload_sha).try :destroy
+
+        receiver.process
+
+        topic.posts.count.should == (start_count + 1)
+        topic.posts.last.cooked.should match /<img src=['"](\/uploads\/default\/\d+\/\w{16}\.png)['"] width=['"]289['"] height=['"]126['"]>/
+        Upload.find_by(sha1: upload_sha).should_not be_nil
+      end
+
+    end
+
+    # === Failure Conditions ===
+
+    describe "too_short.eml" do
+      let!(:reply_key) { '636ca428858779856c226bb145ef4fad' }
+      let!(:email_raw) {
+        fixture_file("emails/too_short.eml")
+        .gsub("TO", "reply+#{reply_key}@appmail.adventuretime.ooo")
+        .gsub("FROM", replying_user_email)
+        .gsub("SUBJECT", "re: [Discourse Meta] eviltrout posted in 'Adventure Time Sux'")
+      }
+
+      it "raises an InvalidPost error" do
+        SiteSetting.min_post_length = 5
+        expect { receiver.process }.to raise_error(Email::Receiver::InvalidPost)
+      end
+    end
+
+    describe "too_many_mentions.eml" do
+      let!(:reply_key) { '636ca428858779856c226bb145ef4fad' }
+      let!(:email_raw) { fixture_file("emails/too_many_mentions.eml") }
+
+      it "raises an InvalidPost error" do
+        SiteSetting.max_mentions_per_post = 10
+        (1..11).each do |i|
+          Fabricate(:user, username: "user#{i}").save
+        end
+
+        expect { receiver.process }.to raise_error(Email::Receiver::InvalidPost)
+      end
+    end
+
+  end
+
+  describe "posting a new topic" do
+    let(:category_destination) { raise "Override this in a lower describe block" }
+    let(:email_raw) { raise "Override this in a lower describe block" }
+    let(:allow_strangers) { false }
+    # ----
+    let(:receiver) { Email::Receiver.new(email_raw) }
+    let(:user_email) { 'jake@adventuretime.ooo' }
+    let(:user) { Fabricate(:user, email: user_email, trust_level: 2)}
+    let(:category) { Fabricate(:category, email_in: category_destination, email_in_allow_strangers: allow_strangers) }
+
+    before do
+      SiteSetting.email_in = true
+      user.save
+      category.save
+    end
+
+    describe "too_short.eml" do
+      let!(:category_destination) { 'incoming+amazing@appmail.adventuretime.ooo' }
+      let(:email_raw) {
+        fixture_file("emails/too_short.eml")
+        .gsub("TO", category_destination)
+        .gsub("FROM", user_email)
+        .gsub("SUBJECT", "A long subject that passes the checks")
+      }
+
+      it "does not create a topic if the post fails" do
+        before_topic_count = Topic.count
+
+        expect { receiver.process }.to raise_error(Email::Receiver::InvalidPost)
+
+        Topic.count.should == before_topic_count
+      end
+
+    end
+
+  end
+
   def fill_email(mail, from, to, body = nil, subject = nil)
     result = mail.gsub("FROM", from).gsub("TO", to)
     if body
@@ -181,12 +282,6 @@ greatest show ever created. Everyone should watch it.
 
         expect(receiver.body).to eq(reply_body)
         expect(receiver.email_log).to eq(email_log)
-
-        attachment_email = fixture_file("emails/attachment.eml")
-        attachment_email = fill_email(attachment_email, "test@test.com", to)
-        r = Email::Receiver.new(attachment_email)
-        expect { r.process }.to_not raise_error
-        expect(r.body).to match(/here is an image attachment\n<img src='\/uploads\/default\/\d+\/\w{16}\.png' width='289' height='126'>\n/)
       end
 
     end
diff --git a/spec/fixtures/emails/boundary.eml b/spec/fixtures/emails/boundary.eml
index 92eb4347f..1250fe498 100644
--- a/spec/fixtures/emails/boundary.eml
+++ b/spec/fixtures/emails/boundary.eml
@@ -18,7 +18,7 @@ Content-Type: text/plain; charset=ISO-8859-1
 I'll look into it, thanks!
 
 
-On Wednesday, June 19, 2013, jake via Adventure Time wrote:
+On Wednesday, June 19, 2013, jake via Discourse wrote:
 
 > jake mentioned you in 'peppermint butler is missing' on Adventure
 > Time:
@@ -58,4 +58,4 @@ p>
 ime.ooo/user_preferences" target=3D"_blank">user preferences</a>.</p>
 </blockquote>
 
---001a11c206a073876a04df81d2a9--
\ No newline at end of file
+--001a11c206a073876a04df81d2a9--
diff --git a/spec/fixtures/emails/multiline_wrote.eml b/spec/fixtures/emails/multiline_wrote.eml
deleted file mode 100644
index 0829990dc..000000000
--- a/spec/fixtures/emails/multiline_wrote.eml
+++ /dev/null
@@ -1,23 +0,0 @@
-
-Delivered-To: discourse-reply+cd480e301683c9902891f15968bf07a5@discourse.org
-Received: by 10.194.216.104 with SMTP id op8csp80593wjc;
-        Wed, 24 Jul 2013 07:59:14 -0700 (PDT)
-Return-Path: <walter.white@googlemail.com>
-References: <topic/5043@discourse.org> <51efeb9b36c34_66dc2dfce6811866@discourse.mail>
-From: Walter White <walter.white@googlemail.com>
-In-Reply-To: <51efeb9b36c34_66dc2dfce6811866@discourse.mail>
-Mime-Version: 1.0 (1.0)
-Date: Wed, 24 Jul 2013 15:59:10 +0100
-Message-ID: <4597127794206131679@unknownmsgid>
-Subject: Re: [Discourse] new reply to your post in 'Crystal Blue'
-To: walter via Discourse <reply+cd480e301683c9902891f15968bf07a5@appmail.adventuretime.ooo>
-Content-Type: multipart/alternative; boundary=001a11c20edc15a39304e2432790
-
-Thanks!
-
-On 24 Jul 2013, at 15:58, walter via Discourse <info@discourse.org>
-wrote:
-
-   walter <http://discourse.org/users/walter>  July 24
-
-You look great today Walter.
diff --git a/spec/fixtures/emails/multipart.eml b/spec/fixtures/emails/multipart.eml
deleted file mode 100644
index b61f9fa48..000000000
--- a/spec/fixtures/emails/multipart.eml
+++ /dev/null
@@ -1,67 +0,0 @@
-Message-ID: <51C22E52.1030509@darthvader.ca>
-Date: Wed, 19 Jun 2013 18:18:58 -0400
-From: Anakin Skywalker <evildad@darthvader.ca>
-User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130510 Thunderbird/17.0.6
-MIME-Version: 1.0
-To: Han Solo via Death Star <reply+603775f8f5f68006461890a3eadf94cf@appmail.adventuretime.ooo>
-Subject: Re: [Death Star] [PM] re: Regarding your post in "Site Customization
- not working"
-References: <51d23d33f41fb_5f4e4b35d7d60798@xwing.mail>
-In-Reply-To: <51d23d33f41fb_5f4e4b35d7d60798@xwing.mail>
-Content-Type: multipart/alternative;
- boundary="------------070503080300090900010604"
-
-This is a multi-part message in MIME format.
---------------070503080300090900010604
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 7bit
-
-So presumably all the quoted garbage and my (proper) signature will get
-stripped from my reply?
-
---
-Anakin Skywalker               | `One of the main causes of the fall of
-evildad@darthvader.ca          | the Roman Empire was that, lacking zero,
-                               | they had no way to indicate successful
-                               | termination of their C programs.' - Firth
-
-
---------------070503080300090900010604
-Content-Type: text/html; charset=UTF-8
-Content-Transfer-Encoding: 7bit
-
-<html>
-  <head>
-    <meta content="text/html; charset=UTF-8" http-equiv="Content-Type">
-  </head>
-  <body bgcolor="#FFFFFF" text="#000000">
-    <div class="moz-cite-prefix">On 13-06-19 06:14 PM, Han Solo via
-      Death Star wrote:<br>
-    </div>
-    <blockquote
-      cite="mid:51d23d33f41fb_5f4e4b35d7d60798@xwing.mail"
-      type="cite">
-      <p>Han Solo just sent you a private message</p>
-      <hr>
-      <p>I got it here! Yay it worked!</p>
-      <hr>
-      <p>Please visit this link to respond: <a moz-do-not-send="true"
-href="http://darthvader.ca/t/regarding-your-post-in-site-customization-not-working/7641/2">http://darthvader.ca/t/regarding-your-post-in-site-customization-not-working/7641/2</a></p>
-      <p>To unsubscribe from these emails, visit your <a
-          moz-do-not-send="true"
-          href="http://darthvader.ca/user_preferences">user
-          preferences</a>.</p>
-    </blockquote>
-    So presumably all the quoted garbage and my (proper) signature will
-    get stripped from my reply?<br>
-    <br>
-    <pre class="moz-signature" cols="72">--
-Anakin Skywalker               | `One of the main causes of the fall of
-<a class="moz-txt-link-abbreviated" href="mailto:evildad@darthvader.ca">evildad@darthvader.ca</a>       | the Roman Empire was that, lacking zero,
-                            | they had no way to indicate successful
-                            | termination of their C programs.' - Firth
-</pre>
-  </body>
-</html>
-
---------------070503080300090900010604--
diff --git a/spec/fixtures/emails/paragraphs.cooked b/spec/fixtures/emails/paragraphs.cooked
new file mode 100644
index 000000000..da83260e0
--- /dev/null
+++ b/spec/fixtures/emails/paragraphs.cooked
@@ -0,0 +1,7 @@
+<p>Is there any reason the <em>old</em> candy can't be be kept in silos while the new candy
+is imported into <em>new</em> silos?</p>
+
+<p>The thing about candy is it stays delicious for a long time -- we can just keep
+it there without worrying about it too much, imo.</p>
+
+<p>Thanks for listening.</p>
\ No newline at end of file
diff --git a/spec/fixtures/emails/too_many_mentions.eml b/spec/fixtures/emails/too_many_mentions.eml
new file mode 100644
index 000000000..9cc7b75c9
--- /dev/null
+++ b/spec/fixtures/emails/too_many_mentions.eml
@@ -0,0 +1,31 @@
+Return-Path: <jake@adventuretime.ooo>
+Received: from iceking.adventuretime.ooo ([unix socket]) by iceking (Cyrus v2.2.13-Debian-2.2.13-19+squeeze3) with LMTPA; Thu, 13 Jun 2013 17:03:50 -0400
+Received: from mail-ie0-x234.google.com (mail-ie0-x234.google.com [IPv6:2607:f8b0:4001:c03::234]) by iceking.adventuretime.ooo (8.14.3/8.14.3/Debian-9.4) with ESMTP id r5DL3nFJ016967 (version=TLSv1/SSLv3 cipher=RC4-SHA bits=128 verify=NOT) for <reply+59d8df8370b7e95c5a49fbf86aeb2c93@discourse.example.com>; Thu, 13 Jun 2013 17:03:50 -0400
+Received: by mail-ie0-f180.google.com with SMTP id f4so21977375iea.25 for <reply+59d8df8370b7e95c5a49fbf86aeb2c93@discourse.example.com>; Thu, 13 Jun 2013 14:03:48 -0700
+Received: by 10.0.0.1 with HTTP; Thu, 13 Jun 2013 14:03:48 -0700
+Date: Thu, 13 Jun 2013 17:03:48 -0400
+From: Jake the Dog <jake@adventuretime.ooo>
+To: reply+636ca428858779856c226bb145ef4fad@appmail.adventuretime.ooo
+Message-ID: <CADkmRc+rNGAGGbV2iE5p918UVy4UyJqVcXRO2=otppgzduJSg@mail.gmail.com>
+Subject: re: [Discourse Meta] eviltrout posted in 'Adventure Time Sux'
+Mime-Version: 1.0
+Content-Type: text/plain;
+ charset=ISO-8859-1
+Content-Transfer-Encoding: 7bit
+X-Sieve: CMU Sieve 2.2
+X-Received: by 10.0.0.1 with SMTP id n7mr11234144ipb.85.1371157428600; Thu,
+ 13 Jun 2013 14:03:48 -0700 (PDT)
+X-Scanned-By: MIMEDefang 2.69 on IPv6:2001:470:1d:165::1
+
+
+@user1
+@user2
+@user3
+@user4
+@user5
+@user6
+@user7
+@user8
+@user9
+@user10
+@user11
\ No newline at end of file
diff --git a/spec/fixtures/emails/too_short.eml b/spec/fixtures/emails/too_short.eml
new file mode 100644
index 000000000..54fed0f98
--- /dev/null
+++ b/spec/fixtures/emails/too_short.eml
@@ -0,0 +1,21 @@
+Return-Path: <jake@adventuretime.ooo>
+Received: from iceking.adventuretime.ooo ([unix socket]) by iceking (Cyrus v2.2.13-Debian-2.2.13-19+squeeze3) with LMTPA; Thu, 13 Jun 2013 17:03:50 -0400
+Received: from mail-ie0-x234.google.com (mail-ie0-x234.google.com [IPv6:2607:f8b0:4001:c03::234]) by iceking.adventuretime.ooo (8.14.3/8.14.3/Debian-9.4) with ESMTP id r5DL3nFJ016967 (version=TLSv1/SSLv3 cipher=RC4-SHA bits=128 verify=NOT) for <reply+59d8df8370b7e95c5a49fbf86aeb2c93@discourse.example.com>; Thu, 13 Jun 2013 17:03:50 -0400
+Received: by mail-ie0-f180.google.com with SMTP id f4so21977375iea.25 for <reply+59d8df8370b7e95c5a49fbf86aeb2c93@discourse.example.com>; Thu, 13 Jun 2013 14:03:48 -0700
+Received: by 10.0.0.1 with HTTP; Thu, 13 Jun 2013 14:03:48 -0700
+Date: Thu, 13 Jun 2013 17:03:48 -0400
+From: Jake the Dog <FROM>
+To: TO
+Message-ID: <CADkmRc+rNGAGGbV2iE5p918UVy4UyJqVcXRO2=otppgzduJSg@mail.gmail.com>
+Subject: SUBJECT
+Mime-Version: 1.0
+Content-Type: text/plain;
+ charset=ISO-8859-1
+Content-Transfer-Encoding: 7bit
+X-Sieve: CMU Sieve 2.2
+X-Received: by 10.0.0.1 with SMTP id n7mr11234144ipb.85.1371157428600; Thu,
+ 13 Jun 2013 14:03:48 -0700 (PDT)
+X-Scanned-By: MIMEDefang 2.69 on IPv6:2001:470:1d:165::1
+
+
++1
\ No newline at end of file
diff --git a/spec/jobs/poll_mailbox_spec.rb b/spec/jobs/poll_mailbox_spec.rb
index b10dcc09e..3084b2b2f 100644
--- a/spec/jobs/poll_mailbox_spec.rb
+++ b/spec/jobs/poll_mailbox_spec.rb
@@ -202,9 +202,9 @@ describe Jobs::PollMailbox do
         email.should be_deleted
       end
 
-      it "a no content reply raises an EmailUnparsableError" do
+      it "a no content reply raises an EmptyEmailError" do
         email = MockPop3EmailObject.new fixture_file('emails/no_content_reply.eml')
-        expect_exception Email::Receiver::EmailUnparsableError
+        expect_exception Email::Receiver::EmptyEmailError
 
         poller.handle_mail(email)
         email.should be_deleted
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 23c827e7d..f90de100e 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -30,6 +30,7 @@ Spork.prefork do
   # Requires supporting ruby files with custom matchers and macros, etc,
   # in spec/support/ and its subdirectories.
   Dir[Rails.root.join("spec/support/**/*.rb")].each {|f| require f}
+  Dir[Rails.root.join("spec/fabricators/*.rb")].each {|f| require f}
 
   # let's not run seed_fu every test
   SeedFu.quiet = true if SeedFu.respond_to? :quiet

From 8ddd90daa42f7487b81b1d6bbd2d68b15b7fa42b Mon Sep 17 00:00:00 2001
From: riking <rikingcoding@gmail.com>
Date: Thu, 28 Aug 2014 12:09:42 -0700
Subject: [PATCH 5/5] Have parse_body() recover from ASCII-8BIT encoding

Added a test to make sure that the result can be passed into TextCleaner
(which expects UTF-8)
---
 lib/email/receiver.rb                  |  3 ++-
 spec/components/email/receiver_spec.rb | 14 ++++++++++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/lib/email/receiver.rb b/lib/email/receiver.rb
index c644950ce..c15f8c427 100644
--- a/lib/email/receiver.rb
+++ b/lib/email/receiver.rb
@@ -95,6 +95,7 @@ module Email
 
     def parse_body(message)
       body = select_body message
+      encoding = body.encoding
       raise EmptyEmailError if body.strip.blank?
 
       body = discourse_email_trimmer body
@@ -103,7 +104,7 @@ module Email
       body = EmailReplyParser.parse_reply body
       raise EmptyEmailError if body.strip.blank?
 
-      body
+      body.force_encoding(encoding).encode("UTF-8")
     end
 
     def select_body(message)
diff --git a/spec/components/email/receiver_spec.rb b/spec/components/email/receiver_spec.rb
index c5ef25543..a04316067 100644
--- a/spec/components/email/receiver_spec.rb
+++ b/spec/components/email/receiver_spec.rb
@@ -45,14 +45,14 @@ describe Email::Receiver do
       I18n.expects(:t).with('user_notifications.previous_discussion').returns('כלטוב')
 
       # The force_encoding call is only needed for the test - it is passed on fine to the cooked post
-      test_parse_body(fixture_file("emails/hebrew.eml")).force_encoding("UTF-8").should == "שלום"
+      test_parse_body(fixture_file("emails/hebrew.eml")).should == "שלום"
     end
 
     it "supports a BIG5-encoded reply" do
       I18n.expects(:t).with('user_notifications.previous_discussion').returns('媽!我上電視了!')
 
       # The force_encoding call is only needed for the test - it is passed on fine to the cooked post
-      test_parse_body(fixture_file("emails/big5.eml")).force_encoding("UTF-8").should == "媽!我上電視了!"
+      test_parse_body(fixture_file("emails/big5.eml")).should == "媽!我上電視了!"
     end
 
     it "removes 'via' lines if they match the site title" do
@@ -77,6 +77,16 @@ it there without worrying about it too much, imo.
 Thanks for listening."
       )
     end
+
+    it "converts back to UTF-8 at the end" do
+      result = test_parse_body(fixture_file("emails/big5.eml"))
+      result.encoding.should == Encoding::UTF_8
+
+      # should not throw
+      TextCleaner.normalize_whitespaces(
+          test_parse_body(fixture_file("emails/big5.eml"))
+      )
+    end
   end
 
   describe "posting replies" do