From 0d10b5c9c442045e761b60352c301975cc7e4878 Mon Sep 17 00:00:00 2001 From: Robin Ward Date: Thu, 20 Jun 2013 12:38:03 -0400 Subject: [PATCH] More email receiving logic --- lib/email/receiver.rb | 46 +++++++++++-------- spec/components/email/receiver_spec.rb | 28 ++++++++++-- spec/fixtures/emails/boundary_email.txt | 61 ------------------------- spec/fixtures/emails/valid_reply.txt | 40 ---------------- 4 files changed, 53 insertions(+), 122 deletions(-) delete mode 100644 spec/fixtures/emails/boundary_email.txt delete mode 100644 spec/fixtures/emails/valid_reply.txt diff --git a/lib/email/receiver.rb b/lib/email/receiver.rb index ee68ce134..fb43d77f5 100644 --- a/lib/email/receiver.rb +++ b/lib/email/receiver.rb @@ -19,7 +19,7 @@ module Email return Email::Receiver.results[:unprocessable] if @raw.blank? @message = Mail::Message.new(@raw) - parse_body + @body = EmailReplyParser.read(parse_body).visible_text return Email::Receiver.results[:unprocessable] if @body.blank? @@ -43,27 +43,37 @@ module Email private def parse_body - @body = @message.body.to_s.strip - return if @body.blank? + html = nil - # I really hate to have to do this, but there seems to be a bug in Mail::Message - # with content boundaries in emails. Until it is fixed, this hack removes stuff - # we don't want from emails bodies - content_type = @message.header['Content-Type'].to_s - if content_type.present? - boundary_match = content_type.match(/boundary\=(.*)$/) - boundary = boundary_match[1] if boundary_match && boundary_match[1].present? - if boundary.present? and @body.present? - - lines = @body.lines - lines = lines[1..-1] if lines.present? and lines[0] =~ /^--#{boundary}/ - lines = lines[1..-1] if lines.present? and lines[0] =~ /^Content-Type/ - - @body = lines.join.strip! + # If the message is multipart, find the best type for our purposes + if @message.multipart? + @message.parts.each do |p| + if p.content_type =~ /text\/plain/ + return p.body.to_s + elsif p.content_type =~ /text\/html/ + html = p.body.to_s + end end end - @body = EmailReplyParser.read(@body).visible_text + html = @message.body.to_s if @message.content_type =~ /text\/html/ + if html.present? + return scrub_html(html) + end + + return @message.body.to_s.strip + end + + def scrub_html(html) + # If we have an HTML message, strip the markup + doc = Nokogiri::HTML(html) + + # Blackberry is annoying in that it only provides HTML. We can easily + # extract it though + content = doc.at("#BB10_response_div") + return content.text if content.present? + + return doc.xpath("//text()").text end def create_reply diff --git a/spec/components/email/receiver_spec.rb b/spec/components/email/receiver_spec.rb index a9ff44bd6..15acc2e32 100644 --- a/spec/components/email/receiver_spec.rb +++ b/spec/components/email/receiver_spec.rb @@ -17,20 +17,42 @@ describe Email::Receiver do end end + describe "with multipart" do + let(:reply_below) { File.read("#{Rails.root}/spec/fixtures/emails/multipart.eml") } + let(:receiver) { Email::Receiver.new(reply_below) } + + it "does something" do + receiver.process + expect(receiver.body).to eq( +"So presumably all the quoted garbage and my (proper) signature will get +stripped from my reply?") + end + end + + describe "html only" do + let(:reply_below) { File.read("#{Rails.root}/spec/fixtures/emails/html_only.eml") } + let(:receiver) { Email::Receiver.new(reply_below) } + + it "does something" do + receiver.process + expect(receiver.body).to eq("The EC2 instance - I've seen that there tends to be odd and " + + "unrecommended settings on the Bitnami installs that I've checked out.") + end + end + describe "with a content boundary" do - let(:bounded_email) { File.read("#{Rails.root}/spec/fixtures/emails/boundary_email.txt") } + let(:bounded_email) { File.read("#{Rails.root}/spec/fixtures/emails/boundary.eml") } let(:receiver) { Email::Receiver.new(bounded_email) } it "does something" do receiver.process expect(receiver.body).to eq("I'll look into it, thanks!") end - end describe "with a valid email" do let(:reply_key) { "59d8df8370b7e95c5a49fbf86aeb2c93" } - let(:valid_reply) { File.read("#{Rails.root}/spec/fixtures/emails/valid_reply.txt") } + let(:valid_reply) { File.read("#{Rails.root}/spec/fixtures/emails/valid_reply.eml") } let(:receiver) { Email::Receiver.new(valid_reply) } let(:post) { Fabricate.build(:post) } let(:user) { Fabricate.build(:user) } diff --git a/spec/fixtures/emails/boundary_email.txt b/spec/fixtures/emails/boundary_email.txt deleted file mode 100644 index 78dba44f5..000000000 --- a/spec/fixtures/emails/boundary_email.txt +++ /dev/null @@ -1,61 +0,0 @@ - -MIME-Version: 1.0 -Received: by 10.64.14.41 with HTTP; Wed, 19 Jun 2013 06:29:41 -0700 (PDT) -In-Reply-To: <51c19490e928a_13442dd8ae892548@tree.mail> -References: <51c19490e928a_13442dd8ae892548@tree.mail> -Date: Wed, 19 Jun 2013 09:29:41 -0400 -Delivered-To: finn@adventuretime.ooo -Message-ID: -Subject: Re: [Adventure Time] jake mentioned you in 'peppermint butler is - missing' -From: Finn the Human -To: jake via Adventure Time -Content-Type: multipart/alternative; boundary=001a11c206a073876a04df81d2a9 - ---001a11c206a073876a04df81d2a9 -Content-Type: text/plain; charset=ISO-8859-1 - -I'll look into it, thanks! - - -On Wednesday, June 19, 2013, jake via Adventure Time wrote: - -> jake mentioned you in 'peppermint butler is missing' on Adventure -> Time: -> ------------------------------ -> -> yeah, just noticed this cc @jake -> ------------------------------ -> -> Please visit this link to respond: -> http://adventuretime.ooo/t/peppermint-butler-is-missing/7628/2 -> -> To unsubscribe from these emails, visit your user preferences -> . -> - ---001a11c206a073876a04df81d2a9 -Content-Type: text/html; charset=ISO-8859-1 -Content-Transfer-Encoding: quoted-printable - -I'll look into it, thanks!

On Wednesday, June 19, 2= -013, jake via Adventure Time wrote:

sa= -m mentioned you in 'Duplicate message are shown in profile' on Adve= -nture Time

- - -

yeah, just noticed this cc @eviltrout

- -

Please visit this link to respond: http= -://adventuretime.ooo/t/peppermint-butler-is-missing/7628/2 - - -

To unsubscribe from these emails, visit your user preferences.

-
- ---001a11c206a073876a04df81d2a9-- \ No newline at end of file diff --git a/spec/fixtures/emails/valid_reply.txt b/spec/fixtures/emails/valid_reply.txt deleted file mode 100644 index 1e6963899..000000000 --- a/spec/fixtures/emails/valid_reply.txt +++ /dev/null @@ -1,40 +0,0 @@ -Return-Path: -Received: from iceking.adventuretime.ooo ([unix socket]) by iceking (Cyrus v2.2.13-Debian-2.2.13-19+squeeze3) with LMTPA; Thu, 13 Jun 2013 17:03:50 -0400 -Received: from mail-ie0-x234.google.com (mail-ie0-x234.google.com [IPv6:2607:f8b0:4001:c03::234]) by iceking.adventuretime.ooo (8.14.3/8.14.3/Debian-9.4) with ESMTP id r5DL3nFJ016967 (version=TLSv1/SSLv3 cipher=RC4-SHA bits=128 verify=NOT) for ; Thu, 13 Jun 2013 17:03:50 -0400 -Received: by mail-ie0-f180.google.com with SMTP id f4so21977375iea.25 for ; Thu, 13 Jun 2013 14:03:48 -0700 -Received: by 10.0.0.1 with HTTP; Thu, 13 Jun 2013 14:03:48 -0700 -Date: Thu, 13 Jun 2013 17:03:48 -0400 -From: Jake the Dog -To: reply+59d8df8370b7e95c5a49fbf86aeb2c93@appmail.adventuretime.ooo -Message-ID: -Subject: re: [Discourse Meta] eviltrout posted in 'Adventure Time Sux' -Mime-Version: 1.0 -Content-Type: text/plain; - charset=ISO-8859-1 -Content-Transfer-Encoding: 7bit -X-Sieve: CMU Sieve 2.2 -X-Received: by 10.0.0.1 with SMTP id n7mr11234144ipb.85.1371157428600; Thu, - 13 Jun 2013 14:03:48 -0700 (PDT) -X-Scanned-By: MIMEDefang 2.69 on IPv6:2001:470:1d:165::1 - -I could not disagree more. I am obviously biased but adventure time is the -greatest show ever created. Everyone should watch it. - -- Jake out - - -On Sun, Jun 9, 2013 at 1:39 PM, eviltrout via Discourse Meta - wrote: -> -> -> -> eviltrout posted in 'Adventure Time Sux' on Discourse Meta: -> -> --- -> hey guys everyone knows adventure time sucks! -> -> --- -> Please visit this link to respond: http://localhost:3000/t/adventure-time-sux/1234/3 -> -> To unsubscribe from these emails, visit your [user preferences](http://localhost:3000/user_preferences). -> \ No newline at end of file