Special work to never include previous posts in parsed emails. Also a new attempt

to catch reply strings in different languages.
This commit is contained in:
Robin Ward
2013-07-24 14:22:32 -04:00
parent 0a7bbb08e1
commit 2319a824f8
5 changed files with 148 additions and 8 deletions

View File

@ -19,10 +19,18 @@ module Email
return Email::Receiver.results[:unprocessable] if @raw.blank? return Email::Receiver.results[:unprocessable] if @raw.blank?
@message = Mail::Message.new(@raw) @message = Mail::Message.new(@raw)
@body = EmailReplyParser.read(parse_body).visible_text
# First remove the known discourse stuff.
parse_body
return Email::Receiver.results[:unprocessable] if @body.blank? return Email::Receiver.results[:unprocessable] if @body.blank?
# Then run the github EmailReplyParser on it in case we didn't catch it
@body = EmailReplyParser.read(@body).visible_text
discourse_email_parser
return Email::Receiver.results[:unprocessable] if @body.blank?
@reply_key = @message.to.first @reply_key = @message.to.first
# Extract the `reply_key` from the format the site has specified # Extract the `reply_key` from the format the site has specified
@ -49,7 +57,8 @@ module Email
if @message.multipart? if @message.multipart?
@message.parts.each do |p| @message.parts.each do |p|
if p.content_type =~ /text\/plain/ if p.content_type =~ /text\/plain/
return p.body.to_s @body = p.body.to_s
return @body
elsif p.content_type =~ /text\/html/ elsif p.content_type =~ /text\/html/
html = p.body.to_s html = p.body.to_s
end end
@ -58,10 +67,11 @@ module Email
html = @message.body.to_s if @message.content_type =~ /text\/html/ html = @message.body.to_s if @message.content_type =~ /text\/html/
if html.present? if html.present?
return scrub_html(html) @body = scrub_html(html)
return @body
end end
return @message.body.to_s.strip @body = @message.body.to_s.strip
end end
def scrub_html(html) def scrub_html(html)
@ -76,8 +86,27 @@ module Email
return doc.xpath("//text()").text return doc.xpath("//text()").text
end end
def create_reply def discourse_email_parser
lines = @body.lines
range_end = 0
email_year =
lines.each_with_index do |l, idx|
break if l =~ /\A\s*\-{3,80}\s*\z/ ||
l =~ Regexp.new("\\A\\s*" + I18n.t('user_notifications.previous_discussion') + "\\s*\\Z") ||
# This one might be controversial but so many reply lines have years, times and end with a colon.
# Let's try it and see how well it works.
(l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/)
range_end = idx
end
@body = lines[0..range_end].join
@body.strip!
end
def create_reply
# Try to post the body as a reply # Try to post the body as a reply
creator = PostCreator.new(email_log.user, creator = PostCreator.new(email_log.user,
raw: @body, raw: @body,

View File

@ -21,7 +21,7 @@ describe Email::Receiver do
let(:reply_below) { File.read("#{Rails.root}/spec/fixtures/emails/multipart.eml") } let(:reply_below) { File.read("#{Rails.root}/spec/fixtures/emails/multipart.eml") }
let(:receiver) { Email::Receiver.new(reply_below) } let(:receiver) { Email::Receiver.new(reply_below) }
it "does something" do it "processes correctly" do
receiver.process receiver.process
expect(receiver.body).to eq( expect(receiver.body).to eq(
"So presumably all the quoted garbage and my (proper) signature will get "So presumably all the quoted garbage and my (proper) signature will get
@ -33,18 +33,48 @@ stripped from my reply?")
let(:reply_below) { File.read("#{Rails.root}/spec/fixtures/emails/html_only.eml") } let(:reply_below) { File.read("#{Rails.root}/spec/fixtures/emails/html_only.eml") }
let(:receiver) { Email::Receiver.new(reply_below) } let(:receiver) { Email::Receiver.new(reply_below) }
it "does something" do it "processes correctly" do
receiver.process receiver.process
expect(receiver.body).to eq("The EC2 instance - I've seen that there tends to be odd and " + expect(receiver.body).to eq("The EC2 instance - I've seen that there tends to be odd and " +
"unrecommended settings on the Bitnami installs that I've checked out.") "unrecommended settings on the Bitnami installs that I've checked out.")
end end
end end
describe "it supports a dutch reply" do
let(:dutch) { File.read("#{Rails.root}/spec/fixtures/emails/dutch.eml") }
let(:receiver) { Email::Receiver.new(dutch) }
it "processes correctly" do
receiver.process
expect(receiver.body).to eq("Dit is een antwoord in het Nederlands.")
end
end
describe "if wrote is on a second line" do
let(:wrote) { File.read("#{Rails.root}/spec/fixtures/emails/multiline_wrote.eml") }
let(:receiver) { Email::Receiver.new(wrote) }
it "processes correctly" do
receiver.process
expect(receiver.body).to eq("Thanks!")
end
end
describe "remove previous discussion" do
let(:previous) { File.read("#{Rails.root}/spec/fixtures/emails/previous.eml") }
let(:receiver) { Email::Receiver.new(previous) }
it "processes correctly" do
receiver.process
expect(receiver.body).to eq("This will not include the previous discussion that is present in this email.")
end
end
describe "multiple paragraphs" do describe "multiple paragraphs" do
let(:paragraphs) { File.read("#{Rails.root}/spec/fixtures/emails/paragraphs.eml") } let(:paragraphs) { File.read("#{Rails.root}/spec/fixtures/emails/paragraphs.eml") }
let(:receiver) { Email::Receiver.new(paragraphs) } let(:receiver) { Email::Receiver.new(paragraphs) }
it "does something" do it "processes correctly" do
receiver.process receiver.process
expect(receiver.body).to eq( expect(receiver.body).to eq(
"Is there any reason the *old* candy can't be be kept in silos while the new candy "Is there any reason the *old* candy can't be be kept in silos while the new candy

20
spec/fixtures/emails/dutch.eml vendored Normal file
View File

@ -0,0 +1,20 @@
Delivered-To: discourse-reply+cd480e301683c9902891f15968bf07a5@discourse.org
Received: by 10.194.216.104 with SMTP id op8csp80593wjc;
Wed, 24 Jul 2013 07:59:14 -0700 (PDT)
Return-Path: <walter.white@googlemail.com>
References: <topic/5043@discourse.org> <51efeb9b36c34_66dc2dfce6811866@discourse.mail>
From: Walter White <walter.white@googlemail.com>
In-Reply-To: <51efeb9b36c34_66dc2dfce6811866@discourse.mail>
Mime-Version: 1.0 (1.0)
Date: Wed, 24 Jul 2013 15:59:10 +0100
Message-ID: <4597127794206131679@unknownmsgid>
Subject: Re: [Discourse] new reply to your post in 'Crystal Blue'
To: walter via Discourse <discourse-reply+cd480e301683c9902891f15968bf07a5@discourse.org>
Content-Type: multipart/alternative; boundary=001a11c20edc15a39304e2432790
Dit is een antwoord in het Nederlands.
Op 18 juli 2013 10:23 schreef Sander Datema het volgende:
Dit is de originele post.

View File

@ -0,0 +1,23 @@
Delivered-To: discourse-reply+cd480e301683c9902891f15968bf07a5@discourse.org
Received: by 10.194.216.104 with SMTP id op8csp80593wjc;
Wed, 24 Jul 2013 07:59:14 -0700 (PDT)
Return-Path: <walter.white@googlemail.com>
References: <topic/5043@discourse.org> <51efeb9b36c34_66dc2dfce6811866@discourse.mail>
From: Walter White <walter.white@googlemail.com>
In-Reply-To: <51efeb9b36c34_66dc2dfce6811866@discourse.mail>
Mime-Version: 1.0 (1.0)
Date: Wed, 24 Jul 2013 15:59:10 +0100
Message-ID: <4597127794206131679@unknownmsgid>
Subject: Re: [Discourse] new reply to your post in 'Crystal Blue'
To: walter via Discourse <discourse-reply+cd480e301683c9902891f15968bf07a5@discourse.org>
Content-Type: multipart/alternative; boundary=001a11c20edc15a39304e2432790
Thanks!
On 24 Jul 2013, at 15:58, walter via Discourse <info@discourse.org>
wrote:
walter <http://discourse.org/users/walter> July 24
You look great today Walter.

38
spec/fixtures/emails/previous.eml vendored Normal file
View File

@ -0,0 +1,38 @@
Delivered-To: discourse-reply+cd480e301683c9902891f15968bf07a5@discourse.org
Received: by 10.194.216.104 with SMTP id op8csp80593wjc;
Wed, 24 Jul 2013 07:59:14 -0700 (PDT)
Return-Path: <walter.white@googlemail.com>
References: <topic/5043@discourse.org> <51efeb9b36c34_66dc2dfce6811866@discourse.mail>
From: Walter White <walter.white@googlemail.com>
In-Reply-To: <51efeb9b36c34_66dc2dfce6811866@discourse.mail>
Mime-Version: 1.0 (1.0)
Date: Wed, 24 Jul 2013 15:59:10 +0100
Message-ID: <4597127794206131679@unknownmsgid>
Subject: Re: [Discourse] new reply to your post in 'Crystal Blue'
To: walter via Discourse <discourse-reply+cd480e301683c9902891f15968bf07a5@discourse.org>
Content-Type: multipart/alternative; boundary=001a11c20edc15a39304e2432790
This will not include the previous discussion that is present in this email.
------------------------------
Previous discussion
skylerwhite<http://discourse.org/users/skylerwhite> July 24
This is a reply.
fring <http://discourse.org/users/fring> July 24
This is an older reply.
hank_schrader <http://discourse.org/users/hank_schrader> July 24
Of course another reply here.
walterwhite <http://discourse.org/users/walterwhite> July 24
------------------------------
To respond, reply to this email or visit
http://discourse.org/t/crystal-blue/5043/10in
your browser.
To unsubscribe from these emails, visit your user
preferences<http://discourse.org/user_preferences>
.