diff --git a/app/models/screened_email.rb b/app/models/screened_email.rb index f10c80a8b..f18100341 100644 --- a/app/models/screened_email.rb +++ b/app/models/screened_email.rb @@ -17,8 +17,27 @@ class ScreenedEmail < ActiveRecord::Base end def self.should_block?(email) - screened_email = ScreenedEmail.find_by(email: email) + levenshtein_distance = SiteSetting.levenshtein_distance_spammer_emails + + sql = <<-SQL + JOIN ( + SELECT email, levenshtein_less_equal(email, :email, :levenshtein_distance) AS distance + FROM screened_emails + ORDER BY created_at DESC + LIMIT 100 + ) AS sed ON sed.email = screened_emails.email + SQL + + screened_emails_distance = ScreenedEmail.sql_fragment(sql, email: email, levenshtein_distance: levenshtein_distance) + + screened_email = ScreenedEmail.joins(screened_emails_distance) + .where("sed.distance <= ?", levenshtein_distance) + .order("sed.distance ASC") + .limit(1) + .first + screened_email.record_match! if screened_email + screened_email && screened_email.action_type == actions[:block] end diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 55ec23ac3..ab1220244 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -890,6 +890,8 @@ en: white_listed_spam_host_domains: "A pipe-delimited list of domains excluded from spam host testing, new users will be able to create an unrestricted count of posts with links to this domain" staff_like_weight: "Extra weighting factor given to likes when performed by staff." + levenshtein_distance_spammer_emails: "Number of characters different from a known spammer email." + reply_by_email_enabled: "Enable replying to topics via email" reply_by_email_address: "Template for reply by email incoming email address, for example: %{reply_key}@reply.example.com or replies+%{reply_key}@example.com" diff --git a/config/site_settings.yml b/config/site_settings.yml index 78ef03d90..c919d0c71 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -523,6 +523,7 @@ spam: white_listed_spam_host_domains: default: '' type: list + levenshtein_distance_spammer_emails: 2 rate_limits: unique_posts_mins: diff --git a/db/migrate/20140711105420_add_fuzzy_str_match_extension.rb b/db/migrate/20140711105420_add_fuzzy_str_match_extension.rb new file mode 100644 index 000000000..0a112a4be --- /dev/null +++ b/db/migrate/20140711105420_add_fuzzy_str_match_extension.rb @@ -0,0 +1,9 @@ +class AddFuzzyStrMatchExtension < ActiveRecord::Migration + def self.up + execute "CREATE EXTENSION IF NOT EXISTS fuzzystrmatch" + end + + def self.down + execute "DROP EXTENSION fuzzystrmatch" + end +end diff --git a/spec/models/screened_email_spec.rb b/spec/models/screened_email_spec.rb index 3e242034b..240e8d57a 100644 --- a/spec/models/screened_email_spec.rb +++ b/spec/models/screened_email_spec.rb @@ -3,33 +3,34 @@ require 'spec_helper' describe ScreenedEmail do let(:email) { 'block@spamfromhome.org' } + let(:similar_email) { 'bl0ck@spamfromhome.org' } describe "new record" do it "sets a default action_type" do - described_class.create(email: email).action_type.should == described_class.actions[:block] + ScreenedEmail.create(email: email).action_type.should == ScreenedEmail.actions[:block] end it "last_match_at is null" do # If we manually load the table with some emails, we can see whether those emails # have ever been blocked by looking at last_match_at. - described_class.create(email: email).last_match_at.should be_nil + ScreenedEmail.create(email: email).last_match_at.should be_nil end end describe '#block' do context 'email is not being blocked' do it 'creates a new record with default action of :block' do - record = described_class.block(email) + record = ScreenedEmail.block(email) record.should_not be_new_record record.email.should == email - record.action_type.should == described_class.actions[:block] + record.action_type.should == ScreenedEmail.actions[:block] end it 'lets action_type be overriden' do - record = described_class.block(email, action_type: described_class.actions[:do_nothing]) + record = ScreenedEmail.block(email, action_type: ScreenedEmail.actions[:do_nothing]) record.should_not be_new_record record.email.should == email - record.action_type.should == described_class.actions[:do_nothing] + record.action_type.should == ScreenedEmail.actions[:do_nothing] end end @@ -37,22 +38,34 @@ describe ScreenedEmail do let!(:existing) { Fabricate(:screened_email, email: email) } it "doesn't create a new record" do - expect { described_class.block(email) }.to_not change { described_class.count } + expect { ScreenedEmail.block(email) }.to_not change { ScreenedEmail.count } end it "returns the existing record" do - described_class.block(email).should == existing + ScreenedEmail.block(email).should == existing end end end describe '#should_block?' do - subject { described_class.should_block?(email) } + subject { ScreenedEmail.should_block?(email) } - it "returns false if a record with the email doesn't exist" do + it "returns false when there are no record with a similar email" do subject.should be_false end + it "returns true when there is a record with the email" do + ScreenedEmail.should_block?(email).should be_false + ScreenedEmail.create(email: email).save + ScreenedEmail.should_block?(email).should be_true + end + + it "returns true when there is a record with a simiral email" do + ScreenedEmail.should_block?(email).should be_false + ScreenedEmail.create(email: similar_email).save + ScreenedEmail.should_block?(email).should be_true + end + shared_examples "when a ScreenedEmail record matches" do it "updates statistics" do Timecop.freeze(Time.zone.now) do @@ -63,13 +76,13 @@ describe ScreenedEmail do end context "action_type is :block" do - let!(:screened_email) { Fabricate(:screened_email, email: email, action_type: described_class.actions[:block]) } + let!(:screened_email) { Fabricate(:screened_email, email: email, action_type: ScreenedEmail.actions[:block]) } it { should be_true } include_examples "when a ScreenedEmail record matches" end context "action_type is :do_nothing" do - let!(:screened_email) { Fabricate(:screened_email, email: email, action_type: described_class.actions[:do_nothing]) } + let!(:screened_email) { Fabricate(:screened_email, email: email, action_type: ScreenedEmail.actions[:do_nothing]) } it { should be_false } include_examples "when a ScreenedEmail record matches" end