mirror of
https://github.com/codeninjasllc/discourse.git
synced 2024-11-30 10:58:31 -05:00
Introduction of TextSentinel to enforce title and body quality.
This commit is contained in:
parent
94e58d733e
commit
40da901e5d
12 changed files with 220 additions and 32 deletions
|
@ -38,6 +38,7 @@ class Post < ActiveRecord::Base
|
||||||
|
|
||||||
validates_presence_of :raw, :user_id, :topic_id
|
validates_presence_of :raw, :user_id, :topic_id
|
||||||
validates :raw, length: {in: SiteSetting.min_post_length..SiteSetting.max_post_length}
|
validates :raw, length: {in: SiteSetting.min_post_length..SiteSetting.max_post_length}
|
||||||
|
validate :raw_quality
|
||||||
validate :max_mention_validator
|
validate :max_mention_validator
|
||||||
validate :max_images_validator
|
validate :max_images_validator
|
||||||
validate :max_links_validator
|
validate :max_links_validator
|
||||||
|
@ -68,6 +69,18 @@ class Post < ActiveRecord::Base
|
||||||
self.raw.strip! if self.raw.present?
|
self.raw.strip! if self.raw.present?
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def raw_quality
|
||||||
|
|
||||||
|
sentinel = TextSentinel.new(self.raw, min_entropy: SiteSetting.body_min_entropy)
|
||||||
|
if sentinel.valid?
|
||||||
|
# It's possible the sentinel has cleaned up the title a bit
|
||||||
|
self.raw = sentinel.text
|
||||||
|
else
|
||||||
|
errors.add(:raw, I18n.t(:is_invalid)) unless sentinel.valid?
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
# Stop us from posting the same thing too quickly
|
# Stop us from posting the same thing too quickly
|
||||||
def unique_post_validator
|
def unique_post_validator
|
||||||
return if SiteSetting.unique_posts_mins == 0
|
return if SiteSetting.unique_posts_mins == 0
|
||||||
|
|
|
@ -126,6 +126,11 @@ class SiteSetting < ActiveRecord::Base
|
||||||
setting(:basic_requires_read_posts, 100)
|
setting(:basic_requires_read_posts, 100)
|
||||||
setting(:basic_requires_time_spent_mins, 30)
|
setting(:basic_requires_time_spent_mins, 30)
|
||||||
|
|
||||||
|
# Entropy checks
|
||||||
|
setting(:title_min_entropy, 10)
|
||||||
|
setting(:body_min_entropy, 7)
|
||||||
|
setting(:max_word_length, 30)
|
||||||
|
|
||||||
|
|
||||||
def self.call_mothership?
|
def self.call_mothership?
|
||||||
self.enforce_global_nicknames? and self.discourse_org_access_key.present?
|
self.enforce_global_nicknames? and self.discourse_org_access_key.present?
|
||||||
|
|
|
@ -2,6 +2,7 @@ require_dependency 'slug'
|
||||||
require_dependency 'avatar_lookup'
|
require_dependency 'avatar_lookup'
|
||||||
require_dependency 'topic_view'
|
require_dependency 'topic_view'
|
||||||
require_dependency 'rate_limiter'
|
require_dependency 'rate_limiter'
|
||||||
|
require_dependency 'text_sentinel'
|
||||||
|
|
||||||
class Topic < ActiveRecord::Base
|
class Topic < ActiveRecord::Base
|
||||||
include RateLimiter::OnCreateRecord
|
include RateLimiter::OnCreateRecord
|
||||||
|
@ -18,13 +19,14 @@ class Topic < ActiveRecord::Base
|
||||||
rate_limit :limit_topics_per_day
|
rate_limit :limit_topics_per_day
|
||||||
rate_limit :limit_private_messages_per_day
|
rate_limit :limit_private_messages_per_day
|
||||||
|
|
||||||
|
validate :title_quality
|
||||||
validates_presence_of :title
|
validates_presence_of :title
|
||||||
validates :title, length: {in: SiteSetting.min_topic_title_length..SiteSetting.max_topic_title_length}
|
validates :title, length: {in: SiteSetting.min_topic_title_length..SiteSetting.max_topic_title_length}
|
||||||
|
|
||||||
serialize :meta_data, ActiveRecord::Coders::Hstore
|
serialize :meta_data, ActiveRecord::Coders::Hstore
|
||||||
|
|
||||||
validate :unique_title
|
validate :unique_title
|
||||||
validate :nuclear_option
|
|
||||||
|
|
||||||
belongs_to :category
|
belongs_to :category
|
||||||
has_many :posts
|
has_many :posts
|
||||||
|
@ -113,18 +115,20 @@ class Topic < ActiveRecord::Base
|
||||||
errors.add(:title, I18n.t(:has_already_been_used)) if finder.exists?
|
errors.add(:title, I18n.t(:has_already_been_used)) if finder.exists?
|
||||||
end
|
end
|
||||||
|
|
||||||
# This is bad, but people are screwing us on try.discourse.org - soon we'll replace with
|
|
||||||
# a much more sane validation of odd characters to allow for other languages and such.
|
|
||||||
def nuclear_option
|
|
||||||
|
|
||||||
# Let presence validation catch it if it's blank
|
def title_quality
|
||||||
return if title.blank?
|
# We don't care about quality on private messages
|
||||||
|
return if private_message?
|
||||||
|
|
||||||
title.each_char do |c|
|
sentinel = TextSentinel.new(title,
|
||||||
unless (20..126).include?(c.ord)
|
min_entropy: SiteSetting.title_min_entropy,
|
||||||
errors.add(:title, I18n.t(:invalid_characters))
|
max_word_length: SiteSetting.max_word_length,
|
||||||
return
|
remove_interior_spaces: true)
|
||||||
end
|
if sentinel.valid?
|
||||||
|
# It's possible the sentinel has cleaned up the title a bit
|
||||||
|
self.title = sentinel.text
|
||||||
|
else
|
||||||
|
errors.add(:title, I18n.t(:is_invalid)) unless sentinel.valid?
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ en:
|
||||||
just_posted_that: "is too similar to what you recently posted"
|
just_posted_that: "is too similar to what you recently posted"
|
||||||
has_already_been_used: "has already been used"
|
has_already_been_used: "has already been used"
|
||||||
invalid_characters: "contains invalid characters"
|
invalid_characters: "contains invalid characters"
|
||||||
|
is_invalid: "is invalid; try to be a little more descriptive"
|
||||||
|
|
||||||
activerecord:
|
activerecord:
|
||||||
attributes:
|
attributes:
|
||||||
|
@ -302,6 +303,10 @@ en:
|
||||||
email_time_window_mins: "How many minutes we wait before sending a user mail, to give them a chance to see it first."
|
email_time_window_mins: "How many minutes we wait before sending a user mail, to give them a chance to see it first."
|
||||||
flush_timings_secs: "How frequently we flush timing data to the server, in seconds."
|
flush_timings_secs: "How frequently we flush timing data to the server, in seconds."
|
||||||
|
|
||||||
|
max_word_length: "The maximum word length in a topic title"
|
||||||
|
title_min_entropy: "The minimum entropy for a topic title"
|
||||||
|
body_min_entropy: "The minimum entropy for post body"
|
||||||
|
|
||||||
# This section is exported to the javascript for i18n in the admin section
|
# This section is exported to the javascript for i18n in the admin section
|
||||||
admin_js:
|
admin_js:
|
||||||
type_to_filter: "Type to Filter..."
|
type_to_filter: "Type to Filter..."
|
||||||
|
|
56
lib/text_sentinel.rb
Normal file
56
lib/text_sentinel.rb
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
require 'iconv'
|
||||||
|
|
||||||
|
#
|
||||||
|
# Given a string, tell us whether or not is acceptable. Also, remove stuff we don't like
|
||||||
|
# such as leading / trailing space.
|
||||||
|
#
|
||||||
|
class TextSentinel
|
||||||
|
|
||||||
|
attr_accessor :text
|
||||||
|
|
||||||
|
def self.non_symbols_regexp
|
||||||
|
/[\ -\/\[-\`\:-\@\{-\~]/m
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(text, opts=nil)
|
||||||
|
if text.present?
|
||||||
|
@text = Iconv.new('UTF-8//IGNORE', 'UTF-8').iconv(text.dup)
|
||||||
|
end
|
||||||
|
|
||||||
|
@opts = opts || {}
|
||||||
|
|
||||||
|
if @text.present?
|
||||||
|
@text.strip!
|
||||||
|
@text.gsub!(/ +/m, ' ') if @opts[:remove_interior_spaces]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Entropy is a number of how many unique characters the string needs.
|
||||||
|
def entropy
|
||||||
|
return 0 if @text.blank?
|
||||||
|
@entropy ||= @text.each_char.to_a.uniq.size
|
||||||
|
end
|
||||||
|
|
||||||
|
def valid?
|
||||||
|
|
||||||
|
# Blank strings are not valid
|
||||||
|
return false if @text.blank?
|
||||||
|
|
||||||
|
# Entropy check if required
|
||||||
|
return false if @opts[:min_entropy].present? and (entropy < @opts[:min_entropy])
|
||||||
|
|
||||||
|
# We don't have a comprehensive list of symbols, but this will eliminate some noise
|
||||||
|
non_symbols = @text.gsub(TextSentinel.non_symbols_regexp, '').size
|
||||||
|
return false if non_symbols == 0
|
||||||
|
|
||||||
|
# Don't allow super long strings without spaces
|
||||||
|
|
||||||
|
return false if @opts[:max_word_length] and @text =~ /\w{#{@opts[:max_word_length]},}(\s|$)/
|
||||||
|
|
||||||
|
# We don't allow all upper case content
|
||||||
|
return false if @text == @text.upcase
|
||||||
|
|
||||||
|
true
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
|
@ -11,7 +11,7 @@ describe PostCreator do
|
||||||
|
|
||||||
context 'new topic' do
|
context 'new topic' do
|
||||||
let(:category) { Fabricate(:category, user: user) }
|
let(:category) { Fabricate(:category, user: user) }
|
||||||
let(:basic_topic_params) { {title: 'hello world', raw: 'my name is fred', archetype_id: 1} }
|
let(:basic_topic_params) { {title: 'hello world topic', raw: 'my name is fred', archetype_id: 1} }
|
||||||
let(:image_sizes) { {'http://an.image.host/image.jpg' => {'width' => 111, 'height' => 222}} }
|
let(:image_sizes) { {'http://an.image.host/image.jpg' => {'width' => 111, 'height' => 222}} }
|
||||||
|
|
||||||
let(:creator) { PostCreator.new(user, basic_topic_params) }
|
let(:creator) { PostCreator.new(user, basic_topic_params) }
|
||||||
|
@ -83,7 +83,7 @@ describe PostCreator do
|
||||||
let(:target_user1) { Fabricate(:coding_horror) }
|
let(:target_user1) { Fabricate(:coding_horror) }
|
||||||
let(:target_user2) { Fabricate(:moderator) }
|
let(:target_user2) { Fabricate(:moderator) }
|
||||||
let(:post) do
|
let(:post) do
|
||||||
PostCreator.create(user, title: 'hi there',
|
PostCreator.create(user, title: 'hi there welcome to my topic',
|
||||||
raw: 'this is my awesome message',
|
raw: 'this is my awesome message',
|
||||||
archetype: Archetype.private_message,
|
archetype: Archetype.private_message,
|
||||||
target_usernames: [target_user1.username, target_user2.username].join(','))
|
target_usernames: [target_user1.username, target_user2.username].join(','))
|
||||||
|
|
|
@ -14,7 +14,7 @@ describe Search do
|
||||||
context 'post indexing observer' do
|
context 'post indexing observer' do
|
||||||
before do
|
before do
|
||||||
@category = Fabricate(:category, name: 'america')
|
@category = Fabricate(:category, name: 'america')
|
||||||
@topic = Fabricate(:topic, title: 'sam test', category: @category)
|
@topic = Fabricate(:topic, title: 'sam test topic', category: @category)
|
||||||
@post = Fabricate(:post, topic: @topic, raw: 'this <b>fun test</b> <img src="bla" title="my image">')
|
@post = Fabricate(:post, topic: @topic, raw: 'this <b>fun test</b> <img src="bla" title="my image">')
|
||||||
@indexed = Topic.exec_sql("select search_data from posts_search where id = #{@post.id}").first["search_data"]
|
@indexed = Topic.exec_sql("select search_data from posts_search where id = #{@post.id}").first["search_data"]
|
||||||
end
|
end
|
||||||
|
@ -29,7 +29,7 @@ describe Search do
|
||||||
end
|
end
|
||||||
|
|
||||||
it "should pick up on title updates" do
|
it "should pick up on title updates" do
|
||||||
@topic.title = "harpi"
|
@topic.title = "harpi is the new title"
|
||||||
@topic.save!
|
@topic.save!
|
||||||
@indexed = Topic.exec_sql("select search_data from posts_search where id = #{@post.id}").first["search_data"]
|
@indexed = Topic.exec_sql("select search_data from posts_search where id = #{@post.id}").first["search_data"]
|
||||||
|
|
||||||
|
|
96
spec/components/text_sentinel_spec.rb
Normal file
96
spec/components/text_sentinel_spec.rb
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
# encoding: utf-8
|
||||||
|
|
||||||
|
require 'spec_helper'
|
||||||
|
require 'text_sentinel'
|
||||||
|
require 'iconv'
|
||||||
|
|
||||||
|
describe TextSentinel do
|
||||||
|
|
||||||
|
|
||||||
|
context "entropy" do
|
||||||
|
|
||||||
|
|
||||||
|
it "returns 0 for an empty string" do
|
||||||
|
TextSentinel.new("").entropy.should == 0
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns 0 for a nil string" do
|
||||||
|
TextSentinel.new(nil).entropy.should == 0
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns 1 for a string with many leading spaces" do
|
||||||
|
TextSentinel.new((" " * 10) + "x").entropy.should == 1
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns 1 for one char, even repeated" do
|
||||||
|
TextSentinel.new("a" * 10).entropy.should == 1
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns an accurate count of many chars" do
|
||||||
|
TextSentinel.new("evil trout is evil").entropy.should == 10
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
context "cleaning up" do
|
||||||
|
|
||||||
|
it "strips leading or trailing whitespace" do
|
||||||
|
TextSentinel.new(" \t test \t ").text.should == "test"
|
||||||
|
end
|
||||||
|
|
||||||
|
it "allows utf-8 chars" do
|
||||||
|
TextSentinel.new("йȝîûηыეமிᚉ⠛").text.should == "йȝîûηыეமிᚉ⠛"
|
||||||
|
end
|
||||||
|
|
||||||
|
context "interior spaces" do
|
||||||
|
|
||||||
|
let(:spacey_string) { "hello there's weird spaces here." }
|
||||||
|
|
||||||
|
it "ignores intra spaces by default" do
|
||||||
|
TextSentinel.new(spacey_string).text.should == spacey_string
|
||||||
|
end
|
||||||
|
|
||||||
|
it "fixes intra spaces when enabled" do
|
||||||
|
TextSentinel.new(spacey_string, remove_interior_spaces: true).text.should == "hello there's weird spaces here."
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
context "validity" do
|
||||||
|
|
||||||
|
let(:valid_string) { "This is a cool topic about Discourse" }
|
||||||
|
|
||||||
|
it "allows a valid string" do
|
||||||
|
TextSentinel.new(valid_string).should be_valid
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't allow all caps topics" do
|
||||||
|
TextSentinel.new(valid_string.upcase).should_not be_valid
|
||||||
|
end
|
||||||
|
|
||||||
|
it "enforces the minimum entropy" do
|
||||||
|
TextSentinel.new(valid_string, min_entropy: 16).should be_valid
|
||||||
|
end
|
||||||
|
|
||||||
|
it "enforces the minimum entropy" do
|
||||||
|
TextSentinel.new(valid_string, min_entropy: 17).should_not be_valid
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't allow a long alphanumeric string with no spaces" do
|
||||||
|
TextSentinel.new("jfewjfoejwfojeojfoejofjeo38493824jfkjewfjeoifijeoijfoejofjeojfoewjfo834988394032jfiejoijofijeojfeojfojeofjewojfojeofjeowjfojeofjeojfoe3898439849032jfeijfwoijfoiewj",
|
||||||
|
max_word_length: 30).should_not be_valid
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't except junk symbols as a string" do
|
||||||
|
TextSentinel.new("[[[").should_not be_valid
|
||||||
|
TextSentinel.new("<<<").should_not be_valid
|
||||||
|
TextSentinel.new("{{$!").should_not be_valid
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
end
|
|
@ -11,11 +11,11 @@ describe TopicQuery do
|
||||||
let(:admin) { Fabricate(:moderator) }
|
let(:admin) { Fabricate(:moderator) }
|
||||||
|
|
||||||
context 'a bunch of topics' do
|
context 'a bunch of topics' do
|
||||||
let!(:regular_topic) { Fabricate(:topic, title: 'regular', user: creator, bumped_at: 15.minutes.ago) }
|
let!(:regular_topic) { Fabricate(:topic, title: 'this is a regular topic', user: creator, bumped_at: 15.minutes.ago) }
|
||||||
let!(:pinned_topic) { Fabricate(:topic, title: 'pinned', user: creator, pinned: true, bumped_at: 10.minutes.ago) }
|
let!(:pinned_topic) { Fabricate(:topic, title: 'this is a pinned topic', user: creator, pinned: true, bumped_at: 10.minutes.ago) }
|
||||||
let!(:archived_topic) { Fabricate(:topic, title: 'archived', user: creator, archived: true, bumped_at: 6.minutes.ago) }
|
let!(:archived_topic) { Fabricate(:topic, title: 'this is an archived topic', user: creator, archived: true, bumped_at: 6.minutes.ago) }
|
||||||
let!(:invisible_topic) { Fabricate(:topic, title: 'invisible', user: creator, visible: false, bumped_at: 5.minutes.ago) }
|
let!(:invisible_topic) { Fabricate(:topic, title: 'this is an invisible topic', user: creator, visible: false, bumped_at: 5.minutes.ago) }
|
||||||
let!(:closed_topic) { Fabricate(:topic, title: 'closed', user: creator, closed: true, bumped_at: 1.minute.ago) }
|
let!(:closed_topic) { Fabricate(:topic, title: 'this is a closed topic', user: creator, closed: true, bumped_at: 1.minute.ago) }
|
||||||
|
|
||||||
context 'list_popular' do
|
context 'list_popular' do
|
||||||
let(:topics) { topic_query.list_popular.topics }
|
let(:topics) { topic_query.list_popular.topics }
|
||||||
|
|
|
@ -334,9 +334,9 @@ describe TopicsController do
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'allows a change of title' do
|
it 'allows a change of title' do
|
||||||
xhr :put, :update, topic_id: @topic.id, slug: @topic.title, title: 'new title'
|
xhr :put, :update, topic_id: @topic.id, slug: @topic.title, title: 'this is a new title for the topic'
|
||||||
@topic.reload
|
@topic.reload
|
||||||
@topic.title.should == 'new title'
|
@topic.title.should == 'this is a new title for the topic'
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'triggers a change of category' do
|
it 'triggers a change of category' do
|
||||||
|
|
|
@ -30,7 +30,7 @@ describe PostAlertObserver do
|
||||||
context 'when editing a post' do
|
context 'when editing a post' do
|
||||||
it 'notifies a user of the revision' do
|
it 'notifies a user of the revision' do
|
||||||
lambda {
|
lambda {
|
||||||
post.revise(evil_trout, "world")
|
post.revise(evil_trout, "world is the new body of the message")
|
||||||
}.should change(post.user.notifications, :count).by(1)
|
}.should change(post.user.notifications, :count).by(1)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -5,9 +5,6 @@ require 'spec_helper'
|
||||||
describe Topic do
|
describe Topic do
|
||||||
|
|
||||||
it { should validate_presence_of :title }
|
it { should validate_presence_of :title }
|
||||||
it { should_not allow_value("x" * (SiteSetting.max_topic_title_length + 1)).for(:title) }
|
|
||||||
it { should_not allow_value("x").for(:title) }
|
|
||||||
it { should_not allow_value((" " * SiteSetting.min_topic_title_length) + "x").for(:title) }
|
|
||||||
|
|
||||||
it { should belong_to :category }
|
it { should belong_to :category }
|
||||||
it { should belong_to :user }
|
it { should belong_to :user }
|
||||||
|
@ -26,14 +23,26 @@ describe Topic do
|
||||||
|
|
||||||
it { should rate_limit }
|
it { should rate_limit }
|
||||||
|
|
||||||
context 'topic title content' do
|
context '.title_quality' do
|
||||||
|
|
||||||
|
it "strips a title when identifying length" do
|
||||||
|
Fabricate.build(:topic, title: (" " * SiteSetting.min_topic_title_length) + "x").should_not be_valid
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't allow a long title" do
|
||||||
|
Fabricate.build(:topic, title: "x" * (SiteSetting.max_topic_title_length + 1)).should_not be_valid
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't allow a short title" do
|
||||||
|
Fabricate.build(:topic, title: "x" * (SiteSetting.min_topic_title_length + 1)).should_not be_valid
|
||||||
|
end
|
||||||
|
|
||||||
it "allows a regular title with a few ascii characters" do
|
it "allows a regular title with a few ascii characters" do
|
||||||
Fabricate.build(:topic, title: "hello this is my cool topic! welcome: all;").should be_valid
|
Fabricate.build(:topic, title: "hello this is my cool topic! welcome: all;").should be_valid
|
||||||
end
|
end
|
||||||
|
|
||||||
it "doesn't allow non standard ascii" do
|
it "allows non ascii" do
|
||||||
Fabricate.build(:topic, title: "Iñtërnâtiônàlizætiøn").should_not be_valid
|
Fabricate.build(:topic, title: "Iñtërnâtiônàlizætiøn").should be_valid
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
@ -830,7 +839,7 @@ describe Topic do
|
||||||
|
|
||||||
context 'changing title' do
|
context 'changing title' do
|
||||||
before do
|
before do
|
||||||
topic.title = "new title"
|
topic.title = "new title for the topic"
|
||||||
topic.save
|
topic.save
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue