diff --git a/app/models/post.rb b/app/models/post.rb index 1957cd532..abd5b3ede 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -38,6 +38,7 @@ class Post < ActiveRecord::Base validates_presence_of :raw, :user_id, :topic_id validates :raw, length: {in: SiteSetting.min_post_length..SiteSetting.max_post_length} + validate :raw_quality validate :max_mention_validator validate :max_images_validator validate :max_links_validator @@ -68,6 +69,18 @@ class Post < ActiveRecord::Base self.raw.strip! if self.raw.present? end + def raw_quality + + sentinel = TextSentinel.new(self.raw, min_entropy: SiteSetting.body_min_entropy) + if sentinel.valid? + # It's possible the sentinel has cleaned up the title a bit + self.raw = sentinel.text + else + errors.add(:raw, I18n.t(:is_invalid)) unless sentinel.valid? + end + end + + # Stop us from posting the same thing too quickly def unique_post_validator return if SiteSetting.unique_posts_mins == 0 diff --git a/app/models/site_setting.rb b/app/models/site_setting.rb index f5091b5d6..e7b88bed5 100644 --- a/app/models/site_setting.rb +++ b/app/models/site_setting.rb @@ -126,6 +126,11 @@ class SiteSetting < ActiveRecord::Base setting(:basic_requires_read_posts, 100) setting(:basic_requires_time_spent_mins, 30) + # Entropy checks + setting(:title_min_entropy, 10) + setting(:body_min_entropy, 7) + setting(:max_word_length, 30) + def self.call_mothership? self.enforce_global_nicknames? and self.discourse_org_access_key.present? diff --git a/app/models/topic.rb b/app/models/topic.rb index 327c910d3..6d6e162a7 100644 --- a/app/models/topic.rb +++ b/app/models/topic.rb @@ -2,6 +2,7 @@ require_dependency 'slug' require_dependency 'avatar_lookup' require_dependency 'topic_view' require_dependency 'rate_limiter' +require_dependency 'text_sentinel' class Topic < ActiveRecord::Base include RateLimiter::OnCreateRecord @@ -18,13 +19,14 @@ class Topic < ActiveRecord::Base rate_limit :limit_topics_per_day rate_limit :limit_private_messages_per_day + validate :title_quality validates_presence_of :title validates :title, length: {in: SiteSetting.min_topic_title_length..SiteSetting.max_topic_title_length} serialize :meta_data, ActiveRecord::Coders::Hstore validate :unique_title - validate :nuclear_option + belongs_to :category has_many :posts @@ -113,18 +115,20 @@ class Topic < ActiveRecord::Base errors.add(:title, I18n.t(:has_already_been_used)) if finder.exists? end - # This is bad, but people are screwing us on try.discourse.org - soon we'll replace with - # a much more sane validation of odd characters to allow for other languages and such. - def nuclear_option - - # Let presence validation catch it if it's blank - return if title.blank? - title.each_char do |c| - unless (20..126).include?(c.ord) - errors.add(:title, I18n.t(:invalid_characters)) - return - end + def title_quality + # We don't care about quality on private messages + return if private_message? + + sentinel = TextSentinel.new(title, + min_entropy: SiteSetting.title_min_entropy, + max_word_length: SiteSetting.max_word_length, + remove_interior_spaces: true) + if sentinel.valid? + # It's possible the sentinel has cleaned up the title a bit + self.title = sentinel.text + else + errors.add(:title, I18n.t(:is_invalid)) unless sentinel.valid? end end diff --git a/config/locales/en.yml b/config/locales/en.yml index a86b81ffc..7f272d78c 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -9,6 +9,7 @@ en: just_posted_that: "is too similar to what you recently posted" has_already_been_used: "has already been used" invalid_characters: "contains invalid characters" + is_invalid: "is invalid; try to be a little more descriptive" activerecord: attributes: @@ -302,6 +303,10 @@ en: email_time_window_mins: "How many minutes we wait before sending a user mail, to give them a chance to see it first." flush_timings_secs: "How frequently we flush timing data to the server, in seconds." + max_word_length: "The maximum word length in a topic title" + title_min_entropy: "The minimum entropy for a topic title" + body_min_entropy: "The minimum entropy for post body" + # This section is exported to the javascript for i18n in the admin section admin_js: type_to_filter: "Type to Filter..." @@ -689,7 +694,7 @@ en: no_posted: "You haven't posted in any topics yet." no_popular: "There are no popular topics. That's sad." - topic: + topic: create_in: 'Create {{categoryName}} Topic' create: 'Create Topic' create_long: 'Create a new Topic' diff --git a/lib/text_sentinel.rb b/lib/text_sentinel.rb new file mode 100644 index 000000000..3af20efd0 --- /dev/null +++ b/lib/text_sentinel.rb @@ -0,0 +1,56 @@ +require 'iconv' + +# +# Given a string, tell us whether or not is acceptable. Also, remove stuff we don't like +# such as leading / trailing space. +# +class TextSentinel + + attr_accessor :text + + def self.non_symbols_regexp + /[\ -\/\[-\`\:-\@\{-\~]/m + end + + def initialize(text, opts=nil) + if text.present? + @text = Iconv.new('UTF-8//IGNORE', 'UTF-8').iconv(text.dup) + end + + @opts = opts || {} + + if @text.present? + @text.strip! + @text.gsub!(/ +/m, ' ') if @opts[:remove_interior_spaces] + end + end + + # Entropy is a number of how many unique characters the string needs. + def entropy + return 0 if @text.blank? + @entropy ||= @text.each_char.to_a.uniq.size + end + + def valid? + + # Blank strings are not valid + return false if @text.blank? + + # Entropy check if required + return false if @opts[:min_entropy].present? and (entropy < @opts[:min_entropy]) + + # We don't have a comprehensive list of symbols, but this will eliminate some noise + non_symbols = @text.gsub(TextSentinel.non_symbols_regexp, '').size + return false if non_symbols == 0 + + # Don't allow super long strings without spaces + + return false if @opts[:max_word_length] and @text =~ /\w{#{@opts[:max_word_length]},}(\s|$)/ + + # We don't allow all upper case content + return false if @text == @text.upcase + + true + end + +end diff --git a/spec/components/post_creator_spec.rb b/spec/components/post_creator_spec.rb index 209606210..5c5b074ec 100644 --- a/spec/components/post_creator_spec.rb +++ b/spec/components/post_creator_spec.rb @@ -11,7 +11,7 @@ describe PostCreator do context 'new topic' do let(:category) { Fabricate(:category, user: user) } - let(:basic_topic_params) { {title: 'hello world', raw: 'my name is fred', archetype_id: 1} } + let(:basic_topic_params) { {title: 'hello world topic', raw: 'my name is fred', archetype_id: 1} } let(:image_sizes) { {'http://an.image.host/image.jpg' => {'width' => 111, 'height' => 222}} } let(:creator) { PostCreator.new(user, basic_topic_params) } @@ -83,7 +83,7 @@ describe PostCreator do let(:target_user1) { Fabricate(:coding_horror) } let(:target_user2) { Fabricate(:moderator) } let(:post) do - PostCreator.create(user, title: 'hi there', + PostCreator.create(user, title: 'hi there welcome to my topic', raw: 'this is my awesome message', archetype: Archetype.private_message, target_usernames: [target_user1.username, target_user2.username].join(',')) diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb index c2699d989..717f63b7c 100644 --- a/spec/components/search_spec.rb +++ b/spec/components/search_spec.rb @@ -14,7 +14,7 @@ describe Search do context 'post indexing observer' do before do @category = Fabricate(:category, name: 'america') - @topic = Fabricate(:topic, title: 'sam test', category: @category) + @topic = Fabricate(:topic, title: 'sam test topic', category: @category) @post = Fabricate(:post, topic: @topic, raw: 'this fun test ') @indexed = Topic.exec_sql("select search_data from posts_search where id = #{@post.id}").first["search_data"] end @@ -29,7 +29,7 @@ describe Search do end it "should pick up on title updates" do - @topic.title = "harpi" + @topic.title = "harpi is the new title" @topic.save! @indexed = Topic.exec_sql("select search_data from posts_search where id = #{@post.id}").first["search_data"] diff --git a/spec/components/text_sentinel_spec.rb b/spec/components/text_sentinel_spec.rb new file mode 100644 index 000000000..746739514 --- /dev/null +++ b/spec/components/text_sentinel_spec.rb @@ -0,0 +1,96 @@ +# encoding: utf-8 + +require 'spec_helper' +require 'text_sentinel' +require 'iconv' + +describe TextSentinel do + + + context "entropy" do + + + it "returns 0 for an empty string" do + TextSentinel.new("").entropy.should == 0 + end + + it "returns 0 for a nil string" do + TextSentinel.new(nil).entropy.should == 0 + end + + it "returns 1 for a string with many leading spaces" do + TextSentinel.new((" " * 10) + "x").entropy.should == 1 + end + + it "returns 1 for one char, even repeated" do + TextSentinel.new("a" * 10).entropy.should == 1 + end + + it "returns an accurate count of many chars" do + TextSentinel.new("evil trout is evil").entropy.should == 10 + end + + end + + context "cleaning up" do + + it "strips leading or trailing whitespace" do + TextSentinel.new(" \t test \t ").text.should == "test" + end + + it "allows utf-8 chars" do + TextSentinel.new("йȝîûηыეமிᚉ⠛").text.should == "йȝîûηыეமிᚉ⠛" + end + + context "interior spaces" do + + let(:spacey_string) { "hello there's weird spaces here." } + + it "ignores intra spaces by default" do + TextSentinel.new(spacey_string).text.should == spacey_string + end + + it "fixes intra spaces when enabled" do + TextSentinel.new(spacey_string, remove_interior_spaces: true).text.should == "hello there's weird spaces here." + end + + end + + end + + context "validity" do + + let(:valid_string) { "This is a cool topic about Discourse" } + + it "allows a valid string" do + TextSentinel.new(valid_string).should be_valid + end + + it "doesn't allow all caps topics" do + TextSentinel.new(valid_string.upcase).should_not be_valid + end + + it "enforces the minimum entropy" do + TextSentinel.new(valid_string, min_entropy: 16).should be_valid + end + + it "enforces the minimum entropy" do + TextSentinel.new(valid_string, min_entropy: 17).should_not be_valid + end + + it "doesn't allow a long alphanumeric string with no spaces" do + TextSentinel.new("jfewjfoejwfojeojfoejofjeo38493824jfkjewfjeoifijeoijfoejofjeojfoewjfo834988394032jfiejoijofijeojfeojfojeofjewojfojeofjeowjfojeofjeojfoe3898439849032jfeijfwoijfoiewj", + max_word_length: 30).should_not be_valid + end + + it "doesn't except junk symbols as a string" do + TextSentinel.new("[[[").should_not be_valid + TextSentinel.new("<<<").should_not be_valid + TextSentinel.new("{{$!").should_not be_valid + end + + + end + + +end \ No newline at end of file diff --git a/spec/components/topic_query_spec.rb b/spec/components/topic_query_spec.rb index e0494ebdb..be06ed505 100644 --- a/spec/components/topic_query_spec.rb +++ b/spec/components/topic_query_spec.rb @@ -11,11 +11,11 @@ describe TopicQuery do let(:admin) { Fabricate(:moderator) } context 'a bunch of topics' do - let!(:regular_topic) { Fabricate(:topic, title: 'regular', user: creator, bumped_at: 15.minutes.ago) } - let!(:pinned_topic) { Fabricate(:topic, title: 'pinned', user: creator, pinned: true, bumped_at: 10.minutes.ago) } - let!(:archived_topic) { Fabricate(:topic, title: 'archived', user: creator, archived: true, bumped_at: 6.minutes.ago) } - let!(:invisible_topic) { Fabricate(:topic, title: 'invisible', user: creator, visible: false, bumped_at: 5.minutes.ago) } - let!(:closed_topic) { Fabricate(:topic, title: 'closed', user: creator, closed: true, bumped_at: 1.minute.ago) } + let!(:regular_topic) { Fabricate(:topic, title: 'this is a regular topic', user: creator, bumped_at: 15.minutes.ago) } + let!(:pinned_topic) { Fabricate(:topic, title: 'this is a pinned topic', user: creator, pinned: true, bumped_at: 10.minutes.ago) } + let!(:archived_topic) { Fabricate(:topic, title: 'this is an archived topic', user: creator, archived: true, bumped_at: 6.minutes.ago) } + let!(:invisible_topic) { Fabricate(:topic, title: 'this is an invisible topic', user: creator, visible: false, bumped_at: 5.minutes.ago) } + let!(:closed_topic) { Fabricate(:topic, title: 'this is a closed topic', user: creator, closed: true, bumped_at: 1.minute.ago) } context 'list_popular' do let(:topics) { topic_query.list_popular.topics } diff --git a/spec/controllers/topics_controller_spec.rb b/spec/controllers/topics_controller_spec.rb index 961713e64..f6bac266f 100644 --- a/spec/controllers/topics_controller_spec.rb +++ b/spec/controllers/topics_controller_spec.rb @@ -334,9 +334,9 @@ describe TopicsController do end it 'allows a change of title' do - xhr :put, :update, topic_id: @topic.id, slug: @topic.title, title: 'new title' + xhr :put, :update, topic_id: @topic.id, slug: @topic.title, title: 'this is a new title for the topic' @topic.reload - @topic.title.should == 'new title' + @topic.title.should == 'this is a new title for the topic' end it 'triggers a change of category' do diff --git a/spec/models/post_alert_observer_spec.rb b/spec/models/post_alert_observer_spec.rb index a1c9de1ef..7b6ad2cf3 100644 --- a/spec/models/post_alert_observer_spec.rb +++ b/spec/models/post_alert_observer_spec.rb @@ -30,7 +30,7 @@ describe PostAlertObserver do context 'when editing a post' do it 'notifies a user of the revision' do lambda { - post.revise(evil_trout, "world") + post.revise(evil_trout, "world is the new body of the message") }.should change(post.user.notifications, :count).by(1) end end diff --git a/spec/models/topic_spec.rb b/spec/models/topic_spec.rb index e50030ba0..af9e9461b 100644 --- a/spec/models/topic_spec.rb +++ b/spec/models/topic_spec.rb @@ -5,9 +5,6 @@ require 'spec_helper' describe Topic do it { should validate_presence_of :title } - it { should_not allow_value("x" * (SiteSetting.max_topic_title_length + 1)).for(:title) } - it { should_not allow_value("x").for(:title) } - it { should_not allow_value((" " * SiteSetting.min_topic_title_length) + "x").for(:title) } it { should belong_to :category } it { should belong_to :user } @@ -26,14 +23,26 @@ describe Topic do it { should rate_limit } - context 'topic title content' do + context '.title_quality' do + + it "strips a title when identifying length" do + Fabricate.build(:topic, title: (" " * SiteSetting.min_topic_title_length) + "x").should_not be_valid + end + + it "doesn't allow a long title" do + Fabricate.build(:topic, title: "x" * (SiteSetting.max_topic_title_length + 1)).should_not be_valid + end + + it "doesn't allow a short title" do + Fabricate.build(:topic, title: "x" * (SiteSetting.min_topic_title_length + 1)).should_not be_valid + end it "allows a regular title with a few ascii characters" do Fabricate.build(:topic, title: "hello this is my cool topic! welcome: all;").should be_valid end - it "doesn't allow non standard ascii" do - Fabricate.build(:topic, title: "Iñtërnâtiônàlizætiøn").should_not be_valid + it "allows non ascii" do + Fabricate.build(:topic, title: "Iñtërnâtiônàlizætiøn").should be_valid end end @@ -830,7 +839,7 @@ describe Topic do context 'changing title' do before do - topic.title = "new title" + topic.title = "new title for the topic" topic.save end