diff --git a/app/controllers/posts_controller.rb b/app/controllers/posts_controller.rb index 5815bb914..bdc23d53a 100644 --- a/app/controllers/posts_controller.rb +++ b/app/controllers/posts_controller.rb @@ -40,12 +40,12 @@ class PostsController < ApplicationController post.image_sizes = params[:image_sizes] if params[:image_sizes].present? guardian.ensure_can_edit!(post) - # to stay consistent with the create api, + # to stay consistent with the create api, # we should allow for title changes and category changes here # we should also move all of this to a post updater. - if post.post_number == 1 && (params[:title] || params[:post][:category]) + if post.post_number == 1 && (params[:title] || params[:post][:category]) post.topic.title = params[:title] if params[:title] - Topic.transaction do + Topic.transaction do post.topic.change_category(params[:post][:category]) post.topic.save end diff --git a/app/controllers/topics_controller.rb b/app/controllers/topics_controller.rb index 01bd068da..6fdb1bab2 100644 --- a/app/controllers/topics_controller.rb +++ b/app/controllers/topics_controller.rb @@ -56,7 +56,9 @@ class TopicsController < ApplicationController topic.change_category(params[:category]) end - render nothing: true + # this is used to return the title to the client as it may have been + # changed by "TextCleaner" + render_serialized(topic, BasicTopicSerializer) end def similar_to diff --git a/app/models/post.rb b/app/models/post.rb index e582b3bea..37923e00c 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -52,16 +52,10 @@ class Post < ActiveRecord::Base end def raw_quality - sentinel = TextSentinel.new(raw, min_entropy: SiteSetting.body_min_entropy) - if sentinel.valid? - # It's possible the sentinel has cleaned up the title a bit - self.raw = sentinel.text - else - errors.add(:raw, I18n.t(:is_invalid)) unless sentinel.valid? - end + sentinel = TextSentinel.body_sentinel(raw) + errors.add(:raw, I18n.t(:is_invalid)) unless sentinel.valid? end - # Stop us from posting the same thing too quickly def unique_post_validator return if SiteSetting.unique_posts_mins == 0 diff --git a/app/models/post_action.rb b/app/models/post_action.rb index 7fe39f615..08f7cd3db 100644 --- a/app/models/post_action.rb +++ b/app/models/post_action.rb @@ -118,12 +118,7 @@ class PostAction < ActiveRecord::Base def message_quality return if message.blank? sentinel = TextSentinel.title_sentinel(message) - if sentinel.valid? - # It's possible the sentinel has cleaned up the title a bit - self.message = sentinel.text - else - errors.add(:message, I18n.t(:is_invalid)) unless sentinel.valid? - end + errors.add(:message, I18n.t(:is_invalid)) unless sentinel.valid? end before_create do diff --git a/app/models/site_setting.rb b/app/models/site_setting.rb index ecc82ad54..2e942b613 100755 --- a/app/models/site_setting.rb +++ b/app/models/site_setting.rb @@ -42,6 +42,9 @@ class SiteSetting < ActiveRecord::Base # cf. https://github.com/discourse/discourse/pull/462#issuecomment-14991562 client_setting(:category_colors, 'BF1E2E|F1592A|F7941D|9EB83B|3AB54A|12A89D|25AAE2|0E76BD|652D90|92278F|ED207B|8C6238|231F20|808281|B3B5B4|283890') + # auto-replace rules for title + setting(:title_prettify, true) + client_setting(:max_upload_size_kb, 1024) # settings only available server side diff --git a/app/models/topic.rb b/app/models/topic.rb index a4a4107c9..b90a6d6ab 100644 --- a/app/models/topic.rb +++ b/app/models/topic.rb @@ -3,6 +3,7 @@ require_dependency 'avatar_lookup' require_dependency 'topic_view' require_dependency 'rate_limiter' require_dependency 'text_sentinel' +require_dependency 'text_cleaner' class Topic < ActiveRecord::Base include ActionView::Helpers @@ -143,8 +144,8 @@ class Topic < ActiveRecord::Base sentinel = TextSentinel.title_sentinel(title) if sentinel.valid? - # It's possible the sentinel has cleaned up the title a bit - self.title = sentinel.text + # clean up the title + self.title = TextCleaner.clean_title(sentinel.text) else errors.add(:title, I18n.t(:is_invalid)) end diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 86feab679..bfa356d74 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -495,6 +495,8 @@ en: max_upload_size_kb: "The maximum size of files we allow users to upload in kb - be sure to configure the limit in nginx (client_max_body_size) / apache or proxy as well." max_similar_results: "How many similar topics to show a user while they are composing a new topic" + title_prettify: "Prevent common title typos and errors, including all caps, lowercase first character, multiple ! and ?, extra . at end, etc." + notification_types: mentioned: "%{display_username} mentioned you in %{link}" liked: "%{display_username} liked your post in %{link}" diff --git a/config/locales/server.fr.yml b/config/locales/server.fr.yml index 31ff0c53e..dc918d69c 100644 --- a/config/locales/server.fr.yml +++ b/config/locales/server.fr.yml @@ -502,6 +502,8 @@ fr: max_upload_size_kb: "La taille maximum des fichiers que les utilisateurs peuvent envoyer en Kb - assurez-vous de configurer également cette limite dans nginx (client_max_body_size) / apache ou votre proxy." max_similar_results: "Nombre de discussions similaires à afficher lorsqu'un utilisateur est en train de créer une nouvelle discussion" + title_prettify: "Corrige les coquilles les plus communes dans les titres (intégralité du titre en majuscule, première lettre en minuscule, de multiples ! et ?, un . inutile à la fin, etc.)" + notification_types: mentioned: "%{display_username} vous a mentionné dans %{link}" liked: "%{display_username} a aimé votre message dans %{link}" diff --git a/lib/text_cleaner.rb b/lib/text_cleaner.rb new file mode 100644 index 000000000..a09d584e4 --- /dev/null +++ b/lib/text_cleaner.rb @@ -0,0 +1,45 @@ +# +# Clean up a text +# +class TextCleaner + + def self.title_options + # cf. http://meta.discourse.org/t/should-we-have-auto-replace-rules-in-titles/5687 + { + deduplicate_exclamation_marks: SiteSetting.title_prettify, + deduplicate_question_marks: SiteSetting.title_prettify, + replace_all_upper_case: SiteSetting.title_prettify, + capitalize_first_letter: SiteSetting.title_prettify, + remove_unnecessary_period: SiteSetting.title_prettify, + remove_extraneous_space: SiteSetting.title_prettify && SiteSetting.default_locale == "en", + fixes_interior_spaces: true, + strip_whitespaces: true + } + end + + def self.clean_title(title) + TextCleaner.clean(title, TextCleaner.title_options) + end + + def self.clean(text, opts = {}) + # Replace !!!!! with a single ! + text.gsub!(/!+/, '!') if opts[:deduplicate_exclamation_marks] + # Replace ????? with a single ? + text.gsub!(/\?+/, '?') if opts[:deduplicate_question_marks] + # Replace all-caps text with regular case letters + text.tr!('A-Z', 'a-z') if opts[:replace_all_upper_case] && (text =~ /[A-Z]+/) && (text == text.upcase) + # Capitalize first letter + text.sub!(/\A([a-z])/) { |first| first.capitalize } if opts[:capitalize_first_letter] + # Remove unnecessary period at the end + text.sub!(/([^.])\.(\s*)\z/, '\1\2') if opts[:remove_unnecessary_period] + # Remove extraneous space before the end punctuation + text.sub!(/\s+([!?]\s*)\z/, '\1') if opts[:remove_extraneous_space] + # Fixes interior spaces + text.gsub!(/ +/, ' ') if opts[:fixes_interior_spaces] + # Strip whitespaces + text.strip! if opts[:strip_whitespaces] + + text + end + +end diff --git a/lib/text_sentinel.rb b/lib/text_sentinel.rb index a07232aee..4a04edb9a 100644 --- a/lib/text_sentinel.rb +++ b/lib/text_sentinel.rb @@ -1,31 +1,27 @@ # -# Given a string, tell us whether or not is acceptable. Also, remove stuff we don't like -# such as leading / trailing space. +# Given a string, tell us whether or not is acceptable. # class TextSentinel attr_accessor :text + def initialize(text, opts=nil) + @opts = opts || {} + @text = text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '') if text.present? + end + def self.non_symbols_regexp /[\ -\/\[-\`\:-\@\{-\~]/m end - def initialize(text, opts=nil) - @opts = opts || {} - - if text.present? - @text = text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '') - @text.gsub!(/ +/m, ' ') if @opts[:remove_interior_spaces] - @text.strip! if @opts[:strip] - end + def self.body_sentinel(text) + TextSentinel.new(text, min_entropy: SiteSetting.body_min_entropy) end def self.title_sentinel(text) TextSentinel.new(text, min_entropy: SiteSetting.title_min_entropy, - max_word_length: SiteSetting.max_word_length, - remove_interior_spaces: true, - strip: true) + max_word_length: SiteSetting.max_word_length) end # Entropy is a number of how many unique characters the string needs. @@ -35,7 +31,6 @@ class TextSentinel end def valid? - # Blank strings are not valid return false if @text.blank? || @text.strip.blank? @@ -47,12 +42,12 @@ class TextSentinel return false if non_symbols == 0 # Don't allow super long strings without spaces - return false if @opts[:max_word_length] && @text =~ /\w{#{@opts[:max_word_length]},}(\s|$)/ # We don't allow all upper case content in english return false if (@text =~ /[A-Z]+/) && (@text == @text.upcase) + # It is valid true end diff --git a/spec/components/text_cleaner_spec.rb b/spec/components/text_cleaner_spec.rb new file mode 100644 index 000000000..6e3a60789 --- /dev/null +++ b/spec/components/text_cleaner_spec.rb @@ -0,0 +1,193 @@ +require 'spec_helper' +require 'text_cleaner' + +describe TextCleaner do + + context "exclamation marks" do + + let(:duplicated_string) { "my precious!!!!" } + let(:deduplicated_string) { "my precious!" } + + it "ignores multiple ! by default" do + TextCleaner.clean(duplicated_string).should == duplicated_string + end + + it "deduplicates ! when enabled" do + TextCleaner.clean(duplicated_string, deduplicate_exclamation_marks: true).should == deduplicated_string + end + + end + + context "question marks" do + + let(:duplicated_string) { "please help me????" } + let(:deduplicated_string) { "please help me?" } + + it "ignores multiple ? by default" do + TextCleaner.clean(duplicated_string).should == duplicated_string + end + + it "deduplicates ? when enabled" do + TextCleaner.clean(duplicated_string, deduplicate_question_marks: true).should == deduplicated_string + end + + end + + context "all upper case text" do + + let(:all_caps) { "ENTIRE TEXT IS ALL CAPS" } + let(:almost_all_caps) { "ENTIRE TEXT iS ALL CAPS" } + let(:regular_case) { "entire text is all caps" } + + it "ignores all upper case text by default" do + TextCleaner.clean(all_caps).should == all_caps + end + + it "replaces all upper case text with regular case letters when enabled" do + TextCleaner.clean(all_caps, replace_all_upper_case: true).should == regular_case + end + + it "ignores almost all upper case text when enabled" do + TextCleaner.clean(almost_all_caps, replace_all_upper_case: true).should == almost_all_caps + end + + end + + context "first letter" do + + let(:lowercased) { "this is awesome" } + let(:capitalized) { "This is awesome" } + + it "ignores first letter case by default" do + TextCleaner.clean(lowercased).should == lowercased + TextCleaner.clean(capitalized).should == capitalized + end + + it "capitalizes first letter when enabled" do + TextCleaner.clean(lowercased, capitalize_first_letter: true).should == capitalized + TextCleaner.clean(capitalized, capitalize_first_letter: true).should == capitalized + end + + end + + context "period at the end" do + + let(:with_period) { "oops." } + let(:with_periods) { "oops..." } + let(:without_period) { "oops" } + + it "ignores unnecessary period at the end by default" do + TextCleaner.clean(with_period).should == with_period + end + + it "removes unnecessary period at the end when enabled" do + TextCleaner.clean(with_period, remove_unnecessary_period: true).should == without_period + end + + it "keeps ellipsis when enabled" do + TextCleaner.clean(with_periods, remove_unnecessary_period: true).should == with_periods + end + + it "keeps trailing whitespaces when enabled" do + TextCleaner.clean(with_periods + " ", remove_unnecessary_period: true).should == with_periods + " " + end + + end + + context "extraneous space" do + + let(:with_space_exclamation) { "oops !" } + let(:without_space_exclamation) { "oops!" } + let(:with_space_question) { "oops ?" } + let(:without_space_question) { "oops?" } + + it "ignores extraneous space before the end punctuation by default" do + TextCleaner.clean(with_space_exclamation).should == with_space_exclamation + TextCleaner.clean(with_space_question).should == with_space_question + end + + it "removes extraneous space before the end punctuation when enabled" do + TextCleaner.clean(with_space_exclamation, remove_extraneous_space: true).should == without_space_exclamation + TextCleaner.clean(with_space_question, remove_extraneous_space: true).should == without_space_question + end + + it "keep trailing whitespaces when enabled" do + TextCleaner.clean(with_space_exclamation + " ", remove_extraneous_space: true).should == without_space_exclamation + " " + TextCleaner.clean(with_space_question + " ", remove_extraneous_space: true).should == without_space_question + " " + end + + end + + context "interior spaces" do + + let(:spacey_string) { "hello there's weird spaces here." } + let(:unspacey_string) { "hello there's weird spaces here." } + + it "ignores interior spaces by default" do + TextCleaner.clean(spacey_string).should == spacey_string + end + + it "fixes interior spaces when enabled" do + TextCleaner.clean(spacey_string, fixes_interior_spaces: true).should == unspacey_string + end + + end + + context "leading and trailing whitespaces" do + + let(:spacey_string) { " \t test \n " } + let(:unspacey_string) { "test" } + + it "ignores leading and trailing whitespaces by default" do + TextCleaner.clean(spacey_string).should == spacey_string + end + + it "strips leading and trailing whitespaces when enabled" do + TextCleaner.clean(spacey_string, strip_whitespaces: true).should == unspacey_string + end + + end + + context "title" do + + it "fixes interior spaces" do + TextCleaner.clean_title("Hello there").should == "Hello there" + end + + it "strips leading and trailing whitespaces" do + TextCleaner.clean_title(" \t Hello there \n ").should == "Hello there" + end + + context "title_prettify site setting is enabled" do + + before { SiteSetting.title_prettify = true } + + it "deduplicates !" do + TextCleaner.clean_title("Hello there!!!!").should == "Hello there!" + end + + it "deduplicates ?" do + TextCleaner.clean_title("Hello there????").should == "Hello there?" + end + + it "replaces all upper case text with regular case letters" do + TextCleaner.clean_title("HELLO THERE").should == "Hello there" + end + + it "capitalizes first letter" do + TextCleaner.clean_title("hello there").should == "Hello there" + end + + it "removes unnecessary period at the end" do + TextCleaner.clean_title("Hello there.").should == "Hello there" + end + + it "removes extraneous space before the end punctuation" do + TextCleaner.clean_title("Hello there ?").should == "Hello there?" + end + + end + + end + +end diff --git a/spec/components/text_sentinel_spec.rb b/spec/components/text_sentinel_spec.rb index 8706c6381..cf9e211ef 100644 --- a/spec/components/text_sentinel_spec.rb +++ b/spec/components/text_sentinel_spec.rb @@ -5,10 +5,12 @@ require 'text_sentinel' describe TextSentinel do + it "allows utf-8 chars" do + TextSentinel.new("йȝîûηыეமிᚉ⠛").text.should == "йȝîûηыეமிᚉ⠛" + end context "entropy" do - it "returns 0 for an empty string" do TextSentinel.new("").entropy.should == 0 end @@ -35,50 +37,6 @@ describe TextSentinel do end - context "cleaning up" do - - it "allows utf-8 chars" do - TextSentinel.new("йȝîûηыეமிᚉ⠛").text.should == "йȝîûηыეமிᚉ⠛" - end - - context "interior spaces" do - - let(:spacey_string) { "hello there's weird spaces here." } - let(:unspacey_string) { "hello there's weird spaces here." } - - it "ignores intra spaces by default" do - TextSentinel.new(spacey_string).text.should == spacey_string - end - - it "fixes intra spaces when enabled" do - TextSentinel.new(spacey_string, remove_interior_spaces: true).text.should == unspacey_string - end - - it "fixes intra spaces in titles" do - TextSentinel.title_sentinel(spacey_string).text.should == unspacey_string - end - - end - - context "stripping whitespace" do - let(:spacey_string) { " \t test \t " } - let(:unspacey_string) { "test" } - - it "does not strip leading and trailing whitespace by default" do - TextSentinel.new(spacey_string).text.should == spacey_string - end - - it "strips leading and trailing whitespace when enabled" do - TextSentinel.new(spacey_string, strip: true).text.should == unspacey_string - end - - it "strips leading and trailing whitespace in titles" do - TextSentinel.title_sentinel(spacey_string).text.should == unspacey_string - end - end - - end - context "validity" do let(:valid_string) { "This is a cool topic about Discourse" } @@ -113,8 +71,6 @@ describe TextSentinel do TextSentinel.new("{{$!").should_not be_valid end - end - end diff --git a/spec/controllers/topics_controller_spec.rb b/spec/controllers/topics_controller_spec.rb index e9927e749..5abf9da82 100644 --- a/spec/controllers/topics_controller_spec.rb +++ b/spec/controllers/topics_controller_spec.rb @@ -407,12 +407,13 @@ describe TopicsController do it 'succeeds' do xhr :put, :update, topic_id: @topic.id, slug: @topic.title response.should be_success + ::JSON.parse(response.body)['basic_topic'].should be_present end it 'allows a change of title' do - xhr :put, :update, topic_id: @topic.id, slug: @topic.title, title: 'this is a new title for the topic' + xhr :put, :update, topic_id: @topic.id, slug: @topic.title, title: 'This is a new title for the topic' @topic.reload - @topic.title.should == 'this is a new title for the topic' + @topic.title.should == 'This is a new title for the topic' end it 'triggers a change of category' do diff --git a/spec/models/topic_spec.rb b/spec/models/topic_spec.rb index db190f836..a7597cbd8 100644 --- a/spec/models/topic_spec.rb +++ b/spec/models/topic_spec.rb @@ -111,20 +111,20 @@ describe Topic do end context 'html in title' do - let(:topic_bold) { Fabricate(:topic, title: "topic with <b>bold</b> text in its title" ) } - let(:topic_image) { Fabricate(:topic, title: "topic with <img src='something'> image in its title" ) } - let(:topic_script) { Fabricate(:topic, title: "<script>alert('title')</script> is my topic title" ) } + let(:topic_bold) { Fabricate(:topic, title: "Topic with <b>bold</b> text in its title" ) } + let(:topic_image) { Fabricate(:topic, title: "Topic with <img src='something'> image in its title" ) } + let(:topic_script) { Fabricate(:topic, title: "Topic with <script>alert('title')</script> script in its title" ) } it "escapes script contents" do - topic_script.title.should == "is my topic title" + topic_script.title.should == "Topic with script in its title" end it "escapes bold contents" do - topic_bold.title.should == "topic with bold text in its title" + topic_bold.title.should == "Topic with bold text in its title" end - it "escapes bold contents" do - topic_image.title.should == "topic with image in its title" + it "escapes image contents" do + topic_image.title.should == "Topic with image in its title" end end