Merge pull request #172 from jeremybanks/master

Do not strip leading and trailing whitespace from raw posts
This commit is contained in:
Robin Ward 2013-02-19 08:15:39 -08:00
commit ae9bee2a91
4 changed files with 45 additions and 16 deletions

View file

@ -29,7 +29,7 @@ class Post < ActiveRecord::Base
has_many :post_actions has_many :post_actions
validates_presence_of :raw, :user_id, :topic_id validates_presence_of :raw, :user_id, :topic_id
validates :raw, length: {in: SiteSetting.min_post_length..SiteSetting.max_post_length} validates :raw, stripped_length: {in: SiteSetting.min_post_length..SiteSetting.max_post_length}
validate :raw_quality validate :raw_quality
validate :max_mention_validator validate :max_mention_validator
validate :max_images_validator validate :max_images_validator
@ -57,10 +57,6 @@ class Post < ActiveRecord::Base
TopicUser.auto_track(self.user_id, self.topic_id, TopicUser::NotificationReasons::CREATED_POST) TopicUser.auto_track(self.user_id, self.topic_id, TopicUser::NotificationReasons::CREATED_POST)
end end
before_validation do
self.raw.strip! if self.raw.present?
end
def raw_quality def raw_quality
sentinel = TextSentinel.new(self.raw, min_entropy: SiteSetting.body_min_entropy) sentinel = TextSentinel.new(self.raw, min_entropy: SiteSetting.body_min_entropy)
@ -212,7 +208,7 @@ class Post < ActiveRecord::Base
# We only filter quotes when there is exactly 1 # We only filter quotes when there is exactly 1
return cooked unless (quote_count == 1) return cooked unless (quote_count == 1)
parent_raw = parent_post.raw.sub(/\[quote.+\/quote\]/m, '').strip parent_raw = parent_post.raw.sub(/\[quote.+\/quote\]/m, '')
if raw[parent_raw] or (parent_raw.size < SHORT_POST_CHARS) if raw[parent_raw] or (parent_raw.size < SHORT_POST_CHARS)
return cooked.sub(/\<aside.+\<\/aside\>/m, '') return cooked.sub(/\<aside.+\<\/aside\>/m, '')

View file

@ -0,0 +1,14 @@
class StrippedLengthValidator < ActiveModel::EachValidator
def validate_each(record, attribute, value)
unless value.nil?
stripped_length = value.strip.length
range = options[:in]
record.errors.add attribute, (options[:message] || I18n.t('errors.messages.too_short', count: range.begin)) unless
stripped_length >= range.begin
record.errors.add attribute, (options[:message] || I18n.t('errors.messages.too_long', count: range.end)) unless
stripped_length <= range.end
else
record.errors.add attribute, (options[:message] || I18n.t('errors.messages.blank'))
end
end
end

View file

@ -15,8 +15,8 @@ class TextSentinel
if text.present? if text.present?
@text = text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '') @text = text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
@text.strip!
@text.gsub!(/ +/m, ' ') if @opts[:remove_interior_spaces] @text.gsub!(/ +/m, ' ') if @opts[:remove_interior_spaces]
@text.strip! if @opts[:strip]
end end
end end
@ -24,19 +24,20 @@ class TextSentinel
TextSentinel.new(text, TextSentinel.new(text,
min_entropy: SiteSetting.title_min_entropy, min_entropy: SiteSetting.title_min_entropy,
max_word_length: SiteSetting.max_word_length, max_word_length: SiteSetting.max_word_length,
remove_interior_spaces: true) remove_interior_spaces: true,
strip: true)
end end
# Entropy is a number of how many unique characters the string needs. # Entropy is a number of how many unique characters the string needs.
def entropy def entropy
return 0 if @text.blank? return 0 if @text.blank?
@entropy ||= @text.each_char.to_a.uniq.size @entropy ||= @text.strip.each_char.to_a.uniq.size
end end
def valid? def valid?
# Blank strings are not valid # Blank strings are not valid
return false if @text.blank? return false if @text.blank? || @text.strip.blank?
# Entropy check if required # Entropy check if required
return false if @opts[:min_entropy].present? and (entropy < @opts[:min_entropy]) return false if @opts[:min_entropy].present? and (entropy < @opts[:min_entropy])

View file

@ -37,10 +37,6 @@ describe TextSentinel do
context "cleaning up" do context "cleaning up" do
it "strips leading or trailing whitespace" do
TextSentinel.new(" \t test \t ").text.should == "test"
end
it "allows utf-8 chars" do it "allows utf-8 chars" do
TextSentinel.new("йȝîûηыეமிᚉ⠛").text.should == "йȝîûηыეமிᚉ⠛" TextSentinel.new("йȝîûηыეமிᚉ⠛").text.should == "йȝîûηыეமிᚉ⠛"
end end
@ -48,15 +44,37 @@ describe TextSentinel do
context "interior spaces" do context "interior spaces" do
let(:spacey_string) { "hello there's weird spaces here." } let(:spacey_string) { "hello there's weird spaces here." }
let(:unspacey_string) { "hello there's weird spaces here." }
it "ignores intra spaces by default" do it "ignores intra spaces by default" do
TextSentinel.new(spacey_string).text.should == spacey_string TextSentinel.new(spacey_string).text.should == spacey_string
end end
it "fixes intra spaces when enabled" do it "fixes intra spaces when enabled" do
TextSentinel.new(spacey_string, remove_interior_spaces: true).text.should == "hello there's weird spaces here." TextSentinel.new(spacey_string, remove_interior_spaces: true).text.should == unspacey_string
end end
it "fixes intra spaces in titles" do
TextSentinel.title_sentinel(spacey_string).text.should == unspacey_string
end
end
context "stripping whitespace" do
let(:spacey_string) { " \t test \t " }
let(:unspacey_string) { "test" }
it "does not strip leading and trailing whitespace by default" do
TextSentinel.new(spacey_string).text.should == spacey_string
end
it "strips leading and trailing whitespace when enabled" do
TextSentinel.new(spacey_string, strip: true).text.should == unspacey_string
end
it "strips leading and trailing whitespace in titles" do
TextSentinel.title_sentinel(spacey_string).text.should == unspacey_string
end
end end
end end