Merge pull request #302 from kirushik/better_search

Better search
This commit is contained in:
Robin Ward 2013-02-28 12:31:31 -08:00
commit 98d4f40d05
2 changed files with 53 additions and 22 deletions

View file

@ -19,10 +19,11 @@ module Search
u.username AS title,
u.email,
NULL AS color
FROM users AS u
JOIN users_search s on s.id = u.id
WHERE s.search_data @@ TO_TSQUERY('english', :query)
ORDER BY last_posted_at desc
FROM users AS u
JOIN users_search s on s.id = u.id
WHERE s.search_data @@ TO_TSQUERY(:locale, :query)
ORDER BY last_posted_at desc
LIMIT :limit
"
end
@ -36,14 +37,16 @@ module Search
FROM topics AS ft
JOIN posts AS p ON p.topic_id = ft.id AND p.post_number = 1
JOIN posts_search s on s.id = p.id
WHERE s.search_data @@ TO_TSQUERY('english', :query)
WHERE s.search_data @@ TO_TSQUERY(:locale, :query)
AND ft.deleted_at IS NULL
AND ft.visible
AND ft.archetype <> '#{Archetype.private_message}'
ORDER BY
TS_RANK_CD(TO_TSVECTOR('english', ft.title), TO_TSQUERY('english', :query)) desc,
TS_RANK_CD(search_data, TO_TSQUERY('english', :query)) desc,
bumped_at desc"
TS_RANK_CD(TO_TSVECTOR(:locale, ft.title), TO_TSQUERY(:locale, :query)) desc,
TS_RANK_CD(search_data, TO_TSQUERY(:locale, :query)) desc,
bumped_at desc
LIMIT :limit
"
end
@ -57,14 +60,16 @@ module Search
FROM topics AS ft
JOIN posts AS p ON p.topic_id = ft.id AND p.post_number <> 1
JOIN posts_search s on s.id = p.id
WHERE s.search_data @@ TO_TSQUERY('english', :query)
WHERE s.search_data @@ TO_TSQUERY(:locale, :query)
AND ft.deleted_at IS NULL and p.deleted_at IS NULL
AND ft.visible
AND ft.archetype <> '#{Archetype.private_message}'
ORDER BY
TS_RANK_CD(TO_TSVECTOR('english', ft.title), TO_TSQUERY('english', :query)) desc,
TS_RANK_CD(search_data, TO_TSQUERY('english', :query)) desc,
bumped_at desc"
TS_RANK_CD(TO_TSVECTOR(:locale, ft.title), TO_TSQUERY(:locale, :query)) desc,
TS_RANK_CD(search_data, TO_TSQUERY(:locale, :query)) desc,
bumped_at desc
LIMIT :limit
"
end
def self.category_query_sql
@ -76,15 +81,27 @@ module Search
c.color
FROM categories AS c
JOIN categories_search s on s.id = c.id
WHERE s.search_data @@ TO_TSQUERY('english', :query)
WHERE s.search_data @@ TO_TSQUERY(:locale, :query)
ORDER BY topics_month desc
LIMIT :limit
"
end
def self.current_locale_long
case I18n.locale # Currently-present in /conf/locales/* only, sorry :-( Add as needed
when :ru then 'russian'
when :fr then 'french'
when :nl then 'dutch'
when :sv then 'swedish'
else 'english'
end
end
def self.query(term, type_filter=nil)
return nil if term.blank?
sanitized_term = term.gsub(/[^0-9a-zA-Z_ ]/, '')
sanitized_term = PG::Connection.escape_string(term.gsub(/[:()&!]/,'')) # Instead of original term.gsub(/[^0-9a-zA-Z_ ]/, '')
# We are stripping only symbols taking place in FTS and simply sanitizing the rest.
# really short terms are totally pointless
return nil if sanitized_term.blank? || sanitized_term.length < self.min_search_term_length
@ -94,14 +111,13 @@ module Search
if type_filter.present?
raise Discourse::InvalidAccess.new("invalid type filter") unless Search.facets.include?(type_filter)
sql = Search.send("#{type_filter}_query_sql") << " LIMIT #{Search.per_facet * Search.facets.size}"
db_result = ActiveRecord::Base.exec_sql(sql , query: terms.join(" & "))
sql = Search.send("#{type_filter}_query_sql")
db_result = ActiveRecord::Base.exec_sql(sql , query: terms.join(" & "), locale: current_locale_long, limit: Search.per_facet * Search.facets.size)
else
db_result = []
[user_query_sql, category_query_sql, topic_query_sql].each do |sql|
sql << " LIMIT " << (Search.per_facet + 1).to_s
db_result += ActiveRecord::Base.exec_sql(sql , query: terms.join(" & ")).to_a
db_result += ActiveRecord::Base.exec_sql(sql , query: terms.join(" & "),locale: current_locale_long, limit: (Search.per_facet + 1)).to_a
end
end
@ -118,8 +134,8 @@ module Search
end
if expected_topics > 0
tmp = ActiveRecord::Base.exec_sql "#{post_query_sql} limit :per_facet",
query: terms.join(" & "), per_facet: expected_topics * 3
tmp = ActiveRecord::Base.exec_sql post_query_sql,
query: terms.join(" & "), locale: current_locale_long, limit: expected_topics * 3
topic_ids = Set.new db_result.map{|r| r["id"]}

View file

@ -1,3 +1,5 @@
# encoding: utf-8
require 'spec_helper'
require 'search'
@ -70,8 +72,7 @@ describe Search do
end
it 'escapes non alphanumeric characters' do
ActiveRecord::Base.expects(:exec_sql).never
Search.query(':!$').should be_blank
Search.query(':!$);}]>@\#\"\'').should be_blank # There are at least three levels of sanitation for Search.query!
end
it 'works when given two terms with spaces' do
@ -123,6 +124,20 @@ describe Search do
end
context 'cyrillic topic' do
let!(:cyrillic_topic) { Fabricate(:topic) do
user
title { sequence(:title) { |i| "Тестовая запись #{i}" } }
end
}
let!(:post) {Fabricate(:post, topic: cyrillic_topic, user: cyrillic_topic.user)}
let(:result) { first_of_type(Search.query('запись'), 'topic') }
it 'finds something when given cyrillic query' do
result.should be_present
end
end
context 'categories' do
let!(:category) { Fabricate(:category) }