FIX: performance of CategoryDetailedSerializer and Category.update_stats on large databases

This commit is contained in:
Neil Lalonde 2014-03-19 10:14:05 -04:00
parent 42ea20b439
commit 3ea477b17d
2 changed files with 91 additions and 26 deletions

View file

@ -97,9 +97,53 @@ class Category < ActiveRecord::Base
end end
end end
def self.update_stats
topics_with_post_count = Topic
.select("topics.category_id, COUNT(*) topic_count, SUM(topics.posts_count) post_count")
.where("topics.id NOT IN (select cc.topic_id from categories cc WHERE topic_id IS NOT NULL)")
.group("topics.category_id")
.visible.to_sql
Category.exec_sql <<SQL
UPDATE categories c
SET topic_count = x.topic_count,
post_count = x.post_count
FROM (#{topics_with_post_count}) x
WHERE x.category_id = c.id AND
(c.topic_count <> x.topic_count OR c.post_count <> x.post_count)
SQL
# Yes, there are a lot of queries happening below.
# Performing a lot of queries is actually faster than using one big update
# statement with sub-selects on large databases with many categories,
# topics, and posts.
#
# The old method with the one query is here:
# https://github.com/discourse/discourse/blob/5f34a621b5416a53a2e79a145e927fca7d5471e8/app/models/category.rb
#
# If you refactor this, test performance on a large database.
Category.all.each do |c|
topics = c.topics.where(['topics.id <> ?', c.topic_id]).visible
c.topics_year = topics.created_since(1.year.ago).count
c.topics_month = topics.created_since(1.month.ago).count
c.topics_week = topics.created_since(1.week.ago).count
c.topics_day = topics.created_since(1.day.ago).count
posts = c.visible_posts
c.posts_year = posts.created_since(1.year.ago).count
c.posts_month = posts.created_since(1.month.ago).count
c.posts_week = posts.created_since(1.week.ago).count
c.posts_day = posts.created_since(1.day.ago).count
c.save if c.changed?
end
end
# Internal: Update category stats: # of topics and posts in past year, month, week for # Internal: Update category stats: # of topics and posts in past year, month, week for
# all categories. # all categories.
def self.update_stats def self.update_stats_OLD
topics = Topic topics = Topic
.select("COUNT(*) topic_count") .select("COUNT(*) topic_count")
.where("topics.category_id = categories.id") .where("topics.category_id = categories.id")
@ -115,6 +159,7 @@ class Category < ActiveRecord::Base
topics_year = topics.created_since(1.year.ago).to_sql topics_year = topics.created_since(1.year.ago).to_sql
topics_month = topics.created_since(1.month.ago).to_sql topics_month = topics.created_since(1.month.ago).to_sql
topics_week = topics.created_since(1.week.ago).to_sql topics_week = topics.created_since(1.week.ago).to_sql
topics_day = topics.created_since(1.day.ago).to_sql
Category.exec_sql <<SQL Category.exec_sql <<SQL
@ -138,14 +183,17 @@ SQL
posts_year = posts.created_since(1.year.ago).to_sql posts_year = posts.created_since(1.year.ago).to_sql
posts_month = posts.created_since(1.month.ago).to_sql posts_month = posts.created_since(1.month.ago).to_sql
posts_week = posts.created_since(1.week.ago).to_sql posts_week = posts.created_since(1.week.ago).to_sql
posts_day = posts.created_since(1.day.ago).to_sql
# TODO don't update unchanged data # TODO don't update unchanged data
Category.update_all("topics_year = (#{topics_year}), Category.update_all("topics_year = (#{topics_year}),
topics_month = (#{topics_month}), topics_month = (#{topics_month}),
topics_week = (#{topics_week}), topics_week = (#{topics_week}),
topics_day = (#{topics_day}),
posts_year = (#{posts_year}), posts_year = (#{posts_year}),
posts_month = (#{posts_month}), posts_month = (#{posts_month}),
posts_week = (#{posts_week})") posts_week = (#{posts_week}),
posts_day = (#{posts_day})")
end end
def visible_posts def visible_posts
@ -157,33 +205,33 @@ SQL
self.topic_id ? query.where(['topics.id <> ?', self.topic_id]) : query self.topic_id ? query.where(['topics.id <> ?', self.topic_id]) : query
end end
def topics_day # def topics_day
if val = $redis.get(topics_day_key) # if val = $redis.get(topics_day_key)
val.to_i # val.to_i
else # else
val = self.topics.where(['topics.id <> ?', self.topic_id]).created_since(1.day.ago).visible.count # val = self.topics.where(['topics.id <> ?', self.topic_id]).created_since(1.day.ago).visible.count
$redis.setex topics_day_key, 30.minutes.to_i, val # $redis.setex topics_day_key, 30.minutes.to_i, val
val # val
end # end
end # end
def topics_day_key # def topics_day_key
"topics_day:cat-#{self.id}" # "topics_day:cat-#{self.id}"
end # end
def posts_day # def posts_day
if val = $redis.get(posts_day_key) # if val = $redis.get(posts_day_key)
val.to_i # val.to_i
else # else
val = self.visible_posts.created_since(1.day.ago).count # val = self.visible_posts.created_since(1.day.ago).count
$redis.setex posts_day_key, 30.minutes.to_i, val # $redis.setex posts_day_key, 30.minutes.to_i, val
val # val
end # end
end # end
def posts_day_key # def posts_day_key
"posts_day:cat-#{self.id}" # "posts_day:cat-#{self.id}"
end # end
# Internal: Generate the text of post prompting to enter category # Internal: Generate the text of post prompting to enter category
# description. # description.

View file

@ -0,0 +1,17 @@
class AddCreatedAtIndexToPosts < ActiveRecord::Migration
def up
execute "CREATE INDEX idx_posts_created_at_topic_id ON posts(created_at, topic_id) WHERE deleted_at IS NULL"
add_column :categories, :topics_day, :integer, default: 0
add_column :categories, :posts_day, :integer, default: 0
execute "DROP INDEX index_topics_on_deleted_at_and_visible_and_archetype_and_id"
add_index :topics, [:deleted_at, :visible, :archetype, :category_id, :id], name: "idx_topics_front_page"
end
def down
execute "DROP INDEX idx_topics_front_page"
add_index :topics, [:deleted_at, :visible, :archetype, :id]
remove_column :categories, :posts_day
remove_column :categories, :topics_day
execute "DROP INDEX idx_posts_created_at_topic_id"
end
end