PERF: calculate scores for topics/posts faster

Only look at topics that changed in last day for 15 minuted schedule
Do a full recalc weekly, just in case
This commit is contained in:
Sam 2014-02-27 11:45:20 +11:00
parent e4909c017c
commit 9551f4aeea
7 changed files with 139 additions and 36 deletions

View file

@ -9,8 +9,8 @@ module Jobs
def execute(args) def execute(args)
# Update the average times # Update the average times
Post.calculate_avg_time Post.calculate_avg_time(1.day.ago)
Topic.calculate_avg_time Topic.calculate_avg_time(1.day.ago)
# Feature topics in categories # Feature topics in categories
CategoryFeaturedTopic.feature_topics CategoryFeaturedTopic.feature_topics
@ -19,8 +19,7 @@ module Jobs
UserStat.update_view_counts UserStat.update_view_counts
# Update the scores of posts # Update the scores of posts
ScoreCalculator.new.calculate ScoreCalculator.new.calculate(1.day.ago)
# Automatically close stuff that we missed # Automatically close stuff that we missed
Topic.auto_close Topic.auto_close

View file

@ -0,0 +1,16 @@
require_dependency 'score_calculator'
module Jobs
# This job will run on a regular basis to update statistics and denormalized data.
# If it does not run, the site will not function properly.
class Weekly < Jobs::Scheduled
every 1.week
def execute(args)
Post.calculate_avg_time
Topic.calculate_avg_time
ScoreCalculator.new.calculate
end
end
end

View file

@ -311,9 +311,9 @@ class Post < ActiveRecord::Base
# This calculates the geometric mean of the post timings and stores it along with # This calculates the geometric mean of the post timings and stores it along with
# each post. # each post.
def self.calculate_avg_time def self.calculate_avg_time(min_topic_age=nil)
retry_lock_error do retry_lock_error do
exec_sql("UPDATE posts builder = SqlBuilder.new("UPDATE posts
SET avg_time = (x.gmean / 1000) SET avg_time = (x.gmean / 1000)
FROM (SELECT post_timings.topic_id, FROM (SELECT post_timings.topic_id,
post_timings.post_number, post_timings.post_number,
@ -324,9 +324,18 @@ class Post < ActiveRecord::Base
AND p2.topic_id = post_timings.topic_id AND p2.topic_id = post_timings.topic_id
AND p2.user_id <> post_timings.user_id AND p2.user_id <> post_timings.user_id
GROUP BY post_timings.topic_id, post_timings.post_number) AS x GROUP BY post_timings.topic_id, post_timings.post_number) AS x
WHERE x.topic_id = posts.topic_id /*where*/")
builder.where("x.topic_id = posts.topic_id
AND x.post_number = posts.post_number AND x.post_number = posts.post_number
AND (posts.avg_time <> (x.gmean / 1000)::int OR posts.avg_time IS NULL)") AND (posts.avg_time <> (x.gmean / 1000)::int OR posts.avg_time IS NULL)")
if min_topic_age
builder.where("posts.topic_id IN (SELECT id FROM topics where bumped_at > :bumped_at)",
bumped_at: min_topic_age)
end
builder.exec
end end
end end

View file

@ -362,15 +362,25 @@ class Topic < ActiveRecord::Base
end end
# This calculates the geometric mean of the posts and stores it with the topic # This calculates the geometric mean of the posts and stores it with the topic
def self.calculate_avg_time def self.calculate_avg_time(min_topic_age=nil)
exec_sql("UPDATE topics builder = SqlBuilder.new("UPDATE topics
SET avg_time = x.gmean SET avg_time = x.gmean
FROM (SELECT topic_id, FROM (SELECT topic_id,
round(exp(avg(ln(avg_time)))) AS gmean round(exp(avg(ln(avg_time)))) AS gmean
FROM posts FROM posts
WHERE avg_time > 0 AND avg_time IS NOT NULL WHERE avg_time > 0 AND avg_time IS NOT NULL
GROUP BY topic_id) AS x GROUP BY topic_id) AS x
WHERE x.topic_id = topics.id AND (topics.avg_time <> x.gmean OR topics.avg_time IS NULL)") /*where*/")
builder.where("x.topic_id = topics.id AND
(topics.avg_time <> x.gmean OR topics.avg_time IS NULL)")
if min_topic_age
builder.where("topics.bumped_at > :bumped_at",
bumped_at: min_topic_age)
end
builder.exec
end end
def changed_to_category(cat) def changed_to_category(cat)

View file

@ -16,22 +16,60 @@ class ScoreCalculator
end end
# Calculate the score for all posts based on the weightings # Calculate the score for all posts based on the weightings
def calculate def calculate(min_topic_age=nil)
# First update the scores of the posts update_posts_score(min_topic_age)
exec_sql(post_score_sql, @weightings)
# Update the percent rankings of the posts update_posts_rank(min_topic_age)
exec_sql("UPDATE posts SET percent_rank = x.percent_rank
update_topics_rank(min_topic_age)
update_topics_percent_rank(min_topic_age)
end
private
def update_posts_score(min_topic_age)
components = []
@weightings.keys.each { |k| components << "COALESCE(#{k.to_s}, 0) * :#{k.to_s}" }
components = components.join(" + ")
builder = SqlBuilder.new(
"UPDATE posts SET score = x.score
FROM (SELECT id, #{components} as score FROM posts) AS x
/*where*/"
)
builder.where("x.id = posts.id
AND (posts.score IS NULL OR x.score <> posts.score)", @weightings)
filter_topics(builder, min_topic_age)
builder.exec
end
def update_posts_rank(min_topic_age)
builder = SqlBuilder.new("UPDATE posts SET percent_rank = x.percent_rank
FROM (SELECT id, percent_rank() FROM (SELECT id, percent_rank()
OVER (PARTITION BY topic_id ORDER BY SCORE DESC) as percent_rank OVER (PARTITION BY topic_id ORDER BY SCORE DESC) as percent_rank
FROM posts) AS x FROM posts) AS x
WHERE x.id = posts.id AND /*where*/")
builder.where("x.id = posts.id AND
(posts.percent_rank IS NULL OR x.percent_rank <> posts.percent_rank)") (posts.percent_rank IS NULL OR x.percent_rank <> posts.percent_rank)")
# Update the topics filter_topics(builder, min_topic_age)
exec_sql "UPDATE topics AS t
builder.exec
end
def update_topics_rank(min_topic_age)
builder = SqlBuilder.new("UPDATE topics AS t
SET has_summary = (t.like_count >= :likes_required AND SET has_summary = (t.like_count >= :likes_required AND
t.posts_count >= :posts_required AND t.posts_count >= :posts_required AND
x.max_score >= :score_required), x.max_score >= :score_required),
@ -41,7 +79,9 @@ class ScoreCalculator
AVG(p.score) AS avg_score AVG(p.score) AS avg_score
FROM posts AS p FROM posts AS p
GROUP BY p.topic_id) AS x GROUP BY p.topic_id) AS x
WHERE x.topic_id = t.id AND /*where*/")
builder.where("x.topic_id = t.id AND
( (
(t.score <> x.avg_score OR t.score IS NULL) OR (t.score <> x.avg_score OR t.score IS NULL) OR
(t.has_summary IS NULL OR t.has_summary <> ( (t.has_summary IS NULL OR t.has_summary <> (
@ -53,31 +93,45 @@ class ScoreCalculator
", ",
likes_required: SiteSetting.summary_likes_required, likes_required: SiteSetting.summary_likes_required,
posts_required: SiteSetting.summary_posts_required, posts_required: SiteSetting.summary_posts_required,
score_required: SiteSetting.summary_score_threshold score_required: SiteSetting.summary_score_threshold)
# Update percentage rank of topics if min_topic_age
exec_sql("UPDATE topics SET percent_rank = x.percent_rank builder.where("t.bumped_at > :bumped_at ",
bumped_at: min_topic_age)
end
builder.exec
end
def update_topics_percent_rank(min_topic_age)
builder = SqlBuilder.new("UPDATE topics SET percent_rank = x.percent_rank
FROM (SELECT id, percent_rank() FROM (SELECT id, percent_rank()
OVER (ORDER BY SCORE DESC) as percent_rank OVER (ORDER BY SCORE DESC) as percent_rank
FROM topics) AS x FROM topics) AS x
WHERE x.id = topics.id AND (topics.percent_rank <> x.percent_rank OR topics.percent_rank IS NULL)") /*where*/")
builder.where("x.id = topics.id AND (topics.percent_rank <> x.percent_rank OR topics.percent_rank IS NULL)")
if min_topic_age
builder.where("topics.bumped_at > :bumped_at ",
bumped_at: min_topic_age)
end
builder.exec
end end
private def filter_topics(builder, min_topic_age)
if min_topic_age
def exec_sql(sql, params=nil) builder.where('posts.topic_id IN
ActiveRecord::Base.exec_sql(sql, params) (SELECT id FROM topics WHERE bumped_at > :bumped_at)',
bumped_at: min_topic_age)
end end
# Generate a SQL statement to update the scores of all posts builder
def post_score_sql end
components = []
@weightings.keys.each { |k| components << "COALESCE(#{k.to_s}, 0) * :#{k.to_s}" }
components = components.join(" + ")
"UPDATE posts SET score = x.score
FROM (SELECT id, #{components} as score FROM posts) AS x
WHERE x.id = posts.id AND (posts.score IS NULL OR x.score <> posts.score)"
end
end end

View file

@ -787,4 +787,12 @@ describe Post do
end end
end end
describe "calculate_avg_time" do
it "should not crash" do
Post.calculate_avg_time
Post.calculate_avg_time(1.day.ago)
end
end
end end

View file

@ -1318,4 +1318,11 @@ describe Topic do
end end
end end
describe "calculate_avg_time" do
it "does not explode" do
Topic.calculate_avg_time
Topic.calculate_avg_time(1.day.ago)
end
end
end end