PERF: make score calculator cheaper when site has long topics

This commit is contained in:
Sam 2016-07-22 09:48:26 +10:00
parent 440558517f
commit 12dc511fea
3 changed files with 61 additions and 43 deletions

View file

@ -7,12 +7,22 @@ module Jobs
class PeriodicalUpdates < Jobs::Scheduled class PeriodicalUpdates < Jobs::Scheduled
every 15.minutes every 15.minutes
def self.should_update_long_topics?
@call_count ||= 0
@call_count += 1
# once every 6 hours
(@call_count % 24) == 1
end
def execute(args) def execute(args)
# Feature topics in categories # Feature topics in categories
CategoryFeaturedTopic.feature_topics CategoryFeaturedTopic.feature_topics
# Update the scores of posts # Update the scores of posts
ScoreCalculator.new.calculate(1.day.ago) args = {min_topic_age: 1.day.ago}
args[:max_topic_length] = 500 unless self.class.should_update_long_topics?
ScoreCalculator.new.calculate(args)
# Automatically close stuff that we missed # Automatically close stuff that we missed
Topic.auto_close Topic.auto_close

View file

@ -16,48 +16,44 @@ class ScoreCalculator
end end
# Calculate the score for all posts based on the weightings # Calculate the score for all posts based on the weightings
def calculate(min_topic_age=nil) def calculate(opts=nil)
update_posts_score(opts)
update_posts_score(min_topic_age) update_posts_rank(opts)
update_topics_rank(opts)
update_posts_rank(min_topic_age) update_topics_percent_rank(opts)
update_topics_rank(min_topic_age)
update_topics_percent_rank(min_topic_age)
end end
private private
def update_posts_score(min_topic_age) def update_posts_score(opts)
limit = 20000 limit = 20000
components = [] components = []
@weightings.each_key { |k| components << "COALESCE(#{k}, 0) * :#{k}" } @weightings.each_key { |k| components << "COALESCE(posts.#{k}, 0) * :#{k}" }
components = components.join(" + ") components = components.join(" + ")
builder = SqlBuilder.new <<SQL builder = SqlBuilder.new <<SQL
UPDATE posts p UPDATE posts p
SET score = x.score SET score = x.score
FROM ( FROM (
SELECT id, #{components} as score FROM posts SELECT posts.id, #{components} as score FROM posts
join topics on posts.topic_id = topics.id
/*where*/ /*where*/
limit #{limit} limit #{limit}
) AS x ) AS x
WHERE x.id = p.id WHERE x.id = p.id
SQL SQL
builder.where("score IS NULL OR score <> #{components}", @weightings) builder.where("posts.score IS NULL OR posts.score <> #{components}", @weightings)
filter_topics(builder, min_topic_age) filter_topics(builder, opts)
while builder.exec.cmd_tuples == limit while builder.exec.cmd_tuples == limit
end end
end end
def update_posts_rank(min_topic_age) def update_posts_rank(opts)
limit = 20000 limit = 20000
builder = SqlBuilder.new <<SQL builder = SqlBuilder.new <<SQL
@ -71,6 +67,7 @@ FROM (
OVER (PARTITION BY topic_id ORDER BY SCORE DESC) as percent_rank OVER (PARTITION BY topic_id ORDER BY SCORE DESC) as percent_rank
FROM posts FROM posts
) Y ON Y.id = posts.id ) Y ON Y.id = posts.id
JOIN topics ON posts.topic_id = topics.id
/*where*/ /*where*/
LIMIT #{limit} LIMIT #{limit}
) AS X ) AS X
@ -79,17 +76,17 @@ SQL
builder.where("posts.percent_rank IS NULL OR Y.percent_rank <> posts.percent_rank") builder.where("posts.percent_rank IS NULL OR Y.percent_rank <> posts.percent_rank")
filter_topics(builder, min_topic_age) filter_topics(builder, opts)
while builder.exec.cmd_tuples == limit while builder.exec.cmd_tuples == limit
end end
end end
def update_topics_rank(min_topic_age) def update_topics_rank(opts)
builder = SqlBuilder.new("UPDATE topics AS t builder = SqlBuilder.new("UPDATE topics AS topics
SET has_summary = (t.like_count >= :likes_required AND SET has_summary = (topics.like_count >= :likes_required AND
t.posts_count >= :posts_required AND topics.posts_count >= :posts_required AND
x.max_score >= :score_required), x.max_score >= :score_required),
score = x.avg_score score = x.avg_score
FROM (SELECT p.topic_id, FROM (SELECT p.topic_id,
@ -99,12 +96,12 @@ SQL
GROUP BY p.topic_id) AS x GROUP BY p.topic_id) AS x
/*where*/") /*where*/")
builder.where("x.topic_id = t.id AND builder.where("x.topic_id = topics.id AND
( (
(t.score <> x.avg_score OR t.score IS NULL) OR (topics.score <> x.avg_score OR topics.score IS NULL) OR
(t.has_summary IS NULL OR t.has_summary <> ( (topics.has_summary IS NULL OR topics.has_summary <> (
t.like_count >= :likes_required AND topics.like_count >= :likes_required AND
t.posts_count >= :posts_required AND topics.posts_count >= :posts_required AND
x.max_score >= :score_required x.max_score >= :score_required
)) ))
) )
@ -113,15 +110,13 @@ SQL
posts_required: SiteSetting.summary_posts_required, posts_required: SiteSetting.summary_posts_required,
score_required: SiteSetting.summary_score_threshold) score_required: SiteSetting.summary_score_threshold)
if min_topic_age
builder.where("t.bumped_at > :bumped_at ", filter_topics(builder, opts)
bumped_at: min_topic_age)
end
builder.exec builder.exec
end end
def update_topics_percent_rank(min_topic_age) def update_topics_percent_rank(opts)
builder = SqlBuilder.new("UPDATE topics SET percent_rank = x.percent_rank builder = SqlBuilder.new("UPDATE topics SET percent_rank = x.percent_rank
FROM (SELECT id, percent_rank() FROM (SELECT id, percent_rank()
@ -131,22 +126,22 @@ SQL
builder.where("x.id = topics.id AND (topics.percent_rank <> x.percent_rank OR topics.percent_rank IS NULL)") builder.where("x.id = topics.id AND (topics.percent_rank <> x.percent_rank OR topics.percent_rank IS NULL)")
filter_topics(builder, opts)
if min_topic_age
builder.where("topics.bumped_at > :bumped_at ",
bumped_at: min_topic_age)
end
builder.exec builder.exec
end end
def filter_topics(builder, min_topic_age) def filter_topics(builder, opts)
if min_topic_age return builder unless opts
builder.where('posts.topic_id IN
(SELECT id FROM topics WHERE bumped_at > :bumped_at)', if min_topic_age = opts[:min_topic_age]
bumped_at: min_topic_age) builder.where("topics.bumped_at > :bumped_at ",
bumped_at: min_topic_age)
end
if max_topic_length = opts[:max_topic_length]
builder.where("topics.posts_count < :max_topic_length",
max_topic_length: max_topic_length)
end end
builder builder

View file

@ -49,6 +49,19 @@ describe ScoreCalculator do
expect(topic.has_summary).to eq(false) expect(topic.has_summary).to eq(false)
end end
it "respects the min_topic_age" do
topic.update_columns(has_summary: true, bumped_at: 1.month.ago)
ScoreCalculator.new(reads: 3).calculate(min_topic_age: 20.days.ago)
expect(topic.has_summary).to eq(true)
end
it "respects the max_topic_length" do
Fabricate(:post, topic_id: topic.id)
topic.update_columns(has_summary: true)
ScoreCalculator.new(reads: 3).calculate(max_topic_length: 1)
expect(topic.has_summary).to eq(true)
end
it "won't update the site settings when the site settings don't match" do it "won't update the site settings when the site settings don't match" do
SiteSetting.expects(:summary_likes_required).returns(0) SiteSetting.expects(:summary_likes_required).returns(0)
SiteSetting.expects(:summary_posts_required).returns(1) SiteSetting.expects(:summary_posts_required).returns(1)