mirror of
https://github.com/codeninjasllc/discourse.git
synced 2024-11-23 15:48:43 -05:00
PERF: calculate scores for topics/posts faster
Only look at topics that changed in last day for 15 minuted schedule Do a full recalc weekly, just in case
This commit is contained in:
parent
e4909c017c
commit
9551f4aeea
7 changed files with 139 additions and 36 deletions
|
@ -9,8 +9,8 @@ module Jobs
|
||||||
|
|
||||||
def execute(args)
|
def execute(args)
|
||||||
# Update the average times
|
# Update the average times
|
||||||
Post.calculate_avg_time
|
Post.calculate_avg_time(1.day.ago)
|
||||||
Topic.calculate_avg_time
|
Topic.calculate_avg_time(1.day.ago)
|
||||||
|
|
||||||
# Feature topics in categories
|
# Feature topics in categories
|
||||||
CategoryFeaturedTopic.feature_topics
|
CategoryFeaturedTopic.feature_topics
|
||||||
|
@ -19,8 +19,7 @@ module Jobs
|
||||||
UserStat.update_view_counts
|
UserStat.update_view_counts
|
||||||
|
|
||||||
# Update the scores of posts
|
# Update the scores of posts
|
||||||
ScoreCalculator.new.calculate
|
ScoreCalculator.new.calculate(1.day.ago)
|
||||||
|
|
||||||
|
|
||||||
# Automatically close stuff that we missed
|
# Automatically close stuff that we missed
|
||||||
Topic.auto_close
|
Topic.auto_close
|
||||||
|
|
16
app/jobs/scheduled/weekly.rb
Normal file
16
app/jobs/scheduled/weekly.rb
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
require_dependency 'score_calculator'
|
||||||
|
|
||||||
|
module Jobs
|
||||||
|
|
||||||
|
# This job will run on a regular basis to update statistics and denormalized data.
|
||||||
|
# If it does not run, the site will not function properly.
|
||||||
|
class Weekly < Jobs::Scheduled
|
||||||
|
every 1.week
|
||||||
|
|
||||||
|
def execute(args)
|
||||||
|
Post.calculate_avg_time
|
||||||
|
Topic.calculate_avg_time
|
||||||
|
ScoreCalculator.new.calculate
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -311,9 +311,9 @@ class Post < ActiveRecord::Base
|
||||||
|
|
||||||
# This calculates the geometric mean of the post timings and stores it along with
|
# This calculates the geometric mean of the post timings and stores it along with
|
||||||
# each post.
|
# each post.
|
||||||
def self.calculate_avg_time
|
def self.calculate_avg_time(min_topic_age=nil)
|
||||||
retry_lock_error do
|
retry_lock_error do
|
||||||
exec_sql("UPDATE posts
|
builder = SqlBuilder.new("UPDATE posts
|
||||||
SET avg_time = (x.gmean / 1000)
|
SET avg_time = (x.gmean / 1000)
|
||||||
FROM (SELECT post_timings.topic_id,
|
FROM (SELECT post_timings.topic_id,
|
||||||
post_timings.post_number,
|
post_timings.post_number,
|
||||||
|
@ -324,9 +324,18 @@ class Post < ActiveRecord::Base
|
||||||
AND p2.topic_id = post_timings.topic_id
|
AND p2.topic_id = post_timings.topic_id
|
||||||
AND p2.user_id <> post_timings.user_id
|
AND p2.user_id <> post_timings.user_id
|
||||||
GROUP BY post_timings.topic_id, post_timings.post_number) AS x
|
GROUP BY post_timings.topic_id, post_timings.post_number) AS x
|
||||||
WHERE x.topic_id = posts.topic_id
|
/*where*/")
|
||||||
|
|
||||||
|
builder.where("x.topic_id = posts.topic_id
|
||||||
AND x.post_number = posts.post_number
|
AND x.post_number = posts.post_number
|
||||||
AND (posts.avg_time <> (x.gmean / 1000)::int OR posts.avg_time IS NULL)")
|
AND (posts.avg_time <> (x.gmean / 1000)::int OR posts.avg_time IS NULL)")
|
||||||
|
|
||||||
|
if min_topic_age
|
||||||
|
builder.where("posts.topic_id IN (SELECT id FROM topics where bumped_at > :bumped_at)",
|
||||||
|
bumped_at: min_topic_age)
|
||||||
|
end
|
||||||
|
|
||||||
|
builder.exec
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -362,15 +362,25 @@ class Topic < ActiveRecord::Base
|
||||||
end
|
end
|
||||||
|
|
||||||
# This calculates the geometric mean of the posts and stores it with the topic
|
# This calculates the geometric mean of the posts and stores it with the topic
|
||||||
def self.calculate_avg_time
|
def self.calculate_avg_time(min_topic_age=nil)
|
||||||
exec_sql("UPDATE topics
|
builder = SqlBuilder.new("UPDATE topics
|
||||||
SET avg_time = x.gmean
|
SET avg_time = x.gmean
|
||||||
FROM (SELECT topic_id,
|
FROM (SELECT topic_id,
|
||||||
round(exp(avg(ln(avg_time)))) AS gmean
|
round(exp(avg(ln(avg_time)))) AS gmean
|
||||||
FROM posts
|
FROM posts
|
||||||
WHERE avg_time > 0 AND avg_time IS NOT NULL
|
WHERE avg_time > 0 AND avg_time IS NOT NULL
|
||||||
GROUP BY topic_id) AS x
|
GROUP BY topic_id) AS x
|
||||||
WHERE x.topic_id = topics.id AND (topics.avg_time <> x.gmean OR topics.avg_time IS NULL)")
|
/*where*/")
|
||||||
|
|
||||||
|
builder.where("x.topic_id = topics.id AND
|
||||||
|
(topics.avg_time <> x.gmean OR topics.avg_time IS NULL)")
|
||||||
|
|
||||||
|
if min_topic_age
|
||||||
|
builder.where("topics.bumped_at > :bumped_at",
|
||||||
|
bumped_at: min_topic_age)
|
||||||
|
end
|
||||||
|
|
||||||
|
builder.exec
|
||||||
end
|
end
|
||||||
|
|
||||||
def changed_to_category(cat)
|
def changed_to_category(cat)
|
||||||
|
|
|
@ -16,22 +16,60 @@ class ScoreCalculator
|
||||||
end
|
end
|
||||||
|
|
||||||
# Calculate the score for all posts based on the weightings
|
# Calculate the score for all posts based on the weightings
|
||||||
def calculate
|
def calculate(min_topic_age=nil)
|
||||||
|
|
||||||
# First update the scores of the posts
|
update_posts_score(min_topic_age)
|
||||||
exec_sql(post_score_sql, @weightings)
|
|
||||||
|
|
||||||
# Update the percent rankings of the posts
|
update_posts_rank(min_topic_age)
|
||||||
exec_sql("UPDATE posts SET percent_rank = x.percent_rank
|
|
||||||
|
update_topics_rank(min_topic_age)
|
||||||
|
|
||||||
|
update_topics_percent_rank(min_topic_age)
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def update_posts_score(min_topic_age)
|
||||||
|
components = []
|
||||||
|
@weightings.keys.each { |k| components << "COALESCE(#{k.to_s}, 0) * :#{k.to_s}" }
|
||||||
|
components = components.join(" + ")
|
||||||
|
|
||||||
|
builder = SqlBuilder.new(
|
||||||
|
"UPDATE posts SET score = x.score
|
||||||
|
FROM (SELECT id, #{components} as score FROM posts) AS x
|
||||||
|
/*where*/"
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
builder.where("x.id = posts.id
|
||||||
|
AND (posts.score IS NULL OR x.score <> posts.score)", @weightings)
|
||||||
|
|
||||||
|
filter_topics(builder, min_topic_age)
|
||||||
|
|
||||||
|
builder.exec
|
||||||
|
end
|
||||||
|
|
||||||
|
def update_posts_rank(min_topic_age)
|
||||||
|
|
||||||
|
builder = SqlBuilder.new("UPDATE posts SET percent_rank = x.percent_rank
|
||||||
FROM (SELECT id, percent_rank()
|
FROM (SELECT id, percent_rank()
|
||||||
OVER (PARTITION BY topic_id ORDER BY SCORE DESC) as percent_rank
|
OVER (PARTITION BY topic_id ORDER BY SCORE DESC) as percent_rank
|
||||||
FROM posts) AS x
|
FROM posts) AS x
|
||||||
WHERE x.id = posts.id AND
|
/*where*/")
|
||||||
|
|
||||||
|
builder.where("x.id = posts.id AND
|
||||||
(posts.percent_rank IS NULL OR x.percent_rank <> posts.percent_rank)")
|
(posts.percent_rank IS NULL OR x.percent_rank <> posts.percent_rank)")
|
||||||
|
|
||||||
|
|
||||||
# Update the topics
|
filter_topics(builder, min_topic_age)
|
||||||
exec_sql "UPDATE topics AS t
|
|
||||||
|
builder.exec
|
||||||
|
end
|
||||||
|
|
||||||
|
def update_topics_rank(min_topic_age)
|
||||||
|
builder = SqlBuilder.new("UPDATE topics AS t
|
||||||
SET has_summary = (t.like_count >= :likes_required AND
|
SET has_summary = (t.like_count >= :likes_required AND
|
||||||
t.posts_count >= :posts_required AND
|
t.posts_count >= :posts_required AND
|
||||||
x.max_score >= :score_required),
|
x.max_score >= :score_required),
|
||||||
|
@ -41,7 +79,9 @@ class ScoreCalculator
|
||||||
AVG(p.score) AS avg_score
|
AVG(p.score) AS avg_score
|
||||||
FROM posts AS p
|
FROM posts AS p
|
||||||
GROUP BY p.topic_id) AS x
|
GROUP BY p.topic_id) AS x
|
||||||
WHERE x.topic_id = t.id AND
|
/*where*/")
|
||||||
|
|
||||||
|
builder.where("x.topic_id = t.id AND
|
||||||
(
|
(
|
||||||
(t.score <> x.avg_score OR t.score IS NULL) OR
|
(t.score <> x.avg_score OR t.score IS NULL) OR
|
||||||
(t.has_summary IS NULL OR t.has_summary <> (
|
(t.has_summary IS NULL OR t.has_summary <> (
|
||||||
|
@ -53,31 +93,45 @@ class ScoreCalculator
|
||||||
",
|
",
|
||||||
likes_required: SiteSetting.summary_likes_required,
|
likes_required: SiteSetting.summary_likes_required,
|
||||||
posts_required: SiteSetting.summary_posts_required,
|
posts_required: SiteSetting.summary_posts_required,
|
||||||
score_required: SiteSetting.summary_score_threshold
|
score_required: SiteSetting.summary_score_threshold)
|
||||||
|
|
||||||
# Update percentage rank of topics
|
if min_topic_age
|
||||||
exec_sql("UPDATE topics SET percent_rank = x.percent_rank
|
builder.where("t.bumped_at > :bumped_at ",
|
||||||
|
bumped_at: min_topic_age)
|
||||||
|
end
|
||||||
|
|
||||||
|
builder.exec
|
||||||
|
end
|
||||||
|
|
||||||
|
def update_topics_percent_rank(min_topic_age)
|
||||||
|
|
||||||
|
builder = SqlBuilder.new("UPDATE topics SET percent_rank = x.percent_rank
|
||||||
FROM (SELECT id, percent_rank()
|
FROM (SELECT id, percent_rank()
|
||||||
OVER (ORDER BY SCORE DESC) as percent_rank
|
OVER (ORDER BY SCORE DESC) as percent_rank
|
||||||
FROM topics) AS x
|
FROM topics) AS x
|
||||||
WHERE x.id = topics.id AND (topics.percent_rank <> x.percent_rank OR topics.percent_rank IS NULL)")
|
/*where*/")
|
||||||
|
|
||||||
|
builder.where("x.id = topics.id AND (topics.percent_rank <> x.percent_rank OR topics.percent_rank IS NULL)")
|
||||||
|
|
||||||
|
|
||||||
|
if min_topic_age
|
||||||
|
builder.where("topics.bumped_at > :bumped_at ",
|
||||||
|
bumped_at: min_topic_age)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
builder.exec
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
private
|
def filter_topics(builder, min_topic_age)
|
||||||
|
if min_topic_age
|
||||||
def exec_sql(sql, params=nil)
|
builder.where('posts.topic_id IN
|
||||||
ActiveRecord::Base.exec_sql(sql, params)
|
(SELECT id FROM topics WHERE bumped_at > :bumped_at)',
|
||||||
|
bumped_at: min_topic_age)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Generate a SQL statement to update the scores of all posts
|
builder
|
||||||
def post_score_sql
|
end
|
||||||
components = []
|
|
||||||
@weightings.keys.each { |k| components << "COALESCE(#{k.to_s}, 0) * :#{k.to_s}" }
|
|
||||||
components = components.join(" + ")
|
|
||||||
|
|
||||||
"UPDATE posts SET score = x.score
|
|
||||||
FROM (SELECT id, #{components} as score FROM posts) AS x
|
|
||||||
WHERE x.id = posts.id AND (posts.score IS NULL OR x.score <> posts.score)"
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -787,4 +787,12 @@ describe Post do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
describe "calculate_avg_time" do
|
||||||
|
|
||||||
|
it "should not crash" do
|
||||||
|
Post.calculate_avg_time
|
||||||
|
Post.calculate_avg_time(1.day.ago)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -1318,4 +1318,11 @@ describe Topic do
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
describe "calculate_avg_time" do
|
||||||
|
it "does not explode" do
|
||||||
|
Topic.calculate_avg_time
|
||||||
|
Topic.calculate_avg_time(1.day.ago)
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue