Add score, percent_rank to topics. Adds HotTopic model and consolidated job to calculate

hotness. Note: People on Heroku will have to update their jobs to the new structure
in Heroku.md
This commit is contained in:
Robin Ward 2013-03-28 13:02:59 -04:00
parent 9b103e6d97
commit 473a64d39d
20 changed files with 224 additions and 109 deletions

60
app/models/hot_topic.rb Normal file
View file

@ -0,0 +1,60 @@
class HotTopic < ActiveRecord::Base
belongs_to :topic
belongs_to :category
# Here's the current idea behind the implementaiton of hot: random can produce good results!
# Hot is currently made up of a random selection of high percentile topics. It includes mostly
# new topics, but also some old ones for variety.
def self.refresh!
transaction do
exec_sql "DELETE FROM hot_topics"
# TODO, move these to site settings once we're sure this is how we want to figure out hot
max_hot_topics = 200 # how many hot topics we want
hot_percentile = 0.2 # What percentile of topics we consider good
older_percentage = 0.2 # how many old topics we want as a percentage
new_days = 21 # how many days old we consider old
exec_sql("INSERT INTO hot_topics (topic_id, category_id, score)
SELECT t.id,
t.category_id,
RANDOM()
FROM topics AS t
WHERE t.deleted_at IS NULL
AND t.visible
AND (NOT t.closed)
AND (NOT t.archived)
AND t.archetype <> :private_message
AND created_at >= (CURRENT_TIMESTAMP - INTERVAL ':days_ago' DAY)
AND t.percent_rank < :hot_percentile
ORDER BY 3 DESC
LIMIT :limit",
hot_percentile: hot_percentile,
limit: ((1.0 - older_percentage) * max_hot_topics).round,
private_message: Archetype::private_message,
days_ago: new_days)
# Add a sprinkling of random older topics
exec_sql("INSERT INTO hot_topics (topic_id, category_id, score)
SELECT t.id,
t.category_id,
RANDOM()
FROM topics AS t
WHERE t.deleted_at IS NULL
AND t.visible
AND (NOT t.closed)
AND (NOT t.archived)
AND t.archetype <> :private_message
AND created_at < (CURRENT_TIMESTAMP - INTERVAL ':days_ago' DAY)
AND t.percent_rank < :hot_percentile
LIMIT :limit",
hot_percentile: hot_percentile,
limit: (older_percentage * max_hot_topics).round,
private_message: Archetype::private_message,
days_ago: new_days)
end
end
end

View file

@ -38,6 +38,8 @@ class Topic < ActiveRecord::Base
has_many :posts
has_many :topic_allowed_users
has_many :allowed_users, through: :topic_allowed_users, source: :user
has_one :hot_topic
belongs_to :user
belongs_to :last_poster, class_name: 'User', foreign_key: :last_post_user_id
belongs_to :featured_user1, class_name: 'User', foreign_key: :featured_user1_id

View file

@ -12,10 +12,8 @@ module Clockwork
every(1.day, 'enqueue_digest_emails', at: '06:00')
every(1.day, 'category_stats', at: '04:00')
every(10.minutes, 'calculate_avg_time')
every(10.minutes, 'feature_topics')
every(1.minute, 'calculate_score')
every(20.minutes, 'calculate_view_counts')
every(10.minutes, 'periodical_updates')
every(1.day, 'version_check')
every(1.minute, 'clockwork_heartbeat')
end

View file

@ -0,0 +1,12 @@
class CreateHotTopics < ActiveRecord::Migration
def change
create_table :hot_topics, force: true do |t|
t.integer :topic_id, null: false
t.integer :category_id, null: true
t.float :score, null: false
end
add_index :hot_topics, :topic_id, unique: true
add_index :hot_topics, :score, order: 'desc'
end
end

View file

@ -0,0 +1,6 @@
class AddScoreToTopics < ActiveRecord::Migration
def change
add_column :topics, :score, :float
add_column :topics, :percent_rank, :float, null: false, default: 1.0
end
end

View file

@ -142,13 +142,7 @@ For details on how to reduce the monthly cost of your application, see the Advan
rake category_stats Daily 04:00
rake calculate_avg_time Every 10 minutes --:--
rake feature_topics Every 10 minutes --:--
rake calculate_score Every 10 minutes --:--
rake calculate_view_counts Every 10 minutes --:--
rake periodical_updates Every 10 minutes --:--
rake version_check Daily 01:00

View file

@ -1,12 +0,0 @@
module Jobs
class CalculateAvgTime < Jobs::Base
def execute(args)
Post.calculate_avg_time
Topic.calculate_avg_time
end
end
end

View file

@ -1,13 +0,0 @@
require_dependency 'score_calculator'
module Jobs
class CalculateScore < Jobs::Base
def execute(args)
ScoreCalculator.new.calculate
end
end
end

View file

@ -1,13 +0,0 @@
require_dependency 'score_calculator'
module Jobs
class CalculateViewCounts < Jobs::Base
def execute(args)
User.update_view_counts
end
end
end

View file

@ -1,11 +0,0 @@
module Jobs
class FeatureTopics < Jobs::Base
def execute(args)
CategoryFeaturedTopic.feature_topics
end
end
end

View file

@ -0,0 +1,31 @@
require_dependency 'score_calculator'
module Jobs
# This job will run on a regular basis to update statistics and denormalized data.
# If it does not run, the site will not function properly.
class PeriodicalUpdates < Jobs::Base
def execute(args)
# Update the average times
Post.calculate_avg_time
Topic.calculate_avg_time
# Feature topics in categories
CategoryFeaturedTopic.feature_topics
# Update view counts for users
User.update_view_counts
# Update the scores of posts
ScoreCalculator.new.calculate
# Refresh Hot Topics
HotTopic.refresh!
end
end
end

View file

@ -30,21 +30,28 @@ class ScoreCalculator
WHERE x.id = posts.id")
# Update the best of flag
exec_sql "
UPDATE topics SET has_best_of =
CASE
WHEN like_count >= :likes_required AND
posts_count >= :posts_required AND
EXISTS(SELECT * FROM posts AS p
WHERE p.topic_id = topics.id
AND p.score >= :score_required) THEN true
ELSE false
END",
likes_required: SiteSetting.best_of_likes_required,
posts_required: SiteSetting.best_of_posts_required,
score_required: SiteSetting.best_of_score_threshold
# Update the topics
exec_sql "UPDATE topics AS t
SET has_best_of = (t.like_count >= :likes_required AND
t.posts_count >= :posts_required AND
x.min_score >= :score_required),
score = x.avg_score
FROM (SELECT p.topic_id,
MIN(p.score) AS min_score,
AVG(p.score) AS avg_score
FROM posts AS p
GROUP BY p.topic_id) AS x
WHERE x.topic_id = t.id",
likes_required: SiteSetting.best_of_likes_required,
posts_required: SiteSetting.best_of_posts_required,
score_required: SiteSetting.best_of_score_threshold
# Update percentage rank of topics
exec_sql("UPDATE topics SET percent_rank = x.percent_rank
FROM (SELECT id, percent_rank()
OVER (ORDER BY SCORE DESC) as percent_rank
FROM topics) AS x
WHERE x.id = topics.id")
end

View file

@ -11,23 +11,8 @@ task :category_stats => :environment do
end
# Every 10 minutes
task :calculate_avg_time => :environment do
Jobs::CalculateAvgTime.new.execute(nil)
end
# Every 10 minutes
task :feature_topics => :environment do
Jobs::FeatureTopics.new.execute(nil)
end
# Every 10 minutes
task :calculate_score => :environment do
Jobs::CalculateScore.new.execute(nil)
end
# Every 10 minutes
task :calculate_view_counts => :environment do
Jobs::CalculateViewCounts.new.execute(nil)
task :periodical_updates => :environment do
Jobs::PeriodicalUpdates.new.execute(nil)
end
# Every day

View file

@ -123,11 +123,9 @@ class TopicQuery
def list_hot
return_list(unordered: true) do |list|
# Let's not include topic categories on hot
list = list.where("categories.topic_id <> topics.id")
list =list.order("coalesce(categories.hotness, 5) desc, topics.bumped_at desc")
# Find hot topics
list = list.joins(:hot_topic)
.order('hot_topics.score + (COALESCE(categories.hotness, 5.0) / 11.0) desc')
end
end

View file

@ -1,12 +0,0 @@
require 'spec_helper'
require 'jobs'
describe Jobs::CalculateViewCounts do
it "delegates to User" do
User.expects(:update_view_counts)
Jobs::CalculateViewCounts.new.execute({})
end
end

View file

@ -0,0 +1,36 @@
require 'spec_helper'
require 'jobs/periodical_updates'
describe Jobs::PeriodicalUpdates do
after do
Jobs::PeriodicalUpdates.new.execute(nil)
end
it "calculates avg post time" do
Post.expects(:calculate_avg_time).once
end
it "calculates avg topic time" do
Topic.expects(:calculate_avg_time).once
end
it "features topics" do
CategoryFeaturedTopic.expects(:feature_topics).once
end
it "updates view counts" do
User.expects(:update_view_counts).once
end
it "calculates scores" do
calculator = mock()
ScoreCalculator.expects(:new).once.returns(calculator)
calculator.expects(:calculate)
end
it "refreshes hot topics" do
HotTopic.expects(:refresh!).once
end
end

View file

@ -23,6 +23,15 @@ describe ScoreCalculator do
another_post.percent_rank.should == 0.0
post.percent_rank.should == 1.0
end
it "gives the topic a score" do
topic.score.should be_present
end
it "gives the topic a percent_rank" do
topic.percent_rank.should_not == 1.0
end
end
context 'best_of' do

View file

@ -71,7 +71,7 @@ describe TopicQuery do
end
end
context 'hot' do
pending 'hot' do
let(:cold_category) { Fabricate(:category, name: 'brrrrrr', hotness: 5) }
let(:hot_category) { Fabricate(:category, name: 'yeeouch', hotness: 10) }

View file

@ -0,0 +1,34 @@
require 'spec_helper'
describe HotTopic do
it { should belong_to :topic }
it { should belong_to :category }
context "refresh!" do
let!(:t1) { Fabricate(:topic) }
let!(:t2) { Fabricate(:topic) }
it "begins blank" do
HotTopic.all.should be_blank
end
context "after calculating" do
before do
# Calculate the scores before we calculate hot
ScoreCalculator.new.calculate
HotTopic.refresh!
end
it "should have hot topics" do
HotTopic.pluck(:topic_id).should =~ [t1.id, t2.id]
end
end
end
end

View file

@ -873,6 +873,10 @@ describe Topic do
topic.has_best_of.should be_false
end
it "is the 1.0 percent rank" do
topic.percent_rank.should == 1.0
end
it 'is not invisible' do
topic.should be_visible
end