FEATURE: store history for scheduled job execution

This commit is contained in:
Sam 2016-05-30 11:38:08 +10:00
parent 089b1d164c
commit c9dcffe434
8 changed files with 138 additions and 1 deletions

View file

@ -11,6 +11,7 @@ module Jobs
Post.calculate_avg_time
Topic.calculate_avg_time
ScoreCalculator.new.calculate
SchedulerStat.purge_old
Draft.cleanup!
end
end

View file

@ -0,0 +1,20 @@
class SchedulerStat < ActiveRecord::Base
def self.purge_old
where('started_at < ?', 3.months.ago).delete_all
end
end
# == Schema Information
#
# Table name: scheduler_stats
#
# id :integer not null, primary key
# name :string not null
# hostname :string not null
# pid :integer not null
# duration_ms :integer
# live_slots_start :integer
# live_slots_finish :integer
# started_at :datetime not null
# success :boolean
#

View file

@ -0,0 +1,14 @@
class CreateSchedulerStats < ActiveRecord::Migration
def change
create_table :scheduler_stats do |t|
t.string :name, null: false
t.string :hostname, null: false
t.integer :pid, null: false
t.integer :duration_ms
t.integer :live_slots_start
t.integer :live_slots_finish
t.datetime :started_at, null: false
t.boolean :success
end
end
end

View file

@ -50,6 +50,14 @@ module Scheduler
Discourse.handle_job_exception(ex, {message: "Scheduling manager orphan rescheduler"})
end
def hostname
@hostname ||= begin
`hostname`
rescue
"unknown"
end
end
def process_queue
klass = @queue.deq
# hack alert, I need to both deq and set @running atomically.
@ -57,9 +65,17 @@ module Scheduler
failed = false
start = Time.now.to_f
info = @mutex.synchronize { @manager.schedule_info(klass) }
stat = nil
begin
info.prev_result = "RUNNING"
@mutex.synchronize { info.write! }
stat = SchedulerStat.create!(
name: klass.to_s,
hostname: hostname,
pid: Process.pid,
started_at: Time.zone.now,
live_slots_start: GC.stat[:heap_live_slots]
)
klass.new.perform
rescue Jobs::HandledExceptionWrapper
# Discourse.handle_exception was already called, and we don't have any extra info to give
@ -72,6 +88,11 @@ module Scheduler
info.prev_duration = duration
info.prev_result = failed ? "FAILED" : "OK"
info.current_owner = nil
stat.update_columns(
duration_ms: duration,
live_slots_finish: GC.stat[:heap_live_slots],
success: !failed
)
attempts(3) do
@mutex.synchronize { info.write! }
end

View file

@ -0,0 +1,45 @@
<header class="row">
<div class="col-sm-12">
<h3>Scheduler History</h3>
</div>
</header>
<div class="container">
<div class="row">
<div class="col-md-9">
<% if @scheduler_stats.length > 0 %>
<table class="table table-striped table-bordered table-white" style="width: 100%; margin: 0; table-layout:fixed;">
<thead>
<th style="width: 30%">Job Name</th>
<th style="width: 15%">Hostname:Pid</th>
<th style="width: 15%">Live Slots delta</th>
<th style="width: 15%">Started At</th>
<th style="width: 15%">Duration (ms)</th>
<th style="width: 15%"></th>
</thead>
<tbody>
<% @scheduler_stats.each do |stat| %>
<tr>
<td><%= stat.name %></td>
<td><%= stat.hostname %>:<%= stat.pid %></td>
<td>
<% if stat.live_slots_start && stat.live_slots_finish %>
<%= stat.live_slots_finish - stat.live_slots_start %>
<% end %>
</td>
<td><%= relative_time stat.started_at %></td>
<td><%= stat.duration_ms %></td>
<td>
<% if !stat.success %>
<span>FAILED</span>
<% end %>
</td>
</tr>
<% end %>
</tbody>
</table>
<% end %>
</div>
</div>
</div>

View file

@ -7,7 +7,7 @@
</div>
<% end %>
<div class="col-sm-12">
<h3>Recurring Jobs</h3>
<h3>Recurring Jobs <a style='font-size:50%; margin-left: 30px' href='scheduler/history'>history</a></h3>
</div>
</header>

View file

@ -22,6 +22,11 @@ module Scheduler
end
end
app.get "/scheduler/history" do
@scheduler_stats = SchedulerStat.order('started_at desc').limit(200)
erb File.read(File.join(VIEWS, 'history.erb')), locals: {view_path: VIEWS}
end
app.post "/scheduler/:name/trigger" do
halt 404 unless (name = params[:name])

View file

@ -133,6 +133,37 @@ describe Scheduler::Manager do
expect(info.next_run).to be <= Time.now.to_i
end
it 'should log when job finishes running' do
Testing::RandomJob.runs = 0
info = manager.schedule_info(Testing::RandomJob)
info.next_run = Time.now.to_i - 1
info.write!
manager = Scheduler::Manager.new(DiscourseRedis.new)
manager.blocking_tick
manager.stop!
stat = SchedulerStat.first
expect(stat).to be_present
expect(stat.duration_ms).to be > 0
expect(stat.success).to be true
end
it 'should log when jobs start running' do
info = manager.schedule_info(Testing::SuperLongJob)
info.next_run = Time.now.to_i - 1
info.write!
manager.tick
manager.stop!
stat = SchedulerStat.first
expect(stat).to be_present
end
it 'should only run pending job once' do
Testing::RandomJob.runs = 0