Merge pull request #2532 from riking/sidekiq

Pass more context from Sidekiq jobs to Logster
This commit is contained in:
Sam 2014-07-18 10:24:51 +10:00
commit 1359a02128
13 changed files with 129 additions and 46 deletions

View file

@ -385,7 +385,7 @@ class TopicsController < ApplicationController
user_id = (current_user.id if current_user)
track_visit = should_track_visit_to_topic?
Scheduler::Defer.later do
Scheduler::Defer.later "Track Visit" do
View.create_for_parent(Topic, topic_id, ip, user_id)
if track_visit
TopicUser.track_visit! topic_id, user_id

View file

@ -55,6 +55,23 @@ module Jobs
true
end
# Construct an error context object for Discourse.handle_exception
# Subclasses are encouraged to use this!
#
# `opts` is the arguments passed to execute().
# `code_desc` is a short string describing what the code was doing (optional).
# `extra` is for any other context you logged.
# Note that, when building your `extra`, that :opts, :job, and :code are used by this method,
# and :current_db and :current_hostname are used by handle_exception.
def error_context(opts, code_desc = nil, extra = {})
ctx = {}
ctx[:opts] = opts
ctx[:job] = self.class
ctx[:message] = code_desc if code_desc
ctx.merge!(extra) if extra != nil
ctx
end
def self.delayed_perform(opts={})
self.new.perform(opts)
end
@ -75,6 +92,7 @@ module Jobs
end
def perform(*args)
total_db_time = 0
ensure_db_instrumented
opts = args.extract_options!.with_indifferent_access
@ -88,7 +106,12 @@ module Jobs
if opts.has_key?(:current_site_id) && opts[:current_site_id] != RailsMultisite::ConnectionManagement.current_db
raise ArgumentError.new("You can't connect to another database when executing a job synchronously.")
else
return execute(opts)
begin
retval = execute(opts)
rescue => exc
Discourse.handle_exception(exc, error_context(opts))
end
return retval
end
end
@ -100,11 +123,10 @@ module Jobs
RailsMultisite::ConnectionManagement.all_dbs
end
total_db_time = 0
exceptions = []
dbs.each do |db|
begin
thread_exception = nil
thread_exception = {}
# NOTE: This looks odd, in fact it looks crazy but there is a reason
# A bug in therubyracer means that under certain conditions running in a fiber
# can cause the whole v8 context to corrupt so much that it will hang sidekiq
@ -128,9 +150,15 @@ module Jobs
begin
RailsMultisite::ConnectionManagement.establish_connection(db: db)
I18n.locale = SiteSetting.default_locale
execute(opts)
begin
execute(opts)
rescue => e
thread_exception[:ex] = e
end
rescue => e
thread_exception = e
thread_exception[:ex] = e
thread_exception[:message] = "While establishing database connection to #{db}"
thread_exception[:other] = { problem_db: db }
ensure
ActiveRecord::Base.connection_handler.clear_active_connections!
total_db_time += Instrumenter.stats.duration_ms
@ -138,17 +166,19 @@ module Jobs
end
t.join
exceptions << thread_exception if thread_exception
exceptions << thread_exception unless thread_exception.empty?
end
end
if exceptions.length > 0
exceptions[1..-1].each do |exception|
Discourse.handle_exception(exception, opts)
exceptions.each do |exception_hash|
Discourse.handle_exception(exception_hash[:ex],
error_context(opts, exception_hash[:code], exception_hash[:other]))
end
raise exceptions[0]
raise HandledExceptionWrapper.new exceptions[0][:ex]
end
nil
ensure
ActiveRecord::Base.connection_handler.clear_active_connections!
@db_duration = total_db_time
@ -156,6 +186,14 @@ module Jobs
end
class HandledExceptionWrapper < Exception
attr_accessor :wrapped
def initialize(ex)
super("Wrapped #{ex.class}: #{ex.message}")
@wrapped = ex
end
end
class Scheduled < Base
extend Scheduler::Schedule
end

View file

@ -29,7 +29,10 @@ module Jobs
# Forces rebake of old posts where needed, as long as no system avatars need updating
unless UserAvatar.where("last_gravatar_download_attempt IS NULL").limit(1).first
Post.rebake_old(250)
problems = Post.rebake_old(250)
problems.each do |hash|
Discourse.handle_exception(hash[:ex], error_context(args, "Rebaking post id #{hash[:post].id}", post_id: hash[:post].id))
end
end
end

View file

@ -13,6 +13,7 @@ module Jobs
include Email::BuildEmailHelper
def execute(args)
@args = args
if SiteSetting.pop3s_polling_enabled?
poll_pop3s
end
@ -47,7 +48,7 @@ module Jobs
client_message = RejectionMailer.send_rejection(message.from, message.body, message.subject, message.to, message_template)
Email::Sender.new(client_message, message_template).send
else
Discourse.handle_exception(e, { code: "unknown error for incoming email", mail: mail_string} )
Discourse.handle_exception(e, error_context(@args, "Unrecognized error type when processing incoming email", mail: mail_string))
end
ensure
mail.delete
@ -70,7 +71,7 @@ module Jobs
pop.finish
end
rescue Net::POPAuthenticationError => e
Discourse.handle_exception(e, { code: "signing in for incoming email" } )
Discourse.handle_exception(e, error_context(@args, "Signing in to poll incoming email"))
end
end

View file

@ -319,14 +319,16 @@ class Post < ActiveRecord::Base
end
def self.rebake_old(limit)
problems = []
Post.where('baked_version IS NULL OR baked_version < ?', BAKED_VERSION)
.limit(limit).each do |p|
begin
p.rebake!
rescue => e
Discourse.handle_exception(e)
problems << {post: p, ex: e}
end
end
problems
end
def rebake!(opts={})

View file

@ -33,7 +33,7 @@ if Sidekiq.server?
manager.tick
rescue => e
# the show must go on
Discourse.handle_exception(e)
Discourse.handle_exception(e, {message: "While ticking scheduling manager"})
end
sleep 1
end
@ -43,18 +43,29 @@ end
Sidekiq.logger.level = Logger::WARN
class LogsterErrorHandler
def call(ex, hash={})
text = "exception: #{ex}\ncontext: #{hash.inspect}\n"
if ex.backtrace
text << "backtrace: #{ex.backtrace.join("\n")}"
class SidekiqLogsterReporter < Sidekiq::ExceptionHandler::Logger
def call(ex, context = {})
# Pass context to Logster
fake_env = {}
context.each do |key, value|
Logster.add_to_env(fake_env, key, value)
end
Rails.logger.error(text)
text = "Job exception: #{ex}\n"
if ex.backtrace
Logster.add_to_env(fake_env, :backtrace, ex.backtrace)
end
Thread.current[Logster::Logger::LOGSTER_ENV] = fake_env
Logster.logger.error(text)
rescue => e
Rails.logger.fatal("Failed to log exception #{ex} #{hash}\nReason: #{e}\n#{e.backtrace.join("\n")}")
Logster.logger.fatal("Failed to log exception #{ex} #{hash}\nReason: #{e.class} #{e}\n#{e.backtrace.join("\n")}")
ensure
Thread.current[Logster::Logger::LOGSTER_ENV] = nil
end
end
Sidekiq.error_handlers << LogsterErrorHandler.new
Sidekiq.error_handlers.clear
Sidekiq.error_handlers << SidekiqLogsterReporter.new

View file

@ -34,7 +34,7 @@ class Auth::DefaultCurrentUserProvider
if current_user && should_update_last_seen?
u = current_user
Scheduler::Defer.later do
Scheduler::Defer.later "Updating Last Seen" do
u.update_last_seen!
u.update_ip_address!(request.ip)
end

View file

@ -9,7 +9,13 @@ module Discourse
extend Sidekiq::ExceptionHandler
end
def self.handle_exception(ex, context=nil, parent_logger = nil)
# Log an exception.
#
# If your code is in a scheduled job, it is recommended to use the
# error_context() method in Jobs::Base to pass the job arguments and any
# other desired context.
# See app/jobs/base.rb for the error_context function.
def self.handle_exception(ex, context = {}, parent_logger = nil)
context ||= {}
parent_logger ||= SidekiqExceptionHandler

View file

@ -128,7 +128,7 @@ module Oneboxer
}
}
rescue => e
Discourse.handle_exception(e, url: url)
Discourse.handle_exception(e, message: "While trying to onebox a URL", url: url)
# return a blank hash, so rest of the code works
{preview: "", onebox: ""}
end

View file

@ -14,10 +14,10 @@ module Scheduler
@async = val
end
def later(&blk)
def later(desc = nil, &blk)
if @async
start_thread unless @thread.alive?
@queue << [RailsMultisite::ConnectionManagement.current_db, blk]
@queue << [RailsMultisite::ConnectionManagement.current_db, blk, desc]
else
blk.call
end
@ -46,11 +46,15 @@ module Scheduler
end
def do_work
db, job = @queue.deq
RailsMultisite::ConnectionManagement.establish_connection(db: db)
job.call
db, job, desc = @queue.deq
begin
RailsMultisite::ConnectionManagement.establish_connection(db: db)
job.call
rescue => ex
Discourse.handle_exception(ex, {message: "Running deferred code '#{desc}'"})
end
rescue => ex
Discourse.handle_exception(ex)
Discourse.handle_exception(ex, {message: "Processing deferred code queue"})
ensure
ActiveRecord::Base.connection_handler.clear_active_connections!
end

View file

@ -42,13 +42,13 @@ module Scheduler
def keep_alive
@manager.keep_alive
rescue => ex
Discourse.handle_exception(ex)
Discourse.handle_exception(ex, {message: "Scheduling manager keep-alive"})
end
def reschedule_orphans
@manager.reschedule_orphans!
rescue => ex
Discourse.handle_exception(ex)
Discourse.handle_exception(ex, {message: "Scheduling manager orphan rescheduler"})
end
def process_queue
@ -62,8 +62,11 @@ module Scheduler
info.prev_result = "RUNNING"
@mutex.synchronize { info.write! }
klass.new.perform
rescue Jobs::HandledExceptionWrapper
# Discourse.handle_exception was already called, and we don't have any extra info to give
failed = true
rescue => e
Discourse.handle_exception(e)
Discourse.handle_exception(e, {message: "Running a scheduled job", job: klass})
failed = true
end
duration = ((Time.now.to_f - start) * 1000).to_i
@ -74,7 +77,7 @@ module Scheduler
@mutex.synchronize { info.write! }
end
rescue => ex
Discourse.handle_exception(ex)
Discourse.handle_exception(ex, {message: "Processing scheduled job queue"})
ensure
@running = false
end

View file

@ -117,22 +117,37 @@ describe Discourse do
end
context "#handle_exception" do
class TempLogger
class TempSidekiqLogger < Sidekiq::ExceptionHandler::Logger
attr_accessor :exception, :context
def handle_exception(exception, context)
self.exception = exception
self.context = context
def call(ex, ctx)
self.exception = ex
self.context = ctx
end
end
let!(:logger) { TempSidekiqLogger.new }
before do
Sidekiq.error_handlers.clear
Sidekiq.error_handlers << logger
end
it "should not fail when called" do
logger = TempLogger.new
exception = StandardError.new
Discourse.handle_exception(exception, nil, logger)
Discourse.handle_exception(exception, nil, nil)
logger.exception.should == exception
logger.context.keys.should == [:current_db, :current_hostname]
end
it "correctly passes extra context" do
exception = StandardError.new
Discourse.handle_exception(exception, {message: "Doing a test", post_id: 31}, nil)
logger.exception.should == exception
logger.context.keys.sort.should == [:current_db, :current_hostname, :message, :post_id].sort
end
end
end

View file

@ -27,13 +27,13 @@ describe Jobs::Base do
end
it 'handles errors in multisite' do
RailsMultisite::ConnectionManagement.expects(:all_dbs).returns(['default','default'])
# just stub so logs are not noisy
Discourse.expects(:handle_exception).returns(nil)
RailsMultisite::ConnectionManagement.expects(:all_dbs).returns(['default','default','default'])
# one exception per database
Discourse.expects(:handle_exception).times(3)
bad = BadJob.new
expect{bad.perform({})}.to raise_error
bad.fail_count.should == 2
bad.fail_count.should == 3
end
it 'delegates the process call to execute' do