2015-06-17 22:29:06 -04:00
|
|
|
require 'date'
|
|
|
|
|
2015-06-17 01:06:42 -04:00
|
|
|
class LogAnalyzer
|
|
|
|
|
|
|
|
class LineParser
|
|
|
|
|
|
|
|
|
|
|
|
# log_format log_discourse '[$time_local] $remote_addr "$request" "$http_user_agent" "$sent_http_x_discourse_route" $status $bytes_sent "$http_referer" $upstream_response_time $request_time "$sent_http_x_discourse_username"';
|
|
|
|
|
|
|
|
attr_accessor :time, :ip_address, :url, :route, :user_agent, :rails_duration, :total_duration,
|
|
|
|
:username, :status, :bytes_sent, :referer
|
|
|
|
|
|
|
|
PATTERN = /\[(.*)\] (\S+) \"(.*)\" \"(.*)\" \"(.*)\" ([0-9]+) ([0-9]+) \"(.*)\" ([0-9.]+) ([0-9.]+) "(.*)"/
|
|
|
|
|
2015-06-17 22:29:06 -04:00
|
|
|
TIME_FORMAT = "%d/%b/%Y:%H:%M:%S %Z"
|
|
|
|
|
2015-06-17 01:06:42 -04:00
|
|
|
def self.parse(line)
|
2015-06-25 23:32:54 -04:00
|
|
|
|
2015-06-17 01:06:42 -04:00
|
|
|
result = new
|
|
|
|
_, result.time, result.ip_address, result.url, result.user_agent,
|
|
|
|
result.route, result.status, result.bytes_sent, result.referer,
|
|
|
|
result.rails_duration, result.total_duration, result.username = line.match(PATTERN).to_a
|
|
|
|
|
|
|
|
result.rails_duration = result.rails_duration.to_f
|
|
|
|
result.total_duration = result.total_duration.to_f
|
|
|
|
|
2015-06-21 19:09:20 -04:00
|
|
|
verb = result.url[0..3] if result.url
|
|
|
|
if verb && verb == "POST"
|
|
|
|
result.route += " (POST)"
|
|
|
|
end
|
|
|
|
|
|
|
|
if verb && verb == "PUT"
|
|
|
|
result.route += " (PUT)"
|
|
|
|
end
|
|
|
|
|
2016-07-19 03:33:49 -04:00
|
|
|
result.url = self.sanitize_url(result.url)
|
|
|
|
|
2015-06-17 01:06:42 -04:00
|
|
|
result
|
|
|
|
end
|
2015-06-17 22:29:06 -04:00
|
|
|
|
2015-06-21 19:09:20 -04:00
|
|
|
def is_mobile?
|
2016-03-15 20:12:05 -04:00
|
|
|
user_agent =~ /Mobile|Android|webOS/ && !(user_agent =~ /iPad|Nexus (7|10)/)
|
2015-06-21 19:09:20 -04:00
|
|
|
end
|
|
|
|
|
2016-07-19 04:11:08 -04:00
|
|
|
def parsed_time
|
2015-06-25 23:32:54 -04:00
|
|
|
DateTime.strptime(time, TIME_FORMAT) if time
|
2015-06-17 22:29:06 -04:00
|
|
|
end
|
2016-07-19 03:33:49 -04:00
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def self.sanitize_url(url)
|
|
|
|
url.gsub(/(api_key|api_user)=(\w+)/, '\1=[FILTERED]')
|
|
|
|
end
|
2015-06-17 01:06:42 -04:00
|
|
|
end
|
|
|
|
|
2015-06-25 23:32:54 -04:00
|
|
|
attr_reader :total_requests, :message_bus_requests, :filenames,
|
2015-06-17 01:06:42 -04:00
|
|
|
:ip_to_rails_duration, :username_to_rails_duration,
|
|
|
|
:route_to_rails_duration, :url_to_rails_duration,
|
2015-06-17 22:29:06 -04:00
|
|
|
:status_404_to_count, :from_time, :to_time
|
2015-06-17 01:06:42 -04:00
|
|
|
|
2015-06-25 23:32:54 -04:00
|
|
|
def self.analyze(filenames, args)
|
|
|
|
new(filenames, args).analyze
|
2015-06-17 01:06:42 -04:00
|
|
|
end
|
|
|
|
|
2015-06-17 22:29:06 -04:00
|
|
|
class Aggeregator
|
|
|
|
|
2015-06-21 19:09:20 -04:00
|
|
|
attr_accessor :aggregate_type
|
|
|
|
|
2015-06-17 22:29:06 -04:00
|
|
|
def initialize
|
|
|
|
@data = {}
|
2015-06-21 19:09:20 -04:00
|
|
|
@aggregate_type = :duration
|
2015-06-17 22:29:06 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def add(id, duration, aggregate=nil)
|
|
|
|
ary = (@data[id] ||= [0,0])
|
|
|
|
ary[0] += duration
|
|
|
|
ary[1] += 1
|
2015-06-21 19:09:20 -04:00
|
|
|
unless aggregate.nil?
|
2015-06-17 22:29:06 -04:00
|
|
|
ary[2] ||= Hash.new(0)
|
2015-06-21 19:09:20 -04:00
|
|
|
if @aggregate_type == :duration
|
|
|
|
ary[2][aggregate] += duration
|
|
|
|
elsif @aggregate_type == :count
|
|
|
|
ary[2][aggregate] += 1
|
|
|
|
end
|
2015-06-17 22:29:06 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2015-06-21 19:09:20 -04:00
|
|
|
def top(n, aggregator_formatter=nil)
|
2015-06-17 22:29:06 -04:00
|
|
|
@data.sort{|a,b| b[1][0] <=> a[1][0]}.first(n).map do |metric, ary|
|
|
|
|
metric = metric.to_s
|
|
|
|
metric = "[empty]" if metric.length == 0
|
|
|
|
result = [metric, ary[0], ary[1]]
|
|
|
|
# handle aggregate
|
|
|
|
if ary[2]
|
2015-06-21 19:09:20 -04:00
|
|
|
if aggregator_formatter
|
|
|
|
result.push aggregator_formatter.call(ary[2], ary[0], ary[1])
|
|
|
|
else
|
|
|
|
result.push ary[2].sort{|a,b| b[1] <=> a[1]}.first(5).map{|k,v|
|
2015-06-17 22:29:06 -04:00
|
|
|
v = "%.2f" % v if Float === v
|
|
|
|
"#{k}(#{v})"}.join(" ")
|
2015-06-21 19:09:20 -04:00
|
|
|
end
|
2015-06-17 22:29:06 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
result
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2015-06-25 23:32:54 -04:00
|
|
|
def initialize(filenames, args={})
|
|
|
|
@filenames = filenames
|
2015-06-17 22:29:06 -04:00
|
|
|
@ip_to_rails_duration = Aggeregator.new
|
|
|
|
@username_to_rails_duration = Aggeregator.new
|
2015-06-21 19:09:20 -04:00
|
|
|
|
2015-06-17 22:29:06 -04:00
|
|
|
@route_to_rails_duration = Aggeregator.new
|
2015-06-21 19:09:20 -04:00
|
|
|
@route_to_rails_duration.aggregate_type = :count
|
|
|
|
|
2015-06-17 22:29:06 -04:00
|
|
|
@url_to_rails_duration = Aggeregator.new
|
|
|
|
@status_404_to_count = Aggeregator.new
|
2015-06-17 01:06:42 -04:00
|
|
|
|
|
|
|
@total_requests = 0
|
|
|
|
@message_bus_requests = 0
|
2015-06-25 23:32:54 -04:00
|
|
|
@limit = args[:limit]
|
|
|
|
end
|
2015-06-17 01:06:42 -04:00
|
|
|
|
2015-06-25 23:32:54 -04:00
|
|
|
def analyze
|
|
|
|
now = DateTime.now
|
2015-06-17 22:29:06 -04:00
|
|
|
|
2015-06-25 23:32:54 -04:00
|
|
|
@filenames.each do |filename|
|
|
|
|
File.open(filename).each_line do |line|
|
|
|
|
@total_requests += 1
|
|
|
|
parsed = LineParser.parse(line)
|
|
|
|
|
|
|
|
next unless parsed.time
|
|
|
|
next if @limit && ((now - parsed.parsed_time) * 24 * 60).to_i > @limit
|
|
|
|
|
|
|
|
@from_time ||= parsed.time
|
|
|
|
@to_time = parsed.time
|
2015-06-17 01:06:42 -04:00
|
|
|
|
2015-06-25 23:32:54 -04:00
|
|
|
if parsed.url =~ /(POST|GET) \/message-bus/
|
|
|
|
@message_bus_requests += 1
|
|
|
|
next
|
|
|
|
end
|
|
|
|
|
|
|
|
@ip_to_rails_duration.add(parsed.ip_address, parsed.rails_duration)
|
2015-06-17 01:06:42 -04:00
|
|
|
|
2015-06-25 23:32:54 -04:00
|
|
|
username = parsed.username == "-" ? "[Anonymous]" : parsed.username
|
|
|
|
@username_to_rails_duration.add(username, parsed.rails_duration, parsed.route)
|
2015-06-17 01:06:42 -04:00
|
|
|
|
2015-06-25 23:32:54 -04:00
|
|
|
@route_to_rails_duration.add(parsed.route, parsed.rails_duration, parsed.is_mobile? ? "mobile" : "desktop")
|
2015-06-17 01:06:42 -04:00
|
|
|
|
2015-06-25 23:32:54 -04:00
|
|
|
@url_to_rails_duration.add(parsed.url, parsed.rails_duration)
|
2015-06-17 01:06:42 -04:00
|
|
|
|
2015-06-25 23:32:54 -04:00
|
|
|
@status_404_to_count.add(parsed.url,1) if parsed.status == "404"
|
|
|
|
end
|
2015-06-17 01:06:42 -04:00
|
|
|
end
|
|
|
|
self
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
|
2015-06-25 23:32:54 -04:00
|
|
|
args = ARGV.dup
|
|
|
|
|
|
|
|
if args[0] == "--limit"
|
|
|
|
args.shift
|
|
|
|
limit = args.shift.to_i
|
|
|
|
end
|
|
|
|
|
|
|
|
filenames = args if args[0]
|
|
|
|
filenames ||= ["/var/log/nginx/access.log", "/var/log/nginx/access.log.1"]
|
|
|
|
|
|
|
|
analyzer = LogAnalyzer.analyze(filenames, limit: limit)
|
2015-06-17 01:06:42 -04:00
|
|
|
|
2015-06-17 22:29:06 -04:00
|
|
|
SPACER = "-" * 100
|
|
|
|
|
|
|
|
# don't feel like pulling in active support
|
|
|
|
def map_with_index(ary, &block)
|
|
|
|
idx = 0
|
|
|
|
ary.map do |item|
|
|
|
|
v = block.call(item, idx)
|
|
|
|
idx += 1
|
|
|
|
v
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2015-06-21 19:09:20 -04:00
|
|
|
def top(cols, aggregator, count, aggregator_formatter = nil)
|
2015-06-21 19:17:06 -04:00
|
|
|
sorted = aggregator.top(count, aggregator_formatter)
|
2015-06-17 22:29:06 -04:00
|
|
|
|
|
|
|
col_just = []
|
2015-06-17 01:06:42 -04:00
|
|
|
|
2015-06-17 22:29:06 -04:00
|
|
|
col_widths = map_with_index(cols) do |name,idx|
|
|
|
|
max_width = name.length
|
2015-06-21 19:09:20 -04:00
|
|
|
|
|
|
|
if cols[idx].respond_to? :align
|
|
|
|
col_just[idx] = cols[idx].align
|
|
|
|
skip_just_detection = true
|
|
|
|
else
|
|
|
|
col_just[idx] = :ljust
|
|
|
|
end
|
|
|
|
|
2015-06-17 22:29:06 -04:00
|
|
|
sorted.each do |row|
|
2015-06-21 19:09:20 -04:00
|
|
|
col_just[idx] = :rjust unless (String === row[idx] || row[idx].nil?) && !skip_just_detection
|
2015-06-17 22:29:06 -04:00
|
|
|
row[idx] = '%.2f' % row[idx] if Float === row[idx]
|
|
|
|
row[idx] = row[idx].to_s
|
|
|
|
max_width = row[idx].length if row[idx].length > max_width
|
|
|
|
end
|
|
|
|
[max_width,80].min
|
|
|
|
end
|
2015-06-17 01:06:42 -04:00
|
|
|
|
2015-06-17 22:29:06 -04:00
|
|
|
puts(map_with_index(cols) do |name,idx|
|
|
|
|
name.ljust(col_widths[idx])
|
|
|
|
end.join(" "))
|
2015-06-17 01:06:42 -04:00
|
|
|
|
2015-06-17 22:29:06 -04:00
|
|
|
puts(map_with_index(cols) do |name,idx|
|
|
|
|
("-" * name.length).ljust(col_widths[idx])
|
|
|
|
end.join(" "))
|
|
|
|
|
|
|
|
sorted.each do |raw_row|
|
|
|
|
|
|
|
|
rows = []
|
|
|
|
idx = 0
|
|
|
|
raw_row.each do |col|
|
|
|
|
j = 0
|
|
|
|
col.to_s.scan(/(.{1,80}($|\s)|.{1,80})/).each do |r|
|
|
|
|
rows[j] ||= []
|
|
|
|
rows[j][idx] = r[0]
|
|
|
|
j += 1
|
|
|
|
end
|
|
|
|
idx += 1
|
|
|
|
end
|
|
|
|
|
|
|
|
if rows.length > 1
|
|
|
|
puts
|
|
|
|
end
|
|
|
|
|
|
|
|
rows.each do |row|
|
|
|
|
cols.length.times do |i|
|
|
|
|
print row[i].to_s.send(col_just[i], col_widths[i])
|
|
|
|
print " "
|
|
|
|
end
|
|
|
|
puts
|
|
|
|
end
|
|
|
|
|
|
|
|
if rows.length > 1
|
|
|
|
puts
|
|
|
|
end
|
2015-06-17 01:06:42 -04:00
|
|
|
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2015-06-21 19:09:20 -04:00
|
|
|
class Column < String
|
|
|
|
attr_accessor :align
|
|
|
|
|
|
|
|
def initialize(val, align)
|
|
|
|
super(val)
|
|
|
|
@align = align
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2015-06-17 01:06:42 -04:00
|
|
|
puts
|
2015-06-25 23:32:54 -04:00
|
|
|
puts "Analyzed: #{analyzer.filenames.join(",")} on #{`hostname`}"
|
|
|
|
if limit
|
|
|
|
puts "Limited to #{DateTime.now - (limit.to_f / (60*24.0))} - #{DateTime.now}"
|
|
|
|
end
|
2015-06-17 01:06:42 -04:00
|
|
|
puts SPACER
|
2015-06-17 22:29:06 -04:00
|
|
|
puts "#{analyzer.from_time} - #{analyzer.to_time}"
|
|
|
|
puts SPACER
|
2015-06-17 01:06:42 -04:00
|
|
|
puts "Total Requests: #{analyzer.total_requests} ( MessageBus: #{analyzer.message_bus_requests} )"
|
|
|
|
puts SPACER
|
|
|
|
puts "Top 30 IPs by Server Load"
|
|
|
|
puts
|
2015-06-17 22:29:06 -04:00
|
|
|
top(["IP Address", "Duration", "Reqs"], analyzer.ip_to_rails_duration, 30)
|
2015-06-17 01:06:42 -04:00
|
|
|
puts SPACER
|
|
|
|
puts
|
|
|
|
puts "Top 30 users by Server Load"
|
|
|
|
puts
|
2015-06-17 22:29:06 -04:00
|
|
|
top(["Username", "Duration", "Reqs", "Routes"], analyzer.username_to_rails_duration, 30)
|
2015-06-17 01:06:42 -04:00
|
|
|
puts SPACER
|
|
|
|
puts
|
2015-06-21 19:09:20 -04:00
|
|
|
puts "Top 100 routes by Server Load"
|
2015-06-17 01:06:42 -04:00
|
|
|
puts
|
2015-06-21 19:09:20 -04:00
|
|
|
top(["Route", "Duration", "Reqs", Column.new("Mobile", :rjust)], analyzer.route_to_rails_duration, 100, lambda{
|
|
|
|
|hash, name, total|
|
|
|
|
"#{hash["mobile"] || 0} (#{"%.2f" % (((hash["mobile"] || 0) / (total + 0.0)) * 100)})%"}
|
|
|
|
)
|
2015-06-17 01:06:42 -04:00
|
|
|
puts SPACER
|
|
|
|
puts
|
|
|
|
puts "Top 30 urls by Server Load"
|
|
|
|
puts
|
2015-06-17 22:29:06 -04:00
|
|
|
top(["Url", "Duration", "Reqs"], analyzer.url_to_rails_duration, 30)
|
2015-06-17 01:06:42 -04:00
|
|
|
|
|
|
|
puts "(all durations in seconds)"
|
|
|
|
puts SPACER
|
|
|
|
puts
|
|
|
|
puts "Top 30 not found urls (404s)"
|
|
|
|
puts
|
|
|
|
top(["Url", "Count"], analyzer.status_404_to_count, 30)
|