diff --git a/script/nginx_analyze.rb b/script/nginx_analyze.rb index f83d6425d..177d45c48 100644 --- a/script/nginx_analyze.rb +++ b/script/nginx_analyze.rb @@ -1,3 +1,5 @@ +require 'date' + class LogAnalyzer class LineParser @@ -10,6 +12,8 @@ class LogAnalyzer PATTERN = /\[(.*)\] (\S+) \"(.*)\" \"(.*)\" \"(.*)\" ([0-9]+) ([0-9]+) \"(.*)\" ([0-9.]+) ([0-9.]+) "(.*)"/ + TIME_FORMAT = "%d/%b/%Y:%H:%M:%S %Z" + def self.parse(line) result = new _, result.time, result.ip_address, result.url, result.user_agent, @@ -21,24 +25,61 @@ class LogAnalyzer result end + + def parsed_time + DateTime.strptime(time, TIME_FORMAT) + end end attr_reader :total_requests, :message_bus_requests, :filename, :ip_to_rails_duration, :username_to_rails_duration, :route_to_rails_duration, :url_to_rails_duration, - :status_404_to_count + :status_404_to_count, :from_time, :to_time def self.analyze(filename) new(filename).analyze end + class Aggeregator + + def initialize + @data = {} + end + + def add(id, duration, aggregate=nil) + ary = (@data[id] ||= [0,0]) + ary[0] += duration + ary[1] += 1 + if aggregate + ary[2] ||= Hash.new(0) + ary[2][aggregate] += duration + end + end + + def top(n) + @data.sort{|a,b| b[1][0] <=> a[1][0]}.first(n).map do |metric, ary| + metric = metric.to_s + metric = "[empty]" if metric.length == 0 + result = [metric, ary[0], ary[1]] + # handle aggregate + if ary[2] + result.push ary[2].sort{|a,b| b[1] <=> a[1]}.first(5).map{|k,v| + v = "%.2f" % v if Float === v + "#{k}(#{v})"}.join(" ") + end + + result + end + end + end + def initialize(filename) @filename = filename - @ip_to_rails_duration = Hash.new(0) - @username_to_rails_duration = Hash.new(0) - @route_to_rails_duration = Hash.new(0) - @url_to_rails_duration = Hash.new(0) - @status_404_to_count = Hash.new(0) + @ip_to_rails_duration = Aggeregator.new + @username_to_rails_duration = Aggeregator.new + @route_to_rails_duration = Aggeregator.new + @url_to_rails_duration = Aggeregator.new + @status_404_to_count = Aggeregator.new end def analyze @@ -48,21 +89,24 @@ class LogAnalyzer @total_requests += 1 parsed = LineParser.parse(line) + @from_time ||= parsed.time + @to_time = parsed.time + if parsed.url =~ /(POST|GET) \/message-bus/ @message_bus_requests += 1 next end - @ip_to_rails_duration[parsed.ip_address] += parsed.rails_duration + @ip_to_rails_duration.add(parsed.ip_address, parsed.rails_duration) username = parsed.username == "-" ? "[Anonymous]" : parsed.username - @username_to_rails_duration[username] += parsed.rails_duration + @username_to_rails_duration.add(username, parsed.rails_duration, parsed.route) - @route_to_rails_duration[parsed.route] += parsed.rails_duration + @route_to_rails_duration.add(parsed.route, parsed.rails_duration) - @url_to_rails_duration[parsed.url] += parsed.rails_duration + @url_to_rails_duration.add(parsed.url, parsed.rails_duration) - @status_404_to_count[parsed.url] += 1 if parsed.status == "404" + @status_404_to_count.add(parsed.url,1) if parsed.status == "404" end self end @@ -72,46 +116,101 @@ end filename = ARGV[0] || "/var/log/nginx/access.log" analyzer = LogAnalyzer.analyze(filename) -SPACER = "-" * 80 +SPACER = "-" * 100 -def top(cols, hash, count) - sorted = hash.sort{|a,b| b[1] <=> a[1]}.first(30) +# don't feel like pulling in active support +def map_with_index(ary, &block) + idx = 0 + ary.map do |item| + v = block.call(item, idx) + idx += 1 + v + end +end - longest_0 = [cols[0].length, sorted.map{|a,b| a.to_s.length}.max ].max +def top(cols, aggregator, count) + sorted = aggregator.top(30) - puts "#{cols[0].ljust(longest_0)} #{cols[1]}" - puts "#{("-"*(cols[0].length)).ljust(longest_0)} #{"-"*cols[1].length}" + col_just = [] + + col_widths = map_with_index(cols) do |name,idx| + max_width = name.length + col_just[idx] = :ljust + sorted.each do |row| + col_just[idx] = :rjust unless String === row[idx] || row[idx].nil? + row[idx] = '%.2f' % row[idx] if Float === row[idx] + row[idx] = row[idx].to_s + max_width = row[idx].length if row[idx].length > max_width + end + [max_width,80].min + end + + puts(map_with_index(cols) do |name,idx| + name.ljust(col_widths[idx]) + end.join(" ")) + + puts(map_with_index(cols) do |name,idx| + ("-" * name.length).ljust(col_widths[idx]) + end.join(" ")) + + sorted.each do |raw_row| + + rows = [] + idx = 0 + raw_row.each do |col| + j = 0 + col.to_s.scan(/(.{1,80}($|\s)|.{1,80})/).each do |r| + rows[j] ||= [] + rows[j][idx] = r[0] + j += 1 + end + idx += 1 + end + + if rows.length > 1 + puts + end + + rows.each do |row| + cols.length.times do |i| + print row[i].to_s.send(col_just[i], col_widths[i]) + print " " + end + puts + end + + if rows.length > 1 + puts + end - sorted.each do |val, duration| - next unless val && val.length > 1 - n = Fixnum === duration ? duration : '%.2f' % duration - puts "#{val.to_s.ljust(longest_0)} #{n.to_s.rjust(cols[1].length)}" end end puts puts "Analyzed: #{analyzer.filename}" puts SPACER +puts "#{analyzer.from_time} - #{analyzer.to_time}" +puts SPACER puts "Total Requests: #{analyzer.total_requests} ( MessageBus: #{analyzer.message_bus_requests} )" puts SPACER puts "Top 30 IPs by Server Load" puts -top(["IP Address", "Duration"], analyzer.ip_to_rails_duration, 30) +top(["IP Address", "Duration", "Reqs"], analyzer.ip_to_rails_duration, 30) puts SPACER puts puts "Top 30 users by Server Load" puts -top(["Username", "Duration"], analyzer.username_to_rails_duration, 30) +top(["Username", "Duration", "Reqs", "Routes"], analyzer.username_to_rails_duration, 30) puts SPACER puts puts "Top 30 routes by Server Load" puts -top(["Route", "Duration"], analyzer.route_to_rails_duration, 30) +top(["Route", "Duration", "Reqs"], analyzer.route_to_rails_duration, 30) puts SPACER puts puts "Top 30 urls by Server Load" puts -top(["Url", "Duration"], analyzer.url_to_rails_duration, 30) +top(["Url", "Duration", "Reqs"], analyzer.url_to_rails_duration, 30) puts "(all durations in seconds)" puts SPACER