Sign up to create your own snipts, or login.

Public snipts » tweakt's snipts » Log file error histogram analysis

posted on Oct 08, 2009 at 6:23 p.m. EDT in 
  • #!/usr/bin/python
    
    # Useful spotting patterns in errors logged to files. Intended to be used 
    # on rolled log files where the filename contains the date or some other 
    # unique index or sequence.
    #
    # Example:
    #  foo-2009-04-23.log
    #  foo-2009-04-24.log
    #  foo-2009-04-25.log
    #
    # Use grep to generate the input for this script. Use a pattern which will 
    # match a variety of possible events. This works best when only one such 
    # event is reported per line.
    #
    # Use the '-o' flag to output only the matched section:
    #
    # > grep -o "\b\w*Exception\b" *
    #
    #   foo-2009-04-23.log:RuntimeException
    #   foo-2009-04-23.log:IOException
    #   foo-2009-04-23.log:IOException
    #   foo-2009-04-23.log:IOException
    #   foo-2009-04-24.log:FileNotFoundException
    #   foo-2009-04-24.log:IOException
    #   foo-2009-04-25.log:OutOfMemoryException
    #
    # Pipe this output to this script and you will get cross-referenced 
    # histograms of:
    #
    # By input file (date):
    #   Count for each error type
    #
    # By Error type:
    #   Count for each day of ocurrence
    #
    # Number of errors (per file/date) - descending order
    #
    # Number of errors (per error type) - descending order
    
    
    import sys
    from math import log
    
    date_exception = dict()
    exception_date = dict()
    exception_all = dict()
    date_all = dict()
    
    def splitlines():
        input = sys.stdin.readlines()
        
        for line in input:
            (date, exception) = line.strip().split(":")
    
            if (not date_all.has_key(date)):
                date_all[date] = 0
            
            date_all[date] += 1
            
            if (not exception_all.has_key(exception)):
                exception_all[exception] = 0
                
            exception_all[exception] += 1
            
            if (not date_exception.has_key(date)):
                date_exception[date] = dict()
    
            count_map = date_exception[date]
    
            if (not count_map.has_key(exception)):
                count_map[exception] = 0
    
            count_map[exception] += 1
    
            if (not exception_date.has_key(exception)):
                exception_date[exception] = dict()
    
            count_map = exception_date[exception]
    
            if (not count_map.has_key(date)):
                count_map[date] = 0
    
            count_map[date] += 1
    
    def print_bar(prefix, label, value):
        print prefix + "%-30s (%5d) %s" % (label, value, "#" * int(log(value+1)*3))
    
    if __name__ == '__main__':
        splitlines()
        
        print "\n"
        for k in date_exception.keys():
            d = date_exception[k]
            print k
            
            l = sorted(d.iteritems(), key=lambda (k,v): (v,k), reverse=True)
            for item in l:
                print_bar("    ", item[0], item[1])
                
        print "\n"        
        for k in exception_date.keys():
            d = exception_date[k]
            print k
            
            l = sorted(d.iteritems(), key=lambda (k,v): (v,k), reverse=True)
            for item in l:
                print_bar("    ", item[0], item[1])
    
        print "\n"
        for s in sorted(exception_all.iteritems(), key=lambda (k,v): (v,k), reverse=True):
            print_bar("", s[0], s[1])
    
        print "\n"
        for s in sorted(date_all.iteritems(), key=lambda (k,v): (v,k), reverse=True):
            print_bar("", s[0], s[1])
    
            
    

    copy | embed

0 Comments

Sign up, or login to leave a comment.