#!/usr/bin/python """Summarize the contents of a syslog log file. The syslog(3) service writes system log messages in a certain format: Jan 17 19:21:50 zeus kernel: klogd 1.3-3, log source = /proc/kmsg started. This program summarizes the contents of such a file, by displaying each unique (except for the time) line once, and also the number of times such a line occurs in the input. The lines are displayed in the order they occur in the input. Lars Wirzenius """ IGNORE_FILENAME = "/etc/syslog-summary/ignore" STATE_FILENAME = None REPEAT = 0 QUIET = 0 import sys, re, getopt, string, md5 datepats = [ re.compile(r"^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [ 0-9][0-9] [ 0-9][0-9]:[0-9][0-9]:[0-9][0-9] "), re.compile(r"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [ 0-9][0-9][0-9][0-9]:[0-9][0-9] "), re.compile(r"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [ 0-9][0-9][0-9][0-9]:[0-9][0-9]:[0-9][0-9] "), ] pidpat = re.compile(r"^([^ ]* [^ ]*)\[[0-9][0-9]*\]: ") repeatpat = re.compile(r"^[^ ]* last message repeated (\d+) times$") ignore_pats = [] def read_patterns(filename): pats = [] try: f = open(filename, "r") except IOError: return [] for line in f.xreadlines(): if line[-1:] == "\n": line = line[:-1] pats.append(re.compile(line)) f.close() return pats def read_states(filename): states = {} if not filename: return states try: f = open(filename, "r") except IOError: return states for line in f.xreadlines(): fields = string.split(line) states[fields[0]] = (string.atoi(fields[1]), fields[2]) f.close() return states def save_states(filename, states): if not filename: return try: f = open(filename, "w") except IOError: return for filename in states.keys(): value = states[filename] f.write("%s %d %s\n" % (filename, value[0], value[1])) f.close() def should_be_ignored(line): for pat in ignore_pats: if pat.search(line): return 1 return 0 def printable_md5(str): chars = [] for char in str: chars.append("%02x" % (ord(char))) return string.join(chars, "") def split_date(line): for pat in datepats: m = pat.match(line) if m: return line[:m.end()], line[m.end():] print "line has bad date", "<" + string.rstrip(line) + ">" return None, line def summarize(filename, states): counts = {} order = [] ignored_count = 0 if not QUIET: print "Summarizing %s" % filename file = open(filename, "r") linecount = 0 md5obj = md5.new() if states.has_key(filename): oldlines, oldmd5 = states[filename] for i in xrange(oldlines): line = file.readline() md5obj.update(line) if printable_md5(md5obj.digest()) != oldmd5: file.seek(0, 0) md5obj = md5.new() else: linecount = oldlines if not QUIET: print "%8d Lines skipped (already processed)" % linecount line = file.readline() previous = None while line: md5obj.update(line) linecount = linecount + 1 date, rest = split_date(line) if date: found = pidpat.search(rest) if found: rest = found.group(1)+": "+rest[found.end():] count = 1 repeated = None if REPEAT: repeated=repeatpat.search(rest) if repeated and previous: count = int(repeated.group(1)) rest = previous if should_be_ignored(rest): ignored_count = ignored_count + count else: if counts.has_key(rest): counts[rest] = counts[rest] + count else: assert count==1 counts[rest] = count order.append(rest) if not repeated: previous = rest line = file.readline() file.close() md5new = printable_md5(md5obj.digest()) states[filename] = (linecount, md5new) if QUIET and order: print "Summarizing %s" % filename if not QUIET or order: print "%8d Patterns to ignore" % len(ignore_pats) print "%8d Ignored lines" % ignored_count for rest in order: print "%8d %s" % (counts[rest], rest), if not QUIET or order: print def main(): global ignore_pats, IGNORE_FILENAME, STATE_FILENAME, REPEAT, QUIET opts, args = getopt.getopt(sys.argv[1:], "i:qs:r", [ "ignore=", "quiet", "state=", "repeat" ]) for opt, optarg in opts: if opt == "-i" or opt == "--ignore": IGNORE_FILENAME = optarg elif opt == "-s" or opt == "--state": STATE_FILENAME = optarg elif opt == "-r" or opt == "--repeat": REPEAT = 1 elif opt == "-q" or opt == "--quiet": QUIET = 1 ignore_pats = read_patterns(IGNORE_FILENAME) states = read_states(STATE_FILENAME) for filename in args: summarize(filename, states) save_states(STATE_FILENAME, states) if __name__ == "__main__": main()