slow_query.py — Python Source, 6 kB (6435 bytes)
#!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (c) Pilot Systems and Libération, 2011 # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. import sys, re, os import cPickle as pickle from optparse import OptionParser # Parse options usage = "usage: %prog [options] <logfile>" parser = OptionParser(usage = usage) parser.add_option("-t", "--threshold", dest="threshold", default=1.0, type="float", help="Threshold of duration (in seconds)",) parser.add_option("-n", "--nb", dest="nb", type="int", default=10, help="Number of slowest queries to display",) parser.add_option("-i", "--individual", dest="individual", default=False, action="store_true", help="Look at individual queries, and not patterns",) parser.add_option("-s", "--sort", dest="sort", default="total", help="Sort by : total, average, nb, max",) parser.add_option("-d", "--date", dest="date", default="", help="Only logs of this date (starting with this string, actually)",) (options, args) = parser.parse_args() if len(args) != 1: parser.print_help() sys.exit(1) class Pattern(object): def __init__(self, pattern, example, duration): self.pattern = pattern self.example = example self.queries = [] self.nb = 0 self.total = 0.0 self.max = 0.0 self.add(duration, example) def add(self, duration, query): self.queries.append(query) self.nb += 1 self.total += duration self.max = max(self.max, duration) @property def average(self): return self.total / self.nb @property def value(self): return getattr(self, options.sort) def __cmp__(self, other): return cmp(self.value, other.value) def __str__(self): vals = [ "", "-- Stats --", "Nb occurences : %d" % self.nb, "Average duration : %.2f s" % self.average, "Total duration : %.2f s" % self.total, "Max duration : %.2f s" % self.max, "-- Query --", "Pattern : %s" % self.pattern, "Example : %s" % self.example, "" ] return '\n'.join(vals) class Parser(object): pattern = re.compile('.*duration: (.*) ms statement: (.*)') invalid = [ re.compile('.* LOG.*unexpected EOF on client connection'), re.compile('.* LOG.*could not receive data from client'), re.compile('.* LOG.*incomplete startup packet'), ] def __init__(self, logfile): self.logfile = logfile self.vals = [] self.queries = {} self.buffer = None def simplify(self, query): """ That's the tricky part... simplify/patternize a SQL query ! """ # Hide query values query = re.sub("E'[^']*'", "%%%", query) query = re.sub("IN \(%%%( *, *%%%)*\)", 'IN (%%%)', query) query = re.sub("IN \([0-9]+( *, *[0-9]+)*\)", 'IN (%%%)', query) query = re.sub("VALUES \(.*\)", 'VALUES (%%%)', query) query = re.sub("= [0-9]+", '= %%%', query) # Simplify the SELECT clause select = re.search('SELECT (.*) FROM', query) if select: select = select.groups()[0] items = select.split(',') res = set() for item in items: item = item.strip() if "." in item: item = item.split('.')[0] + '.%%%' res.add(item) select = ", ".join(res) query = re.sub('SELECT .* FROM', 'SELECT %s FROM' % select, query) return query def got_query(self): if not self.buffer: return duration, query = self.buffer self.buffer = None if options.individual: self.vals.append(Pattern(query, query, duration)) else: pattern = self.simplify(query) if not pattern in self.queries: self.queries[pattern] = Pattern(pattern, query, duration) else: self.queries[pattern].add(duration, query) def parse_line(self, line): for inv in self.invalid: if inv.match(line): self.got_query() return m = self.pattern.match(line) if m: self.got_query() if not line.startswith(options.date): return duration = float(m.groups()[0]) / 1000.0 if duration > options.threshold: self.buffer = [duration, m.groups()[1]] elif self.buffer: self.buffer[1] += " " + line.strip() def parse(self): self.previous = None for line in open(self.logfile): self.parse_line(line) self.got_query() if not options.individual: self.vals = self.queries.values() self.vals.sort() self.nb = len(self.vals) def print_above(self, threshold): count = len([ v for v in self.vals if v.value > threshold ]) print "%d above %.2f s (%.2f %%)" % (count, threshold, count * 100.0 / self.nb) def disp_stats(self): nb = len(self.vals) total = sum([ v.value for v in self.vals ]) average = total / nb print "%d vals, average is %.3f s, total is %.2f s" % (nb, average, total) self.print_above(options.threshold * 100) self.print_above(options.threshold * 50) self.print_above(options.threshold * 10) self.print_above(options.threshold * 5) self.print_above(options.threshold * 2) def disp_top(self): print "top %d: " % (options.nb) for val in self.vals[-options.nb:]: print val parser = Parser(args[0]) parser.parse() parser.disp_stats() parser.disp_top()
Actions sur le document