slow_query.py
—
Python Source,
6 kB (6435 bytes)
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) Pilot Systems and Libération, 2011
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
import sys, re, os
import cPickle as pickle
from optparse import OptionParser
# Parse options
usage = "usage: %prog [options] <logfile>"
parser = OptionParser(usage = usage)
parser.add_option("-t", "--threshold", dest="threshold", default=1.0,
type="float",
help="Threshold of duration (in seconds)",)
parser.add_option("-n", "--nb", dest="nb", type="int", default=10,
help="Number of slowest queries to display",)
parser.add_option("-i", "--individual", dest="individual", default=False,
action="store_true",
help="Look at individual queries, and not patterns",)
parser.add_option("-s", "--sort", dest="sort", default="total",
help="Sort by : total, average, nb, max",)
parser.add_option("-d", "--date", dest="date", default="",
help="Only logs of this date (starting with this string, actually)",)
(options, args) = parser.parse_args()
if len(args) != 1:
parser.print_help()
sys.exit(1)
class Pattern(object):
def __init__(self, pattern, example, duration):
self.pattern = pattern
self.example = example
self.queries = []
self.nb = 0
self.total = 0.0
self.max = 0.0
self.add(duration, example)
def add(self, duration, query):
self.queries.append(query)
self.nb += 1
self.total += duration
self.max = max(self.max, duration)
@property
def average(self):
return self.total / self.nb
@property
def value(self):
return getattr(self, options.sort)
def __cmp__(self, other):
return cmp(self.value, other.value)
def __str__(self):
vals = [
"",
"-- Stats --",
"Nb occurences : %d" % self.nb,
"Average duration : %.2f s" % self.average,
"Total duration : %.2f s" % self.total,
"Max duration : %.2f s" % self.max,
"-- Query --",
"Pattern : %s" % self.pattern,
"Example : %s" % self.example,
""
]
return '\n'.join(vals)
class Parser(object):
pattern = re.compile('.*duration: (.*) ms statement: (.*)')
invalid = [ re.compile('.* LOG.*unexpected EOF on client connection'),
re.compile('.* LOG.*could not receive data from client'),
re.compile('.* LOG.*incomplete startup packet'), ]
def __init__(self, logfile):
self.logfile = logfile
self.vals = []
self.queries = {}
self.buffer = None
def simplify(self, query):
"""
That's the tricky part... simplify/patternize a SQL query !
"""
# Hide query values
query = re.sub("E'[^']*'", "%%%", query)
query = re.sub("IN \(%%%( *, *%%%)*\)", 'IN (%%%)', query)
query = re.sub("IN \([0-9]+( *, *[0-9]+)*\)", 'IN (%%%)', query)
query = re.sub("VALUES \(.*\)", 'VALUES (%%%)', query)
query = re.sub("= [0-9]+", '= %%%', query)
# Simplify the SELECT clause
select = re.search('SELECT (.*) FROM', query)
if select:
select = select.groups()[0]
items = select.split(',')
res = set()
for item in items:
item = item.strip()
if "." in item:
item = item.split('.')[0] + '.%%%'
res.add(item)
select = ", ".join(res)
query = re.sub('SELECT .* FROM', 'SELECT %s FROM' % select, query)
return query
def got_query(self):
if not self.buffer:
return
duration, query = self.buffer
self.buffer = None
if options.individual:
self.vals.append(Pattern(query, query, duration))
else:
pattern = self.simplify(query)
if not pattern in self.queries:
self.queries[pattern] = Pattern(pattern, query, duration)
else:
self.queries[pattern].add(duration, query)
def parse_line(self, line):
for inv in self.invalid:
if inv.match(line):
self.got_query()
return
m = self.pattern.match(line)
if m:
self.got_query()
if not line.startswith(options.date):
return
duration = float(m.groups()[0]) / 1000.0
if duration > options.threshold:
self.buffer = [duration, m.groups()[1]]
elif self.buffer:
self.buffer[1] += " " + line.strip()
def parse(self):
self.previous = None
for line in open(self.logfile):
self.parse_line(line)
self.got_query()
if not options.individual:
self.vals = self.queries.values()
self.vals.sort()
self.nb = len(self.vals)
def print_above(self, threshold):
count = len([ v for v in self.vals if v.value > threshold ])
print "%d above %.2f s (%.2f %%)" % (count, threshold,
count * 100.0 / self.nb)
def disp_stats(self):
nb = len(self.vals)
total = sum([ v.value for v in self.vals ])
average = total / nb
print "%d vals, average is %.3f s, total is %.2f s" % (nb, average, total)
self.print_above(options.threshold * 100)
self.print_above(options.threshold * 50)
self.print_above(options.threshold * 10)
self.print_above(options.threshold * 5)
self.print_above(options.threshold * 2)
def disp_top(self):
print "top %d: " % (options.nb)
for val in self.vals[-options.nb:]:
print val
parser = Parser(args[0])
parser.parse()
parser.disp_stats()
parser.disp_top()
Actions sur le document