Vous êtes ici : Accueil / 2011 / Août / Script d'analyse des logs de "slow query"

Script d'analyse des logs de "slow query"

Python Source icon slow_query.py — Python Source, 6 kB (6435 bytes)

Contenu du fichier

#!/usr/bin/python
# -*- coding: utf-8 -*-

# Copyright (c) Pilot Systems and Libération, 2011

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.


import sys, re, os
import cPickle as pickle
from optparse import OptionParser


# Parse options
usage = "usage: %prog [options] <logfile>"
parser = OptionParser(usage = usage)
parser.add_option("-t", "--threshold", dest="threshold", default=1.0,
                  type="float",
                  help="Threshold of duration (in seconds)",)
parser.add_option("-n", "--nb", dest="nb", type="int", default=10,
                  help="Number of slowest queries to display",)
parser.add_option("-i", "--individual", dest="individual", default=False,
                  action="store_true",
                  help="Look at individual queries, and not patterns",)
parser.add_option("-s", "--sort", dest="sort", default="total",
                  help="Sort by : total, average, nb, max",)
parser.add_option("-d", "--date", dest="date", default="",
                  help="Only logs of this date (starting with this string, actually)",)
(options, args) = parser.parse_args()

if len(args) != 1:
    parser.print_help()
    sys.exit(1)

class Pattern(object):
    def __init__(self, pattern, example, duration):
        self.pattern = pattern
        self.example = example
        self.queries = []
        self.nb = 0
        self.total = 0.0
        self.max = 0.0
        self.add(duration, example)

    def add(self, duration, query):
        self.queries.append(query)
        self.nb += 1
        self.total += duration
        self.max = max(self.max, duration)

    @property
    def average(self):
        return self.total / self.nb

    @property
    def value(self):
        return getattr(self, options.sort)

    def __cmp__(self, other):
        return cmp(self.value, other.value)

    def __str__(self):
        vals = [
            "",
            "-- Stats --",
            "Nb occurences : %d" % self.nb,
            "Average duration : %.2f s" % self.average,
            "Total duration : %.2f s" % self.total,
            "Max duration : %.2f s" % self.max,
            "-- Query --",
            "Pattern : %s" % self.pattern,            
            "Example : %s" % self.example,
            ""
            ]
        return '\n'.join(vals)
    
class Parser(object):
    pattern = re.compile('.*duration: (.*) ms  statement: (.*)')
    invalid = [ re.compile('.* LOG.*unexpected EOF on client connection'),
                re.compile('.* LOG.*could not receive data from client'),
                re.compile('.* LOG.*incomplete startup packet'), ]

    def __init__(self, logfile):
        self.logfile = logfile
        self.vals = []
        self.queries = {}
        self.buffer = None

    def simplify(self, query):
        """
        That's the tricky part... simplify/patternize a SQL query !
        """
        # Hide query values
        query = re.sub("E'[^']*'", "%%%", query)
        query = re.sub("IN \(%%%( *, *%%%)*\)", 'IN (%%%)', query)
        query = re.sub("IN \([0-9]+( *, *[0-9]+)*\)", 'IN (%%%)', query)
        query = re.sub("VALUES \(.*\)", 'VALUES (%%%)', query)
        query = re.sub("= [0-9]+", '= %%%', query)
        # Simplify the SELECT clause
        select = re.search('SELECT (.*) FROM', query)
        if select:
            select = select.groups()[0]
            items = select.split(',')
            res = set()
            for item in items:
                item = item.strip()
                if "." in item:
                    item = item.split('.')[0] + '.%%%'
                res.add(item)
            select = ", ".join(res)
            query = re.sub('SELECT .* FROM', 'SELECT %s FROM' % select, query)
        return query

    def got_query(self):
        if not self.buffer:
            return
        duration, query = self.buffer
        self.buffer = None

        if options.individual:
            self.vals.append(Pattern(query, query, duration))
        else:
            pattern = self.simplify(query)
            if not pattern in self.queries:
                self.queries[pattern] = Pattern(pattern, query, duration)
            else:
                self.queries[pattern].add(duration, query)

    def parse_line(self, line):
        for inv in self.invalid:
            if inv.match(line):
                self.got_query()
                return
            
        m = self.pattern.match(line)
        if m:
            self.got_query()
            if not line.startswith(options.date):
                return
            duration = float(m.groups()[0]) / 1000.0
            if duration > options.threshold:
                self.buffer = [duration, m.groups()[1]]
        elif self.buffer:
            self.buffer[1] += " " + line.strip()

    def parse(self):
        self.previous = None
        for line in open(self.logfile):
            self.parse_line(line)
        self.got_query()

        if not options.individual:
            self.vals = self.queries.values()
        
        self.vals.sort()
        self.nb = len(self.vals)

    def print_above(self, threshold):
        count = len([ v for v in self.vals if v.value > threshold ])
        print "%d above %.2f s (%.2f %%)" % (count, threshold,
                                             count * 100.0 / self.nb)

    def disp_stats(self):
        nb = len(self.vals)
        total = sum([ v.value for v in self.vals ])
        average = total / nb

        print "%d vals, average is %.3f s, total is %.2f s" % (nb, average, total)
        self.print_above(options.threshold * 100)
        self.print_above(options.threshold * 50)
        self.print_above(options.threshold * 10)
        self.print_above(options.threshold * 5)
        self.print_above(options.threshold * 2)

    def disp_top(self):
        print "top %d: " % (options.nb)
        for val in self.vals[-options.nb:]:
            print val



parser = Parser(args[0])
parser.parse()
parser.disp_stats()
parser.disp_top()



Actions sur le document