As an update to my prior post, I’ve added a form to the workbench plugin. Now, the user can select a slow query log file and generate statistics from it. The plugin provides information to answer the following questions:
- What type of queries run most often?
- What type of queries are the slowest?
- Which queries access the most rows?
- Which queries send the most data?
The plugin scans the slow query log, aggregates similar queries, and provides summary statistics for each group. It’s very similar to the mysqldumpslow perl utility, which is included in the mysql bin folder. However, as a plugin, it’s easier to use on a Windows server as it doesn’t require you to install perl.
If you’re working to create a MySQL Workbench plugin, this mform code will help provide an example.
This form prompts the user for the output type of the slow query log (file or table), for the location of the slow query log file, and for the sort order: count, query_time, rows_examined, rows_sent. See Figure 1. Sample plugin form.
The plugin generates output similar to the following for queries in the slow query log:
Statement: SELECT data, created, headers, expire, serialized FROM cache_menu WHERE cid = ‘STRING’
count: 116
average time: 0:00:00.001269
slowest sql: SELECT data, created, headers, expire, serialized FROM cache_menu WHERE cid = ‘links:secondary-links:tree-data:5d6d3aaaaef5fba302ce62698fa37bbe’
slowest time: 0:00:00.024912<
average rows sent: 0
average rows examined: 0
average lock time: 0:00:00.000071
. . .
. . .
. . .
To add the plugin to your workbench install, create a file named slowquerystats_grt.py, click on the “show source” magnifying glass icon below, and copy the code block into the file. From the Scripting menu in Workbench, call install Plugin/Module File and select the file to be installed. This plugin requires Workbench version 2.7 or newer. Once you’ve installed the plugin, you can run it by opening the SQL Editor and selecting Slow Log Stats from the Plugins|Utilities menu.
import os
import re
from wb import *
import grt
from operator import itemgetter
from mforms import newBox, newButton, newPanel, newTextBox, newRadioButton, newLabel, newSelector, newTextEntry, newCheckBox, SelectorCombobox
from mforms import Utilities, FileChooser
import mforms
import datetime
ModuleInfo = DefineModule(name= "slowlogstats", author= "lstigile", version="0.5")
@ModuleInfo.plugin("slowlogstats", caption= "Slow Log Stats", input= [wbinputs.currentQueryBuffer()], pluginMenu= "SQL/Utilities")
@ModuleInfo.export(grt.INT, grt.classes.db_query_QueryBuffer)
#---------------------------------------------
# Entry point, creates form instance, sets choice for sourcetype (table or file), file location
# and sort order of results
#---------------------------------------------
def optionPrompt(qbuffer):
lines = []
sort = {"query_time":1, "count":0, "rows_sent":3,"rows_examined":4, "lock_time":2}
f = LogParamFrm()
editor= qbuffer.owner
editor.addToOutput("\n", 1)
if f.result == mforms.ResultOk:
if f.rbfile.get_active():
resultsets = LogFileResultset(f.logfile_path)
ok= resultsets.open()
tuples = parseSlowQueryTable(resultsets)
else:
editor= qbuffer.owner
sql = "SELECT * FROM mysql.slow_log order by start_time desc limit 10000;"
resultsets = editor.executeScript(sql)
tuples = parseSlowQueryTable(resultsets[0])
if len(tuples) < 12:
editor.addToOutput( tuples[0], 1)
return 0
tuples = sorted(tuples, key=itemgetter(sort[f.sort_method]), reverse=True)
lines = printFormat(tuples)
editor.addToOutput('\n'.join(lines), 0)
if f.rbfile.get_active(): resultsets.close()
return 0
#-----------------------------------
# format info to print to screen
#------------------------------------
def printFormat(tuples):
lines=[]
for item in tuples:
(count, ttime, tlock, tsent, texam, maxtime, maxstmt, atime, asent, aexam, alock, statement ) = item
lines.append('Statement: ' + statement + '\n' + 'count: ' + str(count) + '\n' +
'average time:' + str(atime) + '\n' +
'slowest sql: ' + maxstmt + '\n' +
'slowest time: ' + str(maxtime) + '\n'
'average rows sent: ' + str(asent) + '\n'
'average rows examined: ' + str(aexam) + '\n'
'average lock time: ' + str(alock) + '\n\n'
)
return lines
#----------------------------
def parseSlowQueryTable(result):
row_dict = {}
lines= []
ok = result.goToFirstRow()
if ok == 0:
lines.append('invalid file format, User@Host not found in first 20 entries')
return lines
while ok:
sql = result.stringFieldValueByName('sql_text')
time = result.stringFieldValueByName('query_time')
if time[9:]!='':
time = datetime.timedelta(minutes=int(time[0:2]),
seconds=int(time[3:5]), microseconds=int(time[6:]))
else:
time = datetime.timedelta(hours=int(time[0:2]), minutes=int(time[3:5]),
seconds=int(time[6:8]))
timelock = result.stringFieldValueByName('lock_time')
if timelock[9:]!='':
timelock = datetime.timedelta(minutes=int(timelock[0:2]),
seconds=int(timelock[3:5]), microseconds=int(timelock[6:]))
else:
timelock = datetime.timedelta(hours=int(timelock[0:2]), minutes=int(timelock[3:5]),
seconds=int(timelock[6:8]))
sent = int(result.stringFieldValueByName('rows_sent'))
examined = int(result.stringFieldValueByName('rows_examined'))
#Use python regular expression engine to replace variables with STRING or
#DIGIT For readability, notes on expression are in comments at bottom of this
#script. Modify this expression if SQL statements are not correctly grouped.
sql = re.sub(r'(?<=[\s\(,<>=][\'\"]).*?(?=[\'\"]\)$|[\'\"]$|[\'\"][\,\)]\s|[\'\"]\s[FROM|' +
'ORDER|GROUP|AND|WHERE])', 'STRING', sql)
# replace all digits with 'DIGIT'
sql = re.sub(r'(?<=[-+=\s\(,<>])\d.*?(?=[\s\),\)]|$)','DIGIT', sql)
#determine if SQL is in row_count dict, if not add statement and count, else increment
#existing entries
if not sql in row_dict:
avgtime = time
avgrowssent = sent
avgrowsexamined = examined
avglocktime = timelock
totaltime = time
maxsql = result.stringFieldValueByName('sql_text')
row = [1,totaltime, timelock, sent, examined, time, maxsql, avgtime,
avgrowssent, avgrowsexamined, avglocktime, sql]
row_dict[sql]=row
else:
(count, ttime, tlock, tsent, texam, maxtime, maxstmt, atime, asent, aexam,
alock, sql) = tuple(row_dict[sql])
count = count + 1
ttime = ttime + time
tlock = tlock + timelock
tsent = tsent + sent
texam = texam + examined
if maxtime < time:
maxtime = time
maxstmt = result.stringFieldValueByName('sql_text')
atime = ttime/count
asent = tsent/count
aexam = texam/count
alock = tlock/count
row = [count, ttime, tlock, tsent, texam, maxtime, maxstmt, atime, asent, aexam, alock, sql]
row_dict[sql] = row
ok = result.nextRow()
for row in row_dict.values():
lines.append(tuple(row))
return tuple(lines)
#------function to format time coming from file into 00:00:00000"
def filetimeformat(s):
parts = s.split('.')
sformat = ('%(minutes)02d:%(seconds)02d:%(microseconds)05d'
%{'minutes':int(parts[0])//60,'seconds':int(parts[0])%60,
'microseconds':int(parts[1])})
return sformat
########################################################################
##
## LogFileResultset class
##
##
########################################################################
class LogFileResultset:
def __init__(self, location ):
self.location = location
self.__user = ''
def open(self):
try:
self.file = open(self.location, 'r')
return 1
except IOError, e:
return "Error in file IO: ", e
def goToFirstRow(self):
ok = self.nextRow()
return ok
def __matchTest(self, pattern, text, groupno):
match = re.search(pattern, text)
if match:
return match.group(groupno)
else:
return ''
def nextRow(self):
x = 0
sqlline = []
self.row = {}
# walk through 20 lines looking for first instance of User@Host
# if no entry, return 0
while self.__user == '':
x = x+1
self.__user = self.__matchTest('#\sUser@Host:\s(.*)\n', self.file.readline(),1)
if x == 20:
return 0
line = self.file.readline()
query_time = self.__matchTest('Query_time:\s(.*)\s\sLock_time:\s(.*)\sRows_sent:\s(.*)' +
'\s\sRows_examined:\s(.*)', line,1)
if query_time != '':
query_time = filetimeformat(query_time)
lock_time= self.__matchTest('Query_time:\s(.*)\s\sLock_time:\s(.*)\sRows_sent:\s(.*)' +
'\s\sRows_examined:\s(.*)', line,2)
if lock_time != '':
lock_time = filetimeformat(lock_time)
rows_sent= self.__matchTest('Query_time:\s(.*)\s\sLock_time:\s(.*)\sRows_sent:\s(.*)' +
'\s\sRows_examined:\s(.*)', line,3)
rows_examined= self.__matchTest('Query_time:\s(.*)\s\sLock_time:\s(.*)\sRows_sent:\s(.*)' +
'\s\sRows_examined:\s(.*)', line,4)
# if use databasename in log, the timestamp skips a line
timestamp = self.__matchTest('SET\stimestamp=(\d*);', self.file.readline(),1)
if timestamp == '':
timestamp = self.__matchTest('SET\stimestamp=(\d*);', self.file.readline(),1)
##this pseudo do..until loop is designed to find the first User@Host entry, which marks a new record.
##When we hit User@Host, we save the record in a class variable since it belongs to the next
##record.
while True:
line = self.file.readline()
sqlmatch = re.search(r'#\sTime:\s(.*)\n', line)
if sqlmatch: #skip Time: entry for SHOW STATUS type statements
line = self.file.readline()
sqlmatch = re.search(r'#\sUser@Host:\s(.*)\n', line)
if sqlmatch:
self.__user = sqlmatch.group(1)
break
if line == '':
break
sqlline.append(line)
sql = ' '.join(sqlline)
self.row = {'user': self.__user, 'query_time': query_time, 'lock_time':lock_time, 'rows_sent':rows_sent,
'rows_examined':rows_examined, 'timestamp':timestamp, 'sql_text':sql.rstrip(';\n')}
#if any value is missing, empty the dict
for r in self.row:
if self.row[r] == '':
empty = True
else: empty = False
if empty: return 0
# if empty: self.row.clear()
return 1
def close(self):
self.file.close()
def stringFieldValueByName(self, name):
return self.row[name]
########################################################################
##
## Form class
##
##
########################################################################
class LogParamFrm(mforms.Form):
def __init__(self):
mforms.Form.__init__(self, None)
self.logfile_path = ''
self.sort_method = 'query_time'
self.result = None
self.set_title("Log Parameters")
#bx -- first box as container for others
bx = mforms.newBox(False)
bx.set_padding(12)
bx.set_spacing(12)
self.set_content(bx)
#bx components
lbtitle = mforms.newLabel()
lbtitle.set_text("Slow Query Log Analyze Options")
self.rbfile = newRadioButton(1)
self.rbfile.add_clicked_callback(lambda: self.set_box())
self.rbfile.set_text("Analyze log file on disk (output=file)")
self.rbfile.set_active(True)
self.lbdetail = newLabel("Select an option to analyze a log file for the slow_log table:")
self.lbdetail.set_enabled(True)
# self.lbdetail.set_style(mforms.SmallStyle)
self.rbtable = newRadioButton(1)
self.rbtable.add_clicked_callback(lambda: self.set_box())
self.rbtable.set_text("Analyze slow_log table (output=table)")
bx.add(self.lbdetail, False, True)
bx.add(self.rbfile, False, True)
bx.add(self.rbtable, False, True)
bx.add(lbtitle, False, True)
#bxfile_path -- list path selection and radio buttons
bxfile_path = newBox(True)
bxfile_path.set_spacing(4)
self.tbfile = newTextEntry()
self.lbfile = newLabel(" File Path")
self.btfile = newButton()
self.btfile.set_text("...")
self.btfile.enable_internal_padding(False)
self.btfile.set_enabled(True)
self.btfile.add_clicked_callback(self.open_file_chooser)
bxfile_path.add(self.lbfile, False, False)
bxfile_path.add(self.tbfile, True, True)
bxfile_path.add(self.btfile, False, False)
bx.add(bxfile_path, False, True)
#bxOptions -- sorting and other obtions
bxoptions = mforms.newBox(True)
bxoptions.set_spacing(4)
bx.add(bxoptions, False, True)
lbsort = newLabel(" Sort")
self.cbsort = newSelector()
self.cbsort.set_size(150, -1)
sort_items = ("query_time", "count", "rows_sent","rows_examined", "lock_time")
for item in sort_items:
self.cbsort.add_item(item)
self.cbsort.add_changed_callback(lambda: self.get_sort_method())
bxoptions.add(lbsort, False, False)
bxoptions.add(self.cbsort, False, False)
#bxOK --- last items on window
bxOK = mforms.newBox(True)
bx.add(bxOK, False, True)
bxOK.set_spacing(4)
self.ok = mforms.newButton()
self.ok.set_text("OK")
self.ok.set_enabled(False)
bxOK.add_end(self.ok, False, False)
cancel = mforms.newButton()
cancel.set_text("Cancel")
bxOK.add_end(cancel, False, True)
self.set_size(400, 300)
self.center()
self.result = self.run_modal(self.ok, cancel)
#-------open_file_chooser method---------
def open_file_chooser(self):
filechooser = FileChooser(mforms.OpenFile)
filechooser.set_directory(self.logfile_path)
if filechooser.run_modal():
self.logfile_path = filechooser.get_path()
# self.tbfile.set_value(self.logfile_path)
self.tbfile.set_value(self.logfile_path)
if self.logfile_path != '':
self.ok.set_enabled(True)
#-------file path box visibility toggle method------
def set_box(self):
if self.rbfile.get_active():
self.tbfile.show(True)
self.lbfile.show(True)
self.btfile.show(True)
self.ok.set_enabled(True)
self.ok.set_enabled(False)
else:
self.ok.set_enabled(True)
self.tbfile.show(False)
self.lbfile.show(False)
self.btfile.show(False)
#---------------------------------------------------------------------------
def get_sort_method(self):
self.sort_method = self.cbsort.get_string_value()

Wow, looks like fake Visual Studio.
Comment by Vladimir Vassiliev — September 29, 2010 @ 4:19 pm |
When I try to open the file (downloaded from the database server to my local ubuntu machine) I get the following error:
Error during execution of “Slow Log Stats”
error calling slowlogstats.optionPrompt: see output for details
Now output is displayed.
Comment by Gez Page — August 26, 2011 @ 2:20 pm |