##================================================================##
先來個笑話:
剛交的女朋友讓我去他家玩,我覺得空手不好,於是告訴她我買點東西就過去,誰知道她突然來一句:不用買,家裡還有一盒!
##================================================================##
昨天在折騰一台磁盤出現性能問題的MySQL時,根據監控去慢日志裡找,發現短短10分鐘有好幾萬行慢日志,由於對慢日志結構不熟悉,打開一個600+MB的文件拖著滾動條看,好多慢SQL一條SQL就有幾百行,看來看去都沒找到問題最根本原因,總算功夫不負有心人,總算在不經意間找到,要不然讓我在無知中慢慢老去???
問題SQL如下:

造成的影響:

懷疑造成問題的原因:
由於子查詢中沒有任何過濾條件,進行全表掃描,而服務器內存不夠,導致掃描出來的數據有寫到臨時文件中,同時服務器上還有其他業務運行,服務器無法及時處理完所有請求,造成積壓,而積壓請求又增加服務器壓力,最終導致相互影響惡性循環。
也不知道研發為啥蛋疼地這麼套一下,反正Mysql語句優化處理能力這麼差,大家盡可能簡化SQL吧
##===================================================##
言歸正傳,由於這一個SQL執行頻率較低,使用pt工具分析不能准確定位到該SQL,而使用肉眼看又比較蛋疼,因此寫個小工具來處理,同時還可以解決慢日志較大閱讀不便的問題。
python腳本如下:
# coding: utf-8
import datetime
from optparse import OptionParser
MAX_QUERY_TIME_SECONDS = 10
START_DATETIME = datetime.datetime.min
END_DATETIME = datetime.datetime.max
SLOW_LOG_PATH = ""
class EventInfo(object):
def __init__(self):
self.exec_time = None
self.login_user = ""
self.query_time_seconds = 0
self.lock_time_seconds = 0
self.rows_sent = 0
self.rows_examined = 0
self.command_text = ""
self.file_line_num = 0
def get_datetime_by_time_line(time_line):
if time_line.startswith("# Time:"):
time_str = "20" + time_line.replace("# Time:", "").replace(" ", " ").strip()
return datetime.datetime.strptime(time_str, "%Y%m%d %H:%M:%S")
else:
return None
def set_query_info(event_info, query_info_line):
line = query_info_line
if line.startswith("# Query_time:"):
tmp_line = line.replace("#", "").replace(": ", ":").replace(" ", " ").replace(" ", " ")
tmp_items = tmp_line.split(" ")
for tmp_item in tmp_items:
# print(tmp_item)
if tmp_item.strip().startswith("Query_time:"):
event_info.query_time_seconds = float(tmp_item.replace("Query_time:", ""))
if tmp_item.strip().startswith("Lock_time:"):
event_info.lock_time_seconds = float(tmp_item.replace("Lock_time:", ""))
if tmp_item.strip().startswith("Rows_sent:"):
event_info.rows_sent = float(tmp_item.replace("Rows_sent:", ""))
if tmp_item.strip().startswith("Rows_examined:"):
event_info.rows_examined = float(tmp_item.replace("Rows_examined:", ""))
def set_login_user(event_info, login_user_line):
line = login_user_line
if line.startswith("# User@Host:"):
event_info.login_user = line.replace("# User@Host:", "").strip()
def set_command_text(event_info, command_text_line):
line = command_text_line
event_info.command_text += chr(13) + line
def print_event_info(event_info):
print_info = """
file_line_num: {7}
Time: {0}
User@Host: {1}
Query_time: {2:.5f} seconds
Lock_time: {3:.5f} seconds
Rows_sent: {4}
Rows_examined: {5}
command_text:
{6}
""".format(event_info.exec_time,
event_info.login_user,
event_info.query_time_seconds,
event_info.lock_time_seconds,
event_info.rows_sent,
event_info.rows_examined,
event_info.command_text,
event_info.file_line_num)
print("*" * 70)
print(print_info)
def get_user_paras():
try:
opt = OptionParser()
opt.add_option('--slow_log',
dest='slow_log',
type=str,
help='the file path of the slow log ')
opt.add_option('--long_query_seconds',
dest='long_query_seconds',
type=float,
help='long query seconds')
opt.add_option('--start_datetime',
dest="start_datetime",
type=str,
help="start datetime, format is %Y-%m-%d %H:%M:%S")
opt.add_option('--end_datetime',
dest="end_datetime",
type=str,
help="end datetime, format is %Y-%m-%d %H:%M:%S")
(options, args) = opt.parse_args()
error_messages = []
slow_log = options.slow_log
start_datetime = options.start_datetime
long_query_seconds = options.long_query_seconds
end_datetime = options.end_datetime
if not slow_log:
error_messages.append("slow_log must be set;")
else:
global SLOW_LOG_PATH
SLOW_LOG_PATH = slow_log
if not long_query_seconds:
error_messages.append("long_query_seconds must be set;")
else:
global MAX_QUERY_TIME_SECONDS
MAX_QUERY_TIME_SECONDS = long_query_seconds
try:
if start_datetime is not None:
tmp_start_datetime = datetime.datetime.strptime(start_datetime, "%Y-%m-%d %H:%M:%S")
global START_DATETIME
START_DATETIME = tmp_start_datetime
except:
error_messages.append("the format of start_datetime was wrong ;")
try:
if end_datetime is not None:
tmp_end_datetime = datetime.datetime.strptime(end_datetime, "%Y-%m-%d %H:%M:%S")
global END_DATETIME
END_DATETIME = tmp_end_datetime
except:
error_messages.append("the format of end_datetime was wrong ;")
if len(error_messages) > 0:
opt.print_help()
for error_message in error_messages:
print(error_message)
return False
else:
return True
except Exception as ex:
print("exception :{0}".format(str(ex)))
return False
def read_log():
global MAX_QUERY_TIME_SECONDS
file_path = SLOW_LOG_PATH
file_handler = open(file_path)
line = file_handler.readline()
line_num = 1
find_event_count = 0
current_event_info = EventInfo()
current_time = ""
has_time = False
while line:
if line.startswith("# Time:"):
current_time = get_datetime_by_time_line(line)
if current_time < START_DATETIME:
line = file_handler.readline()
line_num += 1
continue
if current_time > END_DATETIME:
break
line = file_handler.readline()
line_num += 1
has_time = True
continue
if has_time:
if line.startswith("# User@Host:"):
if current_event_info.query_time_seconds > MAX_QUERY_TIME_SECONDS:
print_event_info(event_info=current_event_info)
find_event_count += 1
current_event_info = EventInfo()
current_event_info.file_line_num = line_num
current_event_info.exec_time = current_time
set_login_user(event_info=current_event_info,
login_user_line=line)
elif line.startswith("# Query_time:"):
set_query_info(event_info=current_event_info,
query_info_line=line)
else:
set_command_text(event_info=current_event_info,
command_text_line=line)
line = file_handler.readline()
line_num += 1
print("*" * 70)
print("parse completely")
print("find {0} events".format(find_event_count))
def main():
if get_user_paras():
read_log()
if __name__ == '__main__':
main()
腳本調用方式如:
C:\Python27\python.exe D:/GGA/Code/slow_query_find/find_slow_quer.py --slow_log="D:\slow.log.2016010900" --long_query_seconds=4 --start_datetime="2016-01-08 15:46:00" --end_datetime="2016-01-08 15:50:00"
顯示結果如:

##==================================================================##
