DELIMITER $$
USE `xxx`$$
DROP FUNCTION IF EXISTS `F_getBuluDates`$$
CREATE DEFINER=`root`@`localhost` FUNCTION `F_getBuluDates`(`PuserId` INT(11)) RETURNS VARCHAR(3000) CHARSET gbk
BEGIN
DECLARE buluDate,buluDateTime,LiWaibuluDates,nowdate,bldate VARCHAR(3000) DEFAULT "";
DECLARE rtMsg,RuserName,maxRiZhiDate,btjrqs,blyqrq,tmpdate,yxblsj VARCHAR(255) DEFAULT "";
DECLARE Ra51count INT(30);
DECLARE RbeginDate,RendDate,RtmpDate VARCHAR(2550);
DECLARE u,gqcount,a21Count INT(30) DEFAULT 0;
DECLARE allReadRows,readRows,mySYL,blCount,recordCount,btjryCount,btjrqCount,yxblts,i INT(11) DEFAULT 0;
SELECT a5204,a5205 INTO yxblts,yxblsj FROM a52 WHERE a5218="xxxxxxx" LIMIT 0,1;
SELECT username INTO RuserName FROM USER WHERE id=PuserId;
SELECT COUNT(id) INTO btjryCount FROM a52 WHERE a5214=PuserId AND a5218='xxxxx';
IF btjryCount=0 THEN
SELECT GROUP_CONCAT(a5215) INTO btjrqs FROM a52 WHERE a5218='btjrq';
SET tmpdate=CURDATE();
SET nowdate=DATE_SUB(CURDATE(),INTERVAL yxblts DAY);
REPEAT
SELECT MAX(a2104) INTO maxRiZhiDate FROM a21 WHERE issubmit='y' AND a2104<tmpdate AND FIND_IN_SET(a2104,bldate)=0;
SET tmpdate=maxRiZhiDate;
IF tmpdate>=nowdate THEN
IF F_isWorkDay(maxRiZhiDate)=1 OR ISNULL(maxRiZhiDate) THEN
IF NOT ISNULL(maxRiZhiDate) AND LENGTH(maxRiZhiDate)>0 THEN
IF FIND_IN_SET(maxRiZhiDate,btjrqs)=0 THEN
SELECT COUNT(id) INTO recordCount FROM a21 WHERE a2104=maxRiZhiDate AND creatorid=PuserId;
IF recordCount=0 THEN
IF DAYOFWEEK(maxRiZhiDate)=6 THEN
SET buluDateTime=CONCAT(FROM_DAYS(TO_DAYS(maxRiZhiDate)+2+yxblts)," ",yxblsj);
ELSEIF DAYOFWEEK(maxRiZhiDate)=7 THEN
SET buluDateTime=CONCAT(FROM_DAYS(TO_DAYS(maxRiZhiDate)+1+yxblts)," ",yxblsj);
ELSE
SET buluDateTime=CONCAT(FROM_DAYS(TO_DAYS(maxRiZhiDate)+yxblts)," ",yxblsj);
END IF;
IF(CURRENT_TIMESTAMP()<buluDateTime) THEN
SET buluDate=maxRiZhiDate;
END IF;
END IF;
END IF;
END IF;
END IF;
IF CHAR_LENGTH(buluDate)>0 THEN
IF CHAR_LENGTH(bldate)>0 THEN
SET bldate=CONCAT(buluDate,",",bldate);
ELSE
SET bldate=buluDate;
END IF;
END IF;
END IF;
SET buluDate='';
UNTIL tmpdate<=nowdate
END REPEAT;
SELECT COUNT(id) INTO gqcount FROM a52 WHERE a5216=CURDATE() AND a5218='xxxxxx';
IF gqcount>0 THEN
SELECT a5215 INTO blyqrq FROM a52 WHERE a5216=CURDATE() AND a5218='xxxxxx';
SELECT COUNT(id) INTO a21Count FROM a21 WHERE a2104=blyqrq AND creatorid=PuserId;
IF a21Count=0 THEN
IF FIND_IN_SET(blyqrq,bldate)=0 THEN
IF CHAR_LENGTH(bldate)>0 THEN
SET bldate=CONCAT(blyqrq,",",bldate);
ELSE
SET bldate=blyqrq;
END IF;
END IF;
END IF;
END IF;
IF ISNULL(btjrqs) THEN
SET btjrqs="";
END IF;
SELECT GROUP_CONCAT(a5108) INTO LiWaibuluDates FROM a51 WHERE a5110='y' AND F_ifInSet(a5108,btjrqs)=0 AND a5106 LIKE RuserName AND a5112='否' AND CONCAT(a5108," ",a5113)<CURRENT_TIMESTAMP() AND CURRENT_TIMESTAMP()<CONCAT(a5114," ",a5113);
IF NOT ISNULL(LiWaibuluDates) AND LENGTH(TRIM(LiWaibuluDates))>0 THEN
IF CHAR_LENGTH(bldate)>0 THEN
SET bldate=CONCAT(LiWaibuluDates,",",bldate);
ELSE
SET bldate=LiWaibuluDates;
END IF;
END IF;
SELECT COUNT(id) INTO RA51count FROM a51 WHERE a5110='n' AND F_getid1value2(a5105,1)=PuserId;
IF RA51count>0 THEN
SET u=0;
WHILE u< RA51count DO
SELECT a5108,a5109 INTO RbeginDate,RendDate FROM a51 WHERE a5110='n' AND F_getid1value2(a5105,1)=PuserId ORDER BY id LIMIT u,1;
SET RtmpDate=RbeginDate;
WHILE RtmpDate<=RendDate DO
IF FIND_IN_SET(RtmpDate,bldate)>0 THEN
SET bldate=F_removeFromSet(RtmpDate,bldate);
END IF;
SET RtmpDate=DATE_ADD(RtmpDate, INTERVAL 1 DAY);
END WHILE;
SET u=u+1;
END WHILE;
END IF;
END IF;
RETURN bldate;
END$$
DELIMITER ;
在老系統中該函數調用一次需要話20多秒到30秒左右。
拿到sql之後,首先要確定思路。不能著急這下手。
1. 首先查看各個表的數據量:
select count(*) from xxx;
發現只有 a21 的數據量達到了十幾萬,其他表數據量都比較小。所以重點是 a21表,仔細閱讀了一遍函數的定義,發現涉及 到 a21 的有一處循環:
REPEAT SELECT MAX(a2104) INTO maxRiZhiDate FROM a21 WHERE issubmit='y' AND a2104<tmpdate AND FIND_IN_SET(a2104,bldate)=0;
很顯然使用了 find_in_set 函數,所以改語句無法使用索引。所以想要簡單的通過增加索引來解決問題,應該是行不通的。
這樣就定位到了函數運行慢的問題。結合業務理解改函數的含義。
為了使用索引,我們需要去掉 find_in_set 函數,理解了業務和原函數的功能之後,對改函數的 repeat 循環部分進行了重寫:
DELIMITER $$
USE `oa`$$
DROP FUNCTION IF EXISTS `F_getBuluDates_inner`$$
CREATE DEFINER=`root`@`localhost` FUNCTION `F_getBuluDates_inner`(`PuserId` INT(11), tmpdate VARCHAR(32), nowdate VARCHAR(32)) RETURNS VARCHAR(3000) CHARSET gbk
BEGIN
DECLARE buluDate,buluDateTime,LiWaibuluDates,bldate VARCHAR(3000) DEFAULT "";
DECLARE rtMsg,RuserName,maxRiZhiDate,btjrqs,blyqrq,yxblsj VARCHAR(255) DEFAULT "";
DECLARE Ra51count INT(30);
DECLARE RbeginDate,RendDate,RtmpDate VARCHAR(2550);
DECLARE u,gqcount,a21Count INT(30) DEFAULT 0;
DECLARE allReadRows,readRows,mySYL,blCount,recordCount,btjryCount,btjrqCount,yxblts,i INT(11) DEFAULT 0;
DECLARE no_more_data INT DEFAULT 0;
DECLARE my_cursor CURSOR FOR SELECT DISTINCT a2104 FROM a21 WHERE issubmit='y' AND a2104 < tmpdate AND a2104 >= nowdate ORDER BY a2104 DESC;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET no_more_data = 1;
SELECT a5204,a5205 INTO yxblts,yxblsj FROM a52 WHERE a5218="gzrz" LIMIT 0,1;
SELECT username INTO RuserName FROM USER WHERE id=PuserId;
SELECT COUNT(id) INTO btjryCount FROM a52 WHERE a5214=PuserId AND a5218='xxxxx';
IF btjryCount=0 THEN
SELECT GROUP_CONCAT(a5215) INTO btjrqs FROM a52 WHERE a5218='xxx';
OPEN my_cursor;
FETCH my_cursor INTO maxRiZhiDate;
REPEAT
IF F_isWorkDay(maxRiZhiDate)=1 OR ISNULL(maxRiZhiDate) THEN
IF NOT ISNULL(maxRiZhiDate) AND LENGTH(maxRiZhiDate)>0 THEN
IF FIND_IN_SET(maxRiZhiDate,btjrqs)=0 THEN
SELECT COUNT(id) INTO recordCount FROM a21 WHERE a2104=maxRiZhiDate AND creatorid=PuserId;
IF recordCount=0 THEN
IF DAYOFWEEK(maxRiZhiDate)=6 THEN
SET buluDateTime=CONCAT(FROM_DAYS(TO_DAYS(maxRiZhiDate)+2+yxblts)," ",yxblsj);
ELSEIF DAYOFWEEK(maxRiZhiDate)=7 THEN
SET buluDateTime=CONCAT(FROM_DAYS(TO_DAYS(maxRiZhiDate)+1+yxblts)," ",yxblsj);
ELSE
SET buluDateTime=CONCAT(FROM_DAYS(TO_DAYS(maxRiZhiDate)+yxblts)," ",yxblsj);
END IF;
IF(CURRENT_TIMESTAMP()<buluDateTime) THEN
SET buluDate=maxRiZhiDate;
END IF;
END IF;
END IF;
END IF;
END IF;
IF CHAR_LENGTH(buluDate)>0 THEN
IF CHAR_LENGTH(bldate)>0 THEN
SET bldate=CONCAT(buluDate,",",bldate);
ELSE
SET bldate=buluDate;
END IF;
END IF;
FETCH my_cursor INTO maxRiZhiDate;
SET buluDate='';
UNTIL no_more_data = 1
END REPEAT;
SELECT COUNT(id) INTO gqcount FROM a52 WHERE a5216=CURDATE() AND a5218='xxx';
IF gqcount>0 THEN
SELECT a5215 INTO blyqrq FROM a52 WHERE a5216=CURDATE() AND a5218='xxxxxxxxxx';
SELECT COUNT(id) INTO a21Count FROM a21 WHERE a2104=blyqrq AND creatorid=PuserId;
IF a21Count=0 THEN
IF FIND_IN_SET(blyqrq,bldate)=0 THEN
IF CHAR_LENGTH(bldate)>0 THEN
SET bldate=CONCAT(blyqrq,",",bldate);
ELSE
SET bldate=blyqrq;
END IF;
END IF;
END IF;
END IF;
IF ISNULL(btjrqs) THEN
SET btjrqs="";
END IF;
SELECT GROUP_CONCAT(a5108) INTO LiWaibuluDates FROM a51 WHERE a5110='y' AND F_ifInSet(a5108,btjrqs)=0 AND a5106 LIKE RuserName AND a5112='否' AND CONCAT(a5108," ",a5113)<CURRENT_TIMESTAMP() AND CURRENT_TIMESTAMP()<CONCAT(a5114," ",a5113);
IF NOT ISNULL(LiWaibuluDates) AND LENGTH(TRIM(LiWaibuluDates))>0 THEN
IF CHAR_LENGTH(bldate)>0 THEN
SET bldate=CONCAT(LiWaibuluDates,",",bldate);
ELSE
SET bldate=LiWaibuluDates;
END IF;
END IF;
SELECT COUNT(id) INTO RA51count FROM a51 WHERE a5110='n' AND F_getid1value2(a5105,1)=PuserId;
IF RA51count>0 THEN
SET u=0;
WHILE u< RA51count DO
SELECT a5108,a5109 INTO RbeginDate,RendDate FROM a51 WHERE a5110='n' AND F_getid1value2(a5105,1)=PuserId ORDER BY id LIMIT u,1;
SET RtmpDate=RbeginDate;
WHILE RtmpDate<=RendDate DO
IF FIND_IN_SET(RtmpDate,bldate)>0 THEN
SET bldate=F_removeFromSet(RtmpDate,bldate);
END IF;
SET RtmpDate=DATE_ADD(RtmpDate, INTERVAL 1 DAY);
END WHILE;
SET u=u+1;
END WHILE;
END IF;
END IF;
RETURN bldate;
END$$
DELIMITER ;
然後為了讓新的 sql 使用索引:
DECLARE my_cursor CURSOR FOR
SELECT DISTINCT a2104 FROM a21 WHERE issubmit='y' AND a2104 < tmpdate AND a2104 >= nowdate ORDER BY a2104 DESC;
新增了索引:
add index issubmit_a2104(issubmit,a2104);
最終的提供給外部的調用函數:
DELIMITER $$
CREATE FUNCTION F_getBuluDates_new(`PuserId` INT(11)) RETURNS VARCHAR(3000) CHARSET gbk
BEGIN
DECLARE bldate VARCHAR(3000) DEFAULT "";
DECLARE tmpdate,nowdate VARCHAR(32) DEFAULT "";
DECLARE yxblts INT;
SELECT a5204 INTO yxblts FROM a52 WHERE a5218="gzrz" LIMIT 0,1;
SET tmpdate = CURDATE();
SET nowdate = DATE_SUB(tmpdate,INTERVAL yxblts DAY);
SET bldate = F_getBuluDates_inner(PuserId,tmpdate,nowdate);
RETURN bldate;
END
$$
測試效果:
mysql> select F_getBuluDates_new(10687); +---------------------------------------------------------------------------------------------------------------+ | F_getBuluDates_new(10687) | +---------------------------------------------------------------------------------------------------------------+ | 2016-08-04,2016-08-05,2016-08-08,2016-08-09,2016-08-10,2016-08-11,2016-08-12,2016-08-15,2016-08-16,2016-08-17 | +---------------------------------------------------------------------------------------------------------------+ 1 row in set (1.10 sec) mysql> select F_getBuluDates(10687); +---------------------------------------------------------------------------------------------------------------+ | F_getBuluDates(10687) | +---------------------------------------------------------------------------------------------------------------+ | 2016-08-04,2016-08-05,2016-08-08,2016-08-09,2016-08-10,2016-08-11,2016-08-12,2016-08-15,2016-08-16,2016-08-17 | +---------------------------------------------------------------------------------------------------------------+ 1 row in set (13.10 sec)
性能提升了 11 倍多。客戶對效果很滿意。
新的執行計劃:
mysql> explain SELECT DISTINCT a2104 FROM a21 WHERE issubmit='y' AND a2104 < '2016-08-16' AND a2104 >= '2016-08-10' ORDER BY a2104 DESC; +----+-------------+-------+-------+----------------------+----------------+---------+------+------+--------------------------+ | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra | +----+-------------+-------+-------+----------------------+----------------+---------+------+------+--------------------------+ | 1 | SIMPLE | a21 | range | a2104,issubmit_a2104 | issubmit_a2104 | 516 | NULL | 834 | Using where; Using index | +----+-------------+-------+-------+----------------------+----------------+---------+------+------+--------------------------+ 1 row in set (0.00 sec)
總結:
從原函數實現來看,在數量小時,還是可以的。但是一旦數據量大了,就會出現性能問題。主要問題是在 where 條件中使用了太多的自定義函數,而且這個函數的參數還是表的數據列。
導致無法使用索引。無法使用索引就會進行全表掃描,所以數據量大時會導致性能問題。