MySql-索引優化
索引分析
單表
CREATE TABLE IF NOT EXISTS `article` ( `id` INT(10) UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, `author_id` INT(10) UNSIGNED NOT NULL, `category_id` INT(10) UNSIGNED NOT NULL, `views` INT(10) UNSIGNED NOT NULL, `comments` INT(10) UNSIGNED NOT NULL, `title` VARBINARY(255) NOT NULL, `content` TEXT NOT NULL ); INSERT INTO `article`(`author_id`, `category_id`, `views`, `comments`, `title`, `content`) VALUES (1, 1, 1, 1, '1', '1'), (2, 2, 2, 2, '2', '2'), (1, 1, 3, 3, '3', '3');
1、查詢
#查詢 category_id 為 1 且 comments 大於 1 的情況下,views 最多的 article_id。
EXPLAIN SELECT id,author_id FROM article WHERE category_id = 1 AND comments > 1 ORDER BY views DESC LIMIT 1;
#結論:很顯然,type 是 ALL,即最壞的情況。Extra 裡還出現了 Using filesort,也是最壞的情況。優化是必須的。
2、優化
#新建索引:create index idx_article_ccv on article(category_id,comments,views); #第2次EXPLAIN EXPLAIN SELECT id,author_id FROM `article` WHERE category_id = 1 AND comments >1 ORDER BY views DESC LIMIT 1; #結論: #type 變成了 range,這是可以忍受的。但是 extra 裡使用 Using filesort 仍是無法接受的。 #但是我們已經建立了索引,為啥沒用呢? #這是因為按照 BTree 索引的工作原理, # 先排序 category_id, # 如果遇到相同的 category_id 則再排序 comments,如果遇到相同的 comments 則再排序 views。 #當 comments 欄位在聯合索引裡處於中間位置時, #因comments > 1 條件是一個範圍值(所謂 range (> , < , between and)), #MySQL 無法利用索引再對後面的 views 部分進行檢索,即 range 型別查詢欄位後面的索引無效。 #如果:EXPLAIN SELECT id,author_id FROM `article` WHERE category_id = 1 AND comments =3 ORDER BY views DESC LIMIT 1 就沒有問題
3、再優化
DROP INDEX idx_article_ccv ON article; create index idx_article_cv on article(category_id,views); #第3次EXPLAIN EXPLAIN SELECT id,author_id FROM article WHERE category_id = 1 AND comments > 1 ORDER BY views DESC LIMIT 1; #結論:可以看到,type 變為了 ref,Extra 中的 Using filesort 也消失了,結果非常理想。
兩表
CREATE TABLE IF NOT EXISTS `class` (
`id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`card` INT(10) UNSIGNED NOT NULL,
PRIMARY KEY (`id`)
);
CREATE TABLE IF NOT EXISTS `book` (
`bookid` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`card` INT(10) UNSIGNED NOT NULL,
PRIMARY KEY (`bookid`)
);
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
1、查詢
#下面開始explain分析
EXPLAIN SELECT * FROM class LEFT JOIN book ON class.card = book.card;
#結論:type 有All
2、優化
#新增索引優化
ALTER TABLE `book` ADD INDEX Y ( `card`);
# 第2次explain
EXPLAIN SELECT * FROM class LEFT JOIN book ON class.card = book.card;
#可以看到第二行的 type 變為了 ref,rows 也變成了優化比較明顯。
#這是由左連線特性決定的。LEFT JOIN 條件用於確定如何從右表搜尋行,左邊一定都有,
#所以右邊是我們的關鍵點,一定需要建立索引。
左連線,若索引加在左表的 card上 ,是錯誤的:如下
左連線索引應該加在右表上,因為左邊全都有,右表是關鍵,右表是被驅動表
三表
CREATE TABLE IF NOT EXISTS `phone` (
`phoneid` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`card` INT(10) UNSIGNED NOT NULL,
PRIMARY KEY (`phoneid`)
) ENGINE = INNODB;
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
建立索引查詢
ALTER TABLE `phone` ADD INDEX z ( `card`);
ALTER TABLE `book` ADD INDEX Y ( `card`);#上一個case建過一個同樣的
EXPLAIN SELECT * FROM class LEFT JOIN book ON class.card=book.card LEFT JOIN phone ON book.card = phone.card;
# 後 2 行的 type 都是 ref 且總 rows 優化很好,效果不錯。因此索引最好設定在需要經常查詢的欄位中。
==================================================================================
【結論】
Join語句的優化
儘可能減少Join語句中的巢狀迴圈的迴圈總次數;“永遠用小結果集驅動大的結果集”。
優先優化巢狀迴圈的內層迴圈;
保證Join語句中被驅動表上Join條件欄位已經被索引;
當無法保證被驅動表的Join條件欄位被索引且記憶體資源充足的前提下,不要太吝惜JoinBuffer的設定;
索引失效的問題
CREATE TABLE staffs (
id INT PRIMARY KEY AUTO_INCREMENT,
NAME VARCHAR (24) NOT NULL DEFAULT '' COMMENT '姓名',
age INT NOT NULL DEFAULT 0 COMMENT '年齡',
pos VARCHAR (20) NOT NULL DEFAULT '' COMMENT '職位',
add_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '入職時間'
) CHARSET utf8 COMMENT '員工記錄表' ;
INSERT INTO staffs(NAME,age,pos,add_time) VALUES('z3',22,'manager',NOW());
INSERT INTO staffs(NAME,age,pos,add_time) VALUES('July',23,'dev',NOW());
SELECT * FROM staffs;
ALTER TABLE staffs ADD INDEX idx_staffs_nameAgePos(name, age, pos);
如何避免索引失效
1.全值匹配我最愛(where 後的條件要按順序來)
EXPLAIN SELECT * FROM staffs WHERE NAME = 'July';
EXPLAIN SELECT * FROM staffs WHERE NAME = 'July' AND age = 25;
EXPLAIN SELECT * FROM staffs WHERE NAME = 'July' AND age = 25 AND pos = 'dev';
2.最佳左字首法則(帶頭大哥不能死):如果索引了多列,要遵守最左字首法則。指的是查詢從索引的最左前列開始並且不跳過索引中的列。
EXPLAIN SELECT * FROM staffs WHERE age = 25 AND pos = 'dev';
EXPLAIN SELECT * FROM staffs WHERE pos = 'dev';
3.不在索引列上做任何操作(計算、函式、(自動或者手動)型別轉換),會導致索引失效而轉向全表掃描
EXPLAIN SELECT * FROM staffs WHERE left(NAME,4) = 'July';
索引列上使用了表示式,如where substr(a, 1, 3) = 'hhh',where a = a + 1,表示式是一大忌諱,再簡單mysql也不認。
有時資料量不是大到嚴重影響速度時,一般可以先查出來,比如先查所有有訂單記錄的資料,再在程式中去篩選
4.儲存引擎不能使用索引中範圍條件右邊的列
5.儘量使用覆蓋索引(只訪問索引的查詢(索引列和查詢列一致)),減少select *
6.mysql 在使用不等於(!= 或者<>)的時候無法使用索引會導致全表掃描
7.is null ,is not null 也無法使用索引
8.like以萬用字元開頭('%abc...')mysql索引失效會變成全表掃描的操作
如果必須用到%開頭匹配的需求,用覆蓋索引來解決
9.字串不加單引號索引失效
10.少用or,用它來連線時會索引失效
總結:
索引為複合索引:index(a,b,c)
練習
create table test03(
id int primary key not null auto_increment,
c1 char(10),
c2 char(10),
c3 char(10),
c4 char(10),
c5 char(10)
);
insert into test03(c1,c2,c3,c4,c5) values('a1','a2','a3','a4','a5');
insert into test03(c1,c2,c3,c4,c5) values('b1','b2','b3','b4','b5');
insert into test03(c1,c2,c3,c4,c5) values('c1','c2','c3','c4','c5');
insert into test03(c1,c2,c3,c4,c5) values('d1','d2','d3','d4','d5');
insert into test03(c1,c2,c3,c4,c5) values('e1','e2','e3','e4','e5');
create index idx_test03_c1234 on test03(c1,c2,c3,c4);
show index from test03;
1.正常語句
2.mysql的優化器會按照索引順序優化
3.mysql的優化器會按照索引順序優化
4.用到了 '>' 所以type是range,範圍後的c4索引用不到
5.mysql優化器會按索引順序優化成 where c1='a1' and c2='a2' and c3='a3' and c4>'a4';用到四個索引,有'>' 所以type是range
6. 用到了c1、c2索引c3作用在排序而不是查詢,用不到c4索引
7.和6效果一樣
8.跨過了c3 直接order by c4產生了內排序
9. 只用c1一個欄位索引,但是c2、c3用於排序,是按照索引順序的,所以無filesort
10. 用到了c1索引,order by c3 c2 顛倒了,它沒有按照順序來,出現了filesort
11.正常
12.正常
13.雖然order by c3,c2 沒有按照索引順序,但是前面where c2='a2' 已經確定c2是常量固定值,所以c2 order by沒有任何影響
14.出現內排序: filesort
15.查詢用到索引c1 group by c2,c3也是按照索引順序來的 用來分組排序 正常
16.用到索引c1 但是group by c3,c2沒有按照索引順序,所以出現Using temporary用到了臨時表,但是group by前必排序(order by)所以出現了Using filesort
17.用到了索引c1, c4中間段了沒有用到c4 ,c2、c3用來分組排序
order by索引練習
CREATE TABLE tblA(
#id int primary key not null auto_increment,
age INT,
birth TIMESTAMP NOT NULL
);
INSERT INTO tblA(age,birth) VALUES(22,NOW());
INSERT INTO tblA(age,birth) VALUES(23,NOW());
INSERT INTO tblA(age,birth) VALUES(24,NOW());
CREATE INDEX idx_A_ageBirth ON tblA(age,birth);
group by索引
1.group by實質是先排序後進行分組,遵照索引建的最佳左字首
2.當無法使用索引列,增大max_length_for_sort_data引數的設定+增大sort_buffer_size引數的設定
3.where高於having,能寫在where限定的條件就不要去having限定了。
小表驅動大表
類似巢狀迴圈Nested Loop(for迴圈 外層迴圈少,建立資料庫連線少(外層迴圈),每次資料庫連線查詢的資料多(內層迴圈))
例子:表emp代表員工表,表dep代表部門表
#當B表的資料集必須小於A表時,用in優於exists
select * from emp e where e.depId in (select d.id from dep d);
#等價於
for select d.id from dep d; #外層迴圈(d為小表驅動表)
for select * from emp e where e.depId = d.id; #內層迴圈(e為大表被驅動表)
#當A表的資料集必須小於B表時,用exists優於in
select * from emp e where exists (select 1 from dep d where d.id = e.depId);
#等價於
for select * from emp e; #外層迴圈(e為小表驅動表)
for select */1 from dep d where d.id = e.depId #內層迴圈(d為大表被驅動表)
#exists語句
select ... from table where exists(subquery);
#可以理解為:將主查詢的資料,放到子查詢中做條件驗證,根據驗證結果(true或false)來決定主查詢的資料是否得以保留
#提示
#1.exists(subquery)只返回true或false,因此子查詢中的select *也可以是select 1或其他,實際執行時會忽略select清單,因此沒有區別
#2.exists子查詢的實際執行過程可能經過了優化而不是我們理解上的逐條對比,如果擔憂效率問題,可以進行實際檢驗以確定是否有效率問題
#3.exists子查詢往往也可以用條件表示式、其他子查詢或者join來替代,何種最優需要具體問題具體分析