主键是直接按照表的行数来估计的,表的行数,优化器直接使用show table status like 't'的值
手动触发索引统计:
-- 重新统计索引信息
mysql> analyze table t;
排序对索引选择的影响
-- 创建表
mysql> CREATE TABLE `t` (
`id` int(11) NOT NULL,
`a` int(11) DEFAULT NULL,
`b` int(11) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `a` (`a`),
KEY `b` (`b`)
) ENGINE=InnoDB;
-- 定义测试数据存储过程
mysql> delimiter ;
CREATE PROCEDURE idata ()
BEGIN
DECLARE i INT ;
SET i = 1 ;
WHILE (i <= 100000) DO
INSERT INTO t
VALUES
(i, i, i) ;
SET i = i + 1 ;
END
WHILE ;
END;
delimiter ;
-- 执行存储过程,插入测试数据
mysql> CALL idata ();
-- 查看执行计划,使用了字段a上的索引
mysql> explain select * from t where a between 10000 and 20000;
+----+-------------+-------+-------+---------------+-----+---------+------+-------+-----------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+-------+---------------+-----+---------+------+-------+-----------------------+
| 1 | SIMPLE | t | range | a | a | 5 | NULL | 10000 | Using index condition |
+----+-------------+-------+-------+---------------+-----+---------+------+-------+-----------------------+
-- 由于需要进行字段b排序,虽然索引b需要扫描更多的行数,但本身是有序的,综合扫描行数和排序,优化器选择了索引b,认为代价更小
mysql> explain select * from t where (a between 1 and 1000) and (b between 50000 and 100000) order by b limit 1;
+----+-------------+-------+-------+---------------+-----+---------+------+-------+------------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+-------+---------------+-----+---------+------+-------+------------------------------------+
| 1 | SIMPLE | t | range | a,b | b | 5 | NULL | 50128 | Using index condition; Using where |
+----+-------------+-------+-------+---------------+-----+---------+------+-------+------------------------------------+
-- 方案1:通过force index强制走索引a,纠正优化器错误的选择,不建议使用(不通用,且索引名称更变语句也需要变)
mysql> explain select * from t force index(a) where (a between 1 and 1000) and (b between 50000 and 100000) order by b limit 1;
+----+-------------+-------+-------+---------------+-----+---------+------+------+----------------------------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+-------+---------------+-----+---------+------+------+----------------------------------------------------+
| 1 | SIMPLE | t | range | a | a | 5 | NULL | 999 | Using index condition; Using where; Using filesort |
+----+-------------+-------+-------+---------------+-----+---------+------+------+----------------------------------------------------+
-- 方案2:引导 MySQL 使用我们期望的索引,按b,a排序,优化器需要考虑a排序的代价
mysql> explain select * from t where (a between 1 and 1000) and (b between 50000 and 100000) order by b,a limit 1;
+----+-------------+-------+-------+---------------+-----+---------+------+------+----------------------------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+-------+---------------+-----+---------+------+------+----------------------------------------------------+
| 1 | SIMPLE | t | range | a,b | a | 5 | NULL | 999 | Using index condition; Using where; Using filesort |
+----+-------------+-------+-------+---------------+-----+---------+------+------+----------------------------------------------------+
-- 方案3:有些场景下,我们可以新建一个更合适的索引,来提供给优化器做选择,或删掉误用的索引
ALTER TABLE `t`
DROP INDEX `a`,
DROP INDEX `b`,
ADD INDEX `ab` (`a`,`b`) ;
索引优化
索引选择性
索引选择性 = 基数 / 总行数
-- 表t中字段xxx的索引选择性
select count(distinct xxx)/count(id) from t;
-- 只需要查 ID 的值,而 ID 的值已经在 k 索引树上了,因此可以直接提供查询结果,不需要回表
select ID from T where k between 3 and 5
-- 增加字段V,每次查询需要返回V,可考虑把k、v做成联合索引
select ID,V from T where k between 3 and 5
最左前缀原则+索引下推
-- id、name、age三列,name、age上创建联合索引
-- 满足最左前缀原则,name、age均走索引
select * from T where name='xxx' and age=12
-- Mysql自动优化,调整name、age顺序,,name、age均走索引
select * from T where age=12 and name='xxx'
-- name满足最左前缀原则走索引,MySQL5.6引入索引下推优化(index condition pushdown),即索引中先过滤掉不满足age=12的记录再回表
select * from T where name like 'xxx%' and age=12
-- 不满足最左前缀原则,均不走索引
select * from T where name like '%xxx%' and age=12
-- 满足最左前缀原则,name走索引
select * from T where name='xxx'
-- 不满足最左前缀原则,不走索引
select * from T where age=12
mysql> create table SUser(
ID bigint unsigned primary key,
name varchar(64),
email varchar(64),
...
)engine=innodb;
-- 以下查询场景
mysql> select name from SUser where email='xxx';
-- 方案1:全文本索引,回表次数由符合条件的数据量决定
mysql> alter table SUser add index index1(email);
-- 方案2:前缀索引,回表次数由前缀匹配结果决定
mysql> alter table SUser add index index2(email(6));
-- 查询时字符串反转查询
mysql> select field_list from t where id_card = reverse('input_id_card_string');
使用hash字段
-- 创建一个整数字段,来保存身份证的校验码,同时在这个字段上创建索引
mysql> alter table t add id_card_crc int unsigned, add index(id_card_crc);
-- 查询时使用hash字段走索引查询,再使用原字段精度过滤
mysql> select field_list from t where id_card_crc=crc32('input_id_card_string') and id_card='input_id_card_string'
以上两种方式的缺点:
不支持范围查询
使用hash字段需要额外占用空间,新增了一个字段
读写时需要额外的处理,reverse或者crc32等
前缀索引对覆盖索引的影响?
-- 使用前缀索引就用不上覆盖索引对查询性能的优化
select id,email from SUser where email='xxx';