utf8mb4的大小寫敏感性測試及其修改方法


utf8mb4的大小寫敏感性測試及其修改方法

 utf8mb4_ unicode_ ci 與 utf8mb4_ general_ ci 如何選擇
字符除了需要存儲,還需要排序或比較大小,涉及到與編碼字符集對應的 排序字符集(collation)。ut8mb4對應的排序字符集常用的有 utf8mb4_unicode_ci 、 utf8mb4_general_ci ,到底采用哪個在 stackoverflow 上有個討論, What’s the difference between utf8_general_ci and utf8_unicode_ci
主要從排序准確性和性能兩方面看:
    准確性
    utf8mb4_unicode_ci 是基於標准的Unicode來排序和比較,能夠在各種語言之間精確排序
    utf8mb4_general_ci 沒有實現Unicode排序規則,在遇到某些特殊語言或字符是,排序結果可能不是所期望的。
    但是在絕大多數情況下,這種特殊字符的順序一定要那么精確嗎。比如Unicode把 ? 、 ? 當成 ss 和 OE 來看;而general會把它們當成 s 、 e ,再如 àá??ā? 各自都與  A 相等。
    性能
    utf8mb4_general_ci 在比較和排序的時候更快
    utf8mb4_unicode_ci 在特殊情況下,Unicode排序規則為了能夠處理特殊字符的情況,實現了略微復雜的排序算法。
    但是在絕大多數情況下,不會發生此類復雜比較。general理論上比Unicode可能快些,但相比現在的CPU來說,它遠遠不足以成為考慮性能的因素,索引涉及、SQL設計才是。 我個人推薦是 utf8mb4_unicode_ci ,將來 8.0 里也極有可能使用變為默認的規則。

# 測試utf8mb4的大小寫敏感性及其修改方法

-- 以下是utf8mb4不區分大小寫
# 修改數據庫:  
ALTER DATABASE database_name CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci;  
# 修改表:  
ALTER TABLE table_name CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;  
# 修改表字段:  
ALTER TABLE table_name CHANGE column_name column_name VARCHAR(191) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL;  

-- 以下是utf8mb4區分大小寫
# 修改數據庫:  
ALTER DATABASE database_name CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin;  
# 修改表:  
ALTER TABLE table_name CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;  
# 修改表字段:  
ALTER TABLE table_name CHANGE column_name column_name VARCHAR(191) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL ; 

 

-- 1、刪除庫 drop database if exists db2020; 
mysql> drop database if exists db2020; 
Query OK, 0 rows affected, 1 warning (0.00 sec) 
-- 2、創建字符集為utf8的庫 create database db2020 DEFAULT CHARACTER SET utf8mb4; 
mysql> create database db2020 DEFAULT CHARACTER SET utf8mb4; 
Query OK, 1 row affected (0.00 sec) 
-- 3、查看建庫語句 show create database db2020; 
mysql> show create database db2020; 
+----------+--------------------------------------------------------------------+ 
| Database | Create Database                                                     | 
+----------+--------------------------------------------------------------------+ 
| db2020   | CREATE DATABASE `db2020` /*!40100 DEFAULT CHARACTER SET utf8mb4 */ | 
+----------+--------------------------------------------------------------------+ 
1 row in set (0.00 sec) 
-- 4、創建測試表和數據 use db2020; 
-- drop table if exists tbl_test ; 
create table tbl_test ( 
id bigint(20) NOT NULL AUTO_INCREMENT, 
name varchar(20) NOT NULL, 
PRIMARY KEY (id), 
KEY idx_name (name) 
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ; 

-- 5、查看創建表的語句 
-- use db2020; 
show create table tbl_test\G; 
mysql> show create table tbl_test\G; 
*************************** 1. row ***************************
       Table: tbl_test
Create Table: CREATE TABLE `tbl_test` (
  `id` bigint(20) NOT NULL AUTO_INCREMENT,
  `name` varchar(20) NOT NULL,
  PRIMARY KEY (`id`),
  KEY `idx_name` (`name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
1 row in set (0.05 sec)

ERROR: 
No query specified
-- 6、查看默認字符集 
-- 方法1、show variables like '%character%'; 
mysql> show variables like '%character%';
+--------------------------+----------------------------------------------------------------+
| Variable_name            | Value                                                          |
+--------------------------+----------------------------------------------------------------+
| character_set_client     | utf8                                                           |
| character_set_connection | utf8                                                           |
| character_set_database   | utf8mb4                                                        |
| character_set_filesystem | binary                                                         |
| character_set_results    | utf8                                                           |
| character_set_server     | utf8mb4                                                        |
| character_set_system     | utf8                                                           |
| character_sets_dir       | /opt/mysql/mysql-5.6.43-linux-glibc2.12-x86_64/share/charsets/ |
+--------------------------+----------------------------------------------------------------+
8 rows in set (0.00 sec)

-- 方法2、show variables like 'collation%'; 
mysql> show variables like 'collation%';
+----------------------+--------------------+
| Variable_name        | Value              |
+----------------------+--------------------+
| collation_connection | utf8_general_ci    |
| collation_database   | utf8mb4_general_ci |
| collation_server     | utf8mb4_general_ci |
+----------------------+--------------------+
3 rows in set (0.00 sec)

-- 8、查看默認校對規則 show collation like 'utf8mb4%'; 
mysql> show collation like 'utf8mb4%';
+------------------------+---------+-----+---------+----------+---------+
| Collation              | Charset | Id  | Default | Compiled | Sortlen |
+------------------------+---------+-----+---------+----------+---------+
| utf8mb4_general_ci     | utf8mb4 |  45 | Yes     | Yes      |       1 |
| utf8mb4_bin            | utf8mb4 |  46 |         | Yes      |       1 |
| utf8mb4_unicode_ci     | utf8mb4 | 224 |         | Yes      |       8 |
......
......
+------------------------+---------+-----+---------+----------+---------+
26 rows in set (0.52 sec)
-- 9、插入測試數據 
-- use db2020; 
insert into tbl_test(name) values('aaa'); 
insert into tbl_test(name) values('bbb'); 
insert into tbl_test(name) values('AAA'); 
insert into tbl_test(name) values('BBB'); 
mysql> select * from tbl_test; 
+----+------+
| id | name |
+----+------+
|  1 | aaa  |
|  3 | AAA  |
|  2 | bbb  |
|  4 | BBB  |
+----+------+
4 rows in set (0.08 sec)

mysql>  select * from tbl_test where name='aaa'; 
+----+------+
| id | name |
+----+------+
|  1 | aaa  |
|  3 | AAA  |
+----+------+
2 rows in set (0.04 sec)

use db2020; 
insert into tbl_test(name) values('aaa'); 
insert into tbl_test(name) values('bbb'); 
insert into tbl_test(name) values('AAA'); 
insert into tbl_test(name) values('BBB'); 
mysql> select * from tbl_test; 
+----+------+ 
| id | name | 
+----+------+ 
| 1 | aaa | 
| 3 | AAA | 
| 2 | bbb | 
| 4 | BBB | 
+----+------+ 
4 rows in set (0.00 sec) 
mysql> select * from tbl_test where name='aaa';
+----+------+ 
| id | name | 
+----+------+ 
| 1 | aaa | 
| 3 | AAA | 
+----+------+ 
2 rows in set (0.00 sec) 
-- 10、默認情況下,不區分大小寫,修改成大小寫敏感 
-- alter database db2020 character set=utf8mb4; 
alter database db2020 character set=utf8mb4 collate=utf8mb4_bin; 
-- alter table tbl_test convert to character set utf8mb4 ; 
alter table tbl_test convert to character set utf8mb4 collate utf8mb4_bin; 
-- 只修改這個即可實現區分大小寫 
-- alter table tbl_test change name name varchar(20) character set utf8mb4 collate utf8mb4_general_ci not null; 
-- alter table tbl_test modify name varchar(20) character set utf8mb4 collate utf8mb4_general_ci not null; 
alter table tbl_test change name name varchar(20) character set utf8mb4 collate utf8mb4_bin not null; 
alter table tbl_test modify name varchar(20) character set utf8mb4 collate utf8mb4_bin not null; 
mysql> alter database db2020 character set=utf8mb4 collate=utf8mb4_bin; 
Query OK, 1 row affected (0.00 sec) 
mysql> show create database db2020; 
+----------+----------------------------------------------------------------------------------------+ 
| Database | Create Database | 
+----------+----------------------------------------------------------------------------------------+ 
| db2020 | CREATE DATABASE `db2020` /*!40100 DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_bin */ | 
+----------+----------------------------------------------------------------------------------------+ 
1 row in set (0.00 sec) 
mysql> select * from tbl_test where name='aaa'; 
+----+------+ 
| id | name | 
+----+------+ 
| 1 | aaa | 
| 3 | AAA | 
+----+------+ 
2 rows in set (0.00 sec) 
-- 此時只修改庫級別的還不行,仍然還需要修改表級別的 
mysql> alter table tbl_test convert to character set utf8mb4 collate utf8mb4_bin; 
Query OK, 4 rows affected (0.08 sec) Records: 4 Duplicates: 0 Warnings: 0 
mysql> select * from tbl_test where name='aaa'; 
+----+------+ 
| id | name | 
+----+------+ 
| 1 | aaa | 
+----+------+ 
1 row in set (0.00 sec) 
-- 附錄 修改MySQL配置文件,新增如下參數: 
[client] 
default-character-set = utf8mb4 

[mysql] 
default-character-set = utf8mb4 

[mysqld] 
character-set-client-handshake = FALSE 
character-set-server = utf8mb4 
collation-server = utf8mb4_unicode_ci 
init_connect='SET NAMES utf8mb4'

 

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM