-- 商品表(舊系統 —— utf8 + utf8_general_ci) DROPTABLE IF EXISTS product;
CREATE TABLE product ( id INT AUTO_INCREMENT PRIMARY KEY, product_code VARCHAR(32) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL, product_name VARCHAR(100) NOT NULL, category VARCHAR(50) NOT NULL, price DECIMAL(10,2) NOT NULL, INDEX idx_product_code (product_code) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_general_ci;
-- 訂單明細表(新系統 —— utf8mb4 + utf8mb4_bin) DROPTABLE IF EXISTS order_item;
CREATE TABLE order_item ( id INT AUTO_INCREMENT PRIMARY KEY, order_no VARCHAR(48) NOT NULL, product_code VARCHAR(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL, quantity INTNOT NULL, unit_price DECIMAL(10,2) NOT NULL, created_at DATETIME NOT NULL, INDEX idx_product_code (product_code) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
# 重點是這個 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. 'utf8mb3_general_ci' is a collation of the deprecated character set UTF8MB3. Please consider using UTF8MB4 with an appropriate collation instead. 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. 'utf8mb3_general_ci' is a collation of the deprecated character set UTF8MB3. Please consider using UTF8MB4 with an appropriate collation instead.
MySQL 官方已宣告 utf8mb3 將被移除, 所以其實在 MySQL 8 的環境下跑, 自然就會收到警示訊息, 不過我們現在是模擬新舊系統, 所以理解後忽略即可.
執行完後可以在 ide 內看到字元集 product 是 utf8mb3, order_item 是 utf8mb4.
SHOWFULL COLUMNS FROM product LIKE'product_code'; -- utf8mb3_general_ci
SHOWFULL COLUMNS FROM order_item LIKE'product_code'; -- utf8mb4_bin
Step 2:EXPLAIN 觀察問題
1 2 3 4 5 6 7 8 9 10 11
EXPLAIN SELECT p.product_code, p.product_name, p.category, SUM(oi.quantity) AS total_qty, SUM(oi.quantity * oi.unit_price) AS total_sales FROM order_item oi LEFTJOIN product p ON oi.product_code = p.product_code GROUPBY p.product_code, p.product_name, p.category ORDERBY total_sales DESC;
結果(問題狀態)
table
type
possible_keys
key
rows
Extra
oi
ALL
NULL
NULL
199,500
Using temporary; Using filesort
p
ALL
idx_product_code
NULL
3,690
Range checked for each record (index map: 0x2)
首先看 p 表的 possible_keys 顯示 idx_product_code, 等於有索引,但 key 是 NULL, 表示實際沒用到. 雖然 MySQL 知道有索引可用,但因為 charset/collation 不一致,實際上無法有效利用, 最壞情況下的比較量級是 200,000 × 50,000 = 100 億次 (至於 rows 數量差異是 MySQL 估算值, 不用太在意).
再來看 Extra 跑出來的內容不再是 Using where; Using join buffer (Block Nested Loop), 取而代之的是 Range checked for each record (index map: 0x2).
在 MySQL 5.7 中 Block Nested Loop, 直接放棄索引,把資料塞進 join buffer 暴力比對, 而 MySQL 8.0 Range checked for each record 是對每一筆 order_item 的資料都重新評估一次這筆值用索引划不划算.
本質上還是因為 collation mismatch 導致無法直接走 B-Tree lookup
另外看 SHOW WARNINGS 的輸出, 幫大家畫重點:
Cannot use ref access on index ‘idx_product_code’ due to type or collation conversion on field ‘product_code’
Cannot use ref access on index 'idx_product_code' due to type or collation conversion on field 'product_code' /* select#1 */ select `charset_demo`.`p`.`product_code` AS `product_code`,`charset_demo`.`p`.`product_name` AS `product_name`,`charset_demo`.`p`.`category` AS `category`,sum(`charset_demo`.`oi`.`quantity`) AS `total_qty`,sum((`charset_demo`.`oi`.`quantity` * `charset_demo`.`oi`.`unit_price`)) AS `total_sales` from `charset_demo`.`order_item` `oi` left join `charset_demo`.`product` `p` on((`charset_demo`.`oi`.`product_code` = `charset_demo`.`p`.`product_code`)) where true group by `charset_demo`.`p`.`product_code`,`charset_demo`.`p`.`product_name`,`charset_demo`.`p`.`category` order by `total_sales` desc
ALTER TABLE product MODIFY COLUMN product_code VARCHAR(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL;
Step 4:修復後 EXPLAIN
1 2 3 4 5 6 7 8 9
EXPLAIN SELECT p.product_code, p.product_name, p.category, SUM(oi.quantity) AS total_qty, SUM(oi.quantity * oi.unit_price) AS total_sales FROM order_item oi LEFTJOIN product p ON oi.product_code = p.product_code GROUPBY p.product_code, p.product_name, p.category ORDERBY total_sales DESC;
修復後結果
table
type
key
rows
Extra
oi
ALL
NULL
199,500
Using temporary; Using filesort
p
ref
idx_product_code
1
NULL
p 表從掃描 50,000 行變為每次只查 1 行,Range checked for each record 消失。
-- 商品表(utf8mb4_general_ci) DROPTABLE IF EXISTS product_b; CREATE TABLE product_b ( id INT AUTO_INCREMENT PRIMARY KEY, product_code VARCHAR(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, product_name VARCHAR(100) NOT NULL, category VARCHAR(50) NOT NULL, price DECIMAL(10,2) NOT NULL, INDEX idx_product_code (product_code) ) ENGINE=InnoDB;
-- 訂單明細表(utf8mb4_bin) DROPTABLE IF EXISTS order_item_b; CREATE TABLE order_item_b ( id INT AUTO_INCREMENT PRIMARY KEY, order_no VARCHAR(48) NOT NULL, product_code VARCHAR(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL, quantity INTNOT NULL, unit_price DECIMAL(10,2) NOT NULL, created_at DATETIME NOT NULL, INDEX idx_product_code (product_code) ) ENGINE=InnoDB;
-- 商品表 B:50,000 筆 INSERT INTO product_b (product_code, product_name, category, price) SELECT CONCAT('SKU', LPAD(n, 8, '0')), CONCAT('Product-', n), ELT(1+FLOOR(RAND()*10), 'Electronics','Clothing','Food','Furniture','Sports', 'Books','Toys','Beauty','Health','Garden'), ROUND(10+ RAND()*9990, 2) FROM ( SELECT@row :=@row+1AS n FROM information_schema.columns, (SELECT@row :=0) t LIMIT 50000 ) x;
-- 訂單明細表 B:200,000 筆 INSERT INTO order_item_b (order_no, product_code, quantity, unit_price, created_at) SELECT CONCAT('ORD', LPAD(FLOOR(1+ RAND()*80000), 8, '0')), CONCAT('SKU', LPAD(FLOOR(1+ RAND()*50000), 8, '0')), FLOOR(1+ RAND()*10), ROUND(10+ RAND()*9990, 2), DATE_ADD('2024-01-01', INTERVALFLOOR(RAND()*365) DAY) FROM ( SELECT@row :=@row+1AS n FROM information_schema.columns c1, information_schema.columns c2, (SELECT@row :=0) t LIMIT 200000 ) x;
Step 1:確認 Collation 不一致
1 2 3 4 5
SHOWFULL COLUMNS FROM product_b LIKE'product_code'; -- utf8mb4_general_ci
SHOWFULL COLUMNS FROM order_item_b LIKE'product_code'; -- utf8mb4_bin
Step 2:EXPLAIN 觀察
1 2 3 4 5 6 7 8 9
EXPLAIN SELECT p.product_code, p.product_name, p.category, SUM(oi.quantity) AS total_qty, SUM(oi.quantity * oi.unit_price) AS total_sales FROM order_item_b oi LEFTJOIN product_b p ON oi.product_code = p.product_code GROUPBY p.product_code, p.product_name, p.category ORDERBY total_sales DESC;
table
type
possible_keys
key
rows
Extra
oi
ALL
NULL
NULL
199,500
Using temporary; Using filesort
p
ALL
idx_product_code
NULL
3,701
Range checked for each record (index map: 0x2)
可以看到結果一樣是 Range checked for each record (index map: 0x2).
Step 3:修復
1 2
ALTER TABLE product_b MODIFY COLUMN product_code VARCHAR(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL;
-- 1. 檢查 JOIN 兩側的 charset 和 collation SHOWFULL COLUMNS FROM table_a LIKE'join_column'; SHOWFULL COLUMNS FROM table_b LIKE'join_column';
-- 2. EXPLAIN 後看 SHOW WARNINGS EXPLAIN SELECT ... ; SHOW WARNINGS; -- 看有沒有 convert() 或 collation 轉換
-- 3. 批量掃描整個資料庫中同名欄位的不一致 SELECT t1.TABLE_NAME AS table_1, t1.COLUMN_NAME AS column_name, t1.CHARACTER_SET_NAME AS charset_1, t1.COLLATION_NAME AS collation_1, t2.TABLE_NAME AS table_2, t2.CHARACTER_SET_NAME AS charset_2, t2.COLLATION_NAME AS collation_2 FROM information_schema.COLUMNS t1 JOIN information_schema.COLUMNS t2 ON t1.COLUMN_NAME = t2.COLUMN_NAME AND t1.TABLE_SCHEMA = t2.TABLE_SCHEMA AND t1.TABLE_NAME < t2.TABLE_NAME WHERE t1.TABLE_SCHEMA ='my_database' AND (t1.CHARACTER_SET_NAME != t2.CHARACTER_SET_NAME OR t1.COLLATION_NAME != t2.COLLATION_NAME);
整體建議
由於這次是回去摸舊的系統, 現在較新的專案都推薦一律用 utf8mb4, 因為 MySQL 的 utf8mb3 只支援 3 字節,不是真正的 UTF-8, 然後在設計 DB 時也應該要有詳細的規章與規範, 來統一整個專案的 collation, 通常是推薦 utf8mb4_0900_ai_ci (語義正確的排序)或 utf8mb4_bin(精確比對).