中國工信.三大運營商號段
正則匹配
SELECT
t.cellphone_num,
CASE
WHEN TRIM(t.cellphone_num) REGEXP '^13[5-9]|^134[0-8]|^14[78]|^15[012789]|^172|^178|^170[356]|^18[23478]|^198|^1440' and length(TRIM(t.cellphone_num)) = 11 THEN '中國移動'
WHEN TRIM(t.cellphone_num) REGEXP '^13[0-2]|^145|^146|^15[56]|^166|^17[156]|^170[4789]|^18[56]' and length(TRIM(t.cellphone_num)) = 11 THEN '中國聯通'
WHEN TRIM(t.cellphone_num) REGEXP '^133|^1410|^149|^153|^170[012]|^17[347]|^18[019]|19[139]' and length(TRIM(t.cellphone_num)) = 11 THEN '中國電信'
WHEN TRIM(t.cellphone_num) REGEXP '^1349' and length(TRIM(t.cellphone_num)) = 11 THEN '中國衛星電話'
ELSE '未知運營商'
END AS provider
FROM
temp tt
正則匹配測試
SELECT
"17002341234",
CASE
WHEN TRIM("17002341234") REGEXP '^13[5-9]|^134[0-8]|^14[78]|^15[012789]|^172|^178|^170[356]|^18[23478]|^198|^1440' and length(TRIM("17002341234")) = 11 THEN '中國移動'
WHEN TRIM("17002341234") REGEXP '^13[0-2]|^145|^146|^15[56]|^166|^17[156]|^170[4789]|^18[56]' and length(TRIM("17002341234")) = 11 THEN '中國聯通'
WHEN TRIM("17002341234") REGEXP '^133|^1410|^149|^153|^170[012]|^17[347]|^18[019]|19[139]' and length(TRIM("17002341234")) = 11 THEN '中國電信'
WHEN TRIM("17002341234") REGEXP '^1349' and length(TRIM("17002341234")) = 11 THEN '中國衛星電話'
ELSE '未知運營商'
END AS provider;
create table dw.dim_phone_segment_regexp(
vendor_id string comment '運營商,1:移動,2:聯通,3:電信,4:衛星電話,5:其他',
vendor_name string comment '運營商名稱',
vendor_regexp string comment '運營商號段正則表達式',
bi_update_datetime string comment "更新時間"
) TBLPROPERTIES ("comment"="手機號碼段正則表達式 by zhangjiqiang");
insert into table dw.dim_phone_segment_regexp values
(1,"中國移動", "^13[5-9]|^134[0-8]|^14[78]|^15[012789]|^172|^178|^170[356]|^18[23478]|^198|^1440", "2019-09-16 18:02:47"),
(2,"中國聯通", "^13[0-2]|^145|^146|^15[56]|^166|^17[156]|^170[4789]|^18[56]", "2019-09-16 18:02:47"),
(3,"中國電信", "^133|^1410|^149|^153|^170[012]|^17[347]|^18[019]|19[139]", "2019-09-16 18:02:47"),
(4,"中國衛星電話", "^1349", "2019-09-16 18:02:47")
;
運營商號碼段維表
表結構
-- hive
create table test.dim_mobile_phone_segment(
phone_num_segment string comment '手機號碼段',
vendor_name string comment '運營商'
) TBLPROPERTIES ("comment"="手機號碼段 by zhangjiqiang");
初始化數據
init date: 2019-09-16
insert into table test.dim_mobile_phone_segment values
(133,"中國電信"),
(149,"中國電信"),
(153,"中國電信"),
(173,"中國電信"),
(177,"中國電信"),
(180,"中國電信"),
(181,"中國電信"),
(189,"中國電信"),
(199,"中國電信"),
(130,"中國聯通"),
(131,"中國聯通"),
(132,"中國聯通"),
(145,"中國聯通"),
(155,"中國聯通"),
(156,"中國聯通"),
(166,"中國聯通"),
(171,"中國聯通"),
(175,"中國聯通"),
(176,"中國聯通"),
(185,"中國聯通"),
(186,"中國聯通"),
(166,"中國聯通"),
(145,"中國聯通"),
(1340,"中國移動"),
(1341,"中國移動"),
(1342,"中國移動"),
(1343,"中國移動"),
(1344,"中國移動"),
(1345,"中國移動"),
(1346,"中國移動"),
(1347,"中國移動"),
(1348,"中國移動"),
(135,"中國移動"),
(136,"中國移動"),
(137,"中國移動"),
(138,"中國移動"),
(139,"中國移動"),
(147,"中國移動"),
(150,"中國移動"),
(151,"中國移動"),
(152,"中國移動"),
(157,"中國移動"),
(158,"中國移動"),
(159,"中國移動"),
(172,"中國移動"),
(178,"中國移動"),
(182,"中國移動"),
(183,"中國移動"),
(184,"中國移動"),
(187,"中國移動"),
(188,"中國移動"),
(198,"中國移動"),
(147,"中國移動"),
(1700,"中國電信_虛擬運營商"),
(1701,"中國電信_虛擬運營商"),
(1702,"中國電信_虛擬運營商"),
(1703,"中國移動_虛擬運營商"),
(1705,"中國移動_虛擬運營商"),
(1706,"中國移動_虛擬運營商"),
(1704,"中國聯通_虛擬運營商"),
(1707,"中國聯通_虛擬運營商"),
(1708,"中國聯通_虛擬運營商"),
(1709,"中國聯通_虛擬運營商"),
(171,"中國聯通_虛擬運營商"),
(1349,"衛星通信");