1.1 DWD 層(業務數據)
1.1.1 商品維度表(全量表)
1)建表語句
DROP TABLE IF EXISTS `dwd_dim_sku_info`; CREATE EXTERNAL TABLE `dwd_dim_sku_info` ( `id` string COMMENT '商品 id', `spu_id` string COMMENT 'spuid', `price` double COMMENT '商品價格', `sku_name` string COMMENT '商品名稱', `sku_desc` string COMMENT '商品描述', `weight` double COMMENT '重量', `tm_id` string COMMENT '品牌 id', `tm_name` string COMMENT '品牌名稱', `category3_id` string COMMENT '三級分類 id', `category2_id` string COMMENT '二級分類 id', `category1_id` string COMMENT '一級分類 id', `category3_name` string COMMENT '三級分類名稱', `category2_name` string COMMENT '二級分類名稱', `category1_name` string COMMENT '一級分類名稱', `spu_name` string COMMENT 'spu 名稱', `create_time` string COMMENT '創建時間' ) COMMENT '商品維度表' PARTITIONED BY (`dt` string) stored as parquet location '/warehouse/gmall/dwd/dwd_dim_sku_info/' tblproperties ("parquet.compression"="lzo");
2)數據裝載
insert overwrite table dwd_dim_sku_info partition(dt='2020-03-10') select sku.id, sku.spu_id, sku.price, sku.sku_name, sku.sku_desc, sku.weight, sku.tm_id, ob.tm_name, sku.category3_id, c2.id category2_id, c1.id category1_id, c3.name category3_name, c2.name category2_name, c1.name category1_name, spu.spu_name, sku.create_time from ( select * from ods_sku_info where dt='2020-03-10' )sku join ( select * from ods_base_trademark where dt='2020-03-10' )ob on sku.tm_id=ob.tm_id join ( select * from ods_spu_info where dt='2020-03-10' )spu on spu.id = sku.spu_id join ( select * from ods_base_category3 where dt='2020-03-10' )c3 on sku.category3_id=c3.id join ( select * from ods_base_category2 where dt='2020-03-10' )c2 on c3.category2_id=c2.id join ( select * from ods_base_category1 where dt='2020-03-10' )c1 on c2.category1_id=c1.id;
1.1.2 優惠券信息表(全量)
把 ODS 層 ods_coupon_info 表數據導入到 DWD 層優惠卷信息表,在導入過程中可以做適當的清洗
1)建表語句
drop table if exists dwd_dim_coupon_info; create external table dwd_dim_coupon_info( `id` string COMMENT '購物券編號', `coupon_name` string COMMENT '購物券名稱', `coupon_type` string COMMENT '購物券類型 1 現金券 2 折扣券 3 滿減券 4 滿件打折券', `condition_amount` string COMMENT '滿額數', `condition_num` string COMMENT '滿件數', `activity_id` string COMMENT '活動編號', `benefit_amount` string COMMENT '減金額', `benefit_discount` string COMMENT '折扣', `create_time` string COMMENT '創建時間', `range_type` string COMMENT '范圍類型 1、商品 2、品類 3、品牌', `spu_id` string COMMENT '商品 id', `tm_id` string COMMENT '品牌 id', `category3_id` string COMMENT '品類 id', `limit_num` string COMMENT '最多領用次數', `operate_time` string COMMENT '修改時間', `expire_time` string COMMENT '過期時間' ) COMMENT '優惠券信息表' PARTITIONED BY (`dt` string) row format delimited fields terminated by '\t' stored as parquet location '/warehouse/gmall/dwd/dwd_dim_coupon_info/' tblproperties ("parquet.compression"="lzo");
2)數據裝載
insert overwrite table dwd_dim_coupon_info partition(dt='2020-03-10') select id, coupon_name, coupon_type, condition_amount, condition_num, activity_id, benefit_amount, benefit_discount, create_time, range_type, spu_id, tm_id, category3_id, limit_num, operate_time, expire_time from ods_coupon_info where dt='2020-03-10';
3)查詢加載結果
select * from dwd_dim_coupon_info where dt='2020-03-10';
1.1.3 活動維度表(全量)
1)建表語句
drop table if exists dwd_dim_activity_info; create external table dwd_dim_activity_info( `id` string COMMENT '編號', `activity_name` string COMMENT '活動名稱', `activity_type` string COMMENT '活動類型', `condition_amount` string COMMENT '滿減金額', `condition_num` string COMMENT '滿減件數', `benefit_amount` string COMMENT '優惠金額', `benefit_discount` string COMMENT '優惠折扣', `benefit_level` string COMMENT '優惠級別', `start_time` string COMMENT '開始時間', `end_time` string COMMENT '結束時間', `create_time` string COMMENT '創建時間' ) COMMENT '活動信息表' PARTITIONED BY (`dt` string) row format delimited fields terminated by '\t' stored as parquet location '/warehouse/gmall/dwd/dwd_dim_activity_info/' tblproperties ("parquet.compression"="lzo");
2)數據裝載
insert overwrite table dwd_dim_activity_info partition(dt='2020-03-10') select info.id, info.activity_name, info.activity_type, rule.condition_amount, rule.condition_num, rule.benefit_amount, rule.benefit_discount, rule.benefit_level, info.start_time, info.end_time, info.create_time from ( select * from ods_activity_info where dt='2020-03-10' )info left join ( select * from ods_activity_rule where dt='2020-03-10' )rule on info.id = rule.activity_id;
3)查詢加載結果
select * from dwd_dim_activity_info where dt='2020-03-10';
1.1.4 地區維度表(特殊)
1)建表語句
DROP TABLE IF EXISTS `dwd_dim_base_province`; CREATE EXTERNAL TABLE `dwd_dim_base_province` ( `id` string COMMENT 'id', `province_name` string COMMENT '省市名稱', `area_code` string COMMENT '地區編碼', `iso_code` string COMMENT 'ISO 編碼', `region_id` string COMMENT '地區 id', `region_name` string COMMENT '地區名稱' ) COMMENT '地區省市表' stored as parquet location '/warehouse/gmall/dwd/dwd_dim_base_province/' tblproperties ("parquet.compression"="lzo");
2)數據裝載
insert overwrite table dwd_dim_base_province select bp.id, bp.name, bp.area_code, bp.iso_code, bp.region_id, br.region_name from ods_base_province bp join ods_base_region br on bp.region_id=br.id;
1.1.5 時間維度表(特殊)(預留)
1)建表語句
DROP TABLE IF EXISTS `dwd_dim_date_info`; CREATE EXTERNAL TABLE `dwd_dim_date_info`( `date_id` string COMMENT '日', `week_id` int COMMENT '周', `week_day` int COMMENT '周的第幾天', `day` int COMMENT '每月的第幾天', `month` int COMMENT '第幾月', `quarter` int COMMENT '第幾季度', `year` int COMMENT '年', `is_workday` int COMMENT '是否是周末', `holiday_id` int COMMENT '是否是節假日' ) row format delimited fields terminated by '\t' stored as parquet location '/warehouse/gmall/dwd/dwd_dim_date_info/' tblproperties ("parquet.compression"="lzo");
2)把 date_info.txt 文件上傳到 node01 的 /opt/modules/db_log/路徑
3)數據裝載
load data local inpath '/opt/modules/db_log/date_info.txt' into table dwd_dim_date_info;
4)查詢加載結果
select * from dwd_dim_date_info;
1.1.6 訂單明細事實表(事務型快照事實表)

1)建表語句
drop table if exists dwd_fact_order_detail; create external table dwd_fact_order_detail ( `id` string COMMENT '訂單編號', `order_id` string COMMENT '訂單號', `user_id` string COMMENT '用戶 id', `sku_id` string COMMENT 'sku 商品 id', `sku_name` string COMMENT '商品名稱', `order_price` decimal(10,2) COMMENT '商品價格', `sku_num` bigint COMMENT '商品數量', `create_time` string COMMENT '創建時間', `province_id` string COMMENT '省份 ID', `total_amount` decimal(20,2) COMMENT '訂單總金額' ) PARTITIONED BY (`dt` string) stored as parquet location '/warehouse/gmall/dwd/dwd_fact_order_detail/' tblproperties ("parquet.compression"="lzo");
2)數據裝載
insert overwrite table dwd_fact_order_detail partition(dt='2020-03-10') select od.id, od.order_id, od.user_id, od.sku_id, od.sku_name, od.order_price, od.sku_num, od.create_time, oi.province_id, od.order_price*od.sku_num from ( select * from ods_order_detail where dt='2020-03-10' ) od join ( select * from ods_order_info where dt='2020-03-10' ) oi on od.order_id=oi.id;
3)查詢加載結果
select * from dwd_fact_order_detail where dt='2020-03-10';
1.1.7 支付事實表(事務型快照事實表)

1)建表語句
drop table if exists dwd_fact_payment_info; create external table dwd_fact_payment_info ( `id` string COMMENT '', `out_trade_no` string COMMENT '對外業務編號', `order_id` string COMMENT '訂單編號', `user_id` string COMMENT '用戶編號', `alipay_trade_no` string COMMENT '支付寶交易流水編號', `payment_amount` decimal(16,2) COMMENT '支付金額', `subject` string COMMENT '交易內容', `payment_type` string COMMENT '支付類型', `payment_time` string COMMENT '支付時間', `province_id` string COMMENT '省份 ID' ) PARTITIONED BY (`dt` string) stored as parquet location '/warehouse/gmall/dwd/dwd_fact_payment_info/' tblproperties ("parquet.compression"="lzo");
2)數據裝載
insert overwrite table dwd_fact_payment_info partition(dt='2020-03-10') select pi.id, pi.out_trade_no, pi.order_id, pi.user_id, pi.alipay_trade_no, pi.total_amount, pi.subject, pi.payment_type, pi.payment_time, oi.province_id from ( select * from ods_payment_info where dt='2020-03-10' )pi join ( select id, province_id from ods_order_info where dt='2020-03-10' )oi on pi.order_id = oi.id;
3)查詢加載結果
select * from dwd_fact_payment_info where dt='2020-03-10';
1.1.8 退款事實表(事務型快照事實表)
把 ODS 層 ods_order_refund_info 表數據導入到 DWD 層退款事實表,在導入過程中可以做適當的清洗
1)建表語句
drop table if exists dwd_fact_order_refund_info; create external table dwd_fact_order_refund_info( `id` string COMMENT '編號', `user_id` string COMMENT '用戶 ID', `order_id` string COMMENT '訂單 ID', `sku_id` string COMMENT '商品 ID', `refund_type` string COMMENT '退款類型', `refund_num` bigint COMMENT '退款件數', `refund_amount` decimal(16,2) COMMENT '退款金額', `refund_reason_type` string COMMENT '退款原因類型', `create_time` string COMMENT '退款時間' ) COMMENT '退款事實表' PARTITIONED BY (`dt` string) row format delimited fields terminated by '\t' location '/warehouse/gmall/dwd/dwd_fact_order_refund_info/';
2)數據裝載
insert overwrite table dwd_fact_order_refund_info partition(dt='2020-03-10') select id, user_id, order_id, sku_id, refund_type, refund_num, refund_amount, refund_reason_type, create_time from ods_order_refund_info where dt='2020-03-10';
3)查詢加載結果
select * from dwd_fact_order_refund_info where dt='2020-03-10';
1.1.9 評價事實表(事務型快照事實表)
把 ODS 層 ods_comment_info 表數據導入到 DWD 層評價事實表,在導入過程中可以做適當的清洗
1)建表語句
drop table if exists dwd_fact_comment_info; create external table dwd_fact_comment_info( `id` string COMMENT '編號', `user_id` string COMMENT '用戶 ID', `sku_id` string COMMENT '商品 sku', `spu_id` string COMMENT '商品 spu', `order_id` string COMMENT '訂單 ID', `appraise` string COMMENT '評價', `create_time` string COMMENT '評價時間' ) COMMENT '評價事實表' PARTITIONED BY (`dt` string) row format delimited fields terminated by '\t' location '/warehouse/gmall/dwd/dwd_fact_comment_info/';
2)數據裝載
insert overwrite table dwd_fact_comment_info partition(dt='2020-03-10') select id, user_id, sku_id, spu_id, order_id, appraise, create_time from ods_comment_info where dt='2020-03-10';
1.1.10 加購事實表(周期型快照事實表,每日快照)
由於購物車的數量是會發生變化,所以導增量不合適
每天做一次快照,導入的數據是全量,區別於事務型事實表是每天導入新增
周期型快照事實表劣勢:存儲的數據量會比較大
解決方案:周期型快照事實表存儲的數據比較講究時效性,時間太久了的意義不大,可以刪除以前的數據
1)建表語句
drop table if exists dwd_fact_cart_info; create external table dwd_fact_cart_info( `id` string COMMENT '編號', `user_id` string COMMENT '用戶 id', `sku_id` string COMMENT 'skuid', `cart_price` string COMMENT '放入購物車時價格', `sku_num` string COMMENT '數量', `sku_name` string COMMENT 'sku 名稱 (冗余)', `create_time` string COMMENT '創建時間', `operate_time` string COMMENT '修改時間', `is_ordered` string COMMENT '是否已經下單。1 為已下單;0 為未下單', `order_time` string COMMENT '下單時間' ) COMMENT '加購事實表' PARTITIONED BY (`dt` string) row format delimited fields terminated by '\t' location '/warehouse/gmall/dwd/dwd_fact_cart_info/';
2)數據裝載
insert overwrite table dwd_fact_cart_info partition(dt='2020-03-10') select id, user_id, sku_id, cart_price, sku_num, sku_name, create_time, operate_time, is_ordered, order_time from ods_cart_info where dt='2020-03-10';
3)查詢加載結果
select * from dwd_fact_cart_info where dt='2020-03-10';
1.1.11 收藏事實表(周期型快照事實表,每日快照)
收藏的標記,是否取消,會發生變化,做增量不合適
每天做一次快照,導入的數據是全量,區別於事務型事實表是每天導入新增
1)建表語句
drop table if exists dwd_fact_favor_info; create external table dwd_fact_favor_info( `id` string COMMENT '編號', `user_id` string COMMENT '用戶 id', `sku_id` string COMMENT 'skuid', `spu_id` string COMMENT 'spuid', `is_cancel` string COMMENT '是否取消', `create_time` string COMMENT '收藏時間', `cancel_time` string COMMENT '取消時間' ) COMMENT '收藏事實表' PARTITIONED BY (`dt` string) row format delimited fields terminated by '\t' location '/warehouse/gmall/dwd/dwd_fact_favor_info/';
2)數據裝載
insert overwrite table dwd_fact_favor_info partition(dt='2020-03-10') select id, user_id, sku_id, spu_id, is_cancel, create_time, cancel_time from ods_favor_info where dt='2020-03-10';
3)查詢加載結果
select * from dwd_fact_favor_info where dt='2020-03-10';
1.1.12 優惠券領用事實表(累積型快照事實表)
優惠卷的生命周期:領取優惠卷-》用優惠卷下單-》優惠卷參與支付
累積型快照事實表使用:統計優惠卷領取次數、優惠卷下單次數、優惠卷參與支付次數
1)建表語句
drop table if exists dwd_fact_coupon_use; create external table dwd_fact_coupon_use( `id` string COMMENT '編號', `coupon_id` string COMMENT '優惠券 ID', `user_id` string COMMENT 'userid', `order_id` string COMMENT '訂單 id', `coupon_status` string COMMENT '優惠券狀態', `get_time` string COMMENT '領取時間', `using_time` string COMMENT '使用時間(下單)', `used_time` string COMMENT '使用時間(支付)' ) COMMENT '優惠券領用事實表' PARTITIONED BY (`dt` string) row format delimited fields terminated by '\t' location '/warehouse/gmall/dwd/dwd_fact_coupon_use/';
注意:dt 是按照優惠卷領用時間 get_time 做為分區
2)數據裝載

set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table dwd_fact_coupon_use partition(dt) select if(new.id is null,old.id,new.id), if(new.coupon_id is null,old.coupon_id,new.coupon_id), if(new.user_id is null,old.user_id,new.user_id), if(new.order_id is null,old.order_id,new.order_id), if(new.coupon_status is null,old.coupon_status,new.coupon_status), if(new.get_time is null,old.get_time,new.get_time), if(new.using_time is null,old.using_time,new.using_time), if(new.used_time is null,old.used_time,new.used_time), date_format(if(new.get_time is null,old.get_time,new.get_time),'yyyy-MM-dd') from ( select id, coupon_id, user_id, order_id, coupon_status, get_time, using_time, used_time from dwd_fact_coupon_use where dt in ( select date_format(get_time,'yyyy-MM-dd') from ods_coupon_use where dt='2020-03-10' ) )old full outer join ( select id, coupon_id, user_id, order_id, coupon_status, get_time, using_time, used_time from ods_coupon_use where dt='2020-03-10' )new on old.id=new.id;
1.1.13 訂單事實表(累積型快照事實表)
1)concat 函數
concat 函數在連接字符串的時候,只要其中一個是 NULL,那么將返回 NULL
hive> select concat('a','b'); ab hive> select concat('a','b',null); NULL
2)concat_ws 函數
concat_ws 函數在連接字符串的時候,只要有一個字符串不是 NULL,就不會返回 NULL。concat_ws 函數需要指定分隔符
hive> select concat_ws('-','a','b'); a-b hive> select concat_ws('-','a','b',null); a-b hive> select concat_ws('','a','b',null); ab
3)STR_TO_MAP 函數
- (1)語法描述
STR_TO_MAP(VARCHAR text, VARCHAR listDelimiter, VARCHAR keyValueDelimiter)
- (2)功能描述
使用 listDelimiter 將 text 分隔成 K-V 對,然后使用 keyValueDelimiter 分隔每個 K-V 對,
組裝成 MAP 返回。默認 listDelimiter 為( ,),keyValueDelimiter 為(=)。
- (3)案例
str_to_map(‘1001=2020-03-10,1002=2020-03-10’, ‘,’ , ‘=’)
輸出{“1001”:“2020-03-10”,“1002”:“2020-03-10”}
4)建表語句
訂單生命周期:創建時間=》支付時間=》取消時間=》完成時間=》退款時間=》退款完成時間
由於 ODS 層訂單表只有創建時間和操作時間兩個狀態,不能表達所有時間含義,所以需要關聯訂單狀態表。訂單事實表里面增加了活動 id,所以需要關聯活動訂單表
drop table if exists dwd_fact_order_info; create external table dwd_fact_order_info ( `id` string COMMENT '訂單編號', `order_status` string COMMENT '訂單狀態', `user_id` string COMMENT '用戶 id', `out_trade_no` string COMMENT '支付流水號', `create_time` string COMMENT '創建時間(未支付狀態)', `payment_time` string COMMENT '支付時間(已支付狀態)', `cancel_time` string COMMENT '取消時間(已取消狀態)', `finish_time` string COMMENT '完成時間(已完成狀態)', `refund_time` string COMMENT '退款時間(退款中狀態)', `refund_finish_time` string COMMENT '退款完成時間(退款完成狀態)', `province_id` string COMMENT '省份 ID', `activity_id` string COMMENT '活動 ID', `original_total_amount` string COMMENT '原價金額', `benefit_reduce_amount` string COMMENT '優惠金額', `feight_fee` string COMMENT '運費', `final_total_amount` decimal(10,2) COMMENT '訂單金額' ) PARTITIONED BY (`dt` string) stored as parquet location '/warehouse/gmall/dwd/dwd_fact_order_info/' tblproperties ("parquet.compression"="lzo");
5)數據裝載
5)常用函數
更多函數請點擊博客【HIve】Hive入門解析(五)
6)數據裝載
set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table dwd_fact_order_info partition(dt) select if(new.id is null,old.id,new.id), if(new.order_status is null,old.order_status,new.order_status), if(new.user_id is null,old.user_id,new.user_id), if(new.out_trade_no is null,old.out_trade_no,new.out_trade_no), if(new.tms['1001'] is null,old.create_time,new.tms['1001']),--1001 對應未支付狀態 if(new.tms['1002'] is null,old.payment_time,new.tms['1002']), if(new.tms['1003'] is null,old.cancel_time,new.tms['1003']), if(new.tms['1004'] is null,old.finish_time,new.tms['1004']), if(new.tms['1005'] is null,old.refund_time,new.tms['1005']), if(new.tms['1006'] is null,old.refund_finish_time,new.tms['1006']), if(new.province_id is null,old.province_id,new.province_id), if(new.activity_id is null,old.activity_id,new.activity_id), if(new.original_total_amount is null,old.original_total_amount,new.original_total_amount), if(new.benefit_reduce_amount is null,old.benefit_reduce_amount,new.benefit_reduce_amount), if(new.feight_fee is null,old.feight_fee,new.feight_fee), if(new.final_total_amount is null,old.final_total_amount,new.final_total_amount), date_format(if(new.tms['1001'] is null,old.create_time,new.tms['1001']),'yyyy-MM-dd') from ( select id, order_status, user_id, out_trade_no, create_time, payment_time, cancel_time, finish_time, refund_time, refund_finish_time, province_id, activity_id, original_total_amount, benefit_reduce_amount, feight_fee, final_total_amount from dwd_fact_order_info where dt in ( select date_format(create_time,'yyyy-MM-dd') from ods_order_info where dt='2020-03-10' ) )old full outer join ( select info.id, info.order_status, info.user_id, info.out_trade_no, info.province_id, act.activity_id, log.tms, info.original_total_amount, info.benefit_reduce_amount, info.feight_fee, info.final_total_amount from ( select order_id, str_to_map(concat_ws(',',collect_set(concat(order_status,'=',operate_time))),',','=') tms from ods_order_status_log where dt='2020-03-10' group by order_id )log join ( select * from ods_order_info where dt='2020-03-10' )info on log.order_id=info.id left join ( select * from ods_activity_order where dt='2020-03-10' )act on log.order_id=act.order_id )new on old.id=new.id;
1.1.14 用戶維度表(拉鏈表)
用戶表中的數據每日既有可能新增,也有可能修改,但修改頻率並不高,屬於緩慢變化
維度,此處采用拉鏈表存儲用戶維度數據
1)什么是拉鏈表
2)為什么要做拉鏈表
<ignore_js_op>
3)拉鏈表形成過程
4)拉鏈表制作過程圖
5)拉鏈表制作過程
步驟 0:初始化拉鏈表(首次獨立執行)
(1)建立拉鏈表
drop table if exists dwd_dim_user_info_his; create external table dwd_dim_user_info_his( `id` string COMMENT '用戶 id', `name` string COMMENT '姓名', `birthday` string COMMENT '生日', `gender` string COMMENT '性別', `email` string COMMENT '郵箱', `user_level` string COMMENT '用戶等級', `create_time` string COMMENT '創建時間', `operate_time` string COMMENT '操作時間', `start_date` string COMMENT '有效開始日期', `end_date` string COMMENT '有效結束日期' ) COMMENT '訂單拉鏈表' stored as parquet location '/warehouse/gmall/dwd/dwd_dim_user_info_his/' tblproperties ("parquet.compression"="lzo");
(2)初始化拉鏈表
insert overwrite table dwd_dim_user_info_his select id, name, birthday, gender, email, user_level, create_time, operate_time, '2020-03-10', '9999-99-99' from ods_user_info oi where oi.dt='2020-03-10';
步驟 1:制作當日變動數據(包括新增,修改)每日執行
(1)如何獲得每日變動表
- a.最好表內有創建時間和變動時間(Lucky!)
- b.如果沒有,可以利用第三方工具監控比如 canal,監控 MySQL 的實時變化進行記錄(麻煩)
- c.逐行對比前后兩天的數據,檢查 md5(concat(全部有可能變化的字段))是否相同(low)
- d.要求業務數據庫提供變動流水(人品,顏值)
(2)因為 ods_order_info 本身導入過來就是新增變動明細的表,所以不用處理
- a)數據庫中新增 2020-03-11 一天的數據
- b)通過 Sqoop 把 2020-03-11 日所有數據導入mysqlTohdfs.sh all 2020-03-11
- c)ods 層數據導入hdfs_to_ods_db.sh all 2020-03-11
步驟 2:先合並變動信息,再追加新增信息,插入到臨時表中
1)建立臨時表
drop table if exists dwd_dim_user_info_his_tmp; create external table dwd_dim_user_info_his_tmp( `id` string COMMENT '用戶 id', `name` string COMMENT '姓名', `birthday` string COMMENT '生日', `gender` string COMMENT '性別', `email` string COMMENT '郵箱', `user_level` string COMMENT '用戶等級', `create_time` string COMMENT '創建時間', `operate_time` string COMMENT '操作時間', `start_date` string COMMENT '有效開始日期', `end_date` string COMMENT '有效結束日期' ) COMMENT '訂單拉鏈臨時表' stored as parquet location '/warehouse/gmall/dwd/dwd_dim_user_info_his_tmp/' tblproperties ("parquet.compression"="lzo");
2)導入腳本
insert overwrite table dwd_dim_user_info_his_tmp select * from ( select id, name, birthday, gender, email, user_level, create_time, operate_time, '2020-03-11' start_date, '9999-99-99' end_date from ods_user_info where dt='2020-03-11' union all select uh.id, uh.name, uh.birthday, uh.gender, uh.email, uh.user_level, uh.create_time, uh.operate_time, uh.start_date, if(ui.id is not null and uh.end_date='9999-99-99', date_add(ui.dt,-1), uh.end_date) end_date from dwd_dim_user_info_his uh left join ( select * from ods_user_info where dt='2020-03-11' ) ui on uh.id=ui.id )his order by his.id, start_date;
1.1.15 DWD 層數據導入腳本
1)vim ods_to_dwd_db.sh
#!/bin/bash APP=gmall hive=/opt/modules/hive/bin/hive # 如果是輸入的日期按照取輸入日期;如果沒輸入日期取當前時間的前一天 if [ -n "$2" ] ;then do_date=$2 else do_date=`date -d "-1 day" +%F` fi sql1=" set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table ${APP}.dwd_dim_sku_info partition(dt='$do_date') select sku.id, sku.spu_id, sku.price, sku.sku_name, sku.sku_desc, sku.weight, sku.tm_id, ob.tm_name, sku.category3_id, c2.id category2_id, c1.id category1_id, c3.name category3_name, c2.name category2_name, c1.name category1_name, spu.spu_name, sku.create_time from ( select * from ${APP}.ods_sku_info where dt='$do_date' )sku join ( select * from ${APP}.ods_base_trademark where dt='$do_date' )ob on sku.tm_id=ob.tm_id join ( select * from ${APP}.ods_spu_info where dt='$do_date' )spu on spu.id = sku.spu_id join ( select * from ${APP}.ods_base_category3 where dt='$do_date' )c3 on sku.category3_id=c3.id join ( select * from ${APP}.ods_base_category2 where dt='$do_date' )c2 on c3.category2_id=c2.id join ( select * from ${APP}.ods_base_category1 where dt='$do_date' )c1 on c2.category1_id=c1.id; insert overwrite table ${APP}.dwd_dim_coupon_info partition(dt='$do_date') select id, coupon_name, coupon_type, condition_amount, condition_num, activity_id, benefit_amount, benefit_discount, create_time, range_type, spu_id, tm_id, category3_id, limit_num, operate_time, expire_time from ${APP}.ods_coupon_info where dt='$do_date'; insert overwrite table ${APP}.dwd_dim_activity_info partition(dt='$do_date') select info.id, info.activity_name, info.activity_type, rule.condition_amount, rule.condition_num, rule.benefit_amount, rule.benefit_discount, rule.benefit_level, info.start_time, info.end_time, info.create_time from ( select * from ${APP}.ods_activity_info where dt='$do_date' )info left join ( select * from ${APP}.ods_activity_rule where dt='$do_date' )rule on info.id = rule.activity_id; insert overwrite table ${APP}.dwd_fact_order_detail partition(dt='$do_date') select od.id, od.order_id, od.user_id, od.sku_id, od.sku_name, od.order_price, od.sku_num, od.create_time, oi.province_id, od.order_price*od.sku_num from ( select * from ${APP}.ods_order_detail where dt='$do_date' ) od join ( select * from ${APP}.ods_order_info where dt='$do_date' ) oi on od.order_id=oi.id; insert overwrite table ${APP}.dwd_fact_payment_info partition(dt='$do_date') select pi.id, pi.out_trade_no, pi.order_id, pi.user_id, pi.alipay_trade_no, pi.total_amount, pi.subject, pi.payment_type, pi.payment_time, oi.province_id from ( select * from ${APP}.ods_payment_info where dt='$do_date' )pi join ( select id, province_id from ${APP}.ods_order_info where dt='$do_date' )oi on pi.order_id = oi.id; insert overwrite table ${APP}.dwd_fact_order_refund_info partition(dt='$do_date') select id, user_id, order_id, sku_id, refund_type, refund_num, refund_amount, refund_reason_type, create_time from ${APP}.ods_order_refund_info where dt='$do_date'; insert overwrite table ${APP}.dwd_fact_comment_info partition(dt='$do_date') select id, user_id, sku_id, spu_id, order_id, appraise, create_time from ${APP}.ods_comment_info where dt='$do_date'; insert overwrite table ${APP}.dwd_fact_cart_info partition(dt='$do_date') select id, user_id, sku_id, cart_price, sku_num, sku_name, create_time, operate_time, is_ordered, order_time from ${APP}.ods_cart_info where dt='$do_date'; insert overwrite table ${APP}.dwd_fact_favor_info partition(dt='$do_date') select id, user_id, sku_id, spu_id, is_cancel, create_time, cancel_time from ${APP}.ods_favor_info where dt='$do_date'; insert overwrite table ${APP}.dwd_fact_coupon_use partition(dt) select if(new.id is null,old.id,new.id), if(new.coupon_id is null,old.coupon_id,new.coupon_id), if(new.user_id is null,old.user_id,new.user_id), if(new.order_id is null,old.order_id,new.order_id), if(new.coupon_status is null,old.coupon_status,new.coupon_status), if(new.get_time is null,old.get_time,new.get_time), if(new.using_time is null,old.using_time,new.using_time), if(new.used_time is null,old.used_time,new.used_time), date_format(if(new.get_time is null,old.get_time,new.get_time),'yyyy-MM-dd') from ( select id, coupon_id, user_id, order_id, coupon_status, get_time, using_time, used_time from ${APP}.dwd_fact_coupon_use where dt in ( select date_format(get_time,'yyyy-MM-dd') from ${APP}.ods_coupon_use where dt='$do_date' ) )old full outer join ( select id, coupon_id, user_id, order_id, coupon_status, get_time, using_time, used_time from ${APP}.ods_coupon_use where dt='$do_date' )new on old.id=new.id; insert overwrite table ${APP}.dwd_fact_order_info partition(dt) select if(new.id is null,old.id,new.id), if(new.order_status is null,old.order_status,new.order_status), if(new.user_id is null,old.user_id,new.user_id), if(new.out_trade_no is null,old.out_trade_no,new.out_trade_no), if(new.tms['1001'] is null,old.create_time,new.tms['1001']),--1001 對應未支付狀態 if(new.tms['1002'] is null,old.payment_time,new.tms['1002']), if(new.tms['1003'] is null,old.cancel_time,new.tms['1003']), if(new.tms['1004'] is null,old.finish_time,new.tms['1004']), if(new.tms['1005'] is null,old.refund_time,new.tms['1005']), if(new.tms['1006'] is null,old.refund_finish_time,new.tms['1006']), if(new.province_id is null,old.province_id,new.province_id), if(new.activity_id is null,old.activity_id,new.activity_id), if(new.original_total_amount is null,old.original_total_amount,new.original_total_amount), if(new.benefit_reduce_amount is null,old.benefit_reduce_amount,new.benefit_reduce_amount), if(new.feight_fee is null,old.feight_fee,new.feight_fee), if(new.final_total_amount is null,old.final_total_amount,new.final_total_amount), date_format(if(new.tms['1001'] is null,old.create_time,new.tms['1001']),'yyyy-MM-dd') from ( select id, order_status, user_id, out_trade_no, create_time, payment_time, cancel_time, finish_time, refund_time, refund_finish_time, province_id, activity_id, original_total_amount, benefit_reduce_amount, feight_fee, final_total_amount from ${APP}.dwd_fact_order_info where dt in ( select date_format(create_time,'yyyy-MM-dd') from ${APP}.ods_order_info where dt='$do_date' ) )old full outer join ( select info.id, info.order_status, info.user_id, info.out_trade_no, info.province_id, act.activity_id, log.tms, info.original_total_amount, info.benefit_reduce_amount, info.feight_fee, info.final_total_amount from ( select order_id, str_to_map(concat_ws(',',collect_set(concat(order_status,'=',operate_time))),',',' =') tms from ${APP}.ods_order_status_log where dt='$do_date' group by order_id )log join ( select * from ${APP}.ods_order_info where dt='$do_date' )info on log.order_id=info.id left join ( select * from ${APP}.ods_activity_order where dt='$do_date' )act on log.order_id=act.order_id )new on old.id=new.id; insert overwrite table ${APP}.dwd_dim_user_info_his_tmp select * from ( select id, name, birthday, gender, email, user_level, create_time, operate_time, '$do_date' start_date, '9999-99-99' end_date from ${APP}.ods_user_info where dt='$do_date' union all select uh.id, uh.name, uh.birthday, uh.gender, uh.email, uh.user_level, uh.create_time, uh.operate_time, uh.start_date, if(ui.id is not null and uh.end_date='9999-99-99', date_add(ui.dt,-1), uh.end_date) end_date from ${APP}.dwd_dim_user_info_his uh left join ( select * from ${APP}.ods_user_info where dt='$do_date' ) ui on uh.id=ui.id )his order by his.id, start_date; insert overwrite table ${APP}.dwd_dim_user_info_his select * from ${APP}.dwd_dim_user_info_his_tmp; " sql2=" insert overwrite table ${APP}.dwd_dim_base_province select bp.id, bp.name, bp.area_code, bp.iso_code, bp.region_id, br.region_name from ${APP}.ods_base_province bp join ${APP}.ods_base_region br on bp.region_id=br.id; " case $1 in "first"){ $hive -e "$sql1" $hive -e "$sql2" };; "all"){ $hive -e "$sql1" };; esac
2)增加腳本執行權限
chmod 770 ods_to_dwd_db.sh
3)執行腳本導入數據
ods_to_dwd_db.sh all 2020-03-11
4)查看導入數據
select * from dwd_fact_order_info where dt='2020-03-11'; select * from dwd_fact_order_detail where dt='2020-03-11'; select * from dwd_fact_comment_info where dt='2020-03-11'; select * from dwd_fact_order_refund_info where dt='2020-03-11';
