描述的是訂單各個階段的狀態
用戶 | 地區 | 時間 | 商品 | 優惠券 | 活動 | 度量值 | ||
---|---|---|---|---|---|---|---|---|
訂單 | √ | √ | √ | √ | 一次 |
訂單的生命周期
下單時間=》支付時間=》取消時間=》完成時間=》退款時間=》退款完成時間
訂單事實表的創建
--訂單事實表 增量表, 當日只存儲create_time為當日的數據, 所以create_time和分區字段dt是一致的。累積型快照事實表都有一個create_time作為分區字段。
drop table if exists dwd_fact_order_info;
create external table dwd_fact_order_info (
`id` string COMMENT '訂單編號', --order_info
`order_status` string COMMENT '訂單狀態', --order_info 當前訂單狀態
`user_id` string COMMENT '用戶 id', --order_info
`out_trade_no` string COMMENT '支付流水號', --order_info
`create_time` string COMMENT '創建時間(未支付狀態)', --order_status
`payment_time` string COMMENT '支付時間(已支付狀態)', --order_status
`cancel_time` string COMMENT '取消時間(已取消狀態)', --order_status
`finish_time` string COMMENT '完成時間(已完成狀態)', --order_status
`refund_time` string COMMENT '退款時間(退款中狀態)', --order_status
`refund_finish_time` string COMMENT '退款完成時間(退款完成狀態)', --order_status
`province_id` string COMMENT '省份 ID', --order_info
`activity_id` string COMMENT '活動 ID', --activity 關聯活動表,ods層的訂單活動關聯表,關鍵鍵是order_info中的create_time
`original_total_amount` decimal(16,2) COMMENT '原價金額', --order_info
`benefit_reduce_amount` decimal(16,2) COMMENT '優惠金額', --order_info
`feight_fee` decimal(16,2) COMMENT '運費', --order_info
`final_total_amount` decimal(16,2) COMMENT '訂單金額' --order_info
) COMMENT '訂單事實表'
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dwd/dwd_fact_order_info/'
tblproperties ("parquet.compression"="lzo");
--訂單表 只有創建時間、訂單狀態、操作時間。相當於訂單的最新狀態,沒有訂單過去的狀態。增量表(將創建時間或者操作時間為當日的訂單信息導入)
drop table if exists ods_order_info;
create external table ods_order_info (
`id` string COMMENT '訂單號',
`final_total_amount` decimal(16,2) COMMENT '訂單金額',
`order_status` string COMMENT '訂單狀態',
`user_id` string COMMENT '用戶 id',
`out_trade_no` string COMMENT '支付流水號',
`create_time` string COMMENT '創建時間',
`operate_time` string COMMENT '操作時間',
`province_id` string COMMENT '省份 ID',
`benefit_reduce_amount` decimal(16,2) COMMENT '優惠金額',
`original_total_amount` decimal(16,2) COMMENT '原價金額',
`feight_fee` decimal(16,2) COMMENT '運費'
) COMMENT '訂單表'
PARTITIONED BY (`dt` string) -- 按照時間創建分區
--訂單狀態表(增量表,將operate_time為當日的數據導入,operate_time可能是訂單生命周期中的任何操作) 訂單狀態表分區表, 包含了訂單的各個狀態和各個狀態的操作時間
drop table if exists ods_order_status_log;
create external table ods_order_status_log (
`id` string COMMENT '編號',
`order_id` string COMMENT '訂單 ID',
`order_status` string COMMENT '訂單狀態',
`operate_time` string COMMENT '修改時間'
) COMMENT '訂單狀態表'
PARTITIONED BY (`dt` string)
--活動訂單關聯表,增量表(將創建時間為當前日期的數據導入到ods層)。
drop table if exists ods_activity_order;
create external table ods_activity_order(
`id` string COMMENT '編號',
`activity_id` string COMMENT '活動ID',
`order_id` string COMMENT '訂單id',
`create_time` string COMMENT '領取時間'
) COMMENT '活動訂單關聯表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_activity_order/';
數據示例:
最終的sql
insert overwrite table dwd_fact_order_info
partition(dt)
select
nvl(new.order_id, old.order_id) as order_id,
nvl(new.order_status, old.order_status) as order_status,
nvl(new.user_id, old.user_id) as user_id,
nvl(new.out_trade_no, old.out_trade_no) as out_trade_no,
nvl(new.out_trade_no, old.out_trade_no) as out_trade_no,
nvl(new.create_time,old.create_time) as create_time,
nvl(new.payment_time,old.payment_time) as payment_time,
nvl(new.cancel_time, old.cancel_time) as cancel_time,
nvl(new.finish_time, old.finish_time) as finish_time,
nvl(new.refund_time, old.refund_time) as refund_time,
nvl(new.refund_finish_time, old.refund_finish_time) as refund_finish_time,
nvl(new.province_id, old.province_id) as province_id,
nvl(new.activity_id, old.activity_id) as activity_id,
nvl(new.original_total_amount,old.original_total_amount) as original_total_amount,
nvl(new.benefit_reduce_amount, old.benefit_reduce_amount) as benefit_reduce_amount,
nvl(new.feight_fee, old.feight_fee) as feight_fee,
nvl(new.final_total_amount, old.final_total_amount) as final_total_amount,
nvl(new.create_time, old.create_time) as dt
from
(select
*
from dwd_fact_order_info where dt in (
select create_time from ods_order_info where dt='20201206'
)) old
full outer join
(
select
order_info.id,
order_info.order_status,
order_info.user_id,
order_info.out_trade_no,
order_info.province_id,
order_info.original_total_amount,
order_info.benefit_reduce_amount,
order_info.feight_fee,
order_info.final_total_amount
order_status.tmp_map['1001'] as create_time,
order_status.tmp_map['1002'] as payment_time,
order_status.tmp_map['1003'] as cancel_time,
order_status.tmp_map['1004'] as finish_time,
order_status.tmp_map['1005'] as refund_time,
order_status.tmp_map['1006'] as refund_finish_time,
order_activity.activity_id as activity_id
from
(
select * from
ods_order_info where dt='20201206'
) order_info
join
(
select order_id,
str_to_map(concat_ws(',', collect_set(concat(order_status, '=', operate_time))),',','=') as tmp_map,
from ods_order_status_log where dt='20201206'
group by order_id) order_status
on order_info.id = order_status.order_id
left join
(select * from ods_activity_order where dt='20201006') order_activity --只需要取當天創建的訂單的活動id,因為之前創建的活動id已經處理好了
on order_info.id = order_activity.order_id
) new