經典場景: 訂單明細表中分攤金額的問題
ods層數據:
--ods層的訂單詳情表 增量表
drop table if exists ods_order_detail;
create external table ods_order_detail(
`id` string COMMENT '編號',
`order_id` string COMMENT '訂單號',
`user_id` string COMMENT '用戶 id',
`sku_id` string COMMENT '商品 id',
`sku_name` string COMMENT '商品名稱',
`order_price` decimal(16,2) COMMENT '商品價格',
`sku_num` bigint COMMENT '商品數量',
`create_time` string COMMENT '創建時間',
`source_type` string COMMENT '來源類型',
`source_id` string COMMENT '來源編號'
) COMMENT '訂單詳情表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_order_detail/';
--ods層的訂單表 增量表
drop table if exists ods_order_info;
create external table ods_order_info (
`id` string COMMENT '訂單號',
`final_total_amount` decimal(16,2) COMMENT '訂單金額',
`order_status` string COMMENT '訂單狀態',
`user_id` string COMMENT '用戶 id',
`out_trade_no` string COMMENT '支付流水號',
`create_time` string COMMENT '創建時間',
`operate_time` string COMMENT '操作時間',
`province_id` string COMMENT '省份 ID',
`benefit_reduce_amount` decimal(16,2) COMMENT '優惠金額',
`original_total_amount` decimal(16,2) COMMENT '原價金額',
`feight_fee` decimal(16,2) COMMENT '運費'
) COMMENT '訂單表'
PARTITIONED BY (`dt` string) -- 按照時間創建分區
row format delimited fields terminated by '\t' -- 指定分割符為\t
STORED AS -- 指定存儲方式, 讀數據采用 LzoTextInputFormat; 輸出數據采用 TextOutputFormat
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_order_info/' -- 指定數據在hdfs上的存儲位置
;
場景: 有一筆訂單包含多個商品
洗面奶 *2 =50 、洗衣液 *3=30 、酸奶 * 1=20,訂單詳情中有商品的原始單價
運費:10 原始總金額:100 優惠金額:20 最終支付金額90, 求這筆訂單中每種商品分攤運費多少、分攤支付金額多少、分攤優惠金額多少。
按照一般的邏輯:每種商品分攤的運費是3.33, dwd層的數據匯總得到的運費是9.99,會產生誤差。為了消除這種誤差,將誤差金額加到最貴的商品上。
--dwd層建表語句
create external table dwd_fact_order_detail(
`id` string COMMENT '訂單編號',
`order_id` string COMMENT '訂單號',
`user_id` string COMMENT '用戶id',
`sku_id` string COMMENT '商品id',
`sku_name` string COMMENT '商品名稱',
`order_price` decimal(16,2) COMMENT '商品價格',
`sku_num` bigint COMMENT '商品數量',
`create_time` string COMMENT '創建時間',
`province_id` string COMMENT '省份id',
`source_type` string COMMENT '來源類型',
`source_id` string COMMENT '來源編號',
`original_amount_d` decimal(20,2) COMMENT '原始價格分攤',
`final_amount_d` decimal(20,2) COMMENT '購買價格分攤',
`feight_fee_d` decimal(20,2) COMMENT '運費分攤',
`benefit_reduce_amount_d` decimal(20,2) COMMENT '優惠分攤'
)COMMENT '訂單明細事實表'
partitioned by (`dt` string)
stored as parquet
location '/warehouse/gmall/dwd/dwd_fact_order_detail'
tblproperties('parquet.compression'='lzo')
不考慮誤差的分攤金額
with order_detail_tmp as(
select
id, --編號
order_id, --訂單id
user_id,
sku_id,
sku_name,
order_price,
sku_num,
create_time,
source_type,
source_id
from ods_order_detail where dt='20201031'),
order_info_tmp as (
select
id, --訂單id
final_total_amount,
province_id,
benefit_reduce_amount,
original_total_amount,
feight_fee
from ods_order_info where dt='20201031')
insert overwrite table dwd_fact_order_detail partition(dt='20201031')
select
order_detail.id as id,
order_detail.order_id as order_id,
order_detail.user_id as user_id,
order_detail.sku_id as sku_id,
order_detail.sku_name as sku_name,
order_detail.order_price as order_price,
order_detail.sku_num as sku_num,
order_detail.create_time as create_time,
order_info.province_id as province_id,
order_detail.source_type as source_type,
order_detail.source_id as source_id,
--order_info.final_total_amount,
--order_info.benefit_reduce_amount,
--order_info.original_total_amount,
--order_info.feight_fee,
round(order_detail.order_price*order_detail.sku_num,2) as original_amount_d,
round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.final_total_amount,2) as final_amount_d,
round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.feight_fee,2) as feight_fee_d,
round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.benefit_reduce_amount,2) as benefit_reduce_amount_d
from order_detail_tmp order_detail left join order_info_tmp order_info on order_detail.order_id = order_info.id
考慮誤差,將誤差加到原始價格最高的商品上
with order_detail_tmp as(
select
id, --編號
order_id, --訂單id
user_id,
sku_id,
sku_name,
order_price,
sku_num,
create_time,
source_type,
source_id
from ods_order_detail where dt='20201031'),
order_info_tmp as (
select
id, --訂單id
final_total_amount,
province_id,
benefit_reduce_amount,
original_total_amount,
feight_fee
from ods_order_info where dt='20201031')
insert overwrite table dwd_fact_order_detail partition(dt='20201031')
select
id,
order_id,
sku_id,
sku_name,
order_price,
sku_num,
create_time,
province_id,
source_type,
source_id,
if(rank=1, original_amount_d+original_total_amount-sum_original_amount_d, original_amount_d) as original_amount_d,
if(rank=1, final_amount_d+final_total_amount-sum_final_amount_d, final_amount_d) as final_amount_d,
if(rank=1, feight_fee_d+feight_fee-sum_feight_fee_d, feight_fee_d) as feight_fee_d,
if(rank=1, benefit_reduce_amount_d+benefit_reduce_amount-sum_benefit_reduce_amount_d, benefit_reduce_amount_d) as benefit_reduce_amount_d,
from (
select
order_detail.id as id,
order_detail.order_id as order_id,
order_detail.user_id as user_id,
order_detail.sku_id as sku_id,
order_detail.sku_name as sku_name,
order_detail.order_price as order_price,
order_detail.sku_num as sku_num,
order_detail.create_time as create_time,
order_info.province_id as province_id,
order_detail.source_type as source_type,
order_detail.source_id as source_id,
order_info.final_total_amount,
order_info.benefit_reduce_amount,
order_info.original_total_amount,
order_info.feight_fee,
round(order_detail.order_price*order_detail.sku_num,2) as original_amount_d,
round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.final_total_amount,2) as final_amount_d,
round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.feight_fee,2) as feight_fee_d,
round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.benefit_reduce_amount,2) as benefit_reduce_amount_d,
sum(round(order_detail.order_price*order_detail.sku_num,2)) over(partition by order_id) as sum_original_amount_d,
sum(round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.final_total_amount,2)) over(partition by order_id) as sum_final_amount_d,
sum(round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.feight_fee,2)) over(partition by order_id) as sum_feight_fee_d,
sum(round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.benefit_reduce_amount,2)) over(partition by order_id) as sum_benefit_reduce_amount_d,
row_number() over(partition by order_id order by round(order_detail.order_price*order_detail.sku_num,2)) as rank
from order_detail_tmp order_detail left join order_info_tmp order_info on order_detail.order_id = order_info.id) tmp