-- 1、Doris建表
use dim;
CREATE TABLE IF NOT EXISTS `dim_tab_name` (
`inc_day` date NULL COMMENT "日期(分區)"
,`dept_code` varchar(100) NULL COMMENT "網點代碼"
,`dept_name` varchar(100) NULL COMMENT "網點名稱"
,.............
,`valid_dt` varchar(100) NULL COMMENT "生效日期"
,`invalid_tm` varchar(100) NULL COMMENT "失效日期"
) ENGINE=OLAP
DUPLICATE KEY(`inc_day`, `dept_code`)
COMMENT "...的維表(歷史快照)"
PARTITION BY RANGE(`inc_day`)(
START ("20210801") END ("20211011") EVERY (INTERVAL 1 day)
)
DISTRIBUTED BY HASH(`dept_code`) BUCKETS 32
PROPERTIES(
"storage_medium" = "SSD",
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "DAY",
"dynamic_partition.end" = "1",
"dynamic_partition.prefix" = "p",
"dynamic_partition.buckets" = "32"
);
二、brokerload批量導數
-- 查看brokerload批量導數任務的日志
use dim;SHOW LOAD WHERE LABEL = 'doris_dim_tab_name'
--執行brokerload批量導數
load label dim.doris_dim_tab_name
(
--tez的文件目錄不匹配,問題,導致數據導入不成功
data infile("hdfs://ip:port/hive/warehouse/dim/dim_tab_name/inc_day=2021[08|09|10]*/*")
INTO TABLE `dim_tab_name`
FORMAT AS "parquet"
-- 這里的順序需要和hive中保持一致
(
dept_code,dept_name,.......,valid_dt,invalid_tm
)
COLUMNS FROM PATH AS (inc_day) --從hdfs路徑中獲取分區的字段值
set (
--建議字段映射
inc_day=inc_day
,dept_code=dept_code
,dept_name=dept_name
..................
,valid_dt=valid_dt
,invalid_tm=invalid_tm
)
)
-- 通過SHOW BROKER 查看目前集群的Broker名稱
WITH BROKER hdfs_broker
PROPERTIES
(
"max_filter_ratio" = "0.01"
);
三、創建etl調度任務
----目標源,導入前准備語句:
truncate table dim.dim_tab_name partition(p$[time(yyyyMMdd,-1d)])
----目標源,Stream Load參數:
"strict_mode":"false","max_filter_ratio":"0.10","exec_mem_limit":"12884901888","timeout":"10000"