hive外部表時location修改問題
一、有一次建外部表的時候,忘記設置location
create external table test.ads_education_course_feature_dm
(
course_id string comment '課程id',
course_name string comment '課程name',
detail_browser_times string comment '詳情頁瀏覽次數',
exposure_convert_borwser_rate string comment '曝光轉化率',
pay_flow_7day string comment '七日流水',
complete_course_rate string comment '完課率',
learn_times string comment '學習次數',
learn_users string comment '學習用戶數'
) comment '課程特征表'
partitioned by (pt_d string comment '天分區')
row format delimited fields terminated by '\001'
stored as orc --注意未設置location
外部表的表結構
有一個臨時表 tmp_educenter_course_feature_dm(內部表,測試需要)
create table test.tmp_educenter_course_feature_dm
(
course_id string comment '課程id',
course_name string comment '課程name',
detail_browser_times string comment '詳情頁瀏覽次數',
exposure_convert_borwser_rate string comment '曝光轉化率',
pay_flow_7day string comment '七日流水',
complete_course_rate string comment '完課率',
learn_times string comment '學習次數',
learn_users string comment '學習用戶數'
) comment '課程特征表'
partitioned by (pt_d string comment '天分區')
row format delimited fields terminated by '\001'
stored as orc
插入測試數據
load data local inpath '/root/test.txt' into table test.tmp_educenter_course_feature_dm partition (pt_d='20201031');
二、直接關聯其他表的數據再插入到ads_education_course_feature_dm中
insert overwrite table test.ads_education_course_feature_dm partition(pt_d='20201031')
select
course_id, --course_id
course_name, --'課程name',
detail_browser_times, --'詳情頁瀏覽次數',
exposure_convert_borwser_rate, --'曝光轉化率',
pay_flow_7day, --'七日流水',
complete_course_rate, --'完課率',
learn_times, --'學習次數',
learn_users --'學習用戶數'
from test.tmp_educenter_course_feature_dm
where pt_d='20201031'
查看外部表的分區
show partitions test.ads_education_course_feature_dm;
查看外部表路徑下的分區文件
hdfs dfs -ls hdfs://node01:8020/user/hive/warehouse/test.db/ads_education_course_feature_dm
三、直接修改外部表的location
alter table test.ads_education_course_feature_dm set location '/myhive/ads/ads_education_course_feature_dm'
四、再次插入執行數據插入
insert overwrite table test.ads_education_course_feature_dm partition(pt_d='20201031')
select
course_id, --course_id
course_name, --'課程name',
detail_browser_times, --'詳情頁瀏覽次數',
exposure_convert_borwser_rate, --'曝光轉化率',
pay_flow_7day, --'七日流水',
complete_course_rate, --'完課率',
learn_times, --'學習次數',
learn_users --'學習用戶數'
from test.tmp_educenter_course_feature_dm
where pt_d='20201031'
五、查看修改后的location下的分區文件,發現並沒有分區20201031的文件,覆蓋的文件還是寫到了原來的location地址
六、對於已經存在分區文件的外部表,如果要修改location,記得對已經存在的分區單獨設置location,再進行數據的insert overwrite 的時候就會在新location路徑下創建文件
alter table test.ads_education_course_feature_dm set location '/myhive/ads/ads_education_course_feature_dm';
alter table test.ads_education_course_feature_dm partition(pt_d='20201031') set location '/myhive/ads/ads_education_course_feature_dm';