hive向表格中插入數據並分析語句


1,---導入mds_imei_month_info

set hive.exec.max.dynamic.partitions= 100000; //最大的動態分區表
set hive.support.concurrency=false; //是否支持並發
set hive.exec.max.dynamic.partitions.pernode= 100000; //each mapper or reducer可以創建的最大動態分區數
set hive.exec.dynamic.partition.mode=nonstrict;  //strict是避免全分區字段是動態的,必須有至少一個分區字段是指定有值的
insert into table mds_imei_month_info partition(month)
select imei_p,dt,cnt ,month from 
(
select  imei_p ,month,dt,cnt from 
      (select imei_p,'201705' as month,sum(pow(2,(dt-1))) as dt,sum(cnt) cnt //將自下面取出的dt進行指數的轉換,pow(2,(dt-1))表示2的dt-1次方,cnt表示imei在這個月出現的次數
( select imei_p,cast(substring(dt,7,2) as int) as dt,count(*) cnt from mds_engine_basic where dt>=20170501 and dt<=20170531 and length(dt)=8 group by imei_p,dt //dt一共8位數,從第7位數開始的2位數取出,既01到31 ) a group by imei_p)a where length(imei_p)>=14 and regexp_extract(imei_p,'([a-z,,.,A-Z,0-9,_,\\-]*)',0)=imei_p //這是對imei進行正則匹配 )a;


2,---導入mds_ip_month_info

set hive.exec.max.dynamic.partitions= 100000;
set hive.support.concurrency=false;
set hive.exec.max.dynamic.partitions.pernode= 100000;
set hive.exec.dynamic.partition.mode=nonstrict;
insert into table mds_ip_month_info partition(month)
select user_ip,country,province,city,longtitude,latitude,isp,dt,cnt ,month from 
(
select  user_ip ,month,dt,cnt,country,city,province,latitude,longtitude,isp from 
(select user_ip,'201705' as month,sum(pow(2,(dt-1))) as dt,sum(cnt) cnt,country,city,province,latitude,longtitude,isp
from(
select user_ip,cast(substring(dt,7,2) as int) as dt,count(*) cnt,ipaddressquery(2,user_ip) country,
ipaddressquery(3,user_ip) province,ipaddressquery(1,user_ip) city, split(ipaddressquery(5,user_ip),',')[1]  longtitude ,
split(ipaddressquery(5,user_ip),',')[0]  latitude,
ipaddressquery(4,user_ip) isp  from mds_engine_basic where dt>=20170501 and dt<=20170531 and length(dt)=8 and user_ip not like '%,%'
and      split(user_ip,',')[0]  like '%.%.%' and regexp_extract( split(user_ip,',')[0],'\.([0-9]{0,7})\.([0-9]{0,7})\.([0-9]{0,7})\.([0-9]{0,7})',0)= split(user_ip,',')[0]  and split(user_ip,'\\.')[0]<300
group by user_ip,dt,ipaddressquery(2,user_ip) ,
ipaddressquery(3,user_ip) ,ipaddressquery(1,user_ip) , split(ipaddressquery(5,user_ip),',')[1]   ,
split(ipaddressquery(5,user_ip),',')[0]  ,
ipaddressquery(4,user_ip)) a group by user_ip,country,city,province,latitude,longtitude,isp)a 
)a ;


3,---導入mds_id_month_info

set hive.exec.max.dynamic.partitions= 100000;
set hive.support.concurrency=false;
set hive.exec.max.dynamic.partitions.pernode= 100000;
set hive.exec.dynamic.partition.mode=nonstrict;
insert into table mds_id_month_info partition(month)
select id,dt,cnt ,month from 
(
select  id ,month,dt,cnt from 
      (select id,'201705' as month,sum(pow(2,(dt-1))) as dt,sum(cnt) cnt
      from
        (
     select id,cast(substring(dt,7,2) as int) as dt,count(*) cnt from mds_engine_basic where dt>=20170501 and dt<=20170531 and length(dt)=8 group by id,dt
      ) a group by id)a where length(id)=16 and regexp_extract(id,'([a-z,,.,A-Z,0-9,_,\\-]*)',0)=id 
)a;

 

4,---導入mds_bssid_month_info

set hive.exec.max.dynamic.partitions= 100000;
set hive.support.concurrency=false;
set hive.exec.max.dynamic.partitions.pernode= 100000;
set hive.exec.dynamic.partition.mode=nonstrict;
insert into table mds_bssid_month_info partition(month)
select bssid,dt,cnt ,month from 
(
select  bssid ,month,dt,cnt from 
      (select bssid,'201709' as month,sum(pow(2,(dt-1))) as dt,sum(cnt) cnt
      from
        (
     select bssid,cast(substring(dt,7,2) as int) as dt,count(*) cnt from mds_engine_wifi where dt>=20170901 and dt<=20170930 and length(dt)=8 group by bssid,dt
      ) a group by bssid)a where length(bssid)>=14 
)a;

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM