1 內部表
Show databses;
Use hive_data;
- 1.1 創建內部表
CREATE TABLE SOGOUQ2(DT STRING,WEBSESSION STRING,WORD STRING,S_SEQ INT,C_SEQ INT,WEBSITE STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' ;
-
1.2 加載數據
Load local data:
LOAD DATA LOCAL INPATH '/data/software/sougou/SogouQ2.txt' INTO TABLE SOGOUQ2; Load hdfs data: LOAD DATA INPATH 'hdfs://shulaibao2:9010/home/hadoop/upload/test/sougou/SogouQ1.txt ' INTO TABLE SOGOUQ2;
- 1.3 查看hdfs數據
hadoop fs -ls /user/hive/warehouse/hive_data.db
- 1.4 操作數據庫
select count(*) from SOGOUQ2; select count(*) from SOGOUQ2 where WEBSITE like '%baidu%';
2 外部表
- 2.1 創建hdfs數據存儲目錄
hadoop fs -mkdir -p /home/hadoop/upload/hive/sogouq1 hdfs dfs -ls /home/hadoop/upload/hive/sogouq1
- 2.2 創建表
CREATE EXTERNAL TABLE SOGOUQ1(DT STRING,WEBSESSION STRING,WORD STRING,S_SEQ INT,C_SEQ INT,WEBSITE STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' STORED AS TEXTFILE LOCATION '/home/hadoop/upload/hive/sogouq1'; Show tables;
- 2.3 加載數據
Hadoop fs -copy /home/hadoop/upload/test/sougou/SogouQ1.txt /home/hadoop/upload/hive/sogouq1 hdfs關聯:copy、mv 本地系統數據關聯:copyFromLocal
- 2.4 操作數據庫
select count(*) from SOGOUQ1;
總結:【注】在刪除表的時候,內部表將刪除表的元數據和數據文件;而刪除外部表的時候,僅僅刪除外部表的元數據,不刪除數據文件
3 交易數據統計實戰
- 3.1 數據准備
tbDate:日期、年月、年、月、日、周幾、第幾周、季度、旬、半月;
tbStock:訂單號、交易位置、交易日期;
tbStockDetail:訂單號、行號、貨品、數量、金額:
- 3.2 創建表
CREATE TABLE tbDate(dateID string,theyearmonth string,theyear string,themonth string,thedate string,theweek string,theweeks string,thequot string,thetenday string,thehalfmonth string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' ; CREATE TABLE tbStock(ordernumber STRING,locationid string,dateID string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' ; CREATE TABLE tbStockDetail(ordernumber STRING,rownum int,itemid string,qty int,price int ,amount int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' ;
- 3.3 加載數據
LOAD DATA LOCAL INPATH '/data/software/testdata/tbDate.txt' INTO TABLE tbDate; LOAD DATA LOCAL INPATH '/data/software/testdata/tbStock.txt' INTO TABLE tbStock; LOAD DATA LOCAL INPATH '/data/software/testdata/tbStockDetail.txt' INTO TABLE tbStockDetail;
-
3.4 數據統計分析
-
3.4.1按年統計銷售額,年份升序
select c.theyear, sum(b.amount) from tbStock a left join tbStockDetail b on a.ordernumber=b.ordernumber left join tbDate c on a.dateid=c.dateid group by c.theyear order by c.theyear;
- 3.4.2按交易日期-訂單號分組統計銷售額
select a.dateid,a.ordernumber,sum(b.amount) as sumofamount from tbStock a left join tbStockDetail b on a.ordernumber=b.ordernumber group by a.dateid,a.ordernumber;
- 3.4.3統計年度銷售額最大的交易日期-訂單號
select c.theyear,max(d.sumofamount) from tbDate c inner join (select a.dateid,a.ordernumber,sum(b.amount) as sumofamount from tbStock a left join tbStockDetail b on a.ordernumber=b.ordernumber group by a.dateid,a.ordernumber)
d on c.dateid=d.dateid group by c.theyear sort by c.theyear;
- 3.4.4統計季度銷售額前10位
select c.theyear,c.thequot,sum(b.amount) as sumofamount from tbStock a left join tbStockDetail b on a.ordernumber=b.ordernumber left join tbDate c on a.dateid=c.dateid group by c.theyear,c.thequot order by sumofamount desc limit 10;
- 3.4.5銷售金額在100000以上的單據
select a.ordernumber,sum(b.amount) as sumofamount from tbStock a left join tbStockDetail b on a.ordernumber=b.ordernumber group by a.ordernumber having sumofamount>100000;
- 3.4.6按交易日統銷售額
select c.theyear,b.itemid,sum(b.amount) as sumofamount from tbStock a left join tbStockDetail b on a.ordernumber=b.ordernumber left join tbDate c on a.dateid=c.dateid group by c.theyear,b.itemid;
- 3.4.7統計每個年度銷售額最大的交易日
select d.theyear,max(d.sumofamount) as maxofamount from (select c.theyear,b.itemid,sum(b.amount) as sumofamount from tbStock a left join tbStockDetail b on a.ordernumber=b.ordernumber left join tbDate c on a.dateid=c.dateid group by c.theyear,b.itemid;) d group by d.theyear ;
- 3.4.8統計年度最暢銷的商品
select distinct e.theyear,e.itemid,f.maxofamount from (select c.theyear,b.itemid,sum(b.amount) as sumofamount from tbStock a,tbStockDetail b,tbDate c where a.ordernumber=b.ordernumber and a.dateid=c.dateid group by c.theyear,b.itemid) e ,
(select d.theyear,max(d.sumofamount) as maxofamount from (select c.theyear,b.itemid,sum(b.amount) as sumofamount from tbStock a,tbStockDetail b,tbDate c where a.ordernumber=b.ordernumber and a.dateid=c.dateid group by c.theyear,
b.itemid) d group by d.theyear) f where e.theyear=f.theyear and e.sumofamount=f.maxofamount order by e.theyear;