創建表:
hive>create table tablename(id int,name string,password string);
創建一個名字為tablename的表,表的屬性有int id; string name; string password;
創建表時指定分隔符
hive> create table test1(name string,count int)row format delimited fields terminated by '/t';
加載表
hive> load data inpath '/user/hadoop/output7/part-r-00000' into table test1;
創建一個新表,結構與其他一樣
hive> create table table1 like table2;
創建一個表table1,表結構跟table2一樣;
創建分區表
hive> create table table1(id int,line string) partitioned by (dt string,country string);
顯示表里有多少條記錄(count 數大於50的有多少條記錄)
hive>select count(*) from tablename where count>50;
排序用法order by (查詢count 數大於50並排序)
select * from test2 where count > 50 order by count;
顯示表中有多少分區
hive> show partitions table1;
顯示所有表
hive> show tables;
顯示所有與u開頭的表
hive> show tables 'u*';
顯示表的結構信息
hive> describe test1;
修改表名字
hive> alter table table1 rename to test3;
在原表上新添加一列
hive> alter table test1 add columns(new_col2 int comment 'a commment');
hive> alter table test1 add columns(new_col3 int);
刪除表
hive> drop table test3;
從本地文件加載數據:
hive> LOAD DATA LOCAL INPATH '/home/hadoop/input/ncdc/micro-tab/sample.txt' OVERWRITE INTO TABLE records;
加載分區表
hive> load data inpath '/user/hive/warehouse/clickstream_log/dt=2016-11-29/part-r-00000' overwrite into table clickstream_log PARTITION(dt = '2016-11-30');
顯示所有函數
hive> show functions;
查看函數的用法
hive> describe function substr;
查看數組、map、結構
hive> select col1[0],col2['b'],col3.c from complex;
查看數組、map、結構
hive> select col1[0],col2['b'],col3.c from complex;
內連接:
hive> SELECT sales.*, things.* FROM sales JOIN things ON (sales.id = things.id);
查看hive為某個查詢使用多少個MapReduce作業
hive> Explain SELECT sales.*, things.* FROM sales JOIN things ON (sales.id = things.id);
外連接:
hive> SELECT sales.*, things.* FROM sales LEFT OUTER JOIN things ON (sales.id = things.id);
hive> SELECT sales.*, things.* FROM sales RIGHT OUTER JOIN things ON (sales.id = things.id);
hive> SELECT sales.*, things.* FROM sales FULL OUTER JOIN things ON (sales.id = things.id);
in查詢:Hive不支持,但可以使用LEFT SEMI JOIN
hive> SELECT * FROM things LEFT SEMI JOIN sales ON (sales.id = things.id);
Map連接:Hive可以把較小的表放入每個Mapper的內存來執行連接操作
hive> SELECT /*+ MAPJOIN(things) */ sales.*, things.* FROM sales JOIN things ON (sales.id = things.id);
INSERT OVERWRITE TABLE ..SELECT:新表預先存在
hive> FROM records2
> INSERT OVERWRITE TABLE stations_by_year SELECT year, COUNT(DISTINCT station) GROUP BY year
> INSERT OVERWRITE TABLE records_by_year SELECT year, COUNT(1) GROUP BY year
> INSERT OVERWRITE TABLE good_records_by_year SELECT year, COUNT(1) WHERE temperature != 9999 AND (quality = 0 OR quality = 1 OR quality = 4 OR quality = 5 OR quality = 9) GROUP BY year;
CREATE TABLE ... AS SELECT:新表表預先不存在
hive>CREATE TABLE target AS SELECT col1,col2 FROM source;
創建視圖:
hive> CREATE VIEW valid_records AS SELECT * FROM records2 WHERE temperature !=9999;
查看視圖詳細信息:
hive> DESCRIBE EXTENDED valid_records;
-------------------------------------------------------------------------------------------------------------------------------------
傳統數據庫:
添加:
insert into 表名 values();
修改:
update 表名 set a=b where b=c;
刪除:
delete from 表名where a=b;
查詢:
select * from 表名 where a=b;