一.启动
1.启动hadoop
2.直接在命令行下输入hive就可以换启动hive
二.创建
1.创建普通表(注意各种分隔符的写法)
create table employees( name string, salary float, subordiantes array<string>, deductions map<string,float>, address struct<street:string,city:string,state:string,zip:int>) row format delimited fields terminated by ' ' collection items terminated by ',' map keys terminated by ':' lines terminated by '\n';
2.创建分区表(注意partitioned by (country string,state string)的位置)
create table employees_partition( name string, salary float, subordiantes array<string>, deductions map<string,float>, address struct<street:string,city:string,state:string,zip:int>) partitioned by (country string,state string) row format delimited fields terminated by ' ' collection items terminated by ',' map keys terminated by ':' lines terminated by '\n';
3.向hive中加载数据,指定了不存在的分区就相当于创建分区
load data local inpath '[本地文件路径]'
into table employees_partition partition (country='China',state='Liaoning');
三.分区表
1向分区表中添加分区
alter table employees_partition add [if not exists] partition(country='usa',state='ca');
2删除分区
alter table employees_partition drop [if exists] partition(country='usa',state='ca');
四.查看信息
1查看一个分区表的分区
show partitions employees_partition;
2查看分区表的详细信息
describe formatted employees_partition;
五.重命名表
alter table employees_partition rename to employees_par;
六.修改列
1增加列
alter table employees_partition add columns(app_name string,session_id int);
2删除列(其实是重新指定列,如果没有某列,那么就删除了)
alter table employees_partition replace columns(name string,salary float,subordinates array<string>,deductions map<string,float>,address struct<street:string,city:string,state:string,zip:int>);
七.桶表
1创建桶表
create table employees_bucket(id int,name string,age int) clustered by(id) into 3 buckets row format delimited fields terminated by ',';
2向桶表中插入数据(不能从文件中直接load)
insert into employees_bucket select * from employees_temp;
八.装载数据
1从文件中装载数据
load data local inpath '[本地文件路径]' [overwrite]
into table employees_temp [partition (country='china',state='ca')];
2通过查询语句装载数据
insert into employees_bucket [partition (country='us',state='ca')]
select * from employees_temp;
九.查询语句
1查询集合中的元素
查询数组中的元素:
select name,subordinates[0] from employees_partition;
查询map中的元素:
select name,deductions["a"] from employees_partition;
查询struct中的元素:
select name,address.city from employees_partition;
2在查询中掺入计算
select upper(name),salary,deductions["a"],round(salary*(1-deductions["a"])) from employees_partition;
3聚合查询(每个查询都是针对某一列进行聚合查询)
select count(*),avg(salary) from employees_partition;
4在分区表的某个分区内查询
select * from [table] where [分区字段名] = 'xxx'
十.视图
1创建视图(使用视图限制查询表的一部分)(创建过后是可以在show tables中查到shorter_view的)
create view shorter_view as
> select * from employees_temp > where id > 2;
2创建视图(将表中的map等集合元素单独列成一个表)
create view shorter_view2(street,city,state) as
> select deductions["a"],deductions["b"],deductions["c"]
> from employees_partition;
十一.其他
1列出查询计划
explain select sum(age) from employees_temp;
2显示所有函数
show functions
3 显示函数帮助
describe function split;