0.創建數據庫
hive>create table hive.test(id int);
hive>load data local inpath '/home/hyxy/test_order.txt' into table hive.test;
1.order by 全局排序
hive>select * from hive.test order by id;
2.sort by 局部排序(reduce)
hive>set mapreduce.job.reduces=3;
hive>select * from hive.test sort by id;
索引
1.創建索引
hive>create index test_id_index on table hive.test(id) as 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler' with deferred rebuild;
2.默認查詢索引表:在hive數據庫的INDEX_TABLE下自動生成索引表
hive>select * from hive.hive__test_test_test_id_index__;
說明:默認生成的索引表無數據,空白狀態;
索引表有三個字段:
id:表示index字段
_bucketname:表示數據所在的location位置
_offsets:表示當前數據所處的偏移量
3.重構index表,目的生成index數據
hive>alter index test_id_index on hive.test rebuild;
hive>select * from hive.hive__test_test_id_index__;
4.刪除索引
hive>drop index stocks_id_index on table stocks;
